##// END OF EJS Templates
changegroup: pass all state into group()...
Gregory Szorc -
r39044:8c84f1ef default
parent child Browse files
Show More
@@ -1,1416 +1,1438 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 match as matchmod,
30 30 mdiff,
31 31 phases,
32 32 pycompat,
33 33 repository,
34 34 revlog,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 stringutil,
40 40 )
41 41
42 42 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
43 43 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
44 44 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
45 45
46 46 LFS_REQUIREMENT = 'lfs'
47 47
48 48 readexactly = util.readexactly
49 49
50 50 def getchunk(stream):
51 51 """return the next chunk from stream as a string"""
52 52 d = readexactly(stream, 4)
53 53 l = struct.unpack(">l", d)[0]
54 54 if l <= 4:
55 55 if l:
56 56 raise error.Abort(_("invalid chunk length %d") % l)
57 57 return ""
58 58 return readexactly(stream, l - 4)
59 59
60 60 def chunkheader(length):
61 61 """return a changegroup chunk header (string)"""
62 62 return struct.pack(">l", length + 4)
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(">l", 0)
67 67
68 68 def _fileheader(path):
69 69 """Obtain a changegroup chunk header for a named path."""
70 70 return chunkheader(len(path)) + path
71 71
72 72 def writechunks(ui, chunks, filename, vfs=None):
73 73 """Write chunks to a file and return its filename.
74 74
75 75 The stream is assumed to be a bundle file.
76 76 Existing files will not be overwritten.
77 77 If no filename is specified, a temporary file is created.
78 78 """
79 79 fh = None
80 80 cleanup = None
81 81 try:
82 82 if filename:
83 83 if vfs:
84 84 fh = vfs.open(filename, "wb")
85 85 else:
86 86 # Increase default buffer size because default is usually
87 87 # small (4k is common on Linux).
88 88 fh = open(filename, "wb", 131072)
89 89 else:
90 90 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
91 91 fh = os.fdopen(fd, r"wb")
92 92 cleanup = filename
93 93 for c in chunks:
94 94 fh.write(c)
95 95 cleanup = None
96 96 return filename
97 97 finally:
98 98 if fh is not None:
99 99 fh.close()
100 100 if cleanup is not None:
101 101 if filename and vfs:
102 102 vfs.unlink(cleanup)
103 103 else:
104 104 os.unlink(cleanup)
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
125 125 deltaheadersize = deltaheader.size
126 126 version = '01'
127 127 _grouplistcount = 1 # One list of files after the manifests
128 128
129 129 def __init__(self, fh, alg, extras=None):
130 130 if alg is None:
131 131 alg = 'UN'
132 132 if alg not in util.compengines.supportedbundletypes:
133 133 raise error.Abort(_('unknown stream compression type: %s')
134 134 % alg)
135 135 if alg == 'BZ':
136 136 alg = '_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != 'UN'
148 148 def read(self, l):
149 149 return self._stream.read(l)
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152 def tell(self):
153 153 return self._stream.tell()
154 154 def close(self):
155 155 return self._stream.close()
156 156
157 157 def _chunklength(self):
158 158 d = readexactly(self._stream, 4)
159 159 l = struct.unpack(">l", d)[0]
160 160 if l <= 4:
161 161 if l:
162 162 raise error.Abort(_("invalid chunk length %d") % l)
163 163 return 0
164 164 if self.callback:
165 165 self.callback()
166 166 return l - 4
167 167
168 168 def changelogheader(self):
169 169 """v10 does not have a changelog header chunk"""
170 170 return {}
171 171
172 172 def manifestheader(self):
173 173 """v10 does not have a manifest header chunk"""
174 174 return {}
175 175
176 176 def filelogheader(self):
177 177 """return the header of the filelogs chunk, v10 only has the filename"""
178 178 l = self._chunklength()
179 179 if not l:
180 180 return {}
181 181 fname = readexactly(self._stream, l)
182 182 return {'filename': fname}
183 183
184 184 def _deltaheader(self, headertuple, prevnode):
185 185 node, p1, p2, cs = headertuple
186 186 if prevnode is None:
187 187 deltabase = p1
188 188 else:
189 189 deltabase = prevnode
190 190 flags = 0
191 191 return node, p1, p2, deltabase, cs, flags
192 192
193 193 def deltachunk(self, prevnode):
194 194 l = self._chunklength()
195 195 if not l:
196 196 return {}
197 197 headerdata = readexactly(self._stream, self.deltaheadersize)
198 198 header = self.deltaheader.unpack(headerdata)
199 199 delta = readexactly(self._stream, l - self.deltaheadersize)
200 200 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
201 201 return (node, p1, p2, cs, deltabase, delta, flags)
202 202
203 203 def getchunks(self):
204 204 """returns all the chunks contains in the bundle
205 205
206 206 Used when you need to forward the binary stream to a file or another
207 207 network API. To do so, it parse the changegroup data, otherwise it will
208 208 block in case of sshrepo because it don't know the end of the stream.
209 209 """
210 210 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
211 211 # and a list of filelogs. For changegroup 3, we expect 4 parts:
212 212 # changelog, manifestlog, a list of tree manifestlogs, and a list of
213 213 # filelogs.
214 214 #
215 215 # Changelog and manifestlog parts are terminated with empty chunks. The
216 216 # tree and file parts are a list of entry sections. Each entry section
217 217 # is a series of chunks terminating in an empty chunk. The list of these
218 218 # entry sections is terminated in yet another empty chunk, so we know
219 219 # we've reached the end of the tree/file list when we reach an empty
220 220 # chunk that was proceeded by no non-empty chunks.
221 221
222 222 parts = 0
223 223 while parts < 2 + self._grouplistcount:
224 224 noentries = True
225 225 while True:
226 226 chunk = getchunk(self)
227 227 if not chunk:
228 228 # The first two empty chunks represent the end of the
229 229 # changelog and the manifestlog portions. The remaining
230 230 # empty chunks represent either A) the end of individual
231 231 # tree or file entries in the file list, or B) the end of
232 232 # the entire list. It's the end of the entire list if there
233 233 # were no entries (i.e. noentries is True).
234 234 if parts < 2:
235 235 parts += 1
236 236 elif noentries:
237 237 parts += 1
238 238 break
239 239 noentries = False
240 240 yield chunkheader(len(chunk))
241 241 pos = 0
242 242 while pos < len(chunk):
243 243 next = pos + 2**20
244 244 yield chunk[pos:next]
245 245 pos = next
246 246 yield closechunk()
247 247
248 248 def _unpackmanifests(self, repo, revmap, trp, prog):
249 249 self.callback = prog.increment
250 250 # no need to check for empty manifest group here:
251 251 # if the result of the merge of 1 and 2 is the same in 3 and 4,
252 252 # no new manifest will be created and the manifest group will
253 253 # be empty during the pull
254 254 self.manifestheader()
255 255 deltas = self.deltaiter()
256 256 repo.manifestlog.addgroup(deltas, revmap, trp)
257 257 prog.complete()
258 258 self.callback = None
259 259
260 260 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
261 261 expectedtotal=None):
262 262 """Add the changegroup returned by source.read() to this repo.
263 263 srctype is a string like 'push', 'pull', or 'unbundle'. url is
264 264 the URL of the repo where this changegroup is coming from.
265 265
266 266 Return an integer summarizing the change to this repo:
267 267 - nothing changed or no source: 0
268 268 - more heads than before: 1+added heads (2..n)
269 269 - fewer heads than before: -1-removed heads (-2..-n)
270 270 - number of heads stays the same: 1
271 271 """
272 272 repo = repo.unfiltered()
273 273 def csmap(x):
274 274 repo.ui.debug("add changeset %s\n" % short(x))
275 275 return len(cl)
276 276
277 277 def revmap(x):
278 278 return cl.rev(x)
279 279
280 280 changesets = files = revisions = 0
281 281
282 282 try:
283 283 # The transaction may already carry source information. In this
284 284 # case we use the top level data. We overwrite the argument
285 285 # because we need to use the top level value (if they exist)
286 286 # in this function.
287 287 srctype = tr.hookargs.setdefault('source', srctype)
288 288 url = tr.hookargs.setdefault('url', url)
289 289 repo.hook('prechangegroup',
290 290 throw=True, **pycompat.strkwargs(tr.hookargs))
291 291
292 292 # write changelog data to temp files so concurrent readers
293 293 # will not see an inconsistent view
294 294 cl = repo.changelog
295 295 cl.delayupdate(tr)
296 296 oldheads = set(cl.heads())
297 297
298 298 trp = weakref.proxy(tr)
299 299 # pull off the changeset group
300 300 repo.ui.status(_("adding changesets\n"))
301 301 clstart = len(cl)
302 302 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
303 303 total=expectedtotal)
304 304 self.callback = progress.increment
305 305
306 306 efiles = set()
307 307 def onchangelog(cl, node):
308 308 efiles.update(cl.readfiles(node))
309 309
310 310 self.changelogheader()
311 311 deltas = self.deltaiter()
312 312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
313 313 efiles = len(efiles)
314 314
315 315 if not cgnodes:
316 316 repo.ui.develwarn('applied empty changegroup',
317 317 config='warn-empty-changegroup')
318 318 clend = len(cl)
319 319 changesets = clend - clstart
320 320 progress.complete()
321 321 self.callback = None
322 322
323 323 # pull off the manifest group
324 324 repo.ui.status(_("adding manifests\n"))
325 325 # We know that we'll never have more manifests than we had
326 326 # changesets.
327 327 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
328 328 total=changesets)
329 329 self._unpackmanifests(repo, revmap, trp, progress)
330 330
331 331 needfiles = {}
332 332 if repo.ui.configbool('server', 'validate'):
333 333 cl = repo.changelog
334 334 ml = repo.manifestlog
335 335 # validate incoming csets have their manifests
336 336 for cset in pycompat.xrange(clstart, clend):
337 337 mfnode = cl.changelogrevision(cset).manifest
338 338 mfest = ml[mfnode].readdelta()
339 339 # store file cgnodes we must see
340 340 for f, n in mfest.iteritems():
341 341 needfiles.setdefault(f, set()).add(n)
342 342
343 343 # process the files
344 344 repo.ui.status(_("adding file changes\n"))
345 345 newrevs, newfiles = _addchangegroupfiles(
346 346 repo, self, revmap, trp, efiles, needfiles)
347 347 revisions += newrevs
348 348 files += newfiles
349 349
350 350 deltaheads = 0
351 351 if oldheads:
352 352 heads = cl.heads()
353 353 deltaheads = len(heads) - len(oldheads)
354 354 for h in heads:
355 355 if h not in oldheads and repo[h].closesbranch():
356 356 deltaheads -= 1
357 357 htext = ""
358 358 if deltaheads:
359 359 htext = _(" (%+d heads)") % deltaheads
360 360
361 361 repo.ui.status(_("added %d changesets"
362 362 " with %d changes to %d files%s\n")
363 363 % (changesets, revisions, files, htext))
364 364 repo.invalidatevolatilesets()
365 365
366 366 if changesets > 0:
367 367 if 'node' not in tr.hookargs:
368 368 tr.hookargs['node'] = hex(cl.node(clstart))
369 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
370 370 hookargs = dict(tr.hookargs)
371 371 else:
372 372 hookargs = dict(tr.hookargs)
373 373 hookargs['node'] = hex(cl.node(clstart))
374 374 hookargs['node_last'] = hex(cl.node(clend - 1))
375 375 repo.hook('pretxnchangegroup',
376 376 throw=True, **pycompat.strkwargs(hookargs))
377 377
378 378 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
379 379 phaseall = None
380 380 if srctype in ('push', 'serve'):
381 381 # Old servers can not push the boundary themselves.
382 382 # New servers won't push the boundary if changeset already
383 383 # exists locally as secret
384 384 #
385 385 # We should not use added here but the list of all change in
386 386 # the bundle
387 387 if repo.publishing():
388 388 targetphase = phaseall = phases.public
389 389 else:
390 390 # closer target phase computation
391 391
392 392 # Those changesets have been pushed from the
393 393 # outside, their phases are going to be pushed
394 394 # alongside. Therefor `targetphase` is
395 395 # ignored.
396 396 targetphase = phaseall = phases.draft
397 397 if added:
398 398 phases.registernew(repo, tr, targetphase, added)
399 399 if phaseall is not None:
400 400 phases.advanceboundary(repo, tr, phaseall, cgnodes)
401 401
402 402 if changesets > 0:
403 403
404 404 def runhooks():
405 405 # These hooks run when the lock releases, not when the
406 406 # transaction closes. So it's possible for the changelog
407 407 # to have changed since we last saw it.
408 408 if clstart >= len(repo):
409 409 return
410 410
411 411 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
412 412
413 413 for n in added:
414 414 args = hookargs.copy()
415 415 args['node'] = hex(n)
416 416 del args['node_last']
417 417 repo.hook("incoming", **pycompat.strkwargs(args))
418 418
419 419 newheads = [h for h in repo.heads()
420 420 if h not in oldheads]
421 421 repo.ui.log("incoming",
422 422 "%d incoming changes - new heads: %s\n",
423 423 len(added),
424 424 ', '.join([hex(c[:6]) for c in newheads]))
425 425
426 426 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
427 427 lambda tr: repo._afterlock(runhooks))
428 428 finally:
429 429 repo.ui.flush()
430 430 # never return 0 here:
431 431 if deltaheads < 0:
432 432 ret = deltaheads - 1
433 433 else:
434 434 ret = deltaheads + 1
435 435 return ret
436 436
437 437 def deltaiter(self):
438 438 """
439 439 returns an iterator of the deltas in this changegroup
440 440
441 441 Useful for passing to the underlying storage system to be stored.
442 442 """
443 443 chain = None
444 444 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
445 445 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
446 446 yield chunkdata
447 447 chain = chunkdata[0]
448 448
449 449 class cg2unpacker(cg1unpacker):
450 450 """Unpacker for cg2 streams.
451 451
452 452 cg2 streams add support for generaldelta, so the delta header
453 453 format is slightly different. All other features about the data
454 454 remain the same.
455 455 """
456 456 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
457 457 deltaheadersize = deltaheader.size
458 458 version = '02'
459 459
460 460 def _deltaheader(self, headertuple, prevnode):
461 461 node, p1, p2, deltabase, cs = headertuple
462 462 flags = 0
463 463 return node, p1, p2, deltabase, cs, flags
464 464
465 465 class cg3unpacker(cg2unpacker):
466 466 """Unpacker for cg3 streams.
467 467
468 468 cg3 streams add support for exchanging treemanifests and revlog
469 469 flags. It adds the revlog flags to the delta header and an empty chunk
470 470 separating manifests and files.
471 471 """
472 472 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
473 473 deltaheadersize = deltaheader.size
474 474 version = '03'
475 475 _grouplistcount = 2 # One list of manifests and one list of files
476 476
477 477 def _deltaheader(self, headertuple, prevnode):
478 478 node, p1, p2, deltabase, cs, flags = headertuple
479 479 return node, p1, p2, deltabase, cs, flags
480 480
481 481 def _unpackmanifests(self, repo, revmap, trp, prog):
482 482 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
483 483 for chunkdata in iter(self.filelogheader, {}):
484 484 # If we get here, there are directory manifests in the changegroup
485 485 d = chunkdata["filename"]
486 486 repo.ui.debug("adding %s revisions\n" % d)
487 487 dirlog = repo.manifestlog._revlog.dirlog(d)
488 488 deltas = self.deltaiter()
489 489 if not dirlog.addgroup(deltas, revmap, trp):
490 490 raise error.Abort(_("received dir revlog group is empty"))
491 491
492 492 class headerlessfixup(object):
493 493 def __init__(self, fh, h):
494 494 self._h = h
495 495 self._fh = fh
496 496 def read(self, n):
497 497 if self._h:
498 498 d, self._h = self._h[:n], self._h[n:]
499 499 if len(d) < n:
500 500 d += readexactly(self._fh, n - len(d))
501 501 return d
502 502 return readexactly(self._fh, n)
503 503
504 504 @attr.s(slots=True, frozen=True)
505 505 class revisiondelta(object):
506 506 """Describes a delta entry in a changegroup.
507 507
508 508 Captured data is sufficient to serialize the delta into multiple
509 509 formats.
510 510 """
511 511 # 20 byte node of this revision.
512 512 node = attr.ib()
513 513 # 20 byte nodes of parent revisions.
514 514 p1node = attr.ib()
515 515 p2node = attr.ib()
516 516 # 20 byte node of node this delta is against.
517 517 basenode = attr.ib()
518 518 # 20 byte node of changeset revision this delta is associated with.
519 519 linknode = attr.ib()
520 520 # 2 bytes of flags to apply to revision data.
521 521 flags = attr.ib()
522 522 # Iterable of chunks holding raw delta data.
523 523 deltachunks = attr.ib()
524 524
525 525 def _sortnodesnormal(store, nodes, reorder):
526 526 """Sort nodes for changegroup generation and turn into revnums."""
527 527 # for generaldelta revlogs, we linearize the revs; this will both be
528 528 # much quicker and generate a much smaller bundle
529 529 if (store._generaldelta and reorder is None) or reorder:
530 530 dag = dagutil.revlogdag(store)
531 531 return dag.linearize(set(store.rev(n) for n in nodes))
532 532 else:
533 533 return sorted([store.rev(n) for n in nodes])
534 534
535 535 def _sortnodesellipsis(store, nodes, cl, lookup):
536 536 """Sort nodes for changegroup generation and turn into revnums."""
537 537 # Ellipses serving mode.
538 538 #
539 539 # In a perfect world, we'd generate better ellipsis-ified graphs
540 540 # for non-changelog revlogs. In practice, we haven't started doing
541 541 # that yet, so the resulting DAGs for the manifestlog and filelogs
542 542 # are actually full of bogus parentage on all the ellipsis
543 543 # nodes. This has the side effect that, while the contents are
544 544 # correct, the individual DAGs might be completely out of whack in
545 545 # a case like 882681bc3166 and its ancestors (back about 10
546 546 # revisions or so) in the main hg repo.
547 547 #
548 548 # The one invariant we *know* holds is that the new (potentially
549 549 # bogus) DAG shape will be valid if we order the nodes in the
550 550 # order that they're introduced in dramatis personae by the
551 551 # changelog, so what we do is we sort the non-changelog histories
552 552 # by the order in which they are used by the changelog.
553 553 key = lambda n: cl.rev(lookup(n))
554 554 return [store.rev(n) for n in sorted(nodes, key=key)]
555 555
556 556 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
557 557 """Construct a revision delta for non-ellipses changegroup generation."""
558 558 node = store.node(rev)
559 559 p1, p2 = store.parentrevs(rev)
560 560 base = deltaparentfn(store, rev, p1, p2, prev)
561 561
562 562 prefix = ''
563 563 if store.iscensored(base) or store.iscensored(rev):
564 564 try:
565 565 delta = store.revision(node, raw=True)
566 566 except error.CensoredNodeError as e:
567 567 delta = e.tombstone
568 568 if base == nullrev:
569 569 prefix = mdiff.trivialdiffheader(len(delta))
570 570 else:
571 571 baselen = store.rawsize(base)
572 572 prefix = mdiff.replacediffheader(baselen, len(delta))
573 573 elif base == nullrev:
574 574 delta = store.revision(node, raw=True)
575 575 prefix = mdiff.trivialdiffheader(len(delta))
576 576 else:
577 577 delta = store.revdiff(base, rev)
578 578 p1n, p2n = store.parents(node)
579 579
580 580 return revisiondelta(
581 581 node=node,
582 582 p1node=p1n,
583 583 p2node=p2n,
584 584 basenode=store.node(base),
585 585 linknode=linknode,
586 586 flags=store.flags(rev),
587 587 deltachunks=(prefix, delta),
588 588 )
589 589
590 590 def _revisiondeltanarrow(cl, store, ischangelog, rev, linkrev,
591 591 linknode, clrevtolocalrev, fullclnodes,
592 592 precomputedellipsis):
593 593 linkparents = precomputedellipsis[linkrev]
594 594 def local(clrev):
595 595 """Turn a changelog revnum into a local revnum.
596 596
597 597 The ellipsis dag is stored as revnums on the changelog,
598 598 but when we're producing ellipsis entries for
599 599 non-changelog revlogs, we need to turn those numbers into
600 600 something local. This does that for us, and during the
601 601 changelog sending phase will also expand the stored
602 602 mappings as needed.
603 603 """
604 604 if clrev == nullrev:
605 605 return nullrev
606 606
607 607 if ischangelog:
608 608 return clrev
609 609
610 610 # Walk the ellipsis-ized changelog breadth-first looking for a
611 611 # change that has been linked from the current revlog.
612 612 #
613 613 # For a flat manifest revlog only a single step should be necessary
614 614 # as all relevant changelog entries are relevant to the flat
615 615 # manifest.
616 616 #
617 617 # For a filelog or tree manifest dirlog however not every changelog
618 618 # entry will have been relevant, so we need to skip some changelog
619 619 # nodes even after ellipsis-izing.
620 620 walk = [clrev]
621 621 while walk:
622 622 p = walk[0]
623 623 walk = walk[1:]
624 624 if p in clrevtolocalrev:
625 625 return clrevtolocalrev[p]
626 626 elif p in fullclnodes:
627 627 walk.extend([pp for pp in cl.parentrevs(p)
628 628 if pp != nullrev])
629 629 elif p in precomputedellipsis:
630 630 walk.extend([pp for pp in precomputedellipsis[p]
631 631 if pp != nullrev])
632 632 else:
633 633 # In this case, we've got an ellipsis with parents
634 634 # outside the current bundle (likely an
635 635 # incremental pull). We "know" that we can use the
636 636 # value of this same revlog at whatever revision
637 637 # is pointed to by linknode. "Know" is in scare
638 638 # quotes because I haven't done enough examination
639 639 # of edge cases to convince myself this is really
640 640 # a fact - it works for all the (admittedly
641 641 # thorough) cases in our testsuite, but I would be
642 642 # somewhat unsurprised to find a case in the wild
643 643 # where this breaks down a bit. That said, I don't
644 644 # know if it would hurt anything.
645 645 for i in pycompat.xrange(rev, 0, -1):
646 646 if store.linkrev(i) == clrev:
647 647 return i
648 648 # We failed to resolve a parent for this node, so
649 649 # we crash the changegroup construction.
650 650 raise error.Abort(
651 651 'unable to resolve parent while packing %r %r'
652 652 ' for changeset %r' % (store.indexfile, rev, clrev))
653 653
654 654 return nullrev
655 655
656 656 if not linkparents or (
657 657 store.parentrevs(rev) == (nullrev, nullrev)):
658 658 p1, p2 = nullrev, nullrev
659 659 elif len(linkparents) == 1:
660 660 p1, = sorted(local(p) for p in linkparents)
661 661 p2 = nullrev
662 662 else:
663 663 p1, p2 = sorted(local(p) for p in linkparents)
664 664
665 665 n = store.node(rev)
666 666 p1n, p2n = store.node(p1), store.node(p2)
667 667 flags = store.flags(rev)
668 668 flags |= revlog.REVIDX_ELLIPSIS
669 669
670 670 # TODO: try and actually send deltas for ellipsis data blocks
671 671 data = store.revision(n)
672 672 diffheader = mdiff.trivialdiffheader(len(data))
673 673
674 674 return revisiondelta(
675 675 node=n,
676 676 p1node=p1n,
677 677 p2node=p2n,
678 678 basenode=nullid,
679 679 linknode=linknode,
680 680 flags=flags,
681 681 deltachunks=(diffheader, data),
682 682 )
683 683
684 684 class cgpacker(object):
685 685 def __init__(self, repo, filematcher, version, allowreorder,
686 686 deltaparentfn, builddeltaheader, manifestsend,
687 687 bundlecaps=None, ellipses=False,
688 688 shallow=False, ellipsisroots=None, fullnodes=None):
689 689 """Given a source repo, construct a bundler.
690 690
691 691 filematcher is a matcher that matches on files to include in the
692 692 changegroup. Used to facilitate sparse changegroups.
693 693
694 694 allowreorder controls whether reordering of revisions is allowed.
695 695 This value is used when ``bundle.reorder`` is ``auto`` or isn't
696 696 set.
697 697
698 698 deltaparentfn is a callable that resolves the delta parent for
699 699 a specific revision.
700 700
701 701 builddeltaheader is a callable that constructs the header for a group
702 702 delta.
703 703
704 704 manifestsend is a chunk to send after manifests have been fully emitted.
705 705
706 706 ellipses indicates whether ellipsis serving mode is enabled.
707 707
708 708 bundlecaps is optional and can be used to specify the set of
709 709 capabilities which can be used to build the bundle. While bundlecaps is
710 710 unused in core Mercurial, extensions rely on this feature to communicate
711 711 capabilities to customize the changegroup packer.
712 712
713 713 shallow indicates whether shallow data might be sent. The packer may
714 714 need to pack file contents not introduced by the changes being packed.
715 715
716 716 fullnodes is the set of changelog nodes which should not be ellipsis
717 717 nodes. We store this rather than the set of nodes that should be
718 718 ellipsis because for very large histories we expect this to be
719 719 significantly smaller.
720 720 """
721 721 assert filematcher
722 722 self._filematcher = filematcher
723 723
724 724 self.version = version
725 725 self._deltaparentfn = deltaparentfn
726 726 self._builddeltaheader = builddeltaheader
727 727 self._manifestsend = manifestsend
728 728 self._ellipses = ellipses
729 729
730 730 # Set of capabilities we can use to build the bundle.
731 731 if bundlecaps is None:
732 732 bundlecaps = set()
733 733 self._bundlecaps = bundlecaps
734 734 self._isshallow = shallow
735 735 self._fullclnodes = fullnodes
736 736
737 737 # Maps ellipsis revs to their roots at the changelog level.
738 738 self._precomputedellipsis = ellipsisroots
739 739
740 740 # experimental config: bundle.reorder
741 741 reorder = repo.ui.config('bundle', 'reorder')
742 742 if reorder == 'auto':
743 743 self._reorder = allowreorder
744 744 else:
745 745 self._reorder = stringutil.parsebool(reorder)
746 746
747 747 self._repo = repo
748 748
749 749 if self._repo.ui.verbose and not self._repo.ui.debugflag:
750 750 self._verbosenote = self._repo.ui.note
751 751 else:
752 752 self._verbosenote = lambda s: None
753 753
754 def group(self, revs, store, ischangelog, lookup, units=None,
755 clrevtolocalrev=None):
754 def group(self, repo, revs, store, ischangelog, lookup, deltaparentfn,
755 deltaheaderfn, units=None,
756 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
757 precomputedellipsis=None):
756 758 """Calculate a delta group, yielding a sequence of changegroup chunks
757 759 (strings).
758 760
759 761 Given a list of changeset revs, return a set of deltas and
760 762 metadata corresponding to nodes. The first delta is
761 763 first parent(nodelist[0]) -> nodelist[0], the receiver is
762 764 guaranteed to have this parent as it has all history before
763 765 these changesets. In the case firstparent is nullrev the
764 766 changegroup starts with a full revision.
765 767
766 768 If units is not None, progress detail will be generated, units specifies
767 769 the type of revlog that is touched (changelog, manifest, etc.).
768 770 """
769 771 # if we don't have any revisions touched by these changesets, bail
770 772 if len(revs) == 0:
771 773 yield closechunk()
772 774 return
773 775
774 cl = self._repo.changelog
776 cl = repo.changelog
775 777
776 778 # add the parent of the first rev
777 779 p = store.parentrevs(revs[0])[0]
778 780 revs.insert(0, p)
779 781
780 782 # build deltas
781 783 progress = None
782 784 if units is not None:
783 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
785 progress = repo.ui.makeprogress(_('bundling'), unit=units,
784 786 total=(len(revs) - 1))
785 787 for r in pycompat.xrange(len(revs) - 1):
786 788 if progress:
787 789 progress.update(r + 1)
788 790 prev, curr = revs[r], revs[r + 1]
789 791 linknode = lookup(store.node(curr))
790 792
791 if self._ellipses:
793 if ellipses:
792 794 linkrev = cl.rev(linknode)
793 795 clrevtolocalrev[linkrev] = curr
794 796
795 797 # This is a node to send in full, because the changeset it
796 798 # corresponds to was a full changeset.
797 if linknode in self._fullclnodes:
799 if linknode in fullclnodes:
798 800 delta = _revisiondeltanormal(store, curr, prev, linknode,
799 self._deltaparentfn)
800 elif linkrev not in self._precomputedellipsis:
801 deltaparentfn)
802 elif linkrev not in precomputedellipsis:
801 803 delta = None
802 804 else:
803 805 delta = _revisiondeltanarrow(
804 806 cl, store, ischangelog, curr, linkrev, linknode,
805 clrevtolocalrev, self._fullclnodes,
806 self._precomputedellipsis)
807 clrevtolocalrev, fullclnodes,
808 precomputedellipsis)
807 809 else:
808 810 delta = _revisiondeltanormal(store, curr, prev, linknode,
809 self._deltaparentfn)
811 deltaparentfn)
810 812
811 813 if not delta:
812 814 continue
813 815
814 meta = self._builddeltaheader(delta)
816 meta = deltaheaderfn(delta)
815 817 l = len(meta) + sum(len(x) for x in delta.deltachunks)
816 818 yield chunkheader(l)
817 819 yield meta
818 820 for x in delta.deltachunks:
819 821 yield x
820 822
821 823 if progress:
822 824 progress.complete()
823 825
824 826 yield closechunk()
825 827
826 828 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
827 829 """Yield a sequence of changegroup byte chunks."""
828 830
829 831 repo = self._repo
830 832 cl = repo.changelog
831 833
832 834 self._verbosenote(_('uncompressed size of bundle content:\n'))
833 835 size = 0
834 836
835 837 clstate, chunks = self._generatechangelog(cl, clnodes)
836 838 for chunk in chunks:
837 839 size += len(chunk)
838 840 yield chunk
839 841
840 842 self._verbosenote(_('%8.i (changelog)\n') % size)
841 843
842 844 clrevorder = clstate['clrevorder']
843 845 mfs = clstate['mfs']
844 846 changedfiles = clstate['changedfiles']
845 847
846 848 # We need to make sure that the linkrev in the changegroup refers to
847 849 # the first changeset that introduced the manifest or file revision.
848 850 # The fastpath is usually safer than the slowpath, because the filelogs
849 851 # are walked in revlog order.
850 852 #
851 853 # When taking the slowpath with reorder=None and the manifest revlog
852 854 # uses generaldelta, the manifest may be walked in the "wrong" order.
853 855 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
854 856 # cc0ff93d0c0c).
855 857 #
856 858 # When taking the fastpath, we are only vulnerable to reordering
857 859 # of the changelog itself. The changelog never uses generaldelta, so
858 860 # it is only reordered when reorder=True. To handle this case, we
859 861 # simply take the slowpath, which already has the 'clrevorder' logic.
860 862 # This was also fixed in cc0ff93d0c0c.
861 863 fastpathlinkrev = fastpathlinkrev and not self._reorder
862 864 # Treemanifests don't work correctly with fastpathlinkrev
863 865 # either, because we don't discover which directory nodes to
864 866 # send along with files. This could probably be fixed.
865 867 fastpathlinkrev = fastpathlinkrev and (
866 868 'treemanifest' not in repo.requirements)
867 869
868 870 fnodes = {} # needed file nodes
869 871
870 872 for chunk in self.generatemanifests(commonrevs, clrevorder,
871 873 fastpathlinkrev, mfs, fnodes, source,
872 874 clstate['clrevtomanifestrev']):
873 875 yield chunk
874 876
875 877 mfdicts = None
876 878 if self._ellipses and self._isshallow:
877 879 mfdicts = [(self._repo.manifestlog[n].read(), lr)
878 880 for (n, lr) in mfs.iteritems()]
879 881
880 882 mfs.clear()
881 883 clrevs = set(cl.rev(x) for x in clnodes)
882 884
883 885 for chunk in self.generatefiles(changedfiles, commonrevs,
884 886 source, mfdicts, fastpathlinkrev,
885 887 fnodes, clrevs):
886 888 yield chunk
887 889
888 890 yield closechunk()
889 891
890 892 if clnodes:
891 893 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
892 894
893 895 def _generatechangelog(self, cl, nodes):
894 896 """Generate data for changelog chunks.
895 897
896 898 Returns a 2-tuple of a dict containing state and an iterable of
897 899 byte chunks. The state will not be fully populated until the
898 900 chunk stream has been fully consumed.
899 901 """
900 902 clrevorder = {}
901 903 mfs = {} # needed manifests
902 904 mfl = self._repo.manifestlog
903 905 # TODO violates storage abstraction.
904 906 mfrevlog = mfl._revlog
905 907 changedfiles = set()
906 908 clrevtomanifestrev = {}
907 909
908 910 # Callback for the changelog, used to collect changed files and
909 911 # manifest nodes.
910 912 # Returns the linkrev node (identity in the changelog case).
911 913 def lookupcl(x):
912 914 c = cl.read(x)
913 915 clrevorder[x] = len(clrevorder)
914 916
915 917 if self._ellipses:
916 918 # Only update mfs if x is going to be sent. Otherwise we
917 919 # end up with bogus linkrevs specified for manifests and
918 920 # we skip some manifest nodes that we should otherwise
919 921 # have sent.
920 922 if (x in self._fullclnodes
921 923 or cl.rev(x) in self._precomputedellipsis):
922 924 n = c[0]
923 925 # Record the first changeset introducing this manifest
924 926 # version.
925 927 mfs.setdefault(n, x)
926 928 # Set this narrow-specific dict so we have the lowest
927 929 # manifest revnum to look up for this cl revnum. (Part of
928 930 # mapping changelog ellipsis parents to manifest ellipsis
929 931 # parents)
930 932 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
931 933 # We can't trust the changed files list in the changeset if the
932 934 # client requested a shallow clone.
933 935 if self._isshallow:
934 936 changedfiles.update(mfl[c[0]].read().keys())
935 937 else:
936 938 changedfiles.update(c[3])
937 939 else:
938 940
939 941 n = c[0]
940 942 # record the first changeset introducing this manifest version
941 943 mfs.setdefault(n, x)
942 944 # Record a complete list of potentially-changed files in
943 945 # this manifest.
944 946 changedfiles.update(c[3])
945 947
946 948 return x
947 949
948 950 # Changelog doesn't benefit from reordering revisions. So send out
949 951 # revisions in store order.
950 952 revs = sorted(cl.rev(n) for n in nodes)
951 953
952 954 state = {
953 955 'clrevorder': clrevorder,
954 956 'mfs': mfs,
955 957 'changedfiles': changedfiles,
956 958 'clrevtomanifestrev': clrevtomanifestrev,
957 959 }
958 960
959 gen = self.group(revs, cl, True, lookupcl, units=_('changesets'),
960 clrevtolocalrev={})
961 gen = self.group(self._repo, revs, cl, True, lookupcl,
962 self._deltaparentfn, self._builddeltaheader,
963 ellipses=self._ellipses,
964 units=_('changesets'),
965 clrevtolocalrev={},
966 fullclnodes=self._fullclnodes,
967 precomputedellipsis=self._precomputedellipsis)
961 968
962 969 return state, gen
963 970
964 971 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
965 972 fnodes, source, clrevtolocalrev):
966 973 """Returns an iterator of changegroup chunks containing manifests.
967 974
968 975 `source` is unused here, but is used by extensions like remotefilelog to
969 976 change what is sent based in pulls vs pushes, etc.
970 977 """
971 978 repo = self._repo
972 979 cl = repo.changelog
973 980 mfl = repo.manifestlog
974 981 dirlog = mfl._revlog.dirlog
975 982 tmfnodes = {'': mfs}
976 983
977 984 # Callback for the manifest, used to collect linkrevs for filelog
978 985 # revisions.
979 986 # Returns the linkrev node (collected in lookupcl).
980 987 def makelookupmflinknode(dir, nodes):
981 988 if fastpathlinkrev:
982 989 assert not dir
983 990 return mfs.__getitem__
984 991
985 992 def lookupmflinknode(x):
986 993 """Callback for looking up the linknode for manifests.
987 994
988 995 Returns the linkrev node for the specified manifest.
989 996
990 997 SIDE EFFECT:
991 998
992 999 1) fclnodes gets populated with the list of relevant
993 1000 file nodes if we're not using fastpathlinkrev
994 1001 2) When treemanifests are in use, collects treemanifest nodes
995 1002 to send
996 1003
997 1004 Note that this means manifests must be completely sent to
998 1005 the client before you can trust the list of files and
999 1006 treemanifests to send.
1000 1007 """
1001 1008 clnode = nodes[x]
1002 1009 mdata = mfl.get(dir, x).readfast(shallow=True)
1003 1010 for p, n, fl in mdata.iterentries():
1004 1011 if fl == 't': # subdirectory manifest
1005 1012 subdir = dir + p + '/'
1006 1013 tmfclnodes = tmfnodes.setdefault(subdir, {})
1007 1014 tmfclnode = tmfclnodes.setdefault(n, clnode)
1008 1015 if clrevorder[clnode] < clrevorder[tmfclnode]:
1009 1016 tmfclnodes[n] = clnode
1010 1017 else:
1011 1018 f = dir + p
1012 1019 fclnodes = fnodes.setdefault(f, {})
1013 1020 fclnode = fclnodes.setdefault(n, clnode)
1014 1021 if clrevorder[clnode] < clrevorder[fclnode]:
1015 1022 fclnodes[n] = clnode
1016 1023 return clnode
1017 1024 return lookupmflinknode
1018 1025
1019 1026 size = 0
1020 1027 while tmfnodes:
1021 1028 dir, nodes = tmfnodes.popitem()
1022 1029 store = dirlog(dir)
1023 1030
1024 1031 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
1025 1032 prunednodes = []
1026 1033 else:
1027 1034 frev, flr = store.rev, store.linkrev
1028 1035 prunednodes = [n for n in nodes
1029 1036 if flr(frev(n)) not in commonrevs]
1030 1037
1031 1038 if dir and not prunednodes:
1032 1039 continue
1033 1040
1034 1041 lookupfn = makelookupmflinknode(dir, nodes)
1035 1042
1036 1043 if self._ellipses:
1037 1044 revs = _sortnodesellipsis(store, prunednodes, cl,
1038 1045 lookupfn)
1039 1046 else:
1040 1047 revs = _sortnodesnormal(store, prunednodes,
1041 1048 self._reorder)
1042 1049
1043 1050 if dir:
1044 1051 assert self.version == b'03'
1045 1052 chunk = _fileheader(dir)
1046 1053 size += len(chunk)
1047 1054 yield chunk
1048 1055
1049 for chunk in self.group(revs, store, False, lookupfn,
1056 it = self.group(
1057 self._repo, revs, store, False, lookupfn,
1058 self._deltaparentfn, self._builddeltaheader,
1059 ellipses=self._ellipses,
1050 1060 units=_('manifests'),
1051 clrevtolocalrev=clrevtolocalrev):
1061 clrevtolocalrev=clrevtolocalrev,
1062 fullclnodes=self._fullclnodes,
1063 precomputedellipsis=self._precomputedellipsis)
1064
1065 for chunk in it:
1052 1066 size += len(chunk)
1053 1067 yield chunk
1054 1068
1055 1069 self._verbosenote(_('%8.i (manifests)\n') % size)
1056 1070 yield self._manifestsend
1057 1071
1058 1072 # The 'source' parameter is useful for extensions
1059 1073 def generatefiles(self, changedfiles, commonrevs, source,
1060 1074 mfdicts, fastpathlinkrev, fnodes, clrevs):
1061 1075 changedfiles = list(filter(self._filematcher, changedfiles))
1062 1076
1063 1077 if not fastpathlinkrev:
1064 1078 def normallinknodes(unused, fname):
1065 1079 return fnodes.get(fname, {})
1066 1080 else:
1067 1081 cln = self._repo.changelog.node
1068 1082
1069 1083 def normallinknodes(store, fname):
1070 1084 flinkrev = store.linkrev
1071 1085 fnode = store.node
1072 1086 revs = ((r, flinkrev(r)) for r in store)
1073 1087 return dict((fnode(r), cln(lr))
1074 1088 for r, lr in revs if lr in clrevs)
1075 1089
1076 1090 clrevtolocalrev = {}
1077 1091
1078 1092 if self._isshallow:
1079 1093 # In a shallow clone, the linknodes callback needs to also include
1080 1094 # those file nodes that are in the manifests we sent but weren't
1081 1095 # introduced by those manifests.
1082 1096 commonctxs = [self._repo[c] for c in commonrevs]
1083 1097 clrev = self._repo.changelog.rev
1084 1098
1085 1099 # Defining this function has a side-effect of overriding the
1086 1100 # function of the same name that was passed in as an argument.
1087 1101 # TODO have caller pass in appropriate function.
1088 1102 def linknodes(flog, fname):
1089 1103 for c in commonctxs:
1090 1104 try:
1091 1105 fnode = c.filenode(fname)
1092 1106 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1093 1107 except error.ManifestLookupError:
1094 1108 pass
1095 1109 links = normallinknodes(flog, fname)
1096 1110 if len(links) != len(mfdicts):
1097 1111 for mf, lr in mfdicts:
1098 1112 fnode = mf.get(fname, None)
1099 1113 if fnode in links:
1100 1114 links[fnode] = min(links[fnode], lr, key=clrev)
1101 1115 elif fnode:
1102 1116 links[fnode] = lr
1103 1117 return links
1104 1118 else:
1105 1119 linknodes = normallinknodes
1106 1120
1107 1121 repo = self._repo
1108 1122 cl = repo.changelog
1109 1123 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1110 1124 total=len(changedfiles))
1111 1125 for i, fname in enumerate(sorted(changedfiles)):
1112 1126 filerevlog = repo.file(fname)
1113 1127 if not filerevlog:
1114 1128 raise error.Abort(_("empty or missing file data for %s") %
1115 1129 fname)
1116 1130
1117 1131 clrevtolocalrev.clear()
1118 1132
1119 1133 linkrevnodes = linknodes(filerevlog, fname)
1120 1134 # Lookup for filenodes, we collected the linkrev nodes above in the
1121 1135 # fastpath case and with lookupmf in the slowpath case.
1122 1136 def lookupfilelog(x):
1123 1137 return linkrevnodes[x]
1124 1138
1125 1139 frev, flr = filerevlog.rev, filerevlog.linkrev
1126 1140 filenodes = [n for n in linkrevnodes
1127 1141 if flr(frev(n)) not in commonrevs]
1128 1142
1129 1143 if filenodes:
1130 1144 if self._ellipses:
1131 1145 revs = _sortnodesellipsis(filerevlog, filenodes,
1132 1146 cl, lookupfilelog)
1133 1147 else:
1134 1148 revs = _sortnodesnormal(filerevlog, filenodes,
1135 1149 self._reorder)
1136 1150
1137 1151 progress.update(i + 1, item=fname)
1138 1152 h = _fileheader(fname)
1139 1153 size = len(h)
1140 1154 yield h
1141 for chunk in self.group(revs, filerevlog, False, lookupfilelog,
1142 clrevtolocalrev=clrevtolocalrev):
1155
1156 it = self.group(
1157 self._repo, revs, filerevlog, False, lookupfilelog,
1158 self._deltaparentfn, self._builddeltaheader,
1159 ellipses=self._ellipses,
1160 clrevtolocalrev=clrevtolocalrev,
1161 fullclnodes=self._fullclnodes,
1162 precomputedellipsis=self._precomputedellipsis)
1163
1164 for chunk in it:
1143 1165 size += len(chunk)
1144 1166 yield chunk
1145 1167 self._verbosenote(_('%8.i %s\n') % (size, fname))
1146 1168 progress.complete()
1147 1169
1148 1170 def _deltaparentprev(store, rev, p1, p2, prev):
1149 1171 """Resolve a delta parent to the previous revision.
1150 1172
1151 1173 Used for version 1 changegroups, which don't support generaldelta.
1152 1174 """
1153 1175 return prev
1154 1176
1155 1177 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1156 1178 """Resolve a delta parent when general deltas are supported."""
1157 1179 dp = store.deltaparent(rev)
1158 1180 if dp == nullrev and store.storedeltachains:
1159 1181 # Avoid sending full revisions when delta parent is null. Pick prev
1160 1182 # in that case. It's tempting to pick p1 in this case, as p1 will
1161 1183 # be smaller in the common case. However, computing a delta against
1162 1184 # p1 may require resolving the raw text of p1, which could be
1163 1185 # expensive. The revlog caches should have prev cached, meaning
1164 1186 # less CPU for changegroup generation. There is likely room to add
1165 1187 # a flag and/or config option to control this behavior.
1166 1188 base = prev
1167 1189 elif dp == nullrev:
1168 1190 # revlog is configured to use full snapshot for a reason,
1169 1191 # stick to full snapshot.
1170 1192 base = nullrev
1171 1193 elif dp not in (p1, p2, prev):
1172 1194 # Pick prev when we can't be sure remote has the base revision.
1173 1195 return prev
1174 1196 else:
1175 1197 base = dp
1176 1198
1177 1199 if base != nullrev and not store.candelta(base, rev):
1178 1200 base = nullrev
1179 1201
1180 1202 return base
1181 1203
1182 1204 def _deltaparentellipses(store, rev, p1, p2, prev):
1183 1205 """Resolve a delta parent when in ellipses mode."""
1184 1206 # TODO: send better deltas when in narrow mode.
1185 1207 #
1186 1208 # changegroup.group() loops over revisions to send,
1187 1209 # including revisions we'll skip. What this means is that
1188 1210 # `prev` will be a potentially useless delta base for all
1189 1211 # ellipsis nodes, as the client likely won't have it. In
1190 1212 # the future we should do bookkeeping about which nodes
1191 1213 # have been sent to the client, and try to be
1192 1214 # significantly smarter about delta bases. This is
1193 1215 # slightly tricky because this same code has to work for
1194 1216 # all revlogs, and we don't have the linkrev/linknode here.
1195 1217 return p1
1196 1218
1197 1219 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1198 1220 shallow=False, ellipsisroots=None, fullnodes=None):
1199 1221 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1200 1222 d.node, d.p1node, d.p2node, d.linknode)
1201 1223
1202 1224 return cgpacker(repo, filematcher, b'01',
1203 1225 deltaparentfn=_deltaparentprev,
1204 1226 allowreorder=None,
1205 1227 builddeltaheader=builddeltaheader,
1206 1228 manifestsend=b'',
1207 1229 bundlecaps=bundlecaps,
1208 1230 ellipses=ellipses,
1209 1231 shallow=shallow,
1210 1232 ellipsisroots=ellipsisroots,
1211 1233 fullnodes=fullnodes)
1212 1234
1213 1235 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1214 1236 shallow=False, ellipsisroots=None, fullnodes=None):
1215 1237 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1216 1238 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1217 1239
1218 1240 # Since generaldelta is directly supported by cg2, reordering
1219 1241 # generally doesn't help, so we disable it by default (treating
1220 1242 # bundle.reorder=auto just like bundle.reorder=False).
1221 1243 return cgpacker(repo, filematcher, b'02',
1222 1244 deltaparentfn=_deltaparentgeneraldelta,
1223 1245 allowreorder=False,
1224 1246 builddeltaheader=builddeltaheader,
1225 1247 manifestsend=b'',
1226 1248 bundlecaps=bundlecaps,
1227 1249 ellipses=ellipses,
1228 1250 shallow=shallow,
1229 1251 ellipsisroots=ellipsisroots,
1230 1252 fullnodes=fullnodes)
1231 1253
1232 1254 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1233 1255 shallow=False, ellipsisroots=None, fullnodes=None):
1234 1256 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1235 1257 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1236 1258
1237 1259 deltaparentfn = (_deltaparentellipses if ellipses
1238 1260 else _deltaparentgeneraldelta)
1239 1261
1240 1262 return cgpacker(repo, filematcher, b'03',
1241 1263 deltaparentfn=deltaparentfn,
1242 1264 allowreorder=False,
1243 1265 builddeltaheader=builddeltaheader,
1244 1266 manifestsend=closechunk(),
1245 1267 bundlecaps=bundlecaps,
1246 1268 ellipses=ellipses,
1247 1269 shallow=shallow,
1248 1270 ellipsisroots=ellipsisroots,
1249 1271 fullnodes=fullnodes)
1250 1272
1251 1273 _packermap = {'01': (_makecg1packer, cg1unpacker),
1252 1274 # cg2 adds support for exchanging generaldelta
1253 1275 '02': (_makecg2packer, cg2unpacker),
1254 1276 # cg3 adds support for exchanging revlog flags and treemanifests
1255 1277 '03': (_makecg3packer, cg3unpacker),
1256 1278 }
1257 1279
1258 1280 def allsupportedversions(repo):
1259 1281 versions = set(_packermap.keys())
1260 1282 if not (repo.ui.configbool('experimental', 'changegroup3') or
1261 1283 repo.ui.configbool('experimental', 'treemanifest') or
1262 1284 'treemanifest' in repo.requirements):
1263 1285 versions.discard('03')
1264 1286 return versions
1265 1287
1266 1288 # Changegroup versions that can be applied to the repo
1267 1289 def supportedincomingversions(repo):
1268 1290 return allsupportedversions(repo)
1269 1291
1270 1292 # Changegroup versions that can be created from the repo
1271 1293 def supportedoutgoingversions(repo):
1272 1294 versions = allsupportedversions(repo)
1273 1295 if 'treemanifest' in repo.requirements:
1274 1296 # Versions 01 and 02 support only flat manifests and it's just too
1275 1297 # expensive to convert between the flat manifest and tree manifest on
1276 1298 # the fly. Since tree manifests are hashed differently, all of history
1277 1299 # would have to be converted. Instead, we simply don't even pretend to
1278 1300 # support versions 01 and 02.
1279 1301 versions.discard('01')
1280 1302 versions.discard('02')
1281 1303 if repository.NARROW_REQUIREMENT in repo.requirements:
1282 1304 # Versions 01 and 02 don't support revlog flags, and we need to
1283 1305 # support that for stripping and unbundling to work.
1284 1306 versions.discard('01')
1285 1307 versions.discard('02')
1286 1308 if LFS_REQUIREMENT in repo.requirements:
1287 1309 # Versions 01 and 02 don't support revlog flags, and we need to
1288 1310 # mark LFS entries with REVIDX_EXTSTORED.
1289 1311 versions.discard('01')
1290 1312 versions.discard('02')
1291 1313
1292 1314 return versions
1293 1315
1294 1316 def localversion(repo):
1295 1317 # Finds the best version to use for bundles that are meant to be used
1296 1318 # locally, such as those from strip and shelve, and temporary bundles.
1297 1319 return max(supportedoutgoingversions(repo))
1298 1320
1299 1321 def safeversion(repo):
1300 1322 # Finds the smallest version that it's safe to assume clients of the repo
1301 1323 # will support. For example, all hg versions that support generaldelta also
1302 1324 # support changegroup 02.
1303 1325 versions = supportedoutgoingversions(repo)
1304 1326 if 'generaldelta' in repo.requirements:
1305 1327 versions.discard('01')
1306 1328 assert versions
1307 1329 return min(versions)
1308 1330
1309 1331 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1310 1332 ellipses=False, shallow=False, ellipsisroots=None,
1311 1333 fullnodes=None):
1312 1334 assert version in supportedoutgoingversions(repo)
1313 1335
1314 1336 if filematcher is None:
1315 1337 filematcher = matchmod.alwaysmatcher(repo.root, '')
1316 1338
1317 1339 if version == '01' and not filematcher.always():
1318 1340 raise error.ProgrammingError('version 01 changegroups do not support '
1319 1341 'sparse file matchers')
1320 1342
1321 1343 if ellipses and version in (b'01', b'02'):
1322 1344 raise error.Abort(
1323 1345 _('ellipsis nodes require at least cg3 on client and server, '
1324 1346 'but negotiated version %s') % version)
1325 1347
1326 1348 # Requested files could include files not in the local store. So
1327 1349 # filter those out.
1328 1350 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1329 1351 filematcher)
1330 1352
1331 1353 fn = _packermap[version][0]
1332 1354 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1333 1355 shallow=shallow, ellipsisroots=ellipsisroots,
1334 1356 fullnodes=fullnodes)
1335 1357
1336 1358 def getunbundler(version, fh, alg, extras=None):
1337 1359 return _packermap[version][1](fh, alg, extras=extras)
1338 1360
1339 1361 def _changegroupinfo(repo, nodes, source):
1340 1362 if repo.ui.verbose or source == 'bundle':
1341 1363 repo.ui.status(_("%d changesets found\n") % len(nodes))
1342 1364 if repo.ui.debugflag:
1343 1365 repo.ui.debug("list of changesets:\n")
1344 1366 for node in nodes:
1345 1367 repo.ui.debug("%s\n" % hex(node))
1346 1368
1347 1369 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1348 1370 bundlecaps=None):
1349 1371 cgstream = makestream(repo, outgoing, version, source,
1350 1372 fastpath=fastpath, bundlecaps=bundlecaps)
1351 1373 return getunbundler(version, util.chunkbuffer(cgstream), None,
1352 1374 {'clcount': len(outgoing.missing) })
1353 1375
1354 1376 def makestream(repo, outgoing, version, source, fastpath=False,
1355 1377 bundlecaps=None, filematcher=None):
1356 1378 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1357 1379 filematcher=filematcher)
1358 1380
1359 1381 repo = repo.unfiltered()
1360 1382 commonrevs = outgoing.common
1361 1383 csets = outgoing.missing
1362 1384 heads = outgoing.missingheads
1363 1385 # We go through the fast path if we get told to, or if all (unfiltered
1364 1386 # heads have been requested (since we then know there all linkrevs will
1365 1387 # be pulled by the client).
1366 1388 heads.sort()
1367 1389 fastpathlinkrev = fastpath or (
1368 1390 repo.filtername is None and heads == sorted(repo.heads()))
1369 1391
1370 1392 repo.hook('preoutgoing', throw=True, source=source)
1371 1393 _changegroupinfo(repo, csets, source)
1372 1394 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1373 1395
1374 1396 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1375 1397 revisions = 0
1376 1398 files = 0
1377 1399 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1378 1400 total=expectedfiles)
1379 1401 for chunkdata in iter(source.filelogheader, {}):
1380 1402 files += 1
1381 1403 f = chunkdata["filename"]
1382 1404 repo.ui.debug("adding %s revisions\n" % f)
1383 1405 progress.increment()
1384 1406 fl = repo.file(f)
1385 1407 o = len(fl)
1386 1408 try:
1387 1409 deltas = source.deltaiter()
1388 1410 if not fl.addgroup(deltas, revmap, trp):
1389 1411 raise error.Abort(_("received file revlog group is empty"))
1390 1412 except error.CensoredBaseError as e:
1391 1413 raise error.Abort(_("received delta base is censored: %s") % e)
1392 1414 revisions += len(fl) - o
1393 1415 if f in needfiles:
1394 1416 needs = needfiles[f]
1395 1417 for new in pycompat.xrange(o, len(fl)):
1396 1418 n = fl.node(new)
1397 1419 if n in needs:
1398 1420 needs.remove(n)
1399 1421 else:
1400 1422 raise error.Abort(
1401 1423 _("received spurious file revlog entry"))
1402 1424 if not needs:
1403 1425 del needfiles[f]
1404 1426 progress.complete()
1405 1427
1406 1428 for f, needs in needfiles.iteritems():
1407 1429 fl = repo.file(f)
1408 1430 for n in needs:
1409 1431 try:
1410 1432 fl.rev(n)
1411 1433 except error.LookupError:
1412 1434 raise error.Abort(
1413 1435 _('missing file data for %s:%s - run hg verify') %
1414 1436 (f, hex(n)))
1415 1437
1416 1438 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now