##// END OF EJS Templates
changegroup: move fullnodes into cgpacker...
Gregory Szorc -
r38945:1af339c2 default
parent child Browse files
Show More
@@ -1,1429 +1,1435 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, "wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, "wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103 class cg1unpacker(object):
104 104 """Unpacker for cg1 changegroup streams.
105 105
106 106 A changegroup unpacker handles the framing of the revision data in
107 107 the wire format. Most consumers will want to use the apply()
108 108 method to add the changes from the changegroup to a repository.
109 109
110 110 If you're forwarding a changegroup unmodified to another consumer,
111 111 use getchunks(), which returns an iterator of changegroup
112 112 chunks. This is mostly useful for cases where you need to know the
113 113 data stream has ended by observing the end of the changegroup.
114 114
115 115 deltachunk() is useful only if you're applying delta data. Most
116 116 consumers should prefer apply() instead.
117 117
118 118 A few other public methods exist. Those are used only for
119 119 bundlerepo and some debug commands - their use is discouraged.
120 120 """
121 121 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
122 122 deltaheadersize = deltaheader.size
123 123 version = '01'
124 124 _grouplistcount = 1 # One list of files after the manifests
125 125
126 126 def __init__(self, fh, alg, extras=None):
127 127 if alg is None:
128 128 alg = 'UN'
129 129 if alg not in util.compengines.supportedbundletypes:
130 130 raise error.Abort(_('unknown stream compression type: %s')
131 131 % alg)
132 132 if alg == 'BZ':
133 133 alg = '_truncatedBZ'
134 134
135 135 compengine = util.compengines.forbundletype(alg)
136 136 self._stream = compengine.decompressorreader(fh)
137 137 self._type = alg
138 138 self.extras = extras or {}
139 139 self.callback = None
140 140
141 141 # These methods (compressed, read, seek, tell) all appear to only
142 142 # be used by bundlerepo, but it's a little hard to tell.
143 143 def compressed(self):
144 144 return self._type is not None and self._type != 'UN'
145 145 def read(self, l):
146 146 return self._stream.read(l)
147 147 def seek(self, pos):
148 148 return self._stream.seek(pos)
149 149 def tell(self):
150 150 return self._stream.tell()
151 151 def close(self):
152 152 return self._stream.close()
153 153
154 154 def _chunklength(self):
155 155 d = readexactly(self._stream, 4)
156 156 l = struct.unpack(">l", d)[0]
157 157 if l <= 4:
158 158 if l:
159 159 raise error.Abort(_("invalid chunk length %d") % l)
160 160 return 0
161 161 if self.callback:
162 162 self.callback()
163 163 return l - 4
164 164
165 165 def changelogheader(self):
166 166 """v10 does not have a changelog header chunk"""
167 167 return {}
168 168
169 169 def manifestheader(self):
170 170 """v10 does not have a manifest header chunk"""
171 171 return {}
172 172
173 173 def filelogheader(self):
174 174 """return the header of the filelogs chunk, v10 only has the filename"""
175 175 l = self._chunklength()
176 176 if not l:
177 177 return {}
178 178 fname = readexactly(self._stream, l)
179 179 return {'filename': fname}
180 180
181 181 def _deltaheader(self, headertuple, prevnode):
182 182 node, p1, p2, cs = headertuple
183 183 if prevnode is None:
184 184 deltabase = p1
185 185 else:
186 186 deltabase = prevnode
187 187 flags = 0
188 188 return node, p1, p2, deltabase, cs, flags
189 189
190 190 def deltachunk(self, prevnode):
191 191 l = self._chunklength()
192 192 if not l:
193 193 return {}
194 194 headerdata = readexactly(self._stream, self.deltaheadersize)
195 195 header = self.deltaheader.unpack(headerdata)
196 196 delta = readexactly(self._stream, l - self.deltaheadersize)
197 197 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
198 198 return (node, p1, p2, cs, deltabase, delta, flags)
199 199
200 200 def getchunks(self):
201 201 """returns all the chunks contains in the bundle
202 202
203 203 Used when you need to forward the binary stream to a file or another
204 204 network API. To do so, it parse the changegroup data, otherwise it will
205 205 block in case of sshrepo because it don't know the end of the stream.
206 206 """
207 207 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
208 208 # and a list of filelogs. For changegroup 3, we expect 4 parts:
209 209 # changelog, manifestlog, a list of tree manifestlogs, and a list of
210 210 # filelogs.
211 211 #
212 212 # Changelog and manifestlog parts are terminated with empty chunks. The
213 213 # tree and file parts are a list of entry sections. Each entry section
214 214 # is a series of chunks terminating in an empty chunk. The list of these
215 215 # entry sections is terminated in yet another empty chunk, so we know
216 216 # we've reached the end of the tree/file list when we reach an empty
217 217 # chunk that was proceeded by no non-empty chunks.
218 218
219 219 parts = 0
220 220 while parts < 2 + self._grouplistcount:
221 221 noentries = True
222 222 while True:
223 223 chunk = getchunk(self)
224 224 if not chunk:
225 225 # The first two empty chunks represent the end of the
226 226 # changelog and the manifestlog portions. The remaining
227 227 # empty chunks represent either A) the end of individual
228 228 # tree or file entries in the file list, or B) the end of
229 229 # the entire list. It's the end of the entire list if there
230 230 # were no entries (i.e. noentries is True).
231 231 if parts < 2:
232 232 parts += 1
233 233 elif noentries:
234 234 parts += 1
235 235 break
236 236 noentries = False
237 237 yield chunkheader(len(chunk))
238 238 pos = 0
239 239 while pos < len(chunk):
240 240 next = pos + 2**20
241 241 yield chunk[pos:next]
242 242 pos = next
243 243 yield closechunk()
244 244
245 245 def _unpackmanifests(self, repo, revmap, trp, prog):
246 246 self.callback = prog.increment
247 247 # no need to check for empty manifest group here:
248 248 # if the result of the merge of 1 and 2 is the same in 3 and 4,
249 249 # no new manifest will be created and the manifest group will
250 250 # be empty during the pull
251 251 self.manifestheader()
252 252 deltas = self.deltaiter()
253 253 repo.manifestlog.addgroup(deltas, revmap, trp)
254 254 prog.complete()
255 255 self.callback = None
256 256
257 257 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
258 258 expectedtotal=None):
259 259 """Add the changegroup returned by source.read() to this repo.
260 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
261 261 the URL of the repo where this changegroup is coming from.
262 262
263 263 Return an integer summarizing the change to this repo:
264 264 - nothing changed or no source: 0
265 265 - more heads than before: 1+added heads (2..n)
266 266 - fewer heads than before: -1-removed heads (-2..-n)
267 267 - number of heads stays the same: 1
268 268 """
269 269 repo = repo.unfiltered()
270 270 def csmap(x):
271 271 repo.ui.debug("add changeset %s\n" % short(x))
272 272 return len(cl)
273 273
274 274 def revmap(x):
275 275 return cl.rev(x)
276 276
277 277 changesets = files = revisions = 0
278 278
279 279 try:
280 280 # The transaction may already carry source information. In this
281 281 # case we use the top level data. We overwrite the argument
282 282 # because we need to use the top level value (if they exist)
283 283 # in this function.
284 284 srctype = tr.hookargs.setdefault('source', srctype)
285 285 url = tr.hookargs.setdefault('url', url)
286 286 repo.hook('prechangegroup',
287 287 throw=True, **pycompat.strkwargs(tr.hookargs))
288 288
289 289 # write changelog data to temp files so concurrent readers
290 290 # will not see an inconsistent view
291 291 cl = repo.changelog
292 292 cl.delayupdate(tr)
293 293 oldheads = set(cl.heads())
294 294
295 295 trp = weakref.proxy(tr)
296 296 # pull off the changeset group
297 297 repo.ui.status(_("adding changesets\n"))
298 298 clstart = len(cl)
299 299 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
300 300 total=expectedtotal)
301 301 self.callback = progress.increment
302 302
303 303 efiles = set()
304 304 def onchangelog(cl, node):
305 305 efiles.update(cl.readfiles(node))
306 306
307 307 self.changelogheader()
308 308 deltas = self.deltaiter()
309 309 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
310 310 efiles = len(efiles)
311 311
312 312 if not cgnodes:
313 313 repo.ui.develwarn('applied empty changegroup',
314 314 config='warn-empty-changegroup')
315 315 clend = len(cl)
316 316 changesets = clend - clstart
317 317 progress.complete()
318 318 self.callback = None
319 319
320 320 # pull off the manifest group
321 321 repo.ui.status(_("adding manifests\n"))
322 322 # We know that we'll never have more manifests than we had
323 323 # changesets.
324 324 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
325 325 total=changesets)
326 326 self._unpackmanifests(repo, revmap, trp, progress)
327 327
328 328 needfiles = {}
329 329 if repo.ui.configbool('server', 'validate'):
330 330 cl = repo.changelog
331 331 ml = repo.manifestlog
332 332 # validate incoming csets have their manifests
333 333 for cset in pycompat.xrange(clstart, clend):
334 334 mfnode = cl.changelogrevision(cset).manifest
335 335 mfest = ml[mfnode].readdelta()
336 336 # store file cgnodes we must see
337 337 for f, n in mfest.iteritems():
338 338 needfiles.setdefault(f, set()).add(n)
339 339
340 340 # process the files
341 341 repo.ui.status(_("adding file changes\n"))
342 342 newrevs, newfiles = _addchangegroupfiles(
343 343 repo, self, revmap, trp, efiles, needfiles)
344 344 revisions += newrevs
345 345 files += newfiles
346 346
347 347 deltaheads = 0
348 348 if oldheads:
349 349 heads = cl.heads()
350 350 deltaheads = len(heads) - len(oldheads)
351 351 for h in heads:
352 352 if h not in oldheads and repo[h].closesbranch():
353 353 deltaheads -= 1
354 354 htext = ""
355 355 if deltaheads:
356 356 htext = _(" (%+d heads)") % deltaheads
357 357
358 358 repo.ui.status(_("added %d changesets"
359 359 " with %d changes to %d files%s\n")
360 360 % (changesets, revisions, files, htext))
361 361 repo.invalidatevolatilesets()
362 362
363 363 if changesets > 0:
364 364 if 'node' not in tr.hookargs:
365 365 tr.hookargs['node'] = hex(cl.node(clstart))
366 366 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
367 367 hookargs = dict(tr.hookargs)
368 368 else:
369 369 hookargs = dict(tr.hookargs)
370 370 hookargs['node'] = hex(cl.node(clstart))
371 371 hookargs['node_last'] = hex(cl.node(clend - 1))
372 372 repo.hook('pretxnchangegroup',
373 373 throw=True, **pycompat.strkwargs(hookargs))
374 374
375 375 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
376 376 phaseall = None
377 377 if srctype in ('push', 'serve'):
378 378 # Old servers can not push the boundary themselves.
379 379 # New servers won't push the boundary if changeset already
380 380 # exists locally as secret
381 381 #
382 382 # We should not use added here but the list of all change in
383 383 # the bundle
384 384 if repo.publishing():
385 385 targetphase = phaseall = phases.public
386 386 else:
387 387 # closer target phase computation
388 388
389 389 # Those changesets have been pushed from the
390 390 # outside, their phases are going to be pushed
391 391 # alongside. Therefor `targetphase` is
392 392 # ignored.
393 393 targetphase = phaseall = phases.draft
394 394 if added:
395 395 phases.registernew(repo, tr, targetphase, added)
396 396 if phaseall is not None:
397 397 phases.advanceboundary(repo, tr, phaseall, cgnodes)
398 398
399 399 if changesets > 0:
400 400
401 401 def runhooks():
402 402 # These hooks run when the lock releases, not when the
403 403 # transaction closes. So it's possible for the changelog
404 404 # to have changed since we last saw it.
405 405 if clstart >= len(repo):
406 406 return
407 407
408 408 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
409 409
410 410 for n in added:
411 411 args = hookargs.copy()
412 412 args['node'] = hex(n)
413 413 del args['node_last']
414 414 repo.hook("incoming", **pycompat.strkwargs(args))
415 415
416 416 newheads = [h for h in repo.heads()
417 417 if h not in oldheads]
418 418 repo.ui.log("incoming",
419 419 "%d incoming changes - new heads: %s\n",
420 420 len(added),
421 421 ', '.join([hex(c[:6]) for c in newheads]))
422 422
423 423 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
424 424 lambda tr: repo._afterlock(runhooks))
425 425 finally:
426 426 repo.ui.flush()
427 427 # never return 0 here:
428 428 if deltaheads < 0:
429 429 ret = deltaheads - 1
430 430 else:
431 431 ret = deltaheads + 1
432 432 return ret
433 433
434 434 def deltaiter(self):
435 435 """
436 436 returns an iterator of the deltas in this changegroup
437 437
438 438 Useful for passing to the underlying storage system to be stored.
439 439 """
440 440 chain = None
441 441 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
442 442 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
443 443 yield chunkdata
444 444 chain = chunkdata[0]
445 445
446 446 class cg2unpacker(cg1unpacker):
447 447 """Unpacker for cg2 streams.
448 448
449 449 cg2 streams add support for generaldelta, so the delta header
450 450 format is slightly different. All other features about the data
451 451 remain the same.
452 452 """
453 453 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
454 454 deltaheadersize = deltaheader.size
455 455 version = '02'
456 456
457 457 def _deltaheader(self, headertuple, prevnode):
458 458 node, p1, p2, deltabase, cs = headertuple
459 459 flags = 0
460 460 return node, p1, p2, deltabase, cs, flags
461 461
462 462 class cg3unpacker(cg2unpacker):
463 463 """Unpacker for cg3 streams.
464 464
465 465 cg3 streams add support for exchanging treemanifests and revlog
466 466 flags. It adds the revlog flags to the delta header and an empty chunk
467 467 separating manifests and files.
468 468 """
469 469 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
470 470 deltaheadersize = deltaheader.size
471 471 version = '03'
472 472 _grouplistcount = 2 # One list of manifests and one list of files
473 473
474 474 def _deltaheader(self, headertuple, prevnode):
475 475 node, p1, p2, deltabase, cs, flags = headertuple
476 476 return node, p1, p2, deltabase, cs, flags
477 477
478 478 def _unpackmanifests(self, repo, revmap, trp, prog):
479 479 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
480 480 for chunkdata in iter(self.filelogheader, {}):
481 481 # If we get here, there are directory manifests in the changegroup
482 482 d = chunkdata["filename"]
483 483 repo.ui.debug("adding %s revisions\n" % d)
484 484 dirlog = repo.manifestlog._revlog.dirlog(d)
485 485 deltas = self.deltaiter()
486 486 if not dirlog.addgroup(deltas, revmap, trp):
487 487 raise error.Abort(_("received dir revlog group is empty"))
488 488
489 489 class headerlessfixup(object):
490 490 def __init__(self, fh, h):
491 491 self._h = h
492 492 self._fh = fh
493 493 def read(self, n):
494 494 if self._h:
495 495 d, self._h = self._h[:n], self._h[n:]
496 496 if len(d) < n:
497 497 d += readexactly(self._fh, n - len(d))
498 498 return d
499 499 return readexactly(self._fh, n)
500 500
501 501 @attr.s(slots=True, frozen=True)
502 502 class revisiondelta(object):
503 503 """Describes a delta entry in a changegroup.
504 504
505 505 Captured data is sufficient to serialize the delta into multiple
506 506 formats.
507 507 """
508 508 # 20 byte node of this revision.
509 509 node = attr.ib()
510 510 # 20 byte nodes of parent revisions.
511 511 p1node = attr.ib()
512 512 p2node = attr.ib()
513 513 # 20 byte node of node this delta is against.
514 514 basenode = attr.ib()
515 515 # 20 byte node of changeset revision this delta is associated with.
516 516 linknode = attr.ib()
517 517 # 2 bytes of flags to apply to revision data.
518 518 flags = attr.ib()
519 519 # Iterable of chunks holding raw delta data.
520 520 deltachunks = attr.ib()
521 521
522 522 class cgpacker(object):
523 523 def __init__(self, repo, filematcher, version, allowreorder,
524 524 useprevdelta, builddeltaheader, manifestsend,
525 525 sendtreemanifests, bundlecaps=None, ellipses=False,
526 shallow=False, ellipsisroots=None):
526 shallow=False, ellipsisroots=None, fullnodes=None):
527 527 """Given a source repo, construct a bundler.
528 528
529 529 filematcher is a matcher that matches on files to include in the
530 530 changegroup. Used to facilitate sparse changegroups.
531 531
532 532 allowreorder controls whether reordering of revisions is allowed.
533 533 This value is used when ``bundle.reorder`` is ``auto`` or isn't
534 534 set.
535 535
536 536 useprevdelta controls whether revisions should always delta against
537 537 the previous revision in the changegroup.
538 538
539 539 builddeltaheader is a callable that constructs the header for a group
540 540 delta.
541 541
542 542 manifestsend is a chunk to send after manifests have been fully emitted.
543 543
544 544 sendtreemanifests indicates whether tree manifests should be emitted.
545 545
546 546 ellipses indicates whether ellipsis serving mode is enabled.
547 547
548 548 bundlecaps is optional and can be used to specify the set of
549 549 capabilities which can be used to build the bundle. While bundlecaps is
550 550 unused in core Mercurial, extensions rely on this feature to communicate
551 551 capabilities to customize the changegroup packer.
552 552
553 553 shallow indicates whether shallow data might be sent. The packer may
554 554 need to pack file contents not introduced by the changes being packed.
555
556 fullnodes is the list of nodes which should not be ellipsis nodes. We
557 store this rather than the set of nodes that should be ellipsis because
558 for very large histories we expect this to be significantly smaller.
555 559 """
556 560 assert filematcher
557 561 self._filematcher = filematcher
558 562
559 563 self.version = version
560 564 self._useprevdelta = useprevdelta
561 565 self._builddeltaheader = builddeltaheader
562 566 self._manifestsend = manifestsend
563 567 self._sendtreemanifests = sendtreemanifests
564 568 self._ellipses = ellipses
565 569
566 570 # Set of capabilities we can use to build the bundle.
567 571 if bundlecaps is None:
568 572 bundlecaps = set()
569 573 self._bundlecaps = bundlecaps
570 574 self._isshallow = shallow
575 self._fullnodes = fullnodes
571 576
572 577 # Maps ellipsis revs to their roots at the changelog level.
573 578 self._precomputedellipsis = ellipsisroots
574 579
575 580 # experimental config: bundle.reorder
576 581 reorder = repo.ui.config('bundle', 'reorder')
577 582 if reorder == 'auto':
578 583 self._reorder = allowreorder
579 584 else:
580 585 self._reorder = stringutil.parsebool(reorder)
581 586
582 587 self._repo = repo
583 588
584 589 if self._repo.ui.verbose and not self._repo.ui.debugflag:
585 590 self._verbosenote = self._repo.ui.note
586 591 else:
587 592 self._verbosenote = lambda s: None
588 593
589 594 # TODO the functionality keyed off of this should probably be
590 595 # controlled via arguments to group() that influence behavior.
591 596 self._changelogdone = False
592 597
593 598 # Maps CL revs to per-revlog revisions. Cleared in close() at
594 599 # the end of each group.
595 600 self._clrevtolocalrev = {}
596 601 self._nextclrevtolocalrev = {}
597 602
598 603 # Maps changelog nodes to changelog revs. Filled in once
599 604 # during changelog stage and then left unmodified.
600 605 self._clnodetorev = {}
601 606
602 607 def _close(self):
603 608 # Ellipses serving mode.
604 609 self._clrevtolocalrev.clear()
605 610 if self._nextclrevtolocalrev:
606 611 self.clrevtolocalrev = self._nextclrevtolocalrev
607 612 self._nextclrevtolocalrev.clear()
608 613 self._changelogdone = True
609 614
610 615 return closechunk()
611 616
612 617 def _fileheader(self, fname):
613 618 return chunkheader(len(fname)) + fname
614 619
615 620 # Extracted both for clarity and for overriding in extensions.
616 621 def _sortgroup(self, store, nodelist, lookup):
617 622 """Sort nodes for change group and turn them into revnums."""
618 623 # Ellipses serving mode.
619 624 #
620 625 # In a perfect world, we'd generate better ellipsis-ified graphs
621 626 # for non-changelog revlogs. In practice, we haven't started doing
622 627 # that yet, so the resulting DAGs for the manifestlog and filelogs
623 628 # are actually full of bogus parentage on all the ellipsis
624 629 # nodes. This has the side effect that, while the contents are
625 630 # correct, the individual DAGs might be completely out of whack in
626 631 # a case like 882681bc3166 and its ancestors (back about 10
627 632 # revisions or so) in the main hg repo.
628 633 #
629 634 # The one invariant we *know* holds is that the new (potentially
630 635 # bogus) DAG shape will be valid if we order the nodes in the
631 636 # order that they're introduced in dramatis personae by the
632 637 # changelog, so what we do is we sort the non-changelog histories
633 638 # by the order in which they are used by the changelog.
634 639 if self._ellipses and self._clnodetorev:
635 640 key = lambda n: self._clnodetorev[lookup(n)]
636 641 return [store.rev(n) for n in sorted(nodelist, key=key)]
637 642
638 643 # for generaldelta revlogs, we linearize the revs; this will both be
639 644 # much quicker and generate a much smaller bundle
640 645 if (store._generaldelta and self._reorder is None) or self._reorder:
641 646 dag = dagutil.revlogdag(store)
642 647 return dag.linearize(set(store.rev(n) for n in nodelist))
643 648 else:
644 649 return sorted([store.rev(n) for n in nodelist])
645 650
646 651 def group(self, nodelist, store, lookup, units=None):
647 652 """Calculate a delta group, yielding a sequence of changegroup chunks
648 653 (strings).
649 654
650 655 Given a list of changeset revs, return a set of deltas and
651 656 metadata corresponding to nodes. The first delta is
652 657 first parent(nodelist[0]) -> nodelist[0], the receiver is
653 658 guaranteed to have this parent as it has all history before
654 659 these changesets. In the case firstparent is nullrev the
655 660 changegroup starts with a full revision.
656 661
657 662 If units is not None, progress detail will be generated, units specifies
658 663 the type of revlog that is touched (changelog, manifest, etc.).
659 664 """
660 665 # if we don't have any revisions touched by these changesets, bail
661 666 if len(nodelist) == 0:
662 667 yield self._close()
663 668 return
664 669
665 670 revs = self._sortgroup(store, nodelist, lookup)
666 671
667 672 # add the parent of the first rev
668 673 p = store.parentrevs(revs[0])[0]
669 674 revs.insert(0, p)
670 675
671 676 # build deltas
672 677 progress = None
673 678 if units is not None:
674 679 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
675 680 total=(len(revs) - 1))
676 681 for r in pycompat.xrange(len(revs) - 1):
677 682 if progress:
678 683 progress.update(r + 1)
679 684 prev, curr = revs[r], revs[r + 1]
680 685 linknode = lookup(store.node(curr))
681 686 for c in self._revchunk(store, curr, prev, linknode):
682 687 yield c
683 688
684 689 if progress:
685 690 progress.complete()
686 691 yield self._close()
687 692
688 693 # filter any nodes that claim to be part of the known set
689 694 def _prune(self, store, missing, commonrevs):
690 695 # TODO this violates storage abstraction for manifests.
691 696 if isinstance(store, manifest.manifestrevlog):
692 697 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
693 698 return []
694 699
695 700 rr, rl = store.rev, store.linkrev
696 701 return [n for n in missing if rl(rr(n)) not in commonrevs]
697 702
698 703 def _packmanifests(self, dir, mfnodes, lookuplinknode):
699 704 """Pack flat manifests into a changegroup stream."""
700 705 assert not dir
701 706 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
702 707 lookuplinknode, units=_('manifests')):
703 708 yield chunk
704 709
705 710 def _packtreemanifests(self, dir, mfnodes, lookuplinknode):
706 711 """Version of _packmanifests that operates on directory manifests.
707 712
708 713 Encodes the directory name in the output so multiple manifests
709 714 can be sent.
710 715 """
711 716 assert self.version == b'03'
712 717
713 718 if dir:
714 719 yield self._fileheader(dir)
715 720
716 721 # TODO violates storage abstractions by assuming revlogs.
717 722 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
718 723 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
719 724 units=_('manifests')):
720 725 yield chunk
721 726
722 727 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
723 728 '''yield a sequence of changegroup chunks (strings)'''
724 729 repo = self._repo
725 730 cl = repo.changelog
726 731
727 732 clrevorder = {}
728 733 mfs = {} # needed manifests
729 734 fnodes = {} # needed file nodes
730 735 mfl = repo.manifestlog
731 736 # TODO violates storage abstraction.
732 737 mfrevlog = mfl._revlog
733 738 changedfiles = set()
734 739
735 740 # Callback for the changelog, used to collect changed files and
736 741 # manifest nodes.
737 742 # Returns the linkrev node (identity in the changelog case).
738 743 def lookupcl(x):
739 744 c = cl.read(x)
740 745 clrevorder[x] = len(clrevorder)
741 746
742 747 if self._ellipses:
743 748 # Only update mfs if x is going to be sent. Otherwise we
744 749 # end up with bogus linkrevs specified for manifests and
745 750 # we skip some manifest nodes that we should otherwise
746 751 # have sent.
747 if (x in self._full_nodes
752 if (x in self._fullnodes
748 753 or cl.rev(x) in self._precomputedellipsis):
749 754 n = c[0]
750 755 # Record the first changeset introducing this manifest
751 756 # version.
752 757 mfs.setdefault(n, x)
753 758 # Set this narrow-specific dict so we have the lowest
754 759 # manifest revnum to look up for this cl revnum. (Part of
755 760 # mapping changelog ellipsis parents to manifest ellipsis
756 761 # parents)
757 762 self._nextclrevtolocalrev.setdefault(cl.rev(x),
758 763 mfrevlog.rev(n))
759 764 # We can't trust the changed files list in the changeset if the
760 765 # client requested a shallow clone.
761 766 if self._isshallow:
762 767 changedfiles.update(mfl[c[0]].read().keys())
763 768 else:
764 769 changedfiles.update(c[3])
765 770 else:
766 771
767 772 n = c[0]
768 773 # record the first changeset introducing this manifest version
769 774 mfs.setdefault(n, x)
770 775 # Record a complete list of potentially-changed files in
771 776 # this manifest.
772 777 changedfiles.update(c[3])
773 778
774 779 return x
775 780
776 781 self._verbosenote(_('uncompressed size of bundle content:\n'))
777 782 size = 0
778 783 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
779 784 size += len(chunk)
780 785 yield chunk
781 786 self._verbosenote(_('%8.i (changelog)\n') % size)
782 787
783 788 # We need to make sure that the linkrev in the changegroup refers to
784 789 # the first changeset that introduced the manifest or file revision.
785 790 # The fastpath is usually safer than the slowpath, because the filelogs
786 791 # are walked in revlog order.
787 792 #
788 793 # When taking the slowpath with reorder=None and the manifest revlog
789 794 # uses generaldelta, the manifest may be walked in the "wrong" order.
790 795 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
791 796 # cc0ff93d0c0c).
792 797 #
793 798 # When taking the fastpath, we are only vulnerable to reordering
794 799 # of the changelog itself. The changelog never uses generaldelta, so
795 800 # it is only reordered when reorder=True. To handle this case, we
796 801 # simply take the slowpath, which already has the 'clrevorder' logic.
797 802 # This was also fixed in cc0ff93d0c0c.
798 803 fastpathlinkrev = fastpathlinkrev and not self._reorder
799 804 # Treemanifests don't work correctly with fastpathlinkrev
800 805 # either, because we don't discover which directory nodes to
801 806 # send along with files. This could probably be fixed.
802 807 fastpathlinkrev = fastpathlinkrev and (
803 808 'treemanifest' not in repo.requirements)
804 809
805 810 for chunk in self.generatemanifests(commonrevs, clrevorder,
806 811 fastpathlinkrev, mfs, fnodes, source):
807 812 yield chunk
808 813
809 814 if self._ellipses:
810 815 mfdicts = None
811 816 if self._isshallow:
812 817 mfdicts = [(self._repo.manifestlog[n].read(), lr)
813 818 for (n, lr) in mfs.iteritems()]
814 819
815 820 mfs.clear()
816 821 clrevs = set(cl.rev(x) for x in clnodes)
817 822
818 823 if not fastpathlinkrev:
819 824 def linknodes(unused, fname):
820 825 return fnodes.get(fname, {})
821 826 else:
822 827 cln = cl.node
823 828 def linknodes(filerevlog, fname):
824 829 llr = filerevlog.linkrev
825 830 fln = filerevlog.node
826 831 revs = ((r, llr(r)) for r in filerevlog)
827 832 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
828 833
829 834 if self._ellipses:
830 835 # We need to pass the mfdicts variable down into
831 836 # generatefiles(), but more than one command might have
832 837 # wrapped generatefiles so we can't modify the function
833 838 # signature. Instead, we pass the data to ourselves using an
834 839 # instance attribute. I'm sorry.
835 840 self._mfdicts = mfdicts
836 841
837 842 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
838 843 source):
839 844 yield chunk
840 845
841 846 yield self._close()
842 847
843 848 if clnodes:
844 849 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
845 850
846 851 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
847 852 fnodes, source):
848 853 """Returns an iterator of changegroup chunks containing manifests.
849 854
850 855 `source` is unused here, but is used by extensions like remotefilelog to
851 856 change what is sent based in pulls vs pushes, etc.
852 857 """
853 858 repo = self._repo
854 859 mfl = repo.manifestlog
855 860 dirlog = mfl._revlog.dirlog
856 861 tmfnodes = {'': mfs}
857 862
858 863 # Callback for the manifest, used to collect linkrevs for filelog
859 864 # revisions.
860 865 # Returns the linkrev node (collected in lookupcl).
861 866 def makelookupmflinknode(dir, nodes):
862 867 if fastpathlinkrev:
863 868 assert not dir
864 869 return mfs.__getitem__
865 870
866 871 def lookupmflinknode(x):
867 872 """Callback for looking up the linknode for manifests.
868 873
869 874 Returns the linkrev node for the specified manifest.
870 875
871 876 SIDE EFFECT:
872 877
873 878 1) fclnodes gets populated with the list of relevant
874 879 file nodes if we're not using fastpathlinkrev
875 880 2) When treemanifests are in use, collects treemanifest nodes
876 881 to send
877 882
878 883 Note that this means manifests must be completely sent to
879 884 the client before you can trust the list of files and
880 885 treemanifests to send.
881 886 """
882 887 clnode = nodes[x]
883 888 mdata = mfl.get(dir, x).readfast(shallow=True)
884 889 for p, n, fl in mdata.iterentries():
885 890 if fl == 't': # subdirectory manifest
886 891 subdir = dir + p + '/'
887 892 tmfclnodes = tmfnodes.setdefault(subdir, {})
888 893 tmfclnode = tmfclnodes.setdefault(n, clnode)
889 894 if clrevorder[clnode] < clrevorder[tmfclnode]:
890 895 tmfclnodes[n] = clnode
891 896 else:
892 897 f = dir + p
893 898 fclnodes = fnodes.setdefault(f, {})
894 899 fclnode = fclnodes.setdefault(n, clnode)
895 900 if clrevorder[clnode] < clrevorder[fclnode]:
896 901 fclnodes[n] = clnode
897 902 return clnode
898 903 return lookupmflinknode
899 904
900 905 fn = (self._packtreemanifests if self._sendtreemanifests
901 906 else self._packmanifests)
902 907 size = 0
903 908 while tmfnodes:
904 909 dir, nodes = tmfnodes.popitem()
905 910 prunednodes = self._prune(dirlog(dir), nodes, commonrevs)
906 911 if not dir or prunednodes:
907 912 for x in fn(dir, prunednodes, makelookupmflinknode(dir, nodes)):
908 913 size += len(x)
909 914 yield x
910 915 self._verbosenote(_('%8.i (manifests)\n') % size)
911 916 yield self._manifestsend
912 917
913 918 # The 'source' parameter is useful for extensions
914 919 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
915 920 changedfiles = list(filter(self._filematcher, changedfiles))
916 921
917 922 if self._isshallow:
918 923 # See comment in generate() for why this sadness is a thing.
919 924 mfdicts = self._mfdicts
920 925 del self._mfdicts
921 926 # In a shallow clone, the linknodes callback needs to also include
922 927 # those file nodes that are in the manifests we sent but weren't
923 928 # introduced by those manifests.
924 929 commonctxs = [self._repo[c] for c in commonrevs]
925 930 oldlinknodes = linknodes
926 931 clrev = self._repo.changelog.rev
927 932
928 933 # Defining this function has a side-effect of overriding the
929 934 # function of the same name that was passed in as an argument.
930 935 # TODO have caller pass in appropriate function.
931 936 def linknodes(flog, fname):
932 937 for c in commonctxs:
933 938 try:
934 939 fnode = c.filenode(fname)
935 940 self._clrevtolocalrev[c.rev()] = flog.rev(fnode)
936 941 except error.ManifestLookupError:
937 942 pass
938 943 links = oldlinknodes(flog, fname)
939 944 if len(links) != len(mfdicts):
940 945 for mf, lr in mfdicts:
941 946 fnode = mf.get(fname, None)
942 947 if fnode in links:
943 948 links[fnode] = min(links[fnode], lr, key=clrev)
944 949 elif fnode:
945 950 links[fnode] = lr
946 951 return links
947 952
948 953 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
949 954
950 955 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
951 956 repo = self._repo
952 957 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
953 958 total=len(changedfiles))
954 959 for i, fname in enumerate(sorted(changedfiles)):
955 960 filerevlog = repo.file(fname)
956 961 if not filerevlog:
957 962 raise error.Abort(_("empty or missing file data for %s") %
958 963 fname)
959 964
960 965 linkrevnodes = linknodes(filerevlog, fname)
961 966 # Lookup for filenodes, we collected the linkrev nodes above in the
962 967 # fastpath case and with lookupmf in the slowpath case.
963 968 def lookupfilelog(x):
964 969 return linkrevnodes[x]
965 970
966 971 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
967 972 if filenodes:
968 973 progress.update(i + 1, item=fname)
969 974 h = self._fileheader(fname)
970 975 size = len(h)
971 976 yield h
972 977 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
973 978 size += len(chunk)
974 979 yield chunk
975 980 self._verbosenote(_('%8.i %s\n') % (size, fname))
976 981 progress.complete()
977 982
978 983 def _deltaparent(self, store, rev, p1, p2, prev):
979 984 if self._useprevdelta:
980 985 if not store.candelta(prev, rev):
981 986 raise error.ProgrammingError(
982 987 'cg1 should not be used in this case')
983 988 return prev
984 989
985 990 # Narrow ellipses mode.
986 991 if self._ellipses:
987 992 # TODO: send better deltas when in narrow mode.
988 993 #
989 994 # changegroup.group() loops over revisions to send,
990 995 # including revisions we'll skip. What this means is that
991 996 # `prev` will be a potentially useless delta base for all
992 997 # ellipsis nodes, as the client likely won't have it. In
993 998 # the future we should do bookkeeping about which nodes
994 999 # have been sent to the client, and try to be
995 1000 # significantly smarter about delta bases. This is
996 1001 # slightly tricky because this same code has to work for
997 1002 # all revlogs, and we don't have the linkrev/linknode here.
998 1003 return p1
999 1004
1000 1005 dp = store.deltaparent(rev)
1001 1006 if dp == nullrev and store.storedeltachains:
1002 1007 # Avoid sending full revisions when delta parent is null. Pick prev
1003 1008 # in that case. It's tempting to pick p1 in this case, as p1 will
1004 1009 # be smaller in the common case. However, computing a delta against
1005 1010 # p1 may require resolving the raw text of p1, which could be
1006 1011 # expensive. The revlog caches should have prev cached, meaning
1007 1012 # less CPU for changegroup generation. There is likely room to add
1008 1013 # a flag and/or config option to control this behavior.
1009 1014 base = prev
1010 1015 elif dp == nullrev:
1011 1016 # revlog is configured to use full snapshot for a reason,
1012 1017 # stick to full snapshot.
1013 1018 base = nullrev
1014 1019 elif dp not in (p1, p2, prev):
1015 1020 # Pick prev when we can't be sure remote has the base revision.
1016 1021 return prev
1017 1022 else:
1018 1023 base = dp
1019 1024
1020 1025 if base != nullrev and not store.candelta(base, rev):
1021 1026 base = nullrev
1022 1027
1023 1028 return base
1024 1029
1025 1030 def _revchunk(self, store, rev, prev, linknode):
1026 1031 if self._ellipses:
1027 1032 fn = self._revisiondeltanarrow
1028 1033 else:
1029 1034 fn = self._revisiondeltanormal
1030 1035
1031 1036 delta = fn(store, rev, prev, linknode)
1032 1037 if not delta:
1033 1038 return
1034 1039
1035 1040 meta = self._builddeltaheader(delta)
1036 1041 l = len(meta) + sum(len(x) for x in delta.deltachunks)
1037 1042
1038 1043 yield chunkheader(l)
1039 1044 yield meta
1040 1045 for x in delta.deltachunks:
1041 1046 yield x
1042 1047
1043 1048 def _revisiondeltanormal(self, store, rev, prev, linknode):
1044 1049 node = store.node(rev)
1045 1050 p1, p2 = store.parentrevs(rev)
1046 1051 base = self._deltaparent(store, rev, p1, p2, prev)
1047 1052
1048 1053 prefix = ''
1049 1054 if store.iscensored(base) or store.iscensored(rev):
1050 1055 try:
1051 1056 delta = store.revision(node, raw=True)
1052 1057 except error.CensoredNodeError as e:
1053 1058 delta = e.tombstone
1054 1059 if base == nullrev:
1055 1060 prefix = mdiff.trivialdiffheader(len(delta))
1056 1061 else:
1057 1062 baselen = store.rawsize(base)
1058 1063 prefix = mdiff.replacediffheader(baselen, len(delta))
1059 1064 elif base == nullrev:
1060 1065 delta = store.revision(node, raw=True)
1061 1066 prefix = mdiff.trivialdiffheader(len(delta))
1062 1067 else:
1063 1068 delta = store.revdiff(base, rev)
1064 1069 p1n, p2n = store.parents(node)
1065 1070
1066 1071 return revisiondelta(
1067 1072 node=node,
1068 1073 p1node=p1n,
1069 1074 p2node=p2n,
1070 1075 basenode=store.node(base),
1071 1076 linknode=linknode,
1072 1077 flags=store.flags(rev),
1073 1078 deltachunks=(prefix, delta),
1074 1079 )
1075 1080
1076 1081 def _revisiondeltanarrow(self, store, rev, prev, linknode):
1077 1082 # build up some mapping information that's useful later. See
1078 1083 # the local() nested function below.
1079 1084 if not self._changelogdone:
1080 1085 self._clnodetorev[linknode] = rev
1081 1086 linkrev = rev
1082 1087 self._clrevtolocalrev[linkrev] = rev
1083 1088 else:
1084 1089 linkrev = self._clnodetorev[linknode]
1085 1090 self._clrevtolocalrev[linkrev] = rev
1086 1091
1087 1092 # This is a node to send in full, because the changeset it
1088 1093 # corresponds to was a full changeset.
1089 if linknode in self._full_nodes:
1094 if linknode in self._fullnodes:
1090 1095 return self._revisiondeltanormal(store, rev, prev, linknode)
1091 1096
1092 1097 # At this point, a node can either be one we should skip or an
1093 1098 # ellipsis. If it's not an ellipsis, bail immediately.
1094 1099 if linkrev not in self._precomputedellipsis:
1095 1100 return
1096 1101
1097 1102 linkparents = self._precomputedellipsis[linkrev]
1098 1103 def local(clrev):
1099 1104 """Turn a changelog revnum into a local revnum.
1100 1105
1101 1106 The ellipsis dag is stored as revnums on the changelog,
1102 1107 but when we're producing ellipsis entries for
1103 1108 non-changelog revlogs, we need to turn those numbers into
1104 1109 something local. This does that for us, and during the
1105 1110 changelog sending phase will also expand the stored
1106 1111 mappings as needed.
1107 1112 """
1108 1113 if clrev == nullrev:
1109 1114 return nullrev
1110 1115
1111 1116 if not self._changelogdone:
1112 1117 # If we're doing the changelog, it's possible that we
1113 1118 # have a parent that is already on the client, and we
1114 1119 # need to store some extra mapping information so that
1115 1120 # our contained ellipsis nodes will be able to resolve
1116 1121 # their parents.
1117 1122 if clrev not in self._clrevtolocalrev:
1118 1123 clnode = store.node(clrev)
1119 1124 self._clnodetorev[clnode] = clrev
1120 1125 return clrev
1121 1126
1122 1127 # Walk the ellipsis-ized changelog breadth-first looking for a
1123 1128 # change that has been linked from the current revlog.
1124 1129 #
1125 1130 # For a flat manifest revlog only a single step should be necessary
1126 1131 # as all relevant changelog entries are relevant to the flat
1127 1132 # manifest.
1128 1133 #
1129 1134 # For a filelog or tree manifest dirlog however not every changelog
1130 1135 # entry will have been relevant, so we need to skip some changelog
1131 1136 # nodes even after ellipsis-izing.
1132 1137 walk = [clrev]
1133 1138 while walk:
1134 1139 p = walk[0]
1135 1140 walk = walk[1:]
1136 1141 if p in self._clrevtolocalrev:
1137 1142 return self._clrevtolocalrev[p]
1138 elif p in self._full_nodes:
1143 elif p in self._fullnodes:
1139 1144 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1140 1145 if pp != nullrev])
1141 1146 elif p in self._precomputedellipsis:
1142 1147 walk.extend([pp for pp in self._precomputedellipsis[p]
1143 1148 if pp != nullrev])
1144 1149 else:
1145 1150 # In this case, we've got an ellipsis with parents
1146 1151 # outside the current bundle (likely an
1147 1152 # incremental pull). We "know" that we can use the
1148 1153 # value of this same revlog at whatever revision
1149 1154 # is pointed to by linknode. "Know" is in scare
1150 1155 # quotes because I haven't done enough examination
1151 1156 # of edge cases to convince myself this is really
1152 1157 # a fact - it works for all the (admittedly
1153 1158 # thorough) cases in our testsuite, but I would be
1154 1159 # somewhat unsurprised to find a case in the wild
1155 1160 # where this breaks down a bit. That said, I don't
1156 1161 # know if it would hurt anything.
1157 1162 for i in pycompat.xrange(rev, 0, -1):
1158 1163 if store.linkrev(i) == clrev:
1159 1164 return i
1160 1165 # We failed to resolve a parent for this node, so
1161 1166 # we crash the changegroup construction.
1162 1167 raise error.Abort(
1163 1168 'unable to resolve parent while packing %r %r'
1164 1169 ' for changeset %r' % (store.indexfile, rev, clrev))
1165 1170
1166 1171 return nullrev
1167 1172
1168 1173 if not linkparents or (
1169 1174 store.parentrevs(rev) == (nullrev, nullrev)):
1170 1175 p1, p2 = nullrev, nullrev
1171 1176 elif len(linkparents) == 1:
1172 1177 p1, = sorted(local(p) for p in linkparents)
1173 1178 p2 = nullrev
1174 1179 else:
1175 1180 p1, p2 = sorted(local(p) for p in linkparents)
1176 1181
1177 1182 n = store.node(rev)
1178 1183 p1n, p2n = store.node(p1), store.node(p2)
1179 1184 flags = store.flags(rev)
1180 1185 flags |= revlog.REVIDX_ELLIPSIS
1181 1186
1182 1187 # TODO: try and actually send deltas for ellipsis data blocks
1183 1188 data = store.revision(n)
1184 1189 diffheader = mdiff.trivialdiffheader(len(data))
1185 1190
1186 1191 return revisiondelta(
1187 1192 node=n,
1188 1193 p1node=p1n,
1189 1194 p2node=p2n,
1190 1195 basenode=nullid,
1191 1196 linknode=linknode,
1192 1197 flags=flags,
1193 1198 deltachunks=(diffheader, data),
1194 1199 )
1195 1200
1196 1201 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1197 shallow=False, ellipsisroots=None):
1202 shallow=False, ellipsisroots=None, fullnodes=None):
1198 1203 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1199 1204 d.node, d.p1node, d.p2node, d.linknode)
1200 1205
1201 1206 return cgpacker(repo, filematcher, b'01',
1202 1207 useprevdelta=True,
1203 1208 allowreorder=None,
1204 1209 builddeltaheader=builddeltaheader,
1205 1210 manifestsend=b'',
1206 1211 sendtreemanifests=False,
1207 1212 bundlecaps=bundlecaps,
1208 1213 ellipses=ellipses,
1209 1214 shallow=shallow,
1210 ellipsisroots=ellipsisroots)
1215 ellipsisroots=ellipsisroots,
1216 fullnodes=fullnodes)
1211 1217
1212 1218 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1213 shallow=False, ellipsisroots=None):
1219 shallow=False, ellipsisroots=None, fullnodes=None):
1214 1220 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1215 1221 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1216 1222
1217 1223 # Since generaldelta is directly supported by cg2, reordering
1218 1224 # generally doesn't help, so we disable it by default (treating
1219 1225 # bundle.reorder=auto just like bundle.reorder=False).
1220 1226 return cgpacker(repo, filematcher, b'02',
1221 1227 useprevdelta=False,
1222 1228 allowreorder=False,
1223 1229 builddeltaheader=builddeltaheader,
1224 1230 manifestsend=b'',
1225 1231 sendtreemanifests=False,
1226 1232 bundlecaps=bundlecaps,
1227 1233 ellipses=ellipses,
1228 1234 shallow=shallow,
1229 ellipsisroots=ellipsisroots)
1235 ellipsisroots=ellipsisroots,
1236 fullnodes=fullnodes)
1230 1237
1231 1238 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1232 shallow=False, ellipsisroots=None):
1239 shallow=False, ellipsisroots=None, fullnodes=None):
1233 1240 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1234 1241 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1235 1242
1236 1243 return cgpacker(repo, filematcher, b'03',
1237 1244 useprevdelta=False,
1238 1245 allowreorder=False,
1239 1246 builddeltaheader=builddeltaheader,
1240 1247 manifestsend=closechunk(),
1241 1248 sendtreemanifests=True,
1242 1249 bundlecaps=bundlecaps,
1243 1250 ellipses=ellipses,
1244 1251 shallow=shallow,
1245 ellipsisroots=ellipsisroots)
1252 ellipsisroots=ellipsisroots,
1253 fullnodes=fullnodes)
1246 1254
1247 1255 _packermap = {'01': (_makecg1packer, cg1unpacker),
1248 1256 # cg2 adds support for exchanging generaldelta
1249 1257 '02': (_makecg2packer, cg2unpacker),
1250 1258 # cg3 adds support for exchanging revlog flags and treemanifests
1251 1259 '03': (_makecg3packer, cg3unpacker),
1252 1260 }
1253 1261
1254 1262 def allsupportedversions(repo):
1255 1263 versions = set(_packermap.keys())
1256 1264 if not (repo.ui.configbool('experimental', 'changegroup3') or
1257 1265 repo.ui.configbool('experimental', 'treemanifest') or
1258 1266 'treemanifest' in repo.requirements):
1259 1267 versions.discard('03')
1260 1268 return versions
1261 1269
1262 1270 # Changegroup versions that can be applied to the repo
1263 1271 def supportedincomingversions(repo):
1264 1272 return allsupportedversions(repo)
1265 1273
1266 1274 # Changegroup versions that can be created from the repo
1267 1275 def supportedoutgoingversions(repo):
1268 1276 versions = allsupportedversions(repo)
1269 1277 if 'treemanifest' in repo.requirements:
1270 1278 # Versions 01 and 02 support only flat manifests and it's just too
1271 1279 # expensive to convert between the flat manifest and tree manifest on
1272 1280 # the fly. Since tree manifests are hashed differently, all of history
1273 1281 # would have to be converted. Instead, we simply don't even pretend to
1274 1282 # support versions 01 and 02.
1275 1283 versions.discard('01')
1276 1284 versions.discard('02')
1277 1285 if repository.NARROW_REQUIREMENT in repo.requirements:
1278 1286 # Versions 01 and 02 don't support revlog flags, and we need to
1279 1287 # support that for stripping and unbundling to work.
1280 1288 versions.discard('01')
1281 1289 versions.discard('02')
1282 1290 if LFS_REQUIREMENT in repo.requirements:
1283 1291 # Versions 01 and 02 don't support revlog flags, and we need to
1284 1292 # mark LFS entries with REVIDX_EXTSTORED.
1285 1293 versions.discard('01')
1286 1294 versions.discard('02')
1287 1295
1288 1296 return versions
1289 1297
1290 1298 def localversion(repo):
1291 1299 # Finds the best version to use for bundles that are meant to be used
1292 1300 # locally, such as those from strip and shelve, and temporary bundles.
1293 1301 return max(supportedoutgoingversions(repo))
1294 1302
1295 1303 def safeversion(repo):
1296 1304 # Finds the smallest version that it's safe to assume clients of the repo
1297 1305 # will support. For example, all hg versions that support generaldelta also
1298 1306 # support changegroup 02.
1299 1307 versions = supportedoutgoingversions(repo)
1300 1308 if 'generaldelta' in repo.requirements:
1301 1309 versions.discard('01')
1302 1310 assert versions
1303 1311 return min(versions)
1304 1312
1305 1313 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1306 ellipses=False, shallow=False, ellipsisroots=None):
1314 ellipses=False, shallow=False, ellipsisroots=None,
1315 fullnodes=None):
1307 1316 assert version in supportedoutgoingversions(repo)
1308 1317
1309 1318 if filematcher is None:
1310 1319 filematcher = matchmod.alwaysmatcher(repo.root, '')
1311 1320
1312 1321 if version == '01' and not filematcher.always():
1313 1322 raise error.ProgrammingError('version 01 changegroups do not support '
1314 1323 'sparse file matchers')
1315 1324
1316 1325 if ellipses and version in (b'01', b'02'):
1317 1326 raise error.Abort(
1318 1327 _('ellipsis nodes require at least cg3 on client and server, '
1319 1328 'but negotiated version %s') % version)
1320 1329
1321 1330 # Requested files could include files not in the local store. So
1322 1331 # filter those out.
1323 1332 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1324 1333 filematcher)
1325 1334
1326 1335 fn = _packermap[version][0]
1327 1336 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1328 shallow=shallow, ellipsisroots=ellipsisroots)
1337 shallow=shallow, ellipsisroots=ellipsisroots,
1338 fullnodes=fullnodes)
1329 1339
1330 1340 def getunbundler(version, fh, alg, extras=None):
1331 1341 return _packermap[version][1](fh, alg, extras=extras)
1332 1342
1333 1343 def _changegroupinfo(repo, nodes, source):
1334 1344 if repo.ui.verbose or source == 'bundle':
1335 1345 repo.ui.status(_("%d changesets found\n") % len(nodes))
1336 1346 if repo.ui.debugflag:
1337 1347 repo.ui.debug("list of changesets:\n")
1338 1348 for node in nodes:
1339 1349 repo.ui.debug("%s\n" % hex(node))
1340 1350
1341 1351 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1342 1352 bundlecaps=None):
1343 1353 cgstream = makestream(repo, outgoing, version, source,
1344 1354 fastpath=fastpath, bundlecaps=bundlecaps)
1345 1355 return getunbundler(version, util.chunkbuffer(cgstream), None,
1346 1356 {'clcount': len(outgoing.missing) })
1347 1357
1348 1358 def makestream(repo, outgoing, version, source, fastpath=False,
1349 1359 bundlecaps=None, filematcher=None):
1350 1360 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1351 1361 filematcher=filematcher)
1352 1362
1353 1363 repo = repo.unfiltered()
1354 1364 commonrevs = outgoing.common
1355 1365 csets = outgoing.missing
1356 1366 heads = outgoing.missingheads
1357 1367 # We go through the fast path if we get told to, or if all (unfiltered
1358 1368 # heads have been requested (since we then know there all linkrevs will
1359 1369 # be pulled by the client).
1360 1370 heads.sort()
1361 1371 fastpathlinkrev = fastpath or (
1362 1372 repo.filtername is None and heads == sorted(repo.heads()))
1363 1373
1364 1374 repo.hook('preoutgoing', throw=True, source=source)
1365 1375 _changegroupinfo(repo, csets, source)
1366 1376 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1367 1377
1368 1378 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1369 1379 revisions = 0
1370 1380 files = 0
1371 1381 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1372 1382 total=expectedfiles)
1373 1383 for chunkdata in iter(source.filelogheader, {}):
1374 1384 files += 1
1375 1385 f = chunkdata["filename"]
1376 1386 repo.ui.debug("adding %s revisions\n" % f)
1377 1387 progress.increment()
1378 1388 fl = repo.file(f)
1379 1389 o = len(fl)
1380 1390 try:
1381 1391 deltas = source.deltaiter()
1382 1392 if not fl.addgroup(deltas, revmap, trp):
1383 1393 raise error.Abort(_("received file revlog group is empty"))
1384 1394 except error.CensoredBaseError as e:
1385 1395 raise error.Abort(_("received delta base is censored: %s") % e)
1386 1396 revisions += len(fl) - o
1387 1397 if f in needfiles:
1388 1398 needs = needfiles[f]
1389 1399 for new in pycompat.xrange(o, len(fl)):
1390 1400 n = fl.node(new)
1391 1401 if n in needs:
1392 1402 needs.remove(n)
1393 1403 else:
1394 1404 raise error.Abort(
1395 1405 _("received spurious file revlog entry"))
1396 1406 if not needs:
1397 1407 del needfiles[f]
1398 1408 progress.complete()
1399 1409
1400 1410 for f, needs in needfiles.iteritems():
1401 1411 fl = repo.file(f)
1402 1412 for n in needs:
1403 1413 try:
1404 1414 fl.rev(n)
1405 1415 except error.LookupError:
1406 1416 raise error.Abort(
1407 1417 _('missing file data for %s:%s - run hg verify') %
1408 1418 (f, hex(n)))
1409 1419
1410 1420 return revisions, files
1411 1421
1412 1422 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1413 1423 ellipsisroots, visitnodes, depth, source, version):
1414 1424 # We wrap cg1packer.revchunk, using a side channel to pass
1415 1425 # relevant_nodes into that area. Then if linknode isn't in the
1416 1426 # set, we know we have an ellipsis node and we should defer
1417 1427 # sending that node's data. We override close() to detect
1418 1428 # pending ellipsis nodes and flush them.
1419 1429 packer = getbundler(version, repo, filematcher=match,
1420 1430 ellipses=True,
1421 1431 shallow=depth is not None,
1422 ellipsisroots=ellipsisroots)
1423 # Give the packer the list of nodes which should not be
1424 # ellipsis nodes. We store this rather than the set of nodes
1425 # that should be an ellipsis because for very large histories
1426 # we expect this to be significantly smaller.
1427 packer._full_nodes = relevant_nodes
1432 ellipsisroots=ellipsisroots,
1433 fullnodes=relevant_nodes)
1428 1434
1429 1435 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now