##// END OF EJS Templates
changegroup: specify ellipses mode explicitly...
Gregory Szorc -
r38944:1469584a default
parent child Browse files
Show More
@@ -1,1423 +1,1429 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, "wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, "wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103 class cg1unpacker(object):
104 104 """Unpacker for cg1 changegroup streams.
105 105
106 106 A changegroup unpacker handles the framing of the revision data in
107 107 the wire format. Most consumers will want to use the apply()
108 108 method to add the changes from the changegroup to a repository.
109 109
110 110 If you're forwarding a changegroup unmodified to another consumer,
111 111 use getchunks(), which returns an iterator of changegroup
112 112 chunks. This is mostly useful for cases where you need to know the
113 113 data stream has ended by observing the end of the changegroup.
114 114
115 115 deltachunk() is useful only if you're applying delta data. Most
116 116 consumers should prefer apply() instead.
117 117
118 118 A few other public methods exist. Those are used only for
119 119 bundlerepo and some debug commands - their use is discouraged.
120 120 """
121 121 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
122 122 deltaheadersize = deltaheader.size
123 123 version = '01'
124 124 _grouplistcount = 1 # One list of files after the manifests
125 125
126 126 def __init__(self, fh, alg, extras=None):
127 127 if alg is None:
128 128 alg = 'UN'
129 129 if alg not in util.compengines.supportedbundletypes:
130 130 raise error.Abort(_('unknown stream compression type: %s')
131 131 % alg)
132 132 if alg == 'BZ':
133 133 alg = '_truncatedBZ'
134 134
135 135 compengine = util.compengines.forbundletype(alg)
136 136 self._stream = compengine.decompressorreader(fh)
137 137 self._type = alg
138 138 self.extras = extras or {}
139 139 self.callback = None
140 140
141 141 # These methods (compressed, read, seek, tell) all appear to only
142 142 # be used by bundlerepo, but it's a little hard to tell.
143 143 def compressed(self):
144 144 return self._type is not None and self._type != 'UN'
145 145 def read(self, l):
146 146 return self._stream.read(l)
147 147 def seek(self, pos):
148 148 return self._stream.seek(pos)
149 149 def tell(self):
150 150 return self._stream.tell()
151 151 def close(self):
152 152 return self._stream.close()
153 153
154 154 def _chunklength(self):
155 155 d = readexactly(self._stream, 4)
156 156 l = struct.unpack(">l", d)[0]
157 157 if l <= 4:
158 158 if l:
159 159 raise error.Abort(_("invalid chunk length %d") % l)
160 160 return 0
161 161 if self.callback:
162 162 self.callback()
163 163 return l - 4
164 164
165 165 def changelogheader(self):
166 166 """v10 does not have a changelog header chunk"""
167 167 return {}
168 168
169 169 def manifestheader(self):
170 170 """v10 does not have a manifest header chunk"""
171 171 return {}
172 172
173 173 def filelogheader(self):
174 174 """return the header of the filelogs chunk, v10 only has the filename"""
175 175 l = self._chunklength()
176 176 if not l:
177 177 return {}
178 178 fname = readexactly(self._stream, l)
179 179 return {'filename': fname}
180 180
181 181 def _deltaheader(self, headertuple, prevnode):
182 182 node, p1, p2, cs = headertuple
183 183 if prevnode is None:
184 184 deltabase = p1
185 185 else:
186 186 deltabase = prevnode
187 187 flags = 0
188 188 return node, p1, p2, deltabase, cs, flags
189 189
190 190 def deltachunk(self, prevnode):
191 191 l = self._chunklength()
192 192 if not l:
193 193 return {}
194 194 headerdata = readexactly(self._stream, self.deltaheadersize)
195 195 header = self.deltaheader.unpack(headerdata)
196 196 delta = readexactly(self._stream, l - self.deltaheadersize)
197 197 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
198 198 return (node, p1, p2, cs, deltabase, delta, flags)
199 199
200 200 def getchunks(self):
201 201 """returns all the chunks contains in the bundle
202 202
203 203 Used when you need to forward the binary stream to a file or another
204 204 network API. To do so, it parse the changegroup data, otherwise it will
205 205 block in case of sshrepo because it don't know the end of the stream.
206 206 """
207 207 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
208 208 # and a list of filelogs. For changegroup 3, we expect 4 parts:
209 209 # changelog, manifestlog, a list of tree manifestlogs, and a list of
210 210 # filelogs.
211 211 #
212 212 # Changelog and manifestlog parts are terminated with empty chunks. The
213 213 # tree and file parts are a list of entry sections. Each entry section
214 214 # is a series of chunks terminating in an empty chunk. The list of these
215 215 # entry sections is terminated in yet another empty chunk, so we know
216 216 # we've reached the end of the tree/file list when we reach an empty
217 217 # chunk that was proceeded by no non-empty chunks.
218 218
219 219 parts = 0
220 220 while parts < 2 + self._grouplistcount:
221 221 noentries = True
222 222 while True:
223 223 chunk = getchunk(self)
224 224 if not chunk:
225 225 # The first two empty chunks represent the end of the
226 226 # changelog and the manifestlog portions. The remaining
227 227 # empty chunks represent either A) the end of individual
228 228 # tree or file entries in the file list, or B) the end of
229 229 # the entire list. It's the end of the entire list if there
230 230 # were no entries (i.e. noentries is True).
231 231 if parts < 2:
232 232 parts += 1
233 233 elif noentries:
234 234 parts += 1
235 235 break
236 236 noentries = False
237 237 yield chunkheader(len(chunk))
238 238 pos = 0
239 239 while pos < len(chunk):
240 240 next = pos + 2**20
241 241 yield chunk[pos:next]
242 242 pos = next
243 243 yield closechunk()
244 244
245 245 def _unpackmanifests(self, repo, revmap, trp, prog):
246 246 self.callback = prog.increment
247 247 # no need to check for empty manifest group here:
248 248 # if the result of the merge of 1 and 2 is the same in 3 and 4,
249 249 # no new manifest will be created and the manifest group will
250 250 # be empty during the pull
251 251 self.manifestheader()
252 252 deltas = self.deltaiter()
253 253 repo.manifestlog.addgroup(deltas, revmap, trp)
254 254 prog.complete()
255 255 self.callback = None
256 256
257 257 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
258 258 expectedtotal=None):
259 259 """Add the changegroup returned by source.read() to this repo.
260 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
261 261 the URL of the repo where this changegroup is coming from.
262 262
263 263 Return an integer summarizing the change to this repo:
264 264 - nothing changed or no source: 0
265 265 - more heads than before: 1+added heads (2..n)
266 266 - fewer heads than before: -1-removed heads (-2..-n)
267 267 - number of heads stays the same: 1
268 268 """
269 269 repo = repo.unfiltered()
270 270 def csmap(x):
271 271 repo.ui.debug("add changeset %s\n" % short(x))
272 272 return len(cl)
273 273
274 274 def revmap(x):
275 275 return cl.rev(x)
276 276
277 277 changesets = files = revisions = 0
278 278
279 279 try:
280 280 # The transaction may already carry source information. In this
281 281 # case we use the top level data. We overwrite the argument
282 282 # because we need to use the top level value (if they exist)
283 283 # in this function.
284 284 srctype = tr.hookargs.setdefault('source', srctype)
285 285 url = tr.hookargs.setdefault('url', url)
286 286 repo.hook('prechangegroup',
287 287 throw=True, **pycompat.strkwargs(tr.hookargs))
288 288
289 289 # write changelog data to temp files so concurrent readers
290 290 # will not see an inconsistent view
291 291 cl = repo.changelog
292 292 cl.delayupdate(tr)
293 293 oldheads = set(cl.heads())
294 294
295 295 trp = weakref.proxy(tr)
296 296 # pull off the changeset group
297 297 repo.ui.status(_("adding changesets\n"))
298 298 clstart = len(cl)
299 299 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
300 300 total=expectedtotal)
301 301 self.callback = progress.increment
302 302
303 303 efiles = set()
304 304 def onchangelog(cl, node):
305 305 efiles.update(cl.readfiles(node))
306 306
307 307 self.changelogheader()
308 308 deltas = self.deltaiter()
309 309 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
310 310 efiles = len(efiles)
311 311
312 312 if not cgnodes:
313 313 repo.ui.develwarn('applied empty changegroup',
314 314 config='warn-empty-changegroup')
315 315 clend = len(cl)
316 316 changesets = clend - clstart
317 317 progress.complete()
318 318 self.callback = None
319 319
320 320 # pull off the manifest group
321 321 repo.ui.status(_("adding manifests\n"))
322 322 # We know that we'll never have more manifests than we had
323 323 # changesets.
324 324 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
325 325 total=changesets)
326 326 self._unpackmanifests(repo, revmap, trp, progress)
327 327
328 328 needfiles = {}
329 329 if repo.ui.configbool('server', 'validate'):
330 330 cl = repo.changelog
331 331 ml = repo.manifestlog
332 332 # validate incoming csets have their manifests
333 333 for cset in pycompat.xrange(clstart, clend):
334 334 mfnode = cl.changelogrevision(cset).manifest
335 335 mfest = ml[mfnode].readdelta()
336 336 # store file cgnodes we must see
337 337 for f, n in mfest.iteritems():
338 338 needfiles.setdefault(f, set()).add(n)
339 339
340 340 # process the files
341 341 repo.ui.status(_("adding file changes\n"))
342 342 newrevs, newfiles = _addchangegroupfiles(
343 343 repo, self, revmap, trp, efiles, needfiles)
344 344 revisions += newrevs
345 345 files += newfiles
346 346
347 347 deltaheads = 0
348 348 if oldheads:
349 349 heads = cl.heads()
350 350 deltaheads = len(heads) - len(oldheads)
351 351 for h in heads:
352 352 if h not in oldheads and repo[h].closesbranch():
353 353 deltaheads -= 1
354 354 htext = ""
355 355 if deltaheads:
356 356 htext = _(" (%+d heads)") % deltaheads
357 357
358 358 repo.ui.status(_("added %d changesets"
359 359 " with %d changes to %d files%s\n")
360 360 % (changesets, revisions, files, htext))
361 361 repo.invalidatevolatilesets()
362 362
363 363 if changesets > 0:
364 364 if 'node' not in tr.hookargs:
365 365 tr.hookargs['node'] = hex(cl.node(clstart))
366 366 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
367 367 hookargs = dict(tr.hookargs)
368 368 else:
369 369 hookargs = dict(tr.hookargs)
370 370 hookargs['node'] = hex(cl.node(clstart))
371 371 hookargs['node_last'] = hex(cl.node(clend - 1))
372 372 repo.hook('pretxnchangegroup',
373 373 throw=True, **pycompat.strkwargs(hookargs))
374 374
375 375 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
376 376 phaseall = None
377 377 if srctype in ('push', 'serve'):
378 378 # Old servers can not push the boundary themselves.
379 379 # New servers won't push the boundary if changeset already
380 380 # exists locally as secret
381 381 #
382 382 # We should not use added here but the list of all change in
383 383 # the bundle
384 384 if repo.publishing():
385 385 targetphase = phaseall = phases.public
386 386 else:
387 387 # closer target phase computation
388 388
389 389 # Those changesets have been pushed from the
390 390 # outside, their phases are going to be pushed
391 391 # alongside. Therefor `targetphase` is
392 392 # ignored.
393 393 targetphase = phaseall = phases.draft
394 394 if added:
395 395 phases.registernew(repo, tr, targetphase, added)
396 396 if phaseall is not None:
397 397 phases.advanceboundary(repo, tr, phaseall, cgnodes)
398 398
399 399 if changesets > 0:
400 400
401 401 def runhooks():
402 402 # These hooks run when the lock releases, not when the
403 403 # transaction closes. So it's possible for the changelog
404 404 # to have changed since we last saw it.
405 405 if clstart >= len(repo):
406 406 return
407 407
408 408 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
409 409
410 410 for n in added:
411 411 args = hookargs.copy()
412 412 args['node'] = hex(n)
413 413 del args['node_last']
414 414 repo.hook("incoming", **pycompat.strkwargs(args))
415 415
416 416 newheads = [h for h in repo.heads()
417 417 if h not in oldheads]
418 418 repo.ui.log("incoming",
419 419 "%d incoming changes - new heads: %s\n",
420 420 len(added),
421 421 ', '.join([hex(c[:6]) for c in newheads]))
422 422
423 423 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
424 424 lambda tr: repo._afterlock(runhooks))
425 425 finally:
426 426 repo.ui.flush()
427 427 # never return 0 here:
428 428 if deltaheads < 0:
429 429 ret = deltaheads - 1
430 430 else:
431 431 ret = deltaheads + 1
432 432 return ret
433 433
434 434 def deltaiter(self):
435 435 """
436 436 returns an iterator of the deltas in this changegroup
437 437
438 438 Useful for passing to the underlying storage system to be stored.
439 439 """
440 440 chain = None
441 441 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
442 442 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
443 443 yield chunkdata
444 444 chain = chunkdata[0]
445 445
446 446 class cg2unpacker(cg1unpacker):
447 447 """Unpacker for cg2 streams.
448 448
449 449 cg2 streams add support for generaldelta, so the delta header
450 450 format is slightly different. All other features about the data
451 451 remain the same.
452 452 """
453 453 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
454 454 deltaheadersize = deltaheader.size
455 455 version = '02'
456 456
457 457 def _deltaheader(self, headertuple, prevnode):
458 458 node, p1, p2, deltabase, cs = headertuple
459 459 flags = 0
460 460 return node, p1, p2, deltabase, cs, flags
461 461
462 462 class cg3unpacker(cg2unpacker):
463 463 """Unpacker for cg3 streams.
464 464
465 465 cg3 streams add support for exchanging treemanifests and revlog
466 466 flags. It adds the revlog flags to the delta header and an empty chunk
467 467 separating manifests and files.
468 468 """
469 469 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
470 470 deltaheadersize = deltaheader.size
471 471 version = '03'
472 472 _grouplistcount = 2 # One list of manifests and one list of files
473 473
474 474 def _deltaheader(self, headertuple, prevnode):
475 475 node, p1, p2, deltabase, cs, flags = headertuple
476 476 return node, p1, p2, deltabase, cs, flags
477 477
478 478 def _unpackmanifests(self, repo, revmap, trp, prog):
479 479 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
480 480 for chunkdata in iter(self.filelogheader, {}):
481 481 # If we get here, there are directory manifests in the changegroup
482 482 d = chunkdata["filename"]
483 483 repo.ui.debug("adding %s revisions\n" % d)
484 484 dirlog = repo.manifestlog._revlog.dirlog(d)
485 485 deltas = self.deltaiter()
486 486 if not dirlog.addgroup(deltas, revmap, trp):
487 487 raise error.Abort(_("received dir revlog group is empty"))
488 488
489 489 class headerlessfixup(object):
490 490 def __init__(self, fh, h):
491 491 self._h = h
492 492 self._fh = fh
493 493 def read(self, n):
494 494 if self._h:
495 495 d, self._h = self._h[:n], self._h[n:]
496 496 if len(d) < n:
497 497 d += readexactly(self._fh, n - len(d))
498 498 return d
499 499 return readexactly(self._fh, n)
500 500
501 501 @attr.s(slots=True, frozen=True)
502 502 class revisiondelta(object):
503 503 """Describes a delta entry in a changegroup.
504 504
505 505 Captured data is sufficient to serialize the delta into multiple
506 506 formats.
507 507 """
508 508 # 20 byte node of this revision.
509 509 node = attr.ib()
510 510 # 20 byte nodes of parent revisions.
511 511 p1node = attr.ib()
512 512 p2node = attr.ib()
513 513 # 20 byte node of node this delta is against.
514 514 basenode = attr.ib()
515 515 # 20 byte node of changeset revision this delta is associated with.
516 516 linknode = attr.ib()
517 517 # 2 bytes of flags to apply to revision data.
518 518 flags = attr.ib()
519 519 # Iterable of chunks holding raw delta data.
520 520 deltachunks = attr.ib()
521 521
522 522 class cgpacker(object):
523 523 def __init__(self, repo, filematcher, version, allowreorder,
524 524 useprevdelta, builddeltaheader, manifestsend,
525 sendtreemanifests, bundlecaps=None, shallow=False,
526 ellipsisroots=None):
525 sendtreemanifests, bundlecaps=None, ellipses=False,
526 shallow=False, ellipsisroots=None):
527 527 """Given a source repo, construct a bundler.
528 528
529 529 filematcher is a matcher that matches on files to include in the
530 530 changegroup. Used to facilitate sparse changegroups.
531 531
532 532 allowreorder controls whether reordering of revisions is allowed.
533 533 This value is used when ``bundle.reorder`` is ``auto`` or isn't
534 534 set.
535 535
536 536 useprevdelta controls whether revisions should always delta against
537 537 the previous revision in the changegroup.
538 538
539 539 builddeltaheader is a callable that constructs the header for a group
540 540 delta.
541 541
542 542 manifestsend is a chunk to send after manifests have been fully emitted.
543 543
544 544 sendtreemanifests indicates whether tree manifests should be emitted.
545 545
546 ellipses indicates whether ellipsis serving mode is enabled.
547
546 548 bundlecaps is optional and can be used to specify the set of
547 549 capabilities which can be used to build the bundle. While bundlecaps is
548 550 unused in core Mercurial, extensions rely on this feature to communicate
549 551 capabilities to customize the changegroup packer.
550 552
551 553 shallow indicates whether shallow data might be sent. The packer may
552 554 need to pack file contents not introduced by the changes being packed.
553 555 """
554 556 assert filematcher
555 557 self._filematcher = filematcher
556 558
557 559 self.version = version
558 560 self._useprevdelta = useprevdelta
559 561 self._builddeltaheader = builddeltaheader
560 562 self._manifestsend = manifestsend
561 563 self._sendtreemanifests = sendtreemanifests
564 self._ellipses = ellipses
562 565
563 566 # Set of capabilities we can use to build the bundle.
564 567 if bundlecaps is None:
565 568 bundlecaps = set()
566 569 self._bundlecaps = bundlecaps
567 570 self._isshallow = shallow
568 571
569 572 # Maps ellipsis revs to their roots at the changelog level.
570 573 self._precomputedellipsis = ellipsisroots
571 574
572 575 # experimental config: bundle.reorder
573 576 reorder = repo.ui.config('bundle', 'reorder')
574 577 if reorder == 'auto':
575 578 self._reorder = allowreorder
576 579 else:
577 580 self._reorder = stringutil.parsebool(reorder)
578 581
579 582 self._repo = repo
580 583
581 584 if self._repo.ui.verbose and not self._repo.ui.debugflag:
582 585 self._verbosenote = self._repo.ui.note
583 586 else:
584 587 self._verbosenote = lambda s: None
585 588
586 589 # TODO the functionality keyed off of this should probably be
587 590 # controlled via arguments to group() that influence behavior.
588 591 self._changelogdone = False
589 592
590 593 # Maps CL revs to per-revlog revisions. Cleared in close() at
591 594 # the end of each group.
592 595 self._clrevtolocalrev = {}
593 596 self._nextclrevtolocalrev = {}
594 597
595 598 # Maps changelog nodes to changelog revs. Filled in once
596 599 # during changelog stage and then left unmodified.
597 600 self._clnodetorev = {}
598 601
599 602 def _close(self):
600 603 # Ellipses serving mode.
601 604 self._clrevtolocalrev.clear()
602 605 if self._nextclrevtolocalrev:
603 606 self.clrevtolocalrev = self._nextclrevtolocalrev
604 607 self._nextclrevtolocalrev.clear()
605 608 self._changelogdone = True
606 609
607 610 return closechunk()
608 611
609 612 def _fileheader(self, fname):
610 613 return chunkheader(len(fname)) + fname
611 614
612 615 # Extracted both for clarity and for overriding in extensions.
613 616 def _sortgroup(self, store, nodelist, lookup):
614 617 """Sort nodes for change group and turn them into revnums."""
615 618 # Ellipses serving mode.
616 619 #
617 620 # In a perfect world, we'd generate better ellipsis-ified graphs
618 621 # for non-changelog revlogs. In practice, we haven't started doing
619 622 # that yet, so the resulting DAGs for the manifestlog and filelogs
620 623 # are actually full of bogus parentage on all the ellipsis
621 624 # nodes. This has the side effect that, while the contents are
622 625 # correct, the individual DAGs might be completely out of whack in
623 626 # a case like 882681bc3166 and its ancestors (back about 10
624 627 # revisions or so) in the main hg repo.
625 628 #
626 629 # The one invariant we *know* holds is that the new (potentially
627 630 # bogus) DAG shape will be valid if we order the nodes in the
628 631 # order that they're introduced in dramatis personae by the
629 632 # changelog, so what we do is we sort the non-changelog histories
630 633 # by the order in which they are used by the changelog.
631 if util.safehasattr(self, '_full_nodes') and self._clnodetorev:
634 if self._ellipses and self._clnodetorev:
632 635 key = lambda n: self._clnodetorev[lookup(n)]
633 636 return [store.rev(n) for n in sorted(nodelist, key=key)]
634 637
635 638 # for generaldelta revlogs, we linearize the revs; this will both be
636 639 # much quicker and generate a much smaller bundle
637 640 if (store._generaldelta and self._reorder is None) or self._reorder:
638 641 dag = dagutil.revlogdag(store)
639 642 return dag.linearize(set(store.rev(n) for n in nodelist))
640 643 else:
641 644 return sorted([store.rev(n) for n in nodelist])
642 645
643 646 def group(self, nodelist, store, lookup, units=None):
644 647 """Calculate a delta group, yielding a sequence of changegroup chunks
645 648 (strings).
646 649
647 650 Given a list of changeset revs, return a set of deltas and
648 651 metadata corresponding to nodes. The first delta is
649 652 first parent(nodelist[0]) -> nodelist[0], the receiver is
650 653 guaranteed to have this parent as it has all history before
651 654 these changesets. In the case firstparent is nullrev the
652 655 changegroup starts with a full revision.
653 656
654 657 If units is not None, progress detail will be generated, units specifies
655 658 the type of revlog that is touched (changelog, manifest, etc.).
656 659 """
657 660 # if we don't have any revisions touched by these changesets, bail
658 661 if len(nodelist) == 0:
659 662 yield self._close()
660 663 return
661 664
662 665 revs = self._sortgroup(store, nodelist, lookup)
663 666
664 667 # add the parent of the first rev
665 668 p = store.parentrevs(revs[0])[0]
666 669 revs.insert(0, p)
667 670
668 671 # build deltas
669 672 progress = None
670 673 if units is not None:
671 674 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
672 675 total=(len(revs) - 1))
673 676 for r in pycompat.xrange(len(revs) - 1):
674 677 if progress:
675 678 progress.update(r + 1)
676 679 prev, curr = revs[r], revs[r + 1]
677 680 linknode = lookup(store.node(curr))
678 681 for c in self._revchunk(store, curr, prev, linknode):
679 682 yield c
680 683
681 684 if progress:
682 685 progress.complete()
683 686 yield self._close()
684 687
685 688 # filter any nodes that claim to be part of the known set
686 689 def _prune(self, store, missing, commonrevs):
687 690 # TODO this violates storage abstraction for manifests.
688 691 if isinstance(store, manifest.manifestrevlog):
689 692 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
690 693 return []
691 694
692 695 rr, rl = store.rev, store.linkrev
693 696 return [n for n in missing if rl(rr(n)) not in commonrevs]
694 697
695 698 def _packmanifests(self, dir, mfnodes, lookuplinknode):
696 699 """Pack flat manifests into a changegroup stream."""
697 700 assert not dir
698 701 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
699 702 lookuplinknode, units=_('manifests')):
700 703 yield chunk
701 704
702 705 def _packtreemanifests(self, dir, mfnodes, lookuplinknode):
703 706 """Version of _packmanifests that operates on directory manifests.
704 707
705 708 Encodes the directory name in the output so multiple manifests
706 709 can be sent.
707 710 """
708 711 assert self.version == b'03'
709 712
710 713 if dir:
711 714 yield self._fileheader(dir)
712 715
713 716 # TODO violates storage abstractions by assuming revlogs.
714 717 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
715 718 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
716 719 units=_('manifests')):
717 720 yield chunk
718 721
719 722 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
720 723 '''yield a sequence of changegroup chunks (strings)'''
721 724 repo = self._repo
722 725 cl = repo.changelog
723 726
724 727 clrevorder = {}
725 728 mfs = {} # needed manifests
726 729 fnodes = {} # needed file nodes
727 730 mfl = repo.manifestlog
728 731 # TODO violates storage abstraction.
729 732 mfrevlog = mfl._revlog
730 733 changedfiles = set()
731 734
732 ellipsesmode = util.safehasattr(self, '_full_nodes')
733
734 735 # Callback for the changelog, used to collect changed files and
735 736 # manifest nodes.
736 737 # Returns the linkrev node (identity in the changelog case).
737 738 def lookupcl(x):
738 739 c = cl.read(x)
739 740 clrevorder[x] = len(clrevorder)
740 741
741 if ellipsesmode:
742 if self._ellipses:
742 743 # Only update mfs if x is going to be sent. Otherwise we
743 744 # end up with bogus linkrevs specified for manifests and
744 745 # we skip some manifest nodes that we should otherwise
745 746 # have sent.
746 747 if (x in self._full_nodes
747 748 or cl.rev(x) in self._precomputedellipsis):
748 749 n = c[0]
749 750 # Record the first changeset introducing this manifest
750 751 # version.
751 752 mfs.setdefault(n, x)
752 753 # Set this narrow-specific dict so we have the lowest
753 754 # manifest revnum to look up for this cl revnum. (Part of
754 755 # mapping changelog ellipsis parents to manifest ellipsis
755 756 # parents)
756 757 self._nextclrevtolocalrev.setdefault(cl.rev(x),
757 758 mfrevlog.rev(n))
758 759 # We can't trust the changed files list in the changeset if the
759 760 # client requested a shallow clone.
760 761 if self._isshallow:
761 762 changedfiles.update(mfl[c[0]].read().keys())
762 763 else:
763 764 changedfiles.update(c[3])
764 765 else:
765 766
766 767 n = c[0]
767 768 # record the first changeset introducing this manifest version
768 769 mfs.setdefault(n, x)
769 770 # Record a complete list of potentially-changed files in
770 771 # this manifest.
771 772 changedfiles.update(c[3])
772 773
773 774 return x
774 775
775 776 self._verbosenote(_('uncompressed size of bundle content:\n'))
776 777 size = 0
777 778 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
778 779 size += len(chunk)
779 780 yield chunk
780 781 self._verbosenote(_('%8.i (changelog)\n') % size)
781 782
782 783 # We need to make sure that the linkrev in the changegroup refers to
783 784 # the first changeset that introduced the manifest or file revision.
784 785 # The fastpath is usually safer than the slowpath, because the filelogs
785 786 # are walked in revlog order.
786 787 #
787 788 # When taking the slowpath with reorder=None and the manifest revlog
788 789 # uses generaldelta, the manifest may be walked in the "wrong" order.
789 790 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
790 791 # cc0ff93d0c0c).
791 792 #
792 793 # When taking the fastpath, we are only vulnerable to reordering
793 794 # of the changelog itself. The changelog never uses generaldelta, so
794 795 # it is only reordered when reorder=True. To handle this case, we
795 796 # simply take the slowpath, which already has the 'clrevorder' logic.
796 797 # This was also fixed in cc0ff93d0c0c.
797 798 fastpathlinkrev = fastpathlinkrev and not self._reorder
798 799 # Treemanifests don't work correctly with fastpathlinkrev
799 800 # either, because we don't discover which directory nodes to
800 801 # send along with files. This could probably be fixed.
801 802 fastpathlinkrev = fastpathlinkrev and (
802 803 'treemanifest' not in repo.requirements)
803 804
804 805 for chunk in self.generatemanifests(commonrevs, clrevorder,
805 806 fastpathlinkrev, mfs, fnodes, source):
806 807 yield chunk
807 808
808 if ellipsesmode:
809 if self._ellipses:
809 810 mfdicts = None
810 811 if self._isshallow:
811 812 mfdicts = [(self._repo.manifestlog[n].read(), lr)
812 813 for (n, lr) in mfs.iteritems()]
813 814
814 815 mfs.clear()
815 816 clrevs = set(cl.rev(x) for x in clnodes)
816 817
817 818 if not fastpathlinkrev:
818 819 def linknodes(unused, fname):
819 820 return fnodes.get(fname, {})
820 821 else:
821 822 cln = cl.node
822 823 def linknodes(filerevlog, fname):
823 824 llr = filerevlog.linkrev
824 825 fln = filerevlog.node
825 826 revs = ((r, llr(r)) for r in filerevlog)
826 827 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
827 828
828 if ellipsesmode:
829 if self._ellipses:
829 830 # We need to pass the mfdicts variable down into
830 831 # generatefiles(), but more than one command might have
831 832 # wrapped generatefiles so we can't modify the function
832 833 # signature. Instead, we pass the data to ourselves using an
833 834 # instance attribute. I'm sorry.
834 835 self._mfdicts = mfdicts
835 836
836 837 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
837 838 source):
838 839 yield chunk
839 840
840 841 yield self._close()
841 842
842 843 if clnodes:
843 844 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
844 845
845 846 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
846 847 fnodes, source):
847 848 """Returns an iterator of changegroup chunks containing manifests.
848 849
849 850 `source` is unused here, but is used by extensions like remotefilelog to
850 851 change what is sent based in pulls vs pushes, etc.
851 852 """
852 853 repo = self._repo
853 854 mfl = repo.manifestlog
854 855 dirlog = mfl._revlog.dirlog
855 856 tmfnodes = {'': mfs}
856 857
857 858 # Callback for the manifest, used to collect linkrevs for filelog
858 859 # revisions.
859 860 # Returns the linkrev node (collected in lookupcl).
860 861 def makelookupmflinknode(dir, nodes):
861 862 if fastpathlinkrev:
862 863 assert not dir
863 864 return mfs.__getitem__
864 865
865 866 def lookupmflinknode(x):
866 867 """Callback for looking up the linknode for manifests.
867 868
868 869 Returns the linkrev node for the specified manifest.
869 870
870 871 SIDE EFFECT:
871 872
872 873 1) fclnodes gets populated with the list of relevant
873 874 file nodes if we're not using fastpathlinkrev
874 875 2) When treemanifests are in use, collects treemanifest nodes
875 876 to send
876 877
877 878 Note that this means manifests must be completely sent to
878 879 the client before you can trust the list of files and
879 880 treemanifests to send.
880 881 """
881 882 clnode = nodes[x]
882 883 mdata = mfl.get(dir, x).readfast(shallow=True)
883 884 for p, n, fl in mdata.iterentries():
884 885 if fl == 't': # subdirectory manifest
885 886 subdir = dir + p + '/'
886 887 tmfclnodes = tmfnodes.setdefault(subdir, {})
887 888 tmfclnode = tmfclnodes.setdefault(n, clnode)
888 889 if clrevorder[clnode] < clrevorder[tmfclnode]:
889 890 tmfclnodes[n] = clnode
890 891 else:
891 892 f = dir + p
892 893 fclnodes = fnodes.setdefault(f, {})
893 894 fclnode = fclnodes.setdefault(n, clnode)
894 895 if clrevorder[clnode] < clrevorder[fclnode]:
895 896 fclnodes[n] = clnode
896 897 return clnode
897 898 return lookupmflinknode
898 899
899 900 fn = (self._packtreemanifests if self._sendtreemanifests
900 901 else self._packmanifests)
901 902 size = 0
902 903 while tmfnodes:
903 904 dir, nodes = tmfnodes.popitem()
904 905 prunednodes = self._prune(dirlog(dir), nodes, commonrevs)
905 906 if not dir or prunednodes:
906 907 for x in fn(dir, prunednodes, makelookupmflinknode(dir, nodes)):
907 908 size += len(x)
908 909 yield x
909 910 self._verbosenote(_('%8.i (manifests)\n') % size)
910 911 yield self._manifestsend
911 912
912 913 # The 'source' parameter is useful for extensions
913 914 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
914 915 changedfiles = list(filter(self._filematcher, changedfiles))
915 916
916 917 if self._isshallow:
917 918 # See comment in generate() for why this sadness is a thing.
918 919 mfdicts = self._mfdicts
919 920 del self._mfdicts
920 921 # In a shallow clone, the linknodes callback needs to also include
921 922 # those file nodes that are in the manifests we sent but weren't
922 923 # introduced by those manifests.
923 924 commonctxs = [self._repo[c] for c in commonrevs]
924 925 oldlinknodes = linknodes
925 926 clrev = self._repo.changelog.rev
926 927
927 928 # Defining this function has a side-effect of overriding the
928 929 # function of the same name that was passed in as an argument.
929 930 # TODO have caller pass in appropriate function.
930 931 def linknodes(flog, fname):
931 932 for c in commonctxs:
932 933 try:
933 934 fnode = c.filenode(fname)
934 935 self._clrevtolocalrev[c.rev()] = flog.rev(fnode)
935 936 except error.ManifestLookupError:
936 937 pass
937 938 links = oldlinknodes(flog, fname)
938 939 if len(links) != len(mfdicts):
939 940 for mf, lr in mfdicts:
940 941 fnode = mf.get(fname, None)
941 942 if fnode in links:
942 943 links[fnode] = min(links[fnode], lr, key=clrev)
943 944 elif fnode:
944 945 links[fnode] = lr
945 946 return links
946 947
947 948 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
948 949
949 950 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
950 951 repo = self._repo
951 952 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
952 953 total=len(changedfiles))
953 954 for i, fname in enumerate(sorted(changedfiles)):
954 955 filerevlog = repo.file(fname)
955 956 if not filerevlog:
956 957 raise error.Abort(_("empty or missing file data for %s") %
957 958 fname)
958 959
959 960 linkrevnodes = linknodes(filerevlog, fname)
960 961 # Lookup for filenodes, we collected the linkrev nodes above in the
961 962 # fastpath case and with lookupmf in the slowpath case.
962 963 def lookupfilelog(x):
963 964 return linkrevnodes[x]
964 965
965 966 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
966 967 if filenodes:
967 968 progress.update(i + 1, item=fname)
968 969 h = self._fileheader(fname)
969 970 size = len(h)
970 971 yield h
971 972 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
972 973 size += len(chunk)
973 974 yield chunk
974 975 self._verbosenote(_('%8.i %s\n') % (size, fname))
975 976 progress.complete()
976 977
977 978 def _deltaparent(self, store, rev, p1, p2, prev):
978 979 if self._useprevdelta:
979 980 if not store.candelta(prev, rev):
980 981 raise error.ProgrammingError(
981 982 'cg1 should not be used in this case')
982 983 return prev
983 984
984 985 # Narrow ellipses mode.
985 if util.safehasattr(self, '_full_nodes'):
986 if self._ellipses:
986 987 # TODO: send better deltas when in narrow mode.
987 988 #
988 989 # changegroup.group() loops over revisions to send,
989 990 # including revisions we'll skip. What this means is that
990 991 # `prev` will be a potentially useless delta base for all
991 992 # ellipsis nodes, as the client likely won't have it. In
992 993 # the future we should do bookkeeping about which nodes
993 994 # have been sent to the client, and try to be
994 995 # significantly smarter about delta bases. This is
995 996 # slightly tricky because this same code has to work for
996 997 # all revlogs, and we don't have the linkrev/linknode here.
997 998 return p1
998 999
999 1000 dp = store.deltaparent(rev)
1000 1001 if dp == nullrev and store.storedeltachains:
1001 1002 # Avoid sending full revisions when delta parent is null. Pick prev
1002 1003 # in that case. It's tempting to pick p1 in this case, as p1 will
1003 1004 # be smaller in the common case. However, computing a delta against
1004 1005 # p1 may require resolving the raw text of p1, which could be
1005 1006 # expensive. The revlog caches should have prev cached, meaning
1006 1007 # less CPU for changegroup generation. There is likely room to add
1007 1008 # a flag and/or config option to control this behavior.
1008 1009 base = prev
1009 1010 elif dp == nullrev:
1010 1011 # revlog is configured to use full snapshot for a reason,
1011 1012 # stick to full snapshot.
1012 1013 base = nullrev
1013 1014 elif dp not in (p1, p2, prev):
1014 1015 # Pick prev when we can't be sure remote has the base revision.
1015 1016 return prev
1016 1017 else:
1017 1018 base = dp
1018 1019
1019 1020 if base != nullrev and not store.candelta(base, rev):
1020 1021 base = nullrev
1021 1022
1022 1023 return base
1023 1024
1024 1025 def _revchunk(self, store, rev, prev, linknode):
1025 if util.safehasattr(self, '_full_nodes'):
1026 if self._ellipses:
1026 1027 fn = self._revisiondeltanarrow
1027 1028 else:
1028 1029 fn = self._revisiondeltanormal
1029 1030
1030 1031 delta = fn(store, rev, prev, linknode)
1031 1032 if not delta:
1032 1033 return
1033 1034
1034 1035 meta = self._builddeltaheader(delta)
1035 1036 l = len(meta) + sum(len(x) for x in delta.deltachunks)
1036 1037
1037 1038 yield chunkheader(l)
1038 1039 yield meta
1039 1040 for x in delta.deltachunks:
1040 1041 yield x
1041 1042
1042 1043 def _revisiondeltanormal(self, store, rev, prev, linknode):
1043 1044 node = store.node(rev)
1044 1045 p1, p2 = store.parentrevs(rev)
1045 1046 base = self._deltaparent(store, rev, p1, p2, prev)
1046 1047
1047 1048 prefix = ''
1048 1049 if store.iscensored(base) or store.iscensored(rev):
1049 1050 try:
1050 1051 delta = store.revision(node, raw=True)
1051 1052 except error.CensoredNodeError as e:
1052 1053 delta = e.tombstone
1053 1054 if base == nullrev:
1054 1055 prefix = mdiff.trivialdiffheader(len(delta))
1055 1056 else:
1056 1057 baselen = store.rawsize(base)
1057 1058 prefix = mdiff.replacediffheader(baselen, len(delta))
1058 1059 elif base == nullrev:
1059 1060 delta = store.revision(node, raw=True)
1060 1061 prefix = mdiff.trivialdiffheader(len(delta))
1061 1062 else:
1062 1063 delta = store.revdiff(base, rev)
1063 1064 p1n, p2n = store.parents(node)
1064 1065
1065 1066 return revisiondelta(
1066 1067 node=node,
1067 1068 p1node=p1n,
1068 1069 p2node=p2n,
1069 1070 basenode=store.node(base),
1070 1071 linknode=linknode,
1071 1072 flags=store.flags(rev),
1072 1073 deltachunks=(prefix, delta),
1073 1074 )
1074 1075
1075 1076 def _revisiondeltanarrow(self, store, rev, prev, linknode):
1076 1077 # build up some mapping information that's useful later. See
1077 1078 # the local() nested function below.
1078 1079 if not self._changelogdone:
1079 1080 self._clnodetorev[linknode] = rev
1080 1081 linkrev = rev
1081 1082 self._clrevtolocalrev[linkrev] = rev
1082 1083 else:
1083 1084 linkrev = self._clnodetorev[linknode]
1084 1085 self._clrevtolocalrev[linkrev] = rev
1085 1086
1086 1087 # This is a node to send in full, because the changeset it
1087 1088 # corresponds to was a full changeset.
1088 1089 if linknode in self._full_nodes:
1089 1090 return self._revisiondeltanormal(store, rev, prev, linknode)
1090 1091
1091 1092 # At this point, a node can either be one we should skip or an
1092 1093 # ellipsis. If it's not an ellipsis, bail immediately.
1093 1094 if linkrev not in self._precomputedellipsis:
1094 1095 return
1095 1096
1096 1097 linkparents = self._precomputedellipsis[linkrev]
1097 1098 def local(clrev):
1098 1099 """Turn a changelog revnum into a local revnum.
1099 1100
1100 1101 The ellipsis dag is stored as revnums on the changelog,
1101 1102 but when we're producing ellipsis entries for
1102 1103 non-changelog revlogs, we need to turn those numbers into
1103 1104 something local. This does that for us, and during the
1104 1105 changelog sending phase will also expand the stored
1105 1106 mappings as needed.
1106 1107 """
1107 1108 if clrev == nullrev:
1108 1109 return nullrev
1109 1110
1110 1111 if not self._changelogdone:
1111 1112 # If we're doing the changelog, it's possible that we
1112 1113 # have a parent that is already on the client, and we
1113 1114 # need to store some extra mapping information so that
1114 1115 # our contained ellipsis nodes will be able to resolve
1115 1116 # their parents.
1116 1117 if clrev not in self._clrevtolocalrev:
1117 1118 clnode = store.node(clrev)
1118 1119 self._clnodetorev[clnode] = clrev
1119 1120 return clrev
1120 1121
1121 1122 # Walk the ellipsis-ized changelog breadth-first looking for a
1122 1123 # change that has been linked from the current revlog.
1123 1124 #
1124 1125 # For a flat manifest revlog only a single step should be necessary
1125 1126 # as all relevant changelog entries are relevant to the flat
1126 1127 # manifest.
1127 1128 #
1128 1129 # For a filelog or tree manifest dirlog however not every changelog
1129 1130 # entry will have been relevant, so we need to skip some changelog
1130 1131 # nodes even after ellipsis-izing.
1131 1132 walk = [clrev]
1132 1133 while walk:
1133 1134 p = walk[0]
1134 1135 walk = walk[1:]
1135 1136 if p in self._clrevtolocalrev:
1136 1137 return self._clrevtolocalrev[p]
1137 1138 elif p in self._full_nodes:
1138 1139 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1139 1140 if pp != nullrev])
1140 1141 elif p in self._precomputedellipsis:
1141 1142 walk.extend([pp for pp in self._precomputedellipsis[p]
1142 1143 if pp != nullrev])
1143 1144 else:
1144 1145 # In this case, we've got an ellipsis with parents
1145 1146 # outside the current bundle (likely an
1146 1147 # incremental pull). We "know" that we can use the
1147 1148 # value of this same revlog at whatever revision
1148 1149 # is pointed to by linknode. "Know" is in scare
1149 1150 # quotes because I haven't done enough examination
1150 1151 # of edge cases to convince myself this is really
1151 1152 # a fact - it works for all the (admittedly
1152 1153 # thorough) cases in our testsuite, but I would be
1153 1154 # somewhat unsurprised to find a case in the wild
1154 1155 # where this breaks down a bit. That said, I don't
1155 1156 # know if it would hurt anything.
1156 1157 for i in pycompat.xrange(rev, 0, -1):
1157 1158 if store.linkrev(i) == clrev:
1158 1159 return i
1159 1160 # We failed to resolve a parent for this node, so
1160 1161 # we crash the changegroup construction.
1161 1162 raise error.Abort(
1162 1163 'unable to resolve parent while packing %r %r'
1163 1164 ' for changeset %r' % (store.indexfile, rev, clrev))
1164 1165
1165 1166 return nullrev
1166 1167
1167 1168 if not linkparents or (
1168 1169 store.parentrevs(rev) == (nullrev, nullrev)):
1169 1170 p1, p2 = nullrev, nullrev
1170 1171 elif len(linkparents) == 1:
1171 1172 p1, = sorted(local(p) for p in linkparents)
1172 1173 p2 = nullrev
1173 1174 else:
1174 1175 p1, p2 = sorted(local(p) for p in linkparents)
1175 1176
1176 1177 n = store.node(rev)
1177 1178 p1n, p2n = store.node(p1), store.node(p2)
1178 1179 flags = store.flags(rev)
1179 1180 flags |= revlog.REVIDX_ELLIPSIS
1180 1181
1181 1182 # TODO: try and actually send deltas for ellipsis data blocks
1182 1183 data = store.revision(n)
1183 1184 diffheader = mdiff.trivialdiffheader(len(data))
1184 1185
1185 1186 return revisiondelta(
1186 1187 node=n,
1187 1188 p1node=p1n,
1188 1189 p2node=p2n,
1189 1190 basenode=nullid,
1190 1191 linknode=linknode,
1191 1192 flags=flags,
1192 1193 deltachunks=(diffheader, data),
1193 1194 )
1194 1195
1195 def _makecg1packer(repo, filematcher, bundlecaps, shallow=False,
1196 ellipsisroots=None):
1196 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1197 shallow=False, ellipsisroots=None):
1197 1198 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1198 1199 d.node, d.p1node, d.p2node, d.linknode)
1199 1200
1200 1201 return cgpacker(repo, filematcher, b'01',
1201 1202 useprevdelta=True,
1202 1203 allowreorder=None,
1203 1204 builddeltaheader=builddeltaheader,
1204 1205 manifestsend=b'',
1205 1206 sendtreemanifests=False,
1206 1207 bundlecaps=bundlecaps,
1208 ellipses=ellipses,
1207 1209 shallow=shallow,
1208 1210 ellipsisroots=ellipsisroots)
1209 1211
1210 def _makecg2packer(repo, filematcher, bundlecaps, shallow=False,
1211 ellipsisroots=None):
1212 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1213 shallow=False, ellipsisroots=None):
1212 1214 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1213 1215 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1214 1216
1215 1217 # Since generaldelta is directly supported by cg2, reordering
1216 1218 # generally doesn't help, so we disable it by default (treating
1217 1219 # bundle.reorder=auto just like bundle.reorder=False).
1218 1220 return cgpacker(repo, filematcher, b'02',
1219 1221 useprevdelta=False,
1220 1222 allowreorder=False,
1221 1223 builddeltaheader=builddeltaheader,
1222 1224 manifestsend=b'',
1223 1225 sendtreemanifests=False,
1224 1226 bundlecaps=bundlecaps,
1227 ellipses=ellipses,
1225 1228 shallow=shallow,
1226 1229 ellipsisroots=ellipsisroots)
1227 1230
1228 def _makecg3packer(repo, filematcher, bundlecaps, shallow=False,
1229 ellipsisroots=None):
1231 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1232 shallow=False, ellipsisroots=None):
1230 1233 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1231 1234 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1232 1235
1233 1236 return cgpacker(repo, filematcher, b'03',
1234 1237 useprevdelta=False,
1235 1238 allowreorder=False,
1236 1239 builddeltaheader=builddeltaheader,
1237 1240 manifestsend=closechunk(),
1238 1241 sendtreemanifests=True,
1239 1242 bundlecaps=bundlecaps,
1243 ellipses=ellipses,
1240 1244 shallow=shallow,
1241 1245 ellipsisroots=ellipsisroots)
1242 1246
1243 1247 _packermap = {'01': (_makecg1packer, cg1unpacker),
1244 1248 # cg2 adds support for exchanging generaldelta
1245 1249 '02': (_makecg2packer, cg2unpacker),
1246 1250 # cg3 adds support for exchanging revlog flags and treemanifests
1247 1251 '03': (_makecg3packer, cg3unpacker),
1248 1252 }
1249 1253
1250 1254 def allsupportedversions(repo):
1251 1255 versions = set(_packermap.keys())
1252 1256 if not (repo.ui.configbool('experimental', 'changegroup3') or
1253 1257 repo.ui.configbool('experimental', 'treemanifest') or
1254 1258 'treemanifest' in repo.requirements):
1255 1259 versions.discard('03')
1256 1260 return versions
1257 1261
1258 1262 # Changegroup versions that can be applied to the repo
1259 1263 def supportedincomingversions(repo):
1260 1264 return allsupportedversions(repo)
1261 1265
1262 1266 # Changegroup versions that can be created from the repo
1263 1267 def supportedoutgoingversions(repo):
1264 1268 versions = allsupportedversions(repo)
1265 1269 if 'treemanifest' in repo.requirements:
1266 1270 # Versions 01 and 02 support only flat manifests and it's just too
1267 1271 # expensive to convert between the flat manifest and tree manifest on
1268 1272 # the fly. Since tree manifests are hashed differently, all of history
1269 1273 # would have to be converted. Instead, we simply don't even pretend to
1270 1274 # support versions 01 and 02.
1271 1275 versions.discard('01')
1272 1276 versions.discard('02')
1273 1277 if repository.NARROW_REQUIREMENT in repo.requirements:
1274 1278 # Versions 01 and 02 don't support revlog flags, and we need to
1275 1279 # support that for stripping and unbundling to work.
1276 1280 versions.discard('01')
1277 1281 versions.discard('02')
1278 1282 if LFS_REQUIREMENT in repo.requirements:
1279 1283 # Versions 01 and 02 don't support revlog flags, and we need to
1280 1284 # mark LFS entries with REVIDX_EXTSTORED.
1281 1285 versions.discard('01')
1282 1286 versions.discard('02')
1283 1287
1284 1288 return versions
1285 1289
1286 1290 def localversion(repo):
1287 1291 # Finds the best version to use for bundles that are meant to be used
1288 1292 # locally, such as those from strip and shelve, and temporary bundles.
1289 1293 return max(supportedoutgoingversions(repo))
1290 1294
1291 1295 def safeversion(repo):
1292 1296 # Finds the smallest version that it's safe to assume clients of the repo
1293 1297 # will support. For example, all hg versions that support generaldelta also
1294 1298 # support changegroup 02.
1295 1299 versions = supportedoutgoingversions(repo)
1296 1300 if 'generaldelta' in repo.requirements:
1297 1301 versions.discard('01')
1298 1302 assert versions
1299 1303 return min(versions)
1300 1304
1301 1305 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1302 shallow=False, ellipsisroots=None):
1306 ellipses=False, shallow=False, ellipsisroots=None):
1303 1307 assert version in supportedoutgoingversions(repo)
1304 1308
1305 1309 if filematcher is None:
1306 1310 filematcher = matchmod.alwaysmatcher(repo.root, '')
1307 1311
1308 1312 if version == '01' and not filematcher.always():
1309 1313 raise error.ProgrammingError('version 01 changegroups do not support '
1310 1314 'sparse file matchers')
1311 1315
1316 if ellipses and version in (b'01', b'02'):
1317 raise error.Abort(
1318 _('ellipsis nodes require at least cg3 on client and server, '
1319 'but negotiated version %s') % version)
1320
1312 1321 # Requested files could include files not in the local store. So
1313 1322 # filter those out.
1314 1323 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1315 1324 filematcher)
1316 1325
1317 1326 fn = _packermap[version][0]
1318 return fn(repo, filematcher, bundlecaps, shallow=shallow,
1319 ellipsisroots=ellipsisroots)
1327 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1328 shallow=shallow, ellipsisroots=ellipsisroots)
1320 1329
1321 1330 def getunbundler(version, fh, alg, extras=None):
1322 1331 return _packermap[version][1](fh, alg, extras=extras)
1323 1332
1324 1333 def _changegroupinfo(repo, nodes, source):
1325 1334 if repo.ui.verbose or source == 'bundle':
1326 1335 repo.ui.status(_("%d changesets found\n") % len(nodes))
1327 1336 if repo.ui.debugflag:
1328 1337 repo.ui.debug("list of changesets:\n")
1329 1338 for node in nodes:
1330 1339 repo.ui.debug("%s\n" % hex(node))
1331 1340
1332 1341 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1333 1342 bundlecaps=None):
1334 1343 cgstream = makestream(repo, outgoing, version, source,
1335 1344 fastpath=fastpath, bundlecaps=bundlecaps)
1336 1345 return getunbundler(version, util.chunkbuffer(cgstream), None,
1337 1346 {'clcount': len(outgoing.missing) })
1338 1347
1339 1348 def makestream(repo, outgoing, version, source, fastpath=False,
1340 1349 bundlecaps=None, filematcher=None):
1341 1350 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1342 1351 filematcher=filematcher)
1343 1352
1344 1353 repo = repo.unfiltered()
1345 1354 commonrevs = outgoing.common
1346 1355 csets = outgoing.missing
1347 1356 heads = outgoing.missingheads
1348 1357 # We go through the fast path if we get told to, or if all (unfiltered
1349 1358 # heads have been requested (since we then know there all linkrevs will
1350 1359 # be pulled by the client).
1351 1360 heads.sort()
1352 1361 fastpathlinkrev = fastpath or (
1353 1362 repo.filtername is None and heads == sorted(repo.heads()))
1354 1363
1355 1364 repo.hook('preoutgoing', throw=True, source=source)
1356 1365 _changegroupinfo(repo, csets, source)
1357 1366 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1358 1367
1359 1368 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1360 1369 revisions = 0
1361 1370 files = 0
1362 1371 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1363 1372 total=expectedfiles)
1364 1373 for chunkdata in iter(source.filelogheader, {}):
1365 1374 files += 1
1366 1375 f = chunkdata["filename"]
1367 1376 repo.ui.debug("adding %s revisions\n" % f)
1368 1377 progress.increment()
1369 1378 fl = repo.file(f)
1370 1379 o = len(fl)
1371 1380 try:
1372 1381 deltas = source.deltaiter()
1373 1382 if not fl.addgroup(deltas, revmap, trp):
1374 1383 raise error.Abort(_("received file revlog group is empty"))
1375 1384 except error.CensoredBaseError as e:
1376 1385 raise error.Abort(_("received delta base is censored: %s") % e)
1377 1386 revisions += len(fl) - o
1378 1387 if f in needfiles:
1379 1388 needs = needfiles[f]
1380 1389 for new in pycompat.xrange(o, len(fl)):
1381 1390 n = fl.node(new)
1382 1391 if n in needs:
1383 1392 needs.remove(n)
1384 1393 else:
1385 1394 raise error.Abort(
1386 1395 _("received spurious file revlog entry"))
1387 1396 if not needs:
1388 1397 del needfiles[f]
1389 1398 progress.complete()
1390 1399
1391 1400 for f, needs in needfiles.iteritems():
1392 1401 fl = repo.file(f)
1393 1402 for n in needs:
1394 1403 try:
1395 1404 fl.rev(n)
1396 1405 except error.LookupError:
1397 1406 raise error.Abort(
1398 1407 _('missing file data for %s:%s - run hg verify') %
1399 1408 (f, hex(n)))
1400 1409
1401 1410 return revisions, files
1402 1411
1403 1412 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1404 1413 ellipsisroots, visitnodes, depth, source, version):
1405 if version in ('01', '02'):
1406 raise error.Abort(
1407 'ellipsis nodes require at least cg3 on client and server, '
1408 'but negotiated version %s' % version)
1409 1414 # We wrap cg1packer.revchunk, using a side channel to pass
1410 1415 # relevant_nodes into that area. Then if linknode isn't in the
1411 1416 # set, we know we have an ellipsis node and we should defer
1412 1417 # sending that node's data. We override close() to detect
1413 1418 # pending ellipsis nodes and flush them.
1414 1419 packer = getbundler(version, repo, filematcher=match,
1420 ellipses=True,
1415 1421 shallow=depth is not None,
1416 1422 ellipsisroots=ellipsisroots)
1417 1423 # Give the packer the list of nodes which should not be
1418 1424 # ellipsis nodes. We store this rather than the set of nodes
1419 1425 # that should be an ellipsis because for very large histories
1420 1426 # we expect this to be significantly smaller.
1421 1427 packer._full_nodes = relevant_nodes
1422 1428
1423 1429 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now