##// END OF EJS Templates
changegroup: pass ellipsis roots into cgpacker constructor...
Gregory Szorc -
r38943:ad4c4cc9 default
parent child Browse files
Show More
@@ -1,1413 +1,1423 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, "wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, "wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103 class cg1unpacker(object):
104 104 """Unpacker for cg1 changegroup streams.
105 105
106 106 A changegroup unpacker handles the framing of the revision data in
107 107 the wire format. Most consumers will want to use the apply()
108 108 method to add the changes from the changegroup to a repository.
109 109
110 110 If you're forwarding a changegroup unmodified to another consumer,
111 111 use getchunks(), which returns an iterator of changegroup
112 112 chunks. This is mostly useful for cases where you need to know the
113 113 data stream has ended by observing the end of the changegroup.
114 114
115 115 deltachunk() is useful only if you're applying delta data. Most
116 116 consumers should prefer apply() instead.
117 117
118 118 A few other public methods exist. Those are used only for
119 119 bundlerepo and some debug commands - their use is discouraged.
120 120 """
121 121 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
122 122 deltaheadersize = deltaheader.size
123 123 version = '01'
124 124 _grouplistcount = 1 # One list of files after the manifests
125 125
126 126 def __init__(self, fh, alg, extras=None):
127 127 if alg is None:
128 128 alg = 'UN'
129 129 if alg not in util.compengines.supportedbundletypes:
130 130 raise error.Abort(_('unknown stream compression type: %s')
131 131 % alg)
132 132 if alg == 'BZ':
133 133 alg = '_truncatedBZ'
134 134
135 135 compengine = util.compengines.forbundletype(alg)
136 136 self._stream = compengine.decompressorreader(fh)
137 137 self._type = alg
138 138 self.extras = extras or {}
139 139 self.callback = None
140 140
141 141 # These methods (compressed, read, seek, tell) all appear to only
142 142 # be used by bundlerepo, but it's a little hard to tell.
143 143 def compressed(self):
144 144 return self._type is not None and self._type != 'UN'
145 145 def read(self, l):
146 146 return self._stream.read(l)
147 147 def seek(self, pos):
148 148 return self._stream.seek(pos)
149 149 def tell(self):
150 150 return self._stream.tell()
151 151 def close(self):
152 152 return self._stream.close()
153 153
154 154 def _chunklength(self):
155 155 d = readexactly(self._stream, 4)
156 156 l = struct.unpack(">l", d)[0]
157 157 if l <= 4:
158 158 if l:
159 159 raise error.Abort(_("invalid chunk length %d") % l)
160 160 return 0
161 161 if self.callback:
162 162 self.callback()
163 163 return l - 4
164 164
165 165 def changelogheader(self):
166 166 """v10 does not have a changelog header chunk"""
167 167 return {}
168 168
169 169 def manifestheader(self):
170 170 """v10 does not have a manifest header chunk"""
171 171 return {}
172 172
173 173 def filelogheader(self):
174 174 """return the header of the filelogs chunk, v10 only has the filename"""
175 175 l = self._chunklength()
176 176 if not l:
177 177 return {}
178 178 fname = readexactly(self._stream, l)
179 179 return {'filename': fname}
180 180
181 181 def _deltaheader(self, headertuple, prevnode):
182 182 node, p1, p2, cs = headertuple
183 183 if prevnode is None:
184 184 deltabase = p1
185 185 else:
186 186 deltabase = prevnode
187 187 flags = 0
188 188 return node, p1, p2, deltabase, cs, flags
189 189
190 190 def deltachunk(self, prevnode):
191 191 l = self._chunklength()
192 192 if not l:
193 193 return {}
194 194 headerdata = readexactly(self._stream, self.deltaheadersize)
195 195 header = self.deltaheader.unpack(headerdata)
196 196 delta = readexactly(self._stream, l - self.deltaheadersize)
197 197 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
198 198 return (node, p1, p2, cs, deltabase, delta, flags)
199 199
200 200 def getchunks(self):
201 201 """returns all the chunks contains in the bundle
202 202
203 203 Used when you need to forward the binary stream to a file or another
204 204 network API. To do so, it parse the changegroup data, otherwise it will
205 205 block in case of sshrepo because it don't know the end of the stream.
206 206 """
207 207 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
208 208 # and a list of filelogs. For changegroup 3, we expect 4 parts:
209 209 # changelog, manifestlog, a list of tree manifestlogs, and a list of
210 210 # filelogs.
211 211 #
212 212 # Changelog and manifestlog parts are terminated with empty chunks. The
213 213 # tree and file parts are a list of entry sections. Each entry section
214 214 # is a series of chunks terminating in an empty chunk. The list of these
215 215 # entry sections is terminated in yet another empty chunk, so we know
216 216 # we've reached the end of the tree/file list when we reach an empty
217 217 # chunk that was proceeded by no non-empty chunks.
218 218
219 219 parts = 0
220 220 while parts < 2 + self._grouplistcount:
221 221 noentries = True
222 222 while True:
223 223 chunk = getchunk(self)
224 224 if not chunk:
225 225 # The first two empty chunks represent the end of the
226 226 # changelog and the manifestlog portions. The remaining
227 227 # empty chunks represent either A) the end of individual
228 228 # tree or file entries in the file list, or B) the end of
229 229 # the entire list. It's the end of the entire list if there
230 230 # were no entries (i.e. noentries is True).
231 231 if parts < 2:
232 232 parts += 1
233 233 elif noentries:
234 234 parts += 1
235 235 break
236 236 noentries = False
237 237 yield chunkheader(len(chunk))
238 238 pos = 0
239 239 while pos < len(chunk):
240 240 next = pos + 2**20
241 241 yield chunk[pos:next]
242 242 pos = next
243 243 yield closechunk()
244 244
245 245 def _unpackmanifests(self, repo, revmap, trp, prog):
246 246 self.callback = prog.increment
247 247 # no need to check for empty manifest group here:
248 248 # if the result of the merge of 1 and 2 is the same in 3 and 4,
249 249 # no new manifest will be created and the manifest group will
250 250 # be empty during the pull
251 251 self.manifestheader()
252 252 deltas = self.deltaiter()
253 253 repo.manifestlog.addgroup(deltas, revmap, trp)
254 254 prog.complete()
255 255 self.callback = None
256 256
257 257 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
258 258 expectedtotal=None):
259 259 """Add the changegroup returned by source.read() to this repo.
260 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
261 261 the URL of the repo where this changegroup is coming from.
262 262
263 263 Return an integer summarizing the change to this repo:
264 264 - nothing changed or no source: 0
265 265 - more heads than before: 1+added heads (2..n)
266 266 - fewer heads than before: -1-removed heads (-2..-n)
267 267 - number of heads stays the same: 1
268 268 """
269 269 repo = repo.unfiltered()
270 270 def csmap(x):
271 271 repo.ui.debug("add changeset %s\n" % short(x))
272 272 return len(cl)
273 273
274 274 def revmap(x):
275 275 return cl.rev(x)
276 276
277 277 changesets = files = revisions = 0
278 278
279 279 try:
280 280 # The transaction may already carry source information. In this
281 281 # case we use the top level data. We overwrite the argument
282 282 # because we need to use the top level value (if they exist)
283 283 # in this function.
284 284 srctype = tr.hookargs.setdefault('source', srctype)
285 285 url = tr.hookargs.setdefault('url', url)
286 286 repo.hook('prechangegroup',
287 287 throw=True, **pycompat.strkwargs(tr.hookargs))
288 288
289 289 # write changelog data to temp files so concurrent readers
290 290 # will not see an inconsistent view
291 291 cl = repo.changelog
292 292 cl.delayupdate(tr)
293 293 oldheads = set(cl.heads())
294 294
295 295 trp = weakref.proxy(tr)
296 296 # pull off the changeset group
297 297 repo.ui.status(_("adding changesets\n"))
298 298 clstart = len(cl)
299 299 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
300 300 total=expectedtotal)
301 301 self.callback = progress.increment
302 302
303 303 efiles = set()
304 304 def onchangelog(cl, node):
305 305 efiles.update(cl.readfiles(node))
306 306
307 307 self.changelogheader()
308 308 deltas = self.deltaiter()
309 309 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
310 310 efiles = len(efiles)
311 311
312 312 if not cgnodes:
313 313 repo.ui.develwarn('applied empty changegroup',
314 314 config='warn-empty-changegroup')
315 315 clend = len(cl)
316 316 changesets = clend - clstart
317 317 progress.complete()
318 318 self.callback = None
319 319
320 320 # pull off the manifest group
321 321 repo.ui.status(_("adding manifests\n"))
322 322 # We know that we'll never have more manifests than we had
323 323 # changesets.
324 324 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
325 325 total=changesets)
326 326 self._unpackmanifests(repo, revmap, trp, progress)
327 327
328 328 needfiles = {}
329 329 if repo.ui.configbool('server', 'validate'):
330 330 cl = repo.changelog
331 331 ml = repo.manifestlog
332 332 # validate incoming csets have their manifests
333 333 for cset in pycompat.xrange(clstart, clend):
334 334 mfnode = cl.changelogrevision(cset).manifest
335 335 mfest = ml[mfnode].readdelta()
336 336 # store file cgnodes we must see
337 337 for f, n in mfest.iteritems():
338 338 needfiles.setdefault(f, set()).add(n)
339 339
340 340 # process the files
341 341 repo.ui.status(_("adding file changes\n"))
342 342 newrevs, newfiles = _addchangegroupfiles(
343 343 repo, self, revmap, trp, efiles, needfiles)
344 344 revisions += newrevs
345 345 files += newfiles
346 346
347 347 deltaheads = 0
348 348 if oldheads:
349 349 heads = cl.heads()
350 350 deltaheads = len(heads) - len(oldheads)
351 351 for h in heads:
352 352 if h not in oldheads and repo[h].closesbranch():
353 353 deltaheads -= 1
354 354 htext = ""
355 355 if deltaheads:
356 356 htext = _(" (%+d heads)") % deltaheads
357 357
358 358 repo.ui.status(_("added %d changesets"
359 359 " with %d changes to %d files%s\n")
360 360 % (changesets, revisions, files, htext))
361 361 repo.invalidatevolatilesets()
362 362
363 363 if changesets > 0:
364 364 if 'node' not in tr.hookargs:
365 365 tr.hookargs['node'] = hex(cl.node(clstart))
366 366 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
367 367 hookargs = dict(tr.hookargs)
368 368 else:
369 369 hookargs = dict(tr.hookargs)
370 370 hookargs['node'] = hex(cl.node(clstart))
371 371 hookargs['node_last'] = hex(cl.node(clend - 1))
372 372 repo.hook('pretxnchangegroup',
373 373 throw=True, **pycompat.strkwargs(hookargs))
374 374
375 375 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
376 376 phaseall = None
377 377 if srctype in ('push', 'serve'):
378 378 # Old servers can not push the boundary themselves.
379 379 # New servers won't push the boundary if changeset already
380 380 # exists locally as secret
381 381 #
382 382 # We should not use added here but the list of all change in
383 383 # the bundle
384 384 if repo.publishing():
385 385 targetphase = phaseall = phases.public
386 386 else:
387 387 # closer target phase computation
388 388
389 389 # Those changesets have been pushed from the
390 390 # outside, their phases are going to be pushed
391 391 # alongside. Therefor `targetphase` is
392 392 # ignored.
393 393 targetphase = phaseall = phases.draft
394 394 if added:
395 395 phases.registernew(repo, tr, targetphase, added)
396 396 if phaseall is not None:
397 397 phases.advanceboundary(repo, tr, phaseall, cgnodes)
398 398
399 399 if changesets > 0:
400 400
401 401 def runhooks():
402 402 # These hooks run when the lock releases, not when the
403 403 # transaction closes. So it's possible for the changelog
404 404 # to have changed since we last saw it.
405 405 if clstart >= len(repo):
406 406 return
407 407
408 408 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
409 409
410 410 for n in added:
411 411 args = hookargs.copy()
412 412 args['node'] = hex(n)
413 413 del args['node_last']
414 414 repo.hook("incoming", **pycompat.strkwargs(args))
415 415
416 416 newheads = [h for h in repo.heads()
417 417 if h not in oldheads]
418 418 repo.ui.log("incoming",
419 419 "%d incoming changes - new heads: %s\n",
420 420 len(added),
421 421 ', '.join([hex(c[:6]) for c in newheads]))
422 422
423 423 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
424 424 lambda tr: repo._afterlock(runhooks))
425 425 finally:
426 426 repo.ui.flush()
427 427 # never return 0 here:
428 428 if deltaheads < 0:
429 429 ret = deltaheads - 1
430 430 else:
431 431 ret = deltaheads + 1
432 432 return ret
433 433
434 434 def deltaiter(self):
435 435 """
436 436 returns an iterator of the deltas in this changegroup
437 437
438 438 Useful for passing to the underlying storage system to be stored.
439 439 """
440 440 chain = None
441 441 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
442 442 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
443 443 yield chunkdata
444 444 chain = chunkdata[0]
445 445
446 446 class cg2unpacker(cg1unpacker):
447 447 """Unpacker for cg2 streams.
448 448
449 449 cg2 streams add support for generaldelta, so the delta header
450 450 format is slightly different. All other features about the data
451 451 remain the same.
452 452 """
453 453 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
454 454 deltaheadersize = deltaheader.size
455 455 version = '02'
456 456
457 457 def _deltaheader(self, headertuple, prevnode):
458 458 node, p1, p2, deltabase, cs = headertuple
459 459 flags = 0
460 460 return node, p1, p2, deltabase, cs, flags
461 461
462 462 class cg3unpacker(cg2unpacker):
463 463 """Unpacker for cg3 streams.
464 464
465 465 cg3 streams add support for exchanging treemanifests and revlog
466 466 flags. It adds the revlog flags to the delta header and an empty chunk
467 467 separating manifests and files.
468 468 """
469 469 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
470 470 deltaheadersize = deltaheader.size
471 471 version = '03'
472 472 _grouplistcount = 2 # One list of manifests and one list of files
473 473
474 474 def _deltaheader(self, headertuple, prevnode):
475 475 node, p1, p2, deltabase, cs, flags = headertuple
476 476 return node, p1, p2, deltabase, cs, flags
477 477
478 478 def _unpackmanifests(self, repo, revmap, trp, prog):
479 479 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
480 480 for chunkdata in iter(self.filelogheader, {}):
481 481 # If we get here, there are directory manifests in the changegroup
482 482 d = chunkdata["filename"]
483 483 repo.ui.debug("adding %s revisions\n" % d)
484 484 dirlog = repo.manifestlog._revlog.dirlog(d)
485 485 deltas = self.deltaiter()
486 486 if not dirlog.addgroup(deltas, revmap, trp):
487 487 raise error.Abort(_("received dir revlog group is empty"))
488 488
489 489 class headerlessfixup(object):
490 490 def __init__(self, fh, h):
491 491 self._h = h
492 492 self._fh = fh
493 493 def read(self, n):
494 494 if self._h:
495 495 d, self._h = self._h[:n], self._h[n:]
496 496 if len(d) < n:
497 497 d += readexactly(self._fh, n - len(d))
498 498 return d
499 499 return readexactly(self._fh, n)
500 500
501 501 @attr.s(slots=True, frozen=True)
502 502 class revisiondelta(object):
503 503 """Describes a delta entry in a changegroup.
504 504
505 505 Captured data is sufficient to serialize the delta into multiple
506 506 formats.
507 507 """
508 508 # 20 byte node of this revision.
509 509 node = attr.ib()
510 510 # 20 byte nodes of parent revisions.
511 511 p1node = attr.ib()
512 512 p2node = attr.ib()
513 513 # 20 byte node of node this delta is against.
514 514 basenode = attr.ib()
515 515 # 20 byte node of changeset revision this delta is associated with.
516 516 linknode = attr.ib()
517 517 # 2 bytes of flags to apply to revision data.
518 518 flags = attr.ib()
519 519 # Iterable of chunks holding raw delta data.
520 520 deltachunks = attr.ib()
521 521
522 522 class cgpacker(object):
523 523 def __init__(self, repo, filematcher, version, allowreorder,
524 524 useprevdelta, builddeltaheader, manifestsend,
525 sendtreemanifests, bundlecaps=None, shallow=False):
525 sendtreemanifests, bundlecaps=None, shallow=False,
526 ellipsisroots=None):
526 527 """Given a source repo, construct a bundler.
527 528
528 529 filematcher is a matcher that matches on files to include in the
529 530 changegroup. Used to facilitate sparse changegroups.
530 531
531 532 allowreorder controls whether reordering of revisions is allowed.
532 533 This value is used when ``bundle.reorder`` is ``auto`` or isn't
533 534 set.
534 535
535 536 useprevdelta controls whether revisions should always delta against
536 537 the previous revision in the changegroup.
537 538
538 539 builddeltaheader is a callable that constructs the header for a group
539 540 delta.
540 541
541 542 manifestsend is a chunk to send after manifests have been fully emitted.
542 543
543 544 sendtreemanifests indicates whether tree manifests should be emitted.
544 545
545 546 bundlecaps is optional and can be used to specify the set of
546 547 capabilities which can be used to build the bundle. While bundlecaps is
547 548 unused in core Mercurial, extensions rely on this feature to communicate
548 549 capabilities to customize the changegroup packer.
549 550
550 551 shallow indicates whether shallow data might be sent. The packer may
551 552 need to pack file contents not introduced by the changes being packed.
552 553 """
553 554 assert filematcher
554 555 self._filematcher = filematcher
555 556
556 557 self.version = version
557 558 self._useprevdelta = useprevdelta
558 559 self._builddeltaheader = builddeltaheader
559 560 self._manifestsend = manifestsend
560 561 self._sendtreemanifests = sendtreemanifests
561 562
562 563 # Set of capabilities we can use to build the bundle.
563 564 if bundlecaps is None:
564 565 bundlecaps = set()
565 566 self._bundlecaps = bundlecaps
566 567 self._isshallow = shallow
567 568
569 # Maps ellipsis revs to their roots at the changelog level.
570 self._precomputedellipsis = ellipsisroots
571
568 572 # experimental config: bundle.reorder
569 573 reorder = repo.ui.config('bundle', 'reorder')
570 574 if reorder == 'auto':
571 575 self._reorder = allowreorder
572 576 else:
573 577 self._reorder = stringutil.parsebool(reorder)
574 578
575 579 self._repo = repo
576 580
577 581 if self._repo.ui.verbose and not self._repo.ui.debugflag:
578 582 self._verbosenote = self._repo.ui.note
579 583 else:
580 584 self._verbosenote = lambda s: None
581 585
582 586 # TODO the functionality keyed off of this should probably be
583 587 # controlled via arguments to group() that influence behavior.
584 588 self._changelogdone = False
585 589
586 590 # Maps CL revs to per-revlog revisions. Cleared in close() at
587 591 # the end of each group.
588 592 self._clrevtolocalrev = {}
589 593 self._nextclrevtolocalrev = {}
590 594
591 595 # Maps changelog nodes to changelog revs. Filled in once
592 596 # during changelog stage and then left unmodified.
593 597 self._clnodetorev = {}
594 598
595 599 def _close(self):
596 600 # Ellipses serving mode.
597 601 self._clrevtolocalrev.clear()
598 602 if self._nextclrevtolocalrev:
599 603 self.clrevtolocalrev = self._nextclrevtolocalrev
600 604 self._nextclrevtolocalrev.clear()
601 605 self._changelogdone = True
602 606
603 607 return closechunk()
604 608
605 609 def _fileheader(self, fname):
606 610 return chunkheader(len(fname)) + fname
607 611
608 612 # Extracted both for clarity and for overriding in extensions.
609 613 def _sortgroup(self, store, nodelist, lookup):
610 614 """Sort nodes for change group and turn them into revnums."""
611 615 # Ellipses serving mode.
612 616 #
613 617 # In a perfect world, we'd generate better ellipsis-ified graphs
614 618 # for non-changelog revlogs. In practice, we haven't started doing
615 619 # that yet, so the resulting DAGs for the manifestlog and filelogs
616 620 # are actually full of bogus parentage on all the ellipsis
617 621 # nodes. This has the side effect that, while the contents are
618 622 # correct, the individual DAGs might be completely out of whack in
619 623 # a case like 882681bc3166 and its ancestors (back about 10
620 624 # revisions or so) in the main hg repo.
621 625 #
622 626 # The one invariant we *know* holds is that the new (potentially
623 627 # bogus) DAG shape will be valid if we order the nodes in the
624 628 # order that they're introduced in dramatis personae by the
625 629 # changelog, so what we do is we sort the non-changelog histories
626 630 # by the order in which they are used by the changelog.
627 631 if util.safehasattr(self, '_full_nodes') and self._clnodetorev:
628 632 key = lambda n: self._clnodetorev[lookup(n)]
629 633 return [store.rev(n) for n in sorted(nodelist, key=key)]
630 634
631 635 # for generaldelta revlogs, we linearize the revs; this will both be
632 636 # much quicker and generate a much smaller bundle
633 637 if (store._generaldelta and self._reorder is None) or self._reorder:
634 638 dag = dagutil.revlogdag(store)
635 639 return dag.linearize(set(store.rev(n) for n in nodelist))
636 640 else:
637 641 return sorted([store.rev(n) for n in nodelist])
638 642
639 643 def group(self, nodelist, store, lookup, units=None):
640 644 """Calculate a delta group, yielding a sequence of changegroup chunks
641 645 (strings).
642 646
643 647 Given a list of changeset revs, return a set of deltas and
644 648 metadata corresponding to nodes. The first delta is
645 649 first parent(nodelist[0]) -> nodelist[0], the receiver is
646 650 guaranteed to have this parent as it has all history before
647 651 these changesets. In the case firstparent is nullrev the
648 652 changegroup starts with a full revision.
649 653
650 654 If units is not None, progress detail will be generated, units specifies
651 655 the type of revlog that is touched (changelog, manifest, etc.).
652 656 """
653 657 # if we don't have any revisions touched by these changesets, bail
654 658 if len(nodelist) == 0:
655 659 yield self._close()
656 660 return
657 661
658 662 revs = self._sortgroup(store, nodelist, lookup)
659 663
660 664 # add the parent of the first rev
661 665 p = store.parentrevs(revs[0])[0]
662 666 revs.insert(0, p)
663 667
664 668 # build deltas
665 669 progress = None
666 670 if units is not None:
667 671 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
668 672 total=(len(revs) - 1))
669 673 for r in pycompat.xrange(len(revs) - 1):
670 674 if progress:
671 675 progress.update(r + 1)
672 676 prev, curr = revs[r], revs[r + 1]
673 677 linknode = lookup(store.node(curr))
674 678 for c in self._revchunk(store, curr, prev, linknode):
675 679 yield c
676 680
677 681 if progress:
678 682 progress.complete()
679 683 yield self._close()
680 684
681 685 # filter any nodes that claim to be part of the known set
682 686 def _prune(self, store, missing, commonrevs):
683 687 # TODO this violates storage abstraction for manifests.
684 688 if isinstance(store, manifest.manifestrevlog):
685 689 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
686 690 return []
687 691
688 692 rr, rl = store.rev, store.linkrev
689 693 return [n for n in missing if rl(rr(n)) not in commonrevs]
690 694
691 695 def _packmanifests(self, dir, mfnodes, lookuplinknode):
692 696 """Pack flat manifests into a changegroup stream."""
693 697 assert not dir
694 698 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
695 699 lookuplinknode, units=_('manifests')):
696 700 yield chunk
697 701
698 702 def _packtreemanifests(self, dir, mfnodes, lookuplinknode):
699 703 """Version of _packmanifests that operates on directory manifests.
700 704
701 705 Encodes the directory name in the output so multiple manifests
702 706 can be sent.
703 707 """
704 708 assert self.version == b'03'
705 709
706 710 if dir:
707 711 yield self._fileheader(dir)
708 712
709 713 # TODO violates storage abstractions by assuming revlogs.
710 714 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
711 715 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
712 716 units=_('manifests')):
713 717 yield chunk
714 718
715 719 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
716 720 '''yield a sequence of changegroup chunks (strings)'''
717 721 repo = self._repo
718 722 cl = repo.changelog
719 723
720 724 clrevorder = {}
721 725 mfs = {} # needed manifests
722 726 fnodes = {} # needed file nodes
723 727 mfl = repo.manifestlog
724 728 # TODO violates storage abstraction.
725 729 mfrevlog = mfl._revlog
726 730 changedfiles = set()
727 731
728 732 ellipsesmode = util.safehasattr(self, '_full_nodes')
729 733
730 734 # Callback for the changelog, used to collect changed files and
731 735 # manifest nodes.
732 736 # Returns the linkrev node (identity in the changelog case).
733 737 def lookupcl(x):
734 738 c = cl.read(x)
735 739 clrevorder[x] = len(clrevorder)
736 740
737 741 if ellipsesmode:
738 742 # Only update mfs if x is going to be sent. Otherwise we
739 743 # end up with bogus linkrevs specified for manifests and
740 744 # we skip some manifest nodes that we should otherwise
741 745 # have sent.
742 746 if (x in self._full_nodes
743 or cl.rev(x) in self._precomputed_ellipsis):
747 or cl.rev(x) in self._precomputedellipsis):
744 748 n = c[0]
745 749 # Record the first changeset introducing this manifest
746 750 # version.
747 751 mfs.setdefault(n, x)
748 752 # Set this narrow-specific dict so we have the lowest
749 753 # manifest revnum to look up for this cl revnum. (Part of
750 754 # mapping changelog ellipsis parents to manifest ellipsis
751 755 # parents)
752 756 self._nextclrevtolocalrev.setdefault(cl.rev(x),
753 757 mfrevlog.rev(n))
754 758 # We can't trust the changed files list in the changeset if the
755 759 # client requested a shallow clone.
756 760 if self._isshallow:
757 761 changedfiles.update(mfl[c[0]].read().keys())
758 762 else:
759 763 changedfiles.update(c[3])
760 764 else:
761 765
762 766 n = c[0]
763 767 # record the first changeset introducing this manifest version
764 768 mfs.setdefault(n, x)
765 769 # Record a complete list of potentially-changed files in
766 770 # this manifest.
767 771 changedfiles.update(c[3])
768 772
769 773 return x
770 774
771 775 self._verbosenote(_('uncompressed size of bundle content:\n'))
772 776 size = 0
773 777 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
774 778 size += len(chunk)
775 779 yield chunk
776 780 self._verbosenote(_('%8.i (changelog)\n') % size)
777 781
778 782 # We need to make sure that the linkrev in the changegroup refers to
779 783 # the first changeset that introduced the manifest or file revision.
780 784 # The fastpath is usually safer than the slowpath, because the filelogs
781 785 # are walked in revlog order.
782 786 #
783 787 # When taking the slowpath with reorder=None and the manifest revlog
784 788 # uses generaldelta, the manifest may be walked in the "wrong" order.
785 789 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
786 790 # cc0ff93d0c0c).
787 791 #
788 792 # When taking the fastpath, we are only vulnerable to reordering
789 793 # of the changelog itself. The changelog never uses generaldelta, so
790 794 # it is only reordered when reorder=True. To handle this case, we
791 795 # simply take the slowpath, which already has the 'clrevorder' logic.
792 796 # This was also fixed in cc0ff93d0c0c.
793 797 fastpathlinkrev = fastpathlinkrev and not self._reorder
794 798 # Treemanifests don't work correctly with fastpathlinkrev
795 799 # either, because we don't discover which directory nodes to
796 800 # send along with files. This could probably be fixed.
797 801 fastpathlinkrev = fastpathlinkrev and (
798 802 'treemanifest' not in repo.requirements)
799 803
800 804 for chunk in self.generatemanifests(commonrevs, clrevorder,
801 805 fastpathlinkrev, mfs, fnodes, source):
802 806 yield chunk
803 807
804 808 if ellipsesmode:
805 809 mfdicts = None
806 810 if self._isshallow:
807 811 mfdicts = [(self._repo.manifestlog[n].read(), lr)
808 812 for (n, lr) in mfs.iteritems()]
809 813
810 814 mfs.clear()
811 815 clrevs = set(cl.rev(x) for x in clnodes)
812 816
813 817 if not fastpathlinkrev:
814 818 def linknodes(unused, fname):
815 819 return fnodes.get(fname, {})
816 820 else:
817 821 cln = cl.node
818 822 def linknodes(filerevlog, fname):
819 823 llr = filerevlog.linkrev
820 824 fln = filerevlog.node
821 825 revs = ((r, llr(r)) for r in filerevlog)
822 826 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
823 827
824 828 if ellipsesmode:
825 829 # We need to pass the mfdicts variable down into
826 830 # generatefiles(), but more than one command might have
827 831 # wrapped generatefiles so we can't modify the function
828 832 # signature. Instead, we pass the data to ourselves using an
829 833 # instance attribute. I'm sorry.
830 834 self._mfdicts = mfdicts
831 835
832 836 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
833 837 source):
834 838 yield chunk
835 839
836 840 yield self._close()
837 841
838 842 if clnodes:
839 843 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
840 844
841 845 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
842 846 fnodes, source):
843 847 """Returns an iterator of changegroup chunks containing manifests.
844 848
845 849 `source` is unused here, but is used by extensions like remotefilelog to
846 850 change what is sent based in pulls vs pushes, etc.
847 851 """
848 852 repo = self._repo
849 853 mfl = repo.manifestlog
850 854 dirlog = mfl._revlog.dirlog
851 855 tmfnodes = {'': mfs}
852 856
853 857 # Callback for the manifest, used to collect linkrevs for filelog
854 858 # revisions.
855 859 # Returns the linkrev node (collected in lookupcl).
856 860 def makelookupmflinknode(dir, nodes):
857 861 if fastpathlinkrev:
858 862 assert not dir
859 863 return mfs.__getitem__
860 864
861 865 def lookupmflinknode(x):
862 866 """Callback for looking up the linknode for manifests.
863 867
864 868 Returns the linkrev node for the specified manifest.
865 869
866 870 SIDE EFFECT:
867 871
868 872 1) fclnodes gets populated with the list of relevant
869 873 file nodes if we're not using fastpathlinkrev
870 874 2) When treemanifests are in use, collects treemanifest nodes
871 875 to send
872 876
873 877 Note that this means manifests must be completely sent to
874 878 the client before you can trust the list of files and
875 879 treemanifests to send.
876 880 """
877 881 clnode = nodes[x]
878 882 mdata = mfl.get(dir, x).readfast(shallow=True)
879 883 for p, n, fl in mdata.iterentries():
880 884 if fl == 't': # subdirectory manifest
881 885 subdir = dir + p + '/'
882 886 tmfclnodes = tmfnodes.setdefault(subdir, {})
883 887 tmfclnode = tmfclnodes.setdefault(n, clnode)
884 888 if clrevorder[clnode] < clrevorder[tmfclnode]:
885 889 tmfclnodes[n] = clnode
886 890 else:
887 891 f = dir + p
888 892 fclnodes = fnodes.setdefault(f, {})
889 893 fclnode = fclnodes.setdefault(n, clnode)
890 894 if clrevorder[clnode] < clrevorder[fclnode]:
891 895 fclnodes[n] = clnode
892 896 return clnode
893 897 return lookupmflinknode
894 898
895 899 fn = (self._packtreemanifests if self._sendtreemanifests
896 900 else self._packmanifests)
897 901 size = 0
898 902 while tmfnodes:
899 903 dir, nodes = tmfnodes.popitem()
900 904 prunednodes = self._prune(dirlog(dir), nodes, commonrevs)
901 905 if not dir or prunednodes:
902 906 for x in fn(dir, prunednodes, makelookupmflinknode(dir, nodes)):
903 907 size += len(x)
904 908 yield x
905 909 self._verbosenote(_('%8.i (manifests)\n') % size)
906 910 yield self._manifestsend
907 911
908 912 # The 'source' parameter is useful for extensions
909 913 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
910 914 changedfiles = list(filter(self._filematcher, changedfiles))
911 915
912 916 if self._isshallow:
913 917 # See comment in generate() for why this sadness is a thing.
914 918 mfdicts = self._mfdicts
915 919 del self._mfdicts
916 920 # In a shallow clone, the linknodes callback needs to also include
917 921 # those file nodes that are in the manifests we sent but weren't
918 922 # introduced by those manifests.
919 923 commonctxs = [self._repo[c] for c in commonrevs]
920 924 oldlinknodes = linknodes
921 925 clrev = self._repo.changelog.rev
922 926
923 927 # Defining this function has a side-effect of overriding the
924 928 # function of the same name that was passed in as an argument.
925 929 # TODO have caller pass in appropriate function.
926 930 def linknodes(flog, fname):
927 931 for c in commonctxs:
928 932 try:
929 933 fnode = c.filenode(fname)
930 934 self._clrevtolocalrev[c.rev()] = flog.rev(fnode)
931 935 except error.ManifestLookupError:
932 936 pass
933 937 links = oldlinknodes(flog, fname)
934 938 if len(links) != len(mfdicts):
935 939 for mf, lr in mfdicts:
936 940 fnode = mf.get(fname, None)
937 941 if fnode in links:
938 942 links[fnode] = min(links[fnode], lr, key=clrev)
939 943 elif fnode:
940 944 links[fnode] = lr
941 945 return links
942 946
943 947 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
944 948
945 949 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
946 950 repo = self._repo
947 951 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
948 952 total=len(changedfiles))
949 953 for i, fname in enumerate(sorted(changedfiles)):
950 954 filerevlog = repo.file(fname)
951 955 if not filerevlog:
952 956 raise error.Abort(_("empty or missing file data for %s") %
953 957 fname)
954 958
955 959 linkrevnodes = linknodes(filerevlog, fname)
956 960 # Lookup for filenodes, we collected the linkrev nodes above in the
957 961 # fastpath case and with lookupmf in the slowpath case.
958 962 def lookupfilelog(x):
959 963 return linkrevnodes[x]
960 964
961 965 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
962 966 if filenodes:
963 967 progress.update(i + 1, item=fname)
964 968 h = self._fileheader(fname)
965 969 size = len(h)
966 970 yield h
967 971 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
968 972 size += len(chunk)
969 973 yield chunk
970 974 self._verbosenote(_('%8.i %s\n') % (size, fname))
971 975 progress.complete()
972 976
973 977 def _deltaparent(self, store, rev, p1, p2, prev):
974 978 if self._useprevdelta:
975 979 if not store.candelta(prev, rev):
976 980 raise error.ProgrammingError(
977 981 'cg1 should not be used in this case')
978 982 return prev
979 983
980 984 # Narrow ellipses mode.
981 985 if util.safehasattr(self, '_full_nodes'):
982 986 # TODO: send better deltas when in narrow mode.
983 987 #
984 988 # changegroup.group() loops over revisions to send,
985 989 # including revisions we'll skip. What this means is that
986 990 # `prev` will be a potentially useless delta base for all
987 991 # ellipsis nodes, as the client likely won't have it. In
988 992 # the future we should do bookkeeping about which nodes
989 993 # have been sent to the client, and try to be
990 994 # significantly smarter about delta bases. This is
991 995 # slightly tricky because this same code has to work for
992 996 # all revlogs, and we don't have the linkrev/linknode here.
993 997 return p1
994 998
995 999 dp = store.deltaparent(rev)
996 1000 if dp == nullrev and store.storedeltachains:
997 1001 # Avoid sending full revisions when delta parent is null. Pick prev
998 1002 # in that case. It's tempting to pick p1 in this case, as p1 will
999 1003 # be smaller in the common case. However, computing a delta against
1000 1004 # p1 may require resolving the raw text of p1, which could be
1001 1005 # expensive. The revlog caches should have prev cached, meaning
1002 1006 # less CPU for changegroup generation. There is likely room to add
1003 1007 # a flag and/or config option to control this behavior.
1004 1008 base = prev
1005 1009 elif dp == nullrev:
1006 1010 # revlog is configured to use full snapshot for a reason,
1007 1011 # stick to full snapshot.
1008 1012 base = nullrev
1009 1013 elif dp not in (p1, p2, prev):
1010 1014 # Pick prev when we can't be sure remote has the base revision.
1011 1015 return prev
1012 1016 else:
1013 1017 base = dp
1014 1018
1015 1019 if base != nullrev and not store.candelta(base, rev):
1016 1020 base = nullrev
1017 1021
1018 1022 return base
1019 1023
1020 1024 def _revchunk(self, store, rev, prev, linknode):
1021 1025 if util.safehasattr(self, '_full_nodes'):
1022 1026 fn = self._revisiondeltanarrow
1023 1027 else:
1024 1028 fn = self._revisiondeltanormal
1025 1029
1026 1030 delta = fn(store, rev, prev, linknode)
1027 1031 if not delta:
1028 1032 return
1029 1033
1030 1034 meta = self._builddeltaheader(delta)
1031 1035 l = len(meta) + sum(len(x) for x in delta.deltachunks)
1032 1036
1033 1037 yield chunkheader(l)
1034 1038 yield meta
1035 1039 for x in delta.deltachunks:
1036 1040 yield x
1037 1041
1038 1042 def _revisiondeltanormal(self, store, rev, prev, linknode):
1039 1043 node = store.node(rev)
1040 1044 p1, p2 = store.parentrevs(rev)
1041 1045 base = self._deltaparent(store, rev, p1, p2, prev)
1042 1046
1043 1047 prefix = ''
1044 1048 if store.iscensored(base) or store.iscensored(rev):
1045 1049 try:
1046 1050 delta = store.revision(node, raw=True)
1047 1051 except error.CensoredNodeError as e:
1048 1052 delta = e.tombstone
1049 1053 if base == nullrev:
1050 1054 prefix = mdiff.trivialdiffheader(len(delta))
1051 1055 else:
1052 1056 baselen = store.rawsize(base)
1053 1057 prefix = mdiff.replacediffheader(baselen, len(delta))
1054 1058 elif base == nullrev:
1055 1059 delta = store.revision(node, raw=True)
1056 1060 prefix = mdiff.trivialdiffheader(len(delta))
1057 1061 else:
1058 1062 delta = store.revdiff(base, rev)
1059 1063 p1n, p2n = store.parents(node)
1060 1064
1061 1065 return revisiondelta(
1062 1066 node=node,
1063 1067 p1node=p1n,
1064 1068 p2node=p2n,
1065 1069 basenode=store.node(base),
1066 1070 linknode=linknode,
1067 1071 flags=store.flags(rev),
1068 1072 deltachunks=(prefix, delta),
1069 1073 )
1070 1074
1071 1075 def _revisiondeltanarrow(self, store, rev, prev, linknode):
1072 1076 # build up some mapping information that's useful later. See
1073 1077 # the local() nested function below.
1074 1078 if not self._changelogdone:
1075 1079 self._clnodetorev[linknode] = rev
1076 1080 linkrev = rev
1077 1081 self._clrevtolocalrev[linkrev] = rev
1078 1082 else:
1079 1083 linkrev = self._clnodetorev[linknode]
1080 1084 self._clrevtolocalrev[linkrev] = rev
1081 1085
1082 1086 # This is a node to send in full, because the changeset it
1083 1087 # corresponds to was a full changeset.
1084 1088 if linknode in self._full_nodes:
1085 1089 return self._revisiondeltanormal(store, rev, prev, linknode)
1086 1090
1087 1091 # At this point, a node can either be one we should skip or an
1088 1092 # ellipsis. If it's not an ellipsis, bail immediately.
1089 if linkrev not in self._precomputed_ellipsis:
1093 if linkrev not in self._precomputedellipsis:
1090 1094 return
1091 1095
1092 linkparents = self._precomputed_ellipsis[linkrev]
1096 linkparents = self._precomputedellipsis[linkrev]
1093 1097 def local(clrev):
1094 1098 """Turn a changelog revnum into a local revnum.
1095 1099
1096 1100 The ellipsis dag is stored as revnums on the changelog,
1097 1101 but when we're producing ellipsis entries for
1098 1102 non-changelog revlogs, we need to turn those numbers into
1099 1103 something local. This does that for us, and during the
1100 1104 changelog sending phase will also expand the stored
1101 1105 mappings as needed.
1102 1106 """
1103 1107 if clrev == nullrev:
1104 1108 return nullrev
1105 1109
1106 1110 if not self._changelogdone:
1107 1111 # If we're doing the changelog, it's possible that we
1108 1112 # have a parent that is already on the client, and we
1109 1113 # need to store some extra mapping information so that
1110 1114 # our contained ellipsis nodes will be able to resolve
1111 1115 # their parents.
1112 1116 if clrev not in self._clrevtolocalrev:
1113 1117 clnode = store.node(clrev)
1114 1118 self._clnodetorev[clnode] = clrev
1115 1119 return clrev
1116 1120
1117 1121 # Walk the ellipsis-ized changelog breadth-first looking for a
1118 1122 # change that has been linked from the current revlog.
1119 1123 #
1120 1124 # For a flat manifest revlog only a single step should be necessary
1121 1125 # as all relevant changelog entries are relevant to the flat
1122 1126 # manifest.
1123 1127 #
1124 1128 # For a filelog or tree manifest dirlog however not every changelog
1125 1129 # entry will have been relevant, so we need to skip some changelog
1126 1130 # nodes even after ellipsis-izing.
1127 1131 walk = [clrev]
1128 1132 while walk:
1129 1133 p = walk[0]
1130 1134 walk = walk[1:]
1131 1135 if p in self._clrevtolocalrev:
1132 1136 return self._clrevtolocalrev[p]
1133 1137 elif p in self._full_nodes:
1134 1138 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1135 1139 if pp != nullrev])
1136 elif p in self._precomputed_ellipsis:
1137 walk.extend([pp for pp in self._precomputed_ellipsis[p]
1140 elif p in self._precomputedellipsis:
1141 walk.extend([pp for pp in self._precomputedellipsis[p]
1138 1142 if pp != nullrev])
1139 1143 else:
1140 1144 # In this case, we've got an ellipsis with parents
1141 1145 # outside the current bundle (likely an
1142 1146 # incremental pull). We "know" that we can use the
1143 1147 # value of this same revlog at whatever revision
1144 1148 # is pointed to by linknode. "Know" is in scare
1145 1149 # quotes because I haven't done enough examination
1146 1150 # of edge cases to convince myself this is really
1147 1151 # a fact - it works for all the (admittedly
1148 1152 # thorough) cases in our testsuite, but I would be
1149 1153 # somewhat unsurprised to find a case in the wild
1150 1154 # where this breaks down a bit. That said, I don't
1151 1155 # know if it would hurt anything.
1152 1156 for i in pycompat.xrange(rev, 0, -1):
1153 1157 if store.linkrev(i) == clrev:
1154 1158 return i
1155 1159 # We failed to resolve a parent for this node, so
1156 1160 # we crash the changegroup construction.
1157 1161 raise error.Abort(
1158 1162 'unable to resolve parent while packing %r %r'
1159 1163 ' for changeset %r' % (store.indexfile, rev, clrev))
1160 1164
1161 1165 return nullrev
1162 1166
1163 1167 if not linkparents or (
1164 1168 store.parentrevs(rev) == (nullrev, nullrev)):
1165 1169 p1, p2 = nullrev, nullrev
1166 1170 elif len(linkparents) == 1:
1167 1171 p1, = sorted(local(p) for p in linkparents)
1168 1172 p2 = nullrev
1169 1173 else:
1170 1174 p1, p2 = sorted(local(p) for p in linkparents)
1171 1175
1172 1176 n = store.node(rev)
1173 1177 p1n, p2n = store.node(p1), store.node(p2)
1174 1178 flags = store.flags(rev)
1175 1179 flags |= revlog.REVIDX_ELLIPSIS
1176 1180
1177 1181 # TODO: try and actually send deltas for ellipsis data blocks
1178 1182 data = store.revision(n)
1179 1183 diffheader = mdiff.trivialdiffheader(len(data))
1180 1184
1181 1185 return revisiondelta(
1182 1186 node=n,
1183 1187 p1node=p1n,
1184 1188 p2node=p2n,
1185 1189 basenode=nullid,
1186 1190 linknode=linknode,
1187 1191 flags=flags,
1188 1192 deltachunks=(diffheader, data),
1189 1193 )
1190 1194
1191 def _makecg1packer(repo, filematcher, bundlecaps, shallow=False):
1195 def _makecg1packer(repo, filematcher, bundlecaps, shallow=False,
1196 ellipsisroots=None):
1192 1197 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1193 1198 d.node, d.p1node, d.p2node, d.linknode)
1194 1199
1195 1200 return cgpacker(repo, filematcher, b'01',
1196 1201 useprevdelta=True,
1197 1202 allowreorder=None,
1198 1203 builddeltaheader=builddeltaheader,
1199 1204 manifestsend=b'',
1200 1205 sendtreemanifests=False,
1201 1206 bundlecaps=bundlecaps,
1202 shallow=shallow)
1207 shallow=shallow,
1208 ellipsisroots=ellipsisroots)
1203 1209
1204 def _makecg2packer(repo, filematcher, bundlecaps, shallow=False):
1210 def _makecg2packer(repo, filematcher, bundlecaps, shallow=False,
1211 ellipsisroots=None):
1205 1212 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1206 1213 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1207 1214
1208 1215 # Since generaldelta is directly supported by cg2, reordering
1209 1216 # generally doesn't help, so we disable it by default (treating
1210 1217 # bundle.reorder=auto just like bundle.reorder=False).
1211 1218 return cgpacker(repo, filematcher, b'02',
1212 1219 useprevdelta=False,
1213 1220 allowreorder=False,
1214 1221 builddeltaheader=builddeltaheader,
1215 1222 manifestsend=b'',
1216 1223 sendtreemanifests=False,
1217 1224 bundlecaps=bundlecaps,
1218 shallow=shallow)
1225 shallow=shallow,
1226 ellipsisroots=ellipsisroots)
1219 1227
1220 def _makecg3packer(repo, filematcher, bundlecaps, shallow=False):
1228 def _makecg3packer(repo, filematcher, bundlecaps, shallow=False,
1229 ellipsisroots=None):
1221 1230 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1222 1231 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1223 1232
1224 1233 return cgpacker(repo, filematcher, b'03',
1225 1234 useprevdelta=False,
1226 1235 allowreorder=False,
1227 1236 builddeltaheader=builddeltaheader,
1228 1237 manifestsend=closechunk(),
1229 1238 sendtreemanifests=True,
1230 1239 bundlecaps=bundlecaps,
1231 shallow=shallow)
1240 shallow=shallow,
1241 ellipsisroots=ellipsisroots)
1232 1242
1233 1243 _packermap = {'01': (_makecg1packer, cg1unpacker),
1234 1244 # cg2 adds support for exchanging generaldelta
1235 1245 '02': (_makecg2packer, cg2unpacker),
1236 1246 # cg3 adds support for exchanging revlog flags and treemanifests
1237 1247 '03': (_makecg3packer, cg3unpacker),
1238 1248 }
1239 1249
1240 1250 def allsupportedversions(repo):
1241 1251 versions = set(_packermap.keys())
1242 1252 if not (repo.ui.configbool('experimental', 'changegroup3') or
1243 1253 repo.ui.configbool('experimental', 'treemanifest') or
1244 1254 'treemanifest' in repo.requirements):
1245 1255 versions.discard('03')
1246 1256 return versions
1247 1257
1248 1258 # Changegroup versions that can be applied to the repo
1249 1259 def supportedincomingversions(repo):
1250 1260 return allsupportedversions(repo)
1251 1261
1252 1262 # Changegroup versions that can be created from the repo
1253 1263 def supportedoutgoingversions(repo):
1254 1264 versions = allsupportedversions(repo)
1255 1265 if 'treemanifest' in repo.requirements:
1256 1266 # Versions 01 and 02 support only flat manifests and it's just too
1257 1267 # expensive to convert between the flat manifest and tree manifest on
1258 1268 # the fly. Since tree manifests are hashed differently, all of history
1259 1269 # would have to be converted. Instead, we simply don't even pretend to
1260 1270 # support versions 01 and 02.
1261 1271 versions.discard('01')
1262 1272 versions.discard('02')
1263 1273 if repository.NARROW_REQUIREMENT in repo.requirements:
1264 1274 # Versions 01 and 02 don't support revlog flags, and we need to
1265 1275 # support that for stripping and unbundling to work.
1266 1276 versions.discard('01')
1267 1277 versions.discard('02')
1268 1278 if LFS_REQUIREMENT in repo.requirements:
1269 1279 # Versions 01 and 02 don't support revlog flags, and we need to
1270 1280 # mark LFS entries with REVIDX_EXTSTORED.
1271 1281 versions.discard('01')
1272 1282 versions.discard('02')
1273 1283
1274 1284 return versions
1275 1285
1276 1286 def localversion(repo):
1277 1287 # Finds the best version to use for bundles that are meant to be used
1278 1288 # locally, such as those from strip and shelve, and temporary bundles.
1279 1289 return max(supportedoutgoingversions(repo))
1280 1290
1281 1291 def safeversion(repo):
1282 1292 # Finds the smallest version that it's safe to assume clients of the repo
1283 1293 # will support. For example, all hg versions that support generaldelta also
1284 1294 # support changegroup 02.
1285 1295 versions = supportedoutgoingversions(repo)
1286 1296 if 'generaldelta' in repo.requirements:
1287 1297 versions.discard('01')
1288 1298 assert versions
1289 1299 return min(versions)
1290 1300
1291 1301 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1292 shallow=False):
1302 shallow=False, ellipsisroots=None):
1293 1303 assert version in supportedoutgoingversions(repo)
1294 1304
1295 1305 if filematcher is None:
1296 1306 filematcher = matchmod.alwaysmatcher(repo.root, '')
1297 1307
1298 1308 if version == '01' and not filematcher.always():
1299 1309 raise error.ProgrammingError('version 01 changegroups do not support '
1300 1310 'sparse file matchers')
1301 1311
1302 1312 # Requested files could include files not in the local store. So
1303 1313 # filter those out.
1304 1314 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1305 1315 filematcher)
1306 1316
1307 1317 fn = _packermap[version][0]
1308 return fn(repo, filematcher, bundlecaps, shallow=shallow)
1318 return fn(repo, filematcher, bundlecaps, shallow=shallow,
1319 ellipsisroots=ellipsisroots)
1309 1320
1310 1321 def getunbundler(version, fh, alg, extras=None):
1311 1322 return _packermap[version][1](fh, alg, extras=extras)
1312 1323
1313 1324 def _changegroupinfo(repo, nodes, source):
1314 1325 if repo.ui.verbose or source == 'bundle':
1315 1326 repo.ui.status(_("%d changesets found\n") % len(nodes))
1316 1327 if repo.ui.debugflag:
1317 1328 repo.ui.debug("list of changesets:\n")
1318 1329 for node in nodes:
1319 1330 repo.ui.debug("%s\n" % hex(node))
1320 1331
1321 1332 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1322 1333 bundlecaps=None):
1323 1334 cgstream = makestream(repo, outgoing, version, source,
1324 1335 fastpath=fastpath, bundlecaps=bundlecaps)
1325 1336 return getunbundler(version, util.chunkbuffer(cgstream), None,
1326 1337 {'clcount': len(outgoing.missing) })
1327 1338
1328 1339 def makestream(repo, outgoing, version, source, fastpath=False,
1329 1340 bundlecaps=None, filematcher=None):
1330 1341 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1331 1342 filematcher=filematcher)
1332 1343
1333 1344 repo = repo.unfiltered()
1334 1345 commonrevs = outgoing.common
1335 1346 csets = outgoing.missing
1336 1347 heads = outgoing.missingheads
1337 1348 # We go through the fast path if we get told to, or if all (unfiltered
1338 1349 # heads have been requested (since we then know there all linkrevs will
1339 1350 # be pulled by the client).
1340 1351 heads.sort()
1341 1352 fastpathlinkrev = fastpath or (
1342 1353 repo.filtername is None and heads == sorted(repo.heads()))
1343 1354
1344 1355 repo.hook('preoutgoing', throw=True, source=source)
1345 1356 _changegroupinfo(repo, csets, source)
1346 1357 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1347 1358
1348 1359 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1349 1360 revisions = 0
1350 1361 files = 0
1351 1362 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1352 1363 total=expectedfiles)
1353 1364 for chunkdata in iter(source.filelogheader, {}):
1354 1365 files += 1
1355 1366 f = chunkdata["filename"]
1356 1367 repo.ui.debug("adding %s revisions\n" % f)
1357 1368 progress.increment()
1358 1369 fl = repo.file(f)
1359 1370 o = len(fl)
1360 1371 try:
1361 1372 deltas = source.deltaiter()
1362 1373 if not fl.addgroup(deltas, revmap, trp):
1363 1374 raise error.Abort(_("received file revlog group is empty"))
1364 1375 except error.CensoredBaseError as e:
1365 1376 raise error.Abort(_("received delta base is censored: %s") % e)
1366 1377 revisions += len(fl) - o
1367 1378 if f in needfiles:
1368 1379 needs = needfiles[f]
1369 1380 for new in pycompat.xrange(o, len(fl)):
1370 1381 n = fl.node(new)
1371 1382 if n in needs:
1372 1383 needs.remove(n)
1373 1384 else:
1374 1385 raise error.Abort(
1375 1386 _("received spurious file revlog entry"))
1376 1387 if not needs:
1377 1388 del needfiles[f]
1378 1389 progress.complete()
1379 1390
1380 1391 for f, needs in needfiles.iteritems():
1381 1392 fl = repo.file(f)
1382 1393 for n in needs:
1383 1394 try:
1384 1395 fl.rev(n)
1385 1396 except error.LookupError:
1386 1397 raise error.Abort(
1387 1398 _('missing file data for %s:%s - run hg verify') %
1388 1399 (f, hex(n)))
1389 1400
1390 1401 return revisions, files
1391 1402
1392 1403 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1393 1404 ellipsisroots, visitnodes, depth, source, version):
1394 1405 if version in ('01', '02'):
1395 1406 raise error.Abort(
1396 1407 'ellipsis nodes require at least cg3 on client and server, '
1397 1408 'but negotiated version %s' % version)
1398 1409 # We wrap cg1packer.revchunk, using a side channel to pass
1399 1410 # relevant_nodes into that area. Then if linknode isn't in the
1400 1411 # set, we know we have an ellipsis node and we should defer
1401 1412 # sending that node's data. We override close() to detect
1402 1413 # pending ellipsis nodes and flush them.
1403 1414 packer = getbundler(version, repo, filematcher=match,
1404 shallow=depth is not None)
1415 shallow=depth is not None,
1416 ellipsisroots=ellipsisroots)
1405 1417 # Give the packer the list of nodes which should not be
1406 1418 # ellipsis nodes. We store this rather than the set of nodes
1407 1419 # that should be an ellipsis because for very large histories
1408 1420 # we expect this to be significantly smaller.
1409 1421 packer._full_nodes = relevant_nodes
1410 # Maps ellipsis revs to their roots at the changelog level.
1411 packer._precomputed_ellipsis = ellipsisroots
1412 1422
1413 1423 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now