##// END OF EJS Templates
changegroup: factor changelogdone into an argument...
Gregory Szorc -
r39016:4a202bcc default
parent child Browse files
Show More
@@ -1,1438 +1,1434
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, "wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, "wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103 class cg1unpacker(object):
104 104 """Unpacker for cg1 changegroup streams.
105 105
106 106 A changegroup unpacker handles the framing of the revision data in
107 107 the wire format. Most consumers will want to use the apply()
108 108 method to add the changes from the changegroup to a repository.
109 109
110 110 If you're forwarding a changegroup unmodified to another consumer,
111 111 use getchunks(), which returns an iterator of changegroup
112 112 chunks. This is mostly useful for cases where you need to know the
113 113 data stream has ended by observing the end of the changegroup.
114 114
115 115 deltachunk() is useful only if you're applying delta data. Most
116 116 consumers should prefer apply() instead.
117 117
118 118 A few other public methods exist. Those are used only for
119 119 bundlerepo and some debug commands - their use is discouraged.
120 120 """
121 121 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
122 122 deltaheadersize = deltaheader.size
123 123 version = '01'
124 124 _grouplistcount = 1 # One list of files after the manifests
125 125
126 126 def __init__(self, fh, alg, extras=None):
127 127 if alg is None:
128 128 alg = 'UN'
129 129 if alg not in util.compengines.supportedbundletypes:
130 130 raise error.Abort(_('unknown stream compression type: %s')
131 131 % alg)
132 132 if alg == 'BZ':
133 133 alg = '_truncatedBZ'
134 134
135 135 compengine = util.compengines.forbundletype(alg)
136 136 self._stream = compengine.decompressorreader(fh)
137 137 self._type = alg
138 138 self.extras = extras or {}
139 139 self.callback = None
140 140
141 141 # These methods (compressed, read, seek, tell) all appear to only
142 142 # be used by bundlerepo, but it's a little hard to tell.
143 143 def compressed(self):
144 144 return self._type is not None and self._type != 'UN'
145 145 def read(self, l):
146 146 return self._stream.read(l)
147 147 def seek(self, pos):
148 148 return self._stream.seek(pos)
149 149 def tell(self):
150 150 return self._stream.tell()
151 151 def close(self):
152 152 return self._stream.close()
153 153
154 154 def _chunklength(self):
155 155 d = readexactly(self._stream, 4)
156 156 l = struct.unpack(">l", d)[0]
157 157 if l <= 4:
158 158 if l:
159 159 raise error.Abort(_("invalid chunk length %d") % l)
160 160 return 0
161 161 if self.callback:
162 162 self.callback()
163 163 return l - 4
164 164
165 165 def changelogheader(self):
166 166 """v10 does not have a changelog header chunk"""
167 167 return {}
168 168
169 169 def manifestheader(self):
170 170 """v10 does not have a manifest header chunk"""
171 171 return {}
172 172
173 173 def filelogheader(self):
174 174 """return the header of the filelogs chunk, v10 only has the filename"""
175 175 l = self._chunklength()
176 176 if not l:
177 177 return {}
178 178 fname = readexactly(self._stream, l)
179 179 return {'filename': fname}
180 180
181 181 def _deltaheader(self, headertuple, prevnode):
182 182 node, p1, p2, cs = headertuple
183 183 if prevnode is None:
184 184 deltabase = p1
185 185 else:
186 186 deltabase = prevnode
187 187 flags = 0
188 188 return node, p1, p2, deltabase, cs, flags
189 189
190 190 def deltachunk(self, prevnode):
191 191 l = self._chunklength()
192 192 if not l:
193 193 return {}
194 194 headerdata = readexactly(self._stream, self.deltaheadersize)
195 195 header = self.deltaheader.unpack(headerdata)
196 196 delta = readexactly(self._stream, l - self.deltaheadersize)
197 197 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
198 198 return (node, p1, p2, cs, deltabase, delta, flags)
199 199
200 200 def getchunks(self):
201 201 """returns all the chunks contains in the bundle
202 202
203 203 Used when you need to forward the binary stream to a file or another
204 204 network API. To do so, it parse the changegroup data, otherwise it will
205 205 block in case of sshrepo because it don't know the end of the stream.
206 206 """
207 207 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
208 208 # and a list of filelogs. For changegroup 3, we expect 4 parts:
209 209 # changelog, manifestlog, a list of tree manifestlogs, and a list of
210 210 # filelogs.
211 211 #
212 212 # Changelog and manifestlog parts are terminated with empty chunks. The
213 213 # tree and file parts are a list of entry sections. Each entry section
214 214 # is a series of chunks terminating in an empty chunk. The list of these
215 215 # entry sections is terminated in yet another empty chunk, so we know
216 216 # we've reached the end of the tree/file list when we reach an empty
217 217 # chunk that was proceeded by no non-empty chunks.
218 218
219 219 parts = 0
220 220 while parts < 2 + self._grouplistcount:
221 221 noentries = True
222 222 while True:
223 223 chunk = getchunk(self)
224 224 if not chunk:
225 225 # The first two empty chunks represent the end of the
226 226 # changelog and the manifestlog portions. The remaining
227 227 # empty chunks represent either A) the end of individual
228 228 # tree or file entries in the file list, or B) the end of
229 229 # the entire list. It's the end of the entire list if there
230 230 # were no entries (i.e. noentries is True).
231 231 if parts < 2:
232 232 parts += 1
233 233 elif noentries:
234 234 parts += 1
235 235 break
236 236 noentries = False
237 237 yield chunkheader(len(chunk))
238 238 pos = 0
239 239 while pos < len(chunk):
240 240 next = pos + 2**20
241 241 yield chunk[pos:next]
242 242 pos = next
243 243 yield closechunk()
244 244
245 245 def _unpackmanifests(self, repo, revmap, trp, prog):
246 246 self.callback = prog.increment
247 247 # no need to check for empty manifest group here:
248 248 # if the result of the merge of 1 and 2 is the same in 3 and 4,
249 249 # no new manifest will be created and the manifest group will
250 250 # be empty during the pull
251 251 self.manifestheader()
252 252 deltas = self.deltaiter()
253 253 repo.manifestlog.addgroup(deltas, revmap, trp)
254 254 prog.complete()
255 255 self.callback = None
256 256
257 257 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
258 258 expectedtotal=None):
259 259 """Add the changegroup returned by source.read() to this repo.
260 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
261 261 the URL of the repo where this changegroup is coming from.
262 262
263 263 Return an integer summarizing the change to this repo:
264 264 - nothing changed or no source: 0
265 265 - more heads than before: 1+added heads (2..n)
266 266 - fewer heads than before: -1-removed heads (-2..-n)
267 267 - number of heads stays the same: 1
268 268 """
269 269 repo = repo.unfiltered()
270 270 def csmap(x):
271 271 repo.ui.debug("add changeset %s\n" % short(x))
272 272 return len(cl)
273 273
274 274 def revmap(x):
275 275 return cl.rev(x)
276 276
277 277 changesets = files = revisions = 0
278 278
279 279 try:
280 280 # The transaction may already carry source information. In this
281 281 # case we use the top level data. We overwrite the argument
282 282 # because we need to use the top level value (if they exist)
283 283 # in this function.
284 284 srctype = tr.hookargs.setdefault('source', srctype)
285 285 url = tr.hookargs.setdefault('url', url)
286 286 repo.hook('prechangegroup',
287 287 throw=True, **pycompat.strkwargs(tr.hookargs))
288 288
289 289 # write changelog data to temp files so concurrent readers
290 290 # will not see an inconsistent view
291 291 cl = repo.changelog
292 292 cl.delayupdate(tr)
293 293 oldheads = set(cl.heads())
294 294
295 295 trp = weakref.proxy(tr)
296 296 # pull off the changeset group
297 297 repo.ui.status(_("adding changesets\n"))
298 298 clstart = len(cl)
299 299 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
300 300 total=expectedtotal)
301 301 self.callback = progress.increment
302 302
303 303 efiles = set()
304 304 def onchangelog(cl, node):
305 305 efiles.update(cl.readfiles(node))
306 306
307 307 self.changelogheader()
308 308 deltas = self.deltaiter()
309 309 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
310 310 efiles = len(efiles)
311 311
312 312 if not cgnodes:
313 313 repo.ui.develwarn('applied empty changegroup',
314 314 config='warn-empty-changegroup')
315 315 clend = len(cl)
316 316 changesets = clend - clstart
317 317 progress.complete()
318 318 self.callback = None
319 319
320 320 # pull off the manifest group
321 321 repo.ui.status(_("adding manifests\n"))
322 322 # We know that we'll never have more manifests than we had
323 323 # changesets.
324 324 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
325 325 total=changesets)
326 326 self._unpackmanifests(repo, revmap, trp, progress)
327 327
328 328 needfiles = {}
329 329 if repo.ui.configbool('server', 'validate'):
330 330 cl = repo.changelog
331 331 ml = repo.manifestlog
332 332 # validate incoming csets have their manifests
333 333 for cset in pycompat.xrange(clstart, clend):
334 334 mfnode = cl.changelogrevision(cset).manifest
335 335 mfest = ml[mfnode].readdelta()
336 336 # store file cgnodes we must see
337 337 for f, n in mfest.iteritems():
338 338 needfiles.setdefault(f, set()).add(n)
339 339
340 340 # process the files
341 341 repo.ui.status(_("adding file changes\n"))
342 342 newrevs, newfiles = _addchangegroupfiles(
343 343 repo, self, revmap, trp, efiles, needfiles)
344 344 revisions += newrevs
345 345 files += newfiles
346 346
347 347 deltaheads = 0
348 348 if oldheads:
349 349 heads = cl.heads()
350 350 deltaheads = len(heads) - len(oldheads)
351 351 for h in heads:
352 352 if h not in oldheads and repo[h].closesbranch():
353 353 deltaheads -= 1
354 354 htext = ""
355 355 if deltaheads:
356 356 htext = _(" (%+d heads)") % deltaheads
357 357
358 358 repo.ui.status(_("added %d changesets"
359 359 " with %d changes to %d files%s\n")
360 360 % (changesets, revisions, files, htext))
361 361 repo.invalidatevolatilesets()
362 362
363 363 if changesets > 0:
364 364 if 'node' not in tr.hookargs:
365 365 tr.hookargs['node'] = hex(cl.node(clstart))
366 366 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
367 367 hookargs = dict(tr.hookargs)
368 368 else:
369 369 hookargs = dict(tr.hookargs)
370 370 hookargs['node'] = hex(cl.node(clstart))
371 371 hookargs['node_last'] = hex(cl.node(clend - 1))
372 372 repo.hook('pretxnchangegroup',
373 373 throw=True, **pycompat.strkwargs(hookargs))
374 374
375 375 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
376 376 phaseall = None
377 377 if srctype in ('push', 'serve'):
378 378 # Old servers can not push the boundary themselves.
379 379 # New servers won't push the boundary if changeset already
380 380 # exists locally as secret
381 381 #
382 382 # We should not use added here but the list of all change in
383 383 # the bundle
384 384 if repo.publishing():
385 385 targetphase = phaseall = phases.public
386 386 else:
387 387 # closer target phase computation
388 388
389 389 # Those changesets have been pushed from the
390 390 # outside, their phases are going to be pushed
391 391 # alongside. Therefor `targetphase` is
392 392 # ignored.
393 393 targetphase = phaseall = phases.draft
394 394 if added:
395 395 phases.registernew(repo, tr, targetphase, added)
396 396 if phaseall is not None:
397 397 phases.advanceboundary(repo, tr, phaseall, cgnodes)
398 398
399 399 if changesets > 0:
400 400
401 401 def runhooks():
402 402 # These hooks run when the lock releases, not when the
403 403 # transaction closes. So it's possible for the changelog
404 404 # to have changed since we last saw it.
405 405 if clstart >= len(repo):
406 406 return
407 407
408 408 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
409 409
410 410 for n in added:
411 411 args = hookargs.copy()
412 412 args['node'] = hex(n)
413 413 del args['node_last']
414 414 repo.hook("incoming", **pycompat.strkwargs(args))
415 415
416 416 newheads = [h for h in repo.heads()
417 417 if h not in oldheads]
418 418 repo.ui.log("incoming",
419 419 "%d incoming changes - new heads: %s\n",
420 420 len(added),
421 421 ', '.join([hex(c[:6]) for c in newheads]))
422 422
423 423 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
424 424 lambda tr: repo._afterlock(runhooks))
425 425 finally:
426 426 repo.ui.flush()
427 427 # never return 0 here:
428 428 if deltaheads < 0:
429 429 ret = deltaheads - 1
430 430 else:
431 431 ret = deltaheads + 1
432 432 return ret
433 433
434 434 def deltaiter(self):
435 435 """
436 436 returns an iterator of the deltas in this changegroup
437 437
438 438 Useful for passing to the underlying storage system to be stored.
439 439 """
440 440 chain = None
441 441 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
442 442 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
443 443 yield chunkdata
444 444 chain = chunkdata[0]
445 445
446 446 class cg2unpacker(cg1unpacker):
447 447 """Unpacker for cg2 streams.
448 448
449 449 cg2 streams add support for generaldelta, so the delta header
450 450 format is slightly different. All other features about the data
451 451 remain the same.
452 452 """
453 453 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
454 454 deltaheadersize = deltaheader.size
455 455 version = '02'
456 456
457 457 def _deltaheader(self, headertuple, prevnode):
458 458 node, p1, p2, deltabase, cs = headertuple
459 459 flags = 0
460 460 return node, p1, p2, deltabase, cs, flags
461 461
462 462 class cg3unpacker(cg2unpacker):
463 463 """Unpacker for cg3 streams.
464 464
465 465 cg3 streams add support for exchanging treemanifests and revlog
466 466 flags. It adds the revlog flags to the delta header and an empty chunk
467 467 separating manifests and files.
468 468 """
469 469 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
470 470 deltaheadersize = deltaheader.size
471 471 version = '03'
472 472 _grouplistcount = 2 # One list of manifests and one list of files
473 473
474 474 def _deltaheader(self, headertuple, prevnode):
475 475 node, p1, p2, deltabase, cs, flags = headertuple
476 476 return node, p1, p2, deltabase, cs, flags
477 477
478 478 def _unpackmanifests(self, repo, revmap, trp, prog):
479 479 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
480 480 for chunkdata in iter(self.filelogheader, {}):
481 481 # If we get here, there are directory manifests in the changegroup
482 482 d = chunkdata["filename"]
483 483 repo.ui.debug("adding %s revisions\n" % d)
484 484 dirlog = repo.manifestlog._revlog.dirlog(d)
485 485 deltas = self.deltaiter()
486 486 if not dirlog.addgroup(deltas, revmap, trp):
487 487 raise error.Abort(_("received dir revlog group is empty"))
488 488
489 489 class headerlessfixup(object):
490 490 def __init__(self, fh, h):
491 491 self._h = h
492 492 self._fh = fh
493 493 def read(self, n):
494 494 if self._h:
495 495 d, self._h = self._h[:n], self._h[n:]
496 496 if len(d) < n:
497 497 d += readexactly(self._fh, n - len(d))
498 498 return d
499 499 return readexactly(self._fh, n)
500 500
501 501 @attr.s(slots=True, frozen=True)
502 502 class revisiondelta(object):
503 503 """Describes a delta entry in a changegroup.
504 504
505 505 Captured data is sufficient to serialize the delta into multiple
506 506 formats.
507 507 """
508 508 # 20 byte node of this revision.
509 509 node = attr.ib()
510 510 # 20 byte nodes of parent revisions.
511 511 p1node = attr.ib()
512 512 p2node = attr.ib()
513 513 # 20 byte node of node this delta is against.
514 514 basenode = attr.ib()
515 515 # 20 byte node of changeset revision this delta is associated with.
516 516 linknode = attr.ib()
517 517 # 2 bytes of flags to apply to revision data.
518 518 flags = attr.ib()
519 519 # Iterable of chunks holding raw delta data.
520 520 deltachunks = attr.ib()
521 521
522 522 class cgpacker(object):
523 523 def __init__(self, repo, filematcher, version, allowreorder,
524 524 deltaparentfn, builddeltaheader, manifestsend,
525 525 bundlecaps=None, ellipses=False,
526 526 shallow=False, ellipsisroots=None, fullnodes=None):
527 527 """Given a source repo, construct a bundler.
528 528
529 529 filematcher is a matcher that matches on files to include in the
530 530 changegroup. Used to facilitate sparse changegroups.
531 531
532 532 allowreorder controls whether reordering of revisions is allowed.
533 533 This value is used when ``bundle.reorder`` is ``auto`` or isn't
534 534 set.
535 535
536 536 deltaparentfn is a callable that resolves the delta parent for
537 537 a specific revision.
538 538
539 539 builddeltaheader is a callable that constructs the header for a group
540 540 delta.
541 541
542 542 manifestsend is a chunk to send after manifests have been fully emitted.
543 543
544 544 ellipses indicates whether ellipsis serving mode is enabled.
545 545
546 546 bundlecaps is optional and can be used to specify the set of
547 547 capabilities which can be used to build the bundle. While bundlecaps is
548 548 unused in core Mercurial, extensions rely on this feature to communicate
549 549 capabilities to customize the changegroup packer.
550 550
551 551 shallow indicates whether shallow data might be sent. The packer may
552 552 need to pack file contents not introduced by the changes being packed.
553 553
554 554 fullnodes is the list of nodes which should not be ellipsis nodes. We
555 555 store this rather than the set of nodes that should be ellipsis because
556 556 for very large histories we expect this to be significantly smaller.
557 557 """
558 558 assert filematcher
559 559 self._filematcher = filematcher
560 560
561 561 self.version = version
562 562 self._deltaparentfn = deltaparentfn
563 563 self._builddeltaheader = builddeltaheader
564 564 self._manifestsend = manifestsend
565 565 self._ellipses = ellipses
566 566
567 567 # Set of capabilities we can use to build the bundle.
568 568 if bundlecaps is None:
569 569 bundlecaps = set()
570 570 self._bundlecaps = bundlecaps
571 571 self._isshallow = shallow
572 572 self._fullnodes = fullnodes
573 573
574 574 # Maps ellipsis revs to their roots at the changelog level.
575 575 self._precomputedellipsis = ellipsisroots
576 576
577 577 # experimental config: bundle.reorder
578 578 reorder = repo.ui.config('bundle', 'reorder')
579 579 if reorder == 'auto':
580 580 self._reorder = allowreorder
581 581 else:
582 582 self._reorder = stringutil.parsebool(reorder)
583 583
584 584 self._repo = repo
585 585
586 586 if self._repo.ui.verbose and not self._repo.ui.debugflag:
587 587 self._verbosenote = self._repo.ui.note
588 588 else:
589 589 self._verbosenote = lambda s: None
590 590
591 # TODO the functionality keyed off of this should probably be
592 # controlled via arguments to group() that influence behavior.
593 self._changelogdone = False
594
595 591 # Maps CL revs to per-revlog revisions. Cleared in close() at
596 592 # the end of each group.
597 593 self._clrevtolocalrev = {}
598 594 self._nextclrevtolocalrev = {}
599 595
600 596 # Maps changelog nodes to changelog revs. Filled in once
601 597 # during changelog stage and then left unmodified.
602 598 self._clnodetorev = {}
603 599
604 600 def _close(self):
605 601 # Ellipses serving mode.
606 602 self._clrevtolocalrev.clear()
607 603 if self._nextclrevtolocalrev is not None:
608 604 self._clrevtolocalrev = self._nextclrevtolocalrev
609 605 self._nextclrevtolocalrev = None
610 606
611 607 return closechunk()
612 608
613 609 def _fileheader(self, fname):
614 610 return chunkheader(len(fname)) + fname
615 611
616 612 # Extracted both for clarity and for overriding in extensions.
617 def _sortgroup(self, store, nodelist, lookup):
613 def _sortgroup(self, store, ischangelog, nodelist, lookup):
618 614 """Sort nodes for change group and turn them into revnums."""
619 615 # Ellipses serving mode.
620 616 #
621 617 # In a perfect world, we'd generate better ellipsis-ified graphs
622 618 # for non-changelog revlogs. In practice, we haven't started doing
623 619 # that yet, so the resulting DAGs for the manifestlog and filelogs
624 620 # are actually full of bogus parentage on all the ellipsis
625 621 # nodes. This has the side effect that, while the contents are
626 622 # correct, the individual DAGs might be completely out of whack in
627 623 # a case like 882681bc3166 and its ancestors (back about 10
628 624 # revisions or so) in the main hg repo.
629 625 #
630 626 # The one invariant we *know* holds is that the new (potentially
631 627 # bogus) DAG shape will be valid if we order the nodes in the
632 628 # order that they're introduced in dramatis personae by the
633 629 # changelog, so what we do is we sort the non-changelog histories
634 630 # by the order in which they are used by the changelog.
635 if self._ellipses and self._changelogdone:
631 if self._ellipses and not ischangelog:
636 632 key = lambda n: self._clnodetorev[lookup(n)]
637 633 return [store.rev(n) for n in sorted(nodelist, key=key)]
638 634
639 635 # for generaldelta revlogs, we linearize the revs; this will both be
640 636 # much quicker and generate a much smaller bundle
641 637 if (store._generaldelta and self._reorder is None) or self._reorder:
642 638 dag = dagutil.revlogdag(store)
643 639 return dag.linearize(set(store.rev(n) for n in nodelist))
644 640 else:
645 641 return sorted([store.rev(n) for n in nodelist])
646 642
647 def group(self, nodelist, store, lookup, units=None):
643 def group(self, nodelist, store, ischangelog, lookup, units=None):
648 644 """Calculate a delta group, yielding a sequence of changegroup chunks
649 645 (strings).
650 646
651 647 Given a list of changeset revs, return a set of deltas and
652 648 metadata corresponding to nodes. The first delta is
653 649 first parent(nodelist[0]) -> nodelist[0], the receiver is
654 650 guaranteed to have this parent as it has all history before
655 651 these changesets. In the case firstparent is nullrev the
656 652 changegroup starts with a full revision.
657 653
658 654 If units is not None, progress detail will be generated, units specifies
659 655 the type of revlog that is touched (changelog, manifest, etc.).
660 656 """
661 657 # if we don't have any revisions touched by these changesets, bail
662 658 if len(nodelist) == 0:
663 659 yield self._close()
664 660 return
665 661
666 revs = self._sortgroup(store, nodelist, lookup)
662 revs = self._sortgroup(store, ischangelog, nodelist, lookup)
667 663
668 664 # add the parent of the first rev
669 665 p = store.parentrevs(revs[0])[0]
670 666 revs.insert(0, p)
671 667
672 668 # build deltas
673 669 progress = None
674 670 if units is not None:
675 671 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
676 672 total=(len(revs) - 1))
677 673 for r in pycompat.xrange(len(revs) - 1):
678 674 if progress:
679 675 progress.update(r + 1)
680 676 prev, curr = revs[r], revs[r + 1]
681 677 linknode = lookup(store.node(curr))
682 for c in self._revchunk(store, curr, prev, linknode):
678 for c in self._revchunk(store, ischangelog, curr, prev, linknode):
683 679 yield c
684 680
685 681 if progress:
686 682 progress.complete()
687 683 yield self._close()
688 684
689 685 # filter any nodes that claim to be part of the known set
690 686 def _prune(self, store, missing, commonrevs):
691 687 # TODO this violates storage abstraction for manifests.
692 688 if isinstance(store, manifest.manifestrevlog):
693 689 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
694 690 return []
695 691
696 692 rr, rl = store.rev, store.linkrev
697 693 return [n for n in missing if rl(rr(n)) not in commonrevs]
698 694
699 695 def _packmanifests(self, dir, mfnodes, lookuplinknode):
700 696 """Pack manifests into a changegroup stream.
701 697
702 698 Encodes the directory name in the output so multiple manifests
703 699 can be sent. Multiple manifests is not supported by cg1 and cg2.
704 700 """
705 701
706 702 if dir:
707 703 assert self.version == b'03'
708 704 yield self._fileheader(dir)
709 705
710 706 # TODO violates storage abstractions by assuming revlogs.
711 707 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
712 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
708 for chunk in self.group(mfnodes, dirlog, False, lookuplinknode,
713 709 units=_('manifests')):
714 710 yield chunk
715 711
716 712 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
717 713 """Yield a sequence of changegroup byte chunks."""
718 714
719 715 repo = self._repo
720 716 cl = repo.changelog
721 717
722 718 self._verbosenote(_('uncompressed size of bundle content:\n'))
723 719 size = 0
724 720
725 721 clstate, chunks = self._generatechangelog(cl, clnodes)
726 722 for chunk in chunks:
727 723 size += len(chunk)
728 724 yield chunk
729 725
730 726 self._verbosenote(_('%8.i (changelog)\n') % size)
731 727
732 self._changelogdone = True
733
734 728 clrevorder = clstate['clrevorder']
735 729 mfs = clstate['mfs']
736 730 changedfiles = clstate['changedfiles']
737 731
738 732 # We need to make sure that the linkrev in the changegroup refers to
739 733 # the first changeset that introduced the manifest or file revision.
740 734 # The fastpath is usually safer than the slowpath, because the filelogs
741 735 # are walked in revlog order.
742 736 #
743 737 # When taking the slowpath with reorder=None and the manifest revlog
744 738 # uses generaldelta, the manifest may be walked in the "wrong" order.
745 739 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
746 740 # cc0ff93d0c0c).
747 741 #
748 742 # When taking the fastpath, we are only vulnerable to reordering
749 743 # of the changelog itself. The changelog never uses generaldelta, so
750 744 # it is only reordered when reorder=True. To handle this case, we
751 745 # simply take the slowpath, which already has the 'clrevorder' logic.
752 746 # This was also fixed in cc0ff93d0c0c.
753 747 fastpathlinkrev = fastpathlinkrev and not self._reorder
754 748 # Treemanifests don't work correctly with fastpathlinkrev
755 749 # either, because we don't discover which directory nodes to
756 750 # send along with files. This could probably be fixed.
757 751 fastpathlinkrev = fastpathlinkrev and (
758 752 'treemanifest' not in repo.requirements)
759 753
760 754 fnodes = {} # needed file nodes
761 755
762 756 for chunk in self.generatemanifests(commonrevs, clrevorder,
763 757 fastpathlinkrev, mfs, fnodes, source):
764 758 yield chunk
765 759
766 760 if self._ellipses:
767 761 mfdicts = None
768 762 if self._isshallow:
769 763 mfdicts = [(self._repo.manifestlog[n].read(), lr)
770 764 for (n, lr) in mfs.iteritems()]
771 765
772 766 mfs.clear()
773 767 clrevs = set(cl.rev(x) for x in clnodes)
774 768
775 769 if not fastpathlinkrev:
776 770 def linknodes(unused, fname):
777 771 return fnodes.get(fname, {})
778 772 else:
779 773 cln = cl.node
780 774 def linknodes(filerevlog, fname):
781 775 llr = filerevlog.linkrev
782 776 fln = filerevlog.node
783 777 revs = ((r, llr(r)) for r in filerevlog)
784 778 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
785 779
786 780 if self._ellipses:
787 781 # We need to pass the mfdicts variable down into
788 782 # generatefiles(), but more than one command might have
789 783 # wrapped generatefiles so we can't modify the function
790 784 # signature. Instead, we pass the data to ourselves using an
791 785 # instance attribute. I'm sorry.
792 786 self._mfdicts = mfdicts
793 787
794 788 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
795 789 source):
796 790 yield chunk
797 791
798 792 yield self._close()
799 793
800 794 if clnodes:
801 795 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
802 796
803 797 def _generatechangelog(self, cl, nodes):
804 798 """Generate data for changelog chunks.
805 799
806 800 Returns a 2-tuple of a dict containing state and an iterable of
807 801 byte chunks. The state will not be fully populated until the
808 802 chunk stream has been fully consumed.
809 803 """
810 804 clrevorder = {}
811 805 mfs = {} # needed manifests
812 806 mfl = self._repo.manifestlog
813 807 # TODO violates storage abstraction.
814 808 mfrevlog = mfl._revlog
815 809 changedfiles = set()
816 810
817 811 # Callback for the changelog, used to collect changed files and
818 812 # manifest nodes.
819 813 # Returns the linkrev node (identity in the changelog case).
820 814 def lookupcl(x):
821 815 c = cl.read(x)
822 816 clrevorder[x] = len(clrevorder)
823 817
824 818 if self._ellipses:
825 819 # Only update mfs if x is going to be sent. Otherwise we
826 820 # end up with bogus linkrevs specified for manifests and
827 821 # we skip some manifest nodes that we should otherwise
828 822 # have sent.
829 823 if (x in self._fullnodes
830 824 or cl.rev(x) in self._precomputedellipsis):
831 825 n = c[0]
832 826 # Record the first changeset introducing this manifest
833 827 # version.
834 828 mfs.setdefault(n, x)
835 829 # Set this narrow-specific dict so we have the lowest
836 830 # manifest revnum to look up for this cl revnum. (Part of
837 831 # mapping changelog ellipsis parents to manifest ellipsis
838 832 # parents)
839 833 self._nextclrevtolocalrev.setdefault(cl.rev(x),
840 834 mfrevlog.rev(n))
841 835 # We can't trust the changed files list in the changeset if the
842 836 # client requested a shallow clone.
843 837 if self._isshallow:
844 838 changedfiles.update(mfl[c[0]].read().keys())
845 839 else:
846 840 changedfiles.update(c[3])
847 841 else:
848 842
849 843 n = c[0]
850 844 # record the first changeset introducing this manifest version
851 845 mfs.setdefault(n, x)
852 846 # Record a complete list of potentially-changed files in
853 847 # this manifest.
854 848 changedfiles.update(c[3])
855 849
856 850 return x
857 851
858 852 state = {
859 853 'clrevorder': clrevorder,
860 854 'mfs': mfs,
861 855 'changedfiles': changedfiles,
862 856 }
863 857
864 gen = self.group(nodes, cl, lookupcl, units=_('changesets'))
858 gen = self.group(nodes, cl, True, lookupcl, units=_('changesets'))
865 859
866 860 return state, gen
867 861
868 862 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
869 863 fnodes, source):
870 864 """Returns an iterator of changegroup chunks containing manifests.
871 865
872 866 `source` is unused here, but is used by extensions like remotefilelog to
873 867 change what is sent based in pulls vs pushes, etc.
874 868 """
875 869 repo = self._repo
876 870 mfl = repo.manifestlog
877 871 dirlog = mfl._revlog.dirlog
878 872 tmfnodes = {'': mfs}
879 873
880 874 # Callback for the manifest, used to collect linkrevs for filelog
881 875 # revisions.
882 876 # Returns the linkrev node (collected in lookupcl).
883 877 def makelookupmflinknode(dir, nodes):
884 878 if fastpathlinkrev:
885 879 assert not dir
886 880 return mfs.__getitem__
887 881
888 882 def lookupmflinknode(x):
889 883 """Callback for looking up the linknode for manifests.
890 884
891 885 Returns the linkrev node for the specified manifest.
892 886
893 887 SIDE EFFECT:
894 888
895 889 1) fclnodes gets populated with the list of relevant
896 890 file nodes if we're not using fastpathlinkrev
897 891 2) When treemanifests are in use, collects treemanifest nodes
898 892 to send
899 893
900 894 Note that this means manifests must be completely sent to
901 895 the client before you can trust the list of files and
902 896 treemanifests to send.
903 897 """
904 898 clnode = nodes[x]
905 899 mdata = mfl.get(dir, x).readfast(shallow=True)
906 900 for p, n, fl in mdata.iterentries():
907 901 if fl == 't': # subdirectory manifest
908 902 subdir = dir + p + '/'
909 903 tmfclnodes = tmfnodes.setdefault(subdir, {})
910 904 tmfclnode = tmfclnodes.setdefault(n, clnode)
911 905 if clrevorder[clnode] < clrevorder[tmfclnode]:
912 906 tmfclnodes[n] = clnode
913 907 else:
914 908 f = dir + p
915 909 fclnodes = fnodes.setdefault(f, {})
916 910 fclnode = fclnodes.setdefault(n, clnode)
917 911 if clrevorder[clnode] < clrevorder[fclnode]:
918 912 fclnodes[n] = clnode
919 913 return clnode
920 914 return lookupmflinknode
921 915
922 916 size = 0
923 917 while tmfnodes:
924 918 dir, nodes = tmfnodes.popitem()
925 919 prunednodes = self._prune(dirlog(dir), nodes, commonrevs)
926 920 if not dir or prunednodes:
927 921 for x in self._packmanifests(dir, prunednodes,
928 922 makelookupmflinknode(dir, nodes)):
929 923 size += len(x)
930 924 yield x
931 925 self._verbosenote(_('%8.i (manifests)\n') % size)
932 926 yield self._manifestsend
933 927
934 928 # The 'source' parameter is useful for extensions
935 929 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
936 930 changedfiles = list(filter(self._filematcher, changedfiles))
937 931
938 932 if self._isshallow:
939 933 # See comment in generate() for why this sadness is a thing.
940 934 mfdicts = self._mfdicts
941 935 del self._mfdicts
942 936 # In a shallow clone, the linknodes callback needs to also include
943 937 # those file nodes that are in the manifests we sent but weren't
944 938 # introduced by those manifests.
945 939 commonctxs = [self._repo[c] for c in commonrevs]
946 940 oldlinknodes = linknodes
947 941 clrev = self._repo.changelog.rev
948 942
949 943 # Defining this function has a side-effect of overriding the
950 944 # function of the same name that was passed in as an argument.
951 945 # TODO have caller pass in appropriate function.
952 946 def linknodes(flog, fname):
953 947 for c in commonctxs:
954 948 try:
955 949 fnode = c.filenode(fname)
956 950 self._clrevtolocalrev[c.rev()] = flog.rev(fnode)
957 951 except error.ManifestLookupError:
958 952 pass
959 953 links = oldlinknodes(flog, fname)
960 954 if len(links) != len(mfdicts):
961 955 for mf, lr in mfdicts:
962 956 fnode = mf.get(fname, None)
963 957 if fnode in links:
964 958 links[fnode] = min(links[fnode], lr, key=clrev)
965 959 elif fnode:
966 960 links[fnode] = lr
967 961 return links
968 962
969 963 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
970 964
971 965 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
972 966 repo = self._repo
973 967 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
974 968 total=len(changedfiles))
975 969 for i, fname in enumerate(sorted(changedfiles)):
976 970 filerevlog = repo.file(fname)
977 971 if not filerevlog:
978 972 raise error.Abort(_("empty or missing file data for %s") %
979 973 fname)
980 974
981 975 linkrevnodes = linknodes(filerevlog, fname)
982 976 # Lookup for filenodes, we collected the linkrev nodes above in the
983 977 # fastpath case and with lookupmf in the slowpath case.
984 978 def lookupfilelog(x):
985 979 return linkrevnodes[x]
986 980
987 981 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
988 982 if filenodes:
989 983 progress.update(i + 1, item=fname)
990 984 h = self._fileheader(fname)
991 985 size = len(h)
992 986 yield h
993 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
987 for chunk in self.group(filenodes, filerevlog, False,
988 lookupfilelog):
994 989 size += len(chunk)
995 990 yield chunk
996 991 self._verbosenote(_('%8.i %s\n') % (size, fname))
997 992 progress.complete()
998 993
999 def _revchunk(self, store, rev, prev, linknode):
994 def _revchunk(self, store, ischangelog, rev, prev, linknode):
1000 995 if self._ellipses:
1001 996 fn = self._revisiondeltanarrow
1002 997 else:
1003 998 fn = self._revisiondeltanormal
1004 999
1005 delta = fn(store, rev, prev, linknode)
1000 delta = fn(store, ischangelog, rev, prev, linknode)
1006 1001 if not delta:
1007 1002 return
1008 1003
1009 1004 meta = self._builddeltaheader(delta)
1010 1005 l = len(meta) + sum(len(x) for x in delta.deltachunks)
1011 1006
1012 1007 yield chunkheader(l)
1013 1008 yield meta
1014 1009 for x in delta.deltachunks:
1015 1010 yield x
1016 1011
1017 def _revisiondeltanormal(self, store, rev, prev, linknode):
1012 def _revisiondeltanormal(self, store, ischangelog, rev, prev, linknode):
1018 1013 node = store.node(rev)
1019 1014 p1, p2 = store.parentrevs(rev)
1020 1015 base = self._deltaparentfn(store, rev, p1, p2, prev)
1021 1016
1022 1017 prefix = ''
1023 1018 if store.iscensored(base) or store.iscensored(rev):
1024 1019 try:
1025 1020 delta = store.revision(node, raw=True)
1026 1021 except error.CensoredNodeError as e:
1027 1022 delta = e.tombstone
1028 1023 if base == nullrev:
1029 1024 prefix = mdiff.trivialdiffheader(len(delta))
1030 1025 else:
1031 1026 baselen = store.rawsize(base)
1032 1027 prefix = mdiff.replacediffheader(baselen, len(delta))
1033 1028 elif base == nullrev:
1034 1029 delta = store.revision(node, raw=True)
1035 1030 prefix = mdiff.trivialdiffheader(len(delta))
1036 1031 else:
1037 1032 delta = store.revdiff(base, rev)
1038 1033 p1n, p2n = store.parents(node)
1039 1034
1040 1035 return revisiondelta(
1041 1036 node=node,
1042 1037 p1node=p1n,
1043 1038 p2node=p2n,
1044 1039 basenode=store.node(base),
1045 1040 linknode=linknode,
1046 1041 flags=store.flags(rev),
1047 1042 deltachunks=(prefix, delta),
1048 1043 )
1049 1044
1050 def _revisiondeltanarrow(self, store, rev, prev, linknode):
1045 def _revisiondeltanarrow(self, store, ischangelog, rev, prev, linknode):
1051 1046 # build up some mapping information that's useful later. See
1052 1047 # the local() nested function below.
1053 if not self._changelogdone:
1048 if ischangelog:
1054 1049 self._clnodetorev[linknode] = rev
1055 1050 linkrev = rev
1056 1051 self._clrevtolocalrev[linkrev] = rev
1057 1052 else:
1058 1053 linkrev = self._clnodetorev[linknode]
1059 1054 self._clrevtolocalrev[linkrev] = rev
1060 1055
1061 1056 # This is a node to send in full, because the changeset it
1062 1057 # corresponds to was a full changeset.
1063 1058 if linknode in self._fullnodes:
1064 return self._revisiondeltanormal(store, rev, prev, linknode)
1059 return self._revisiondeltanormal(store, ischangelog, rev, prev,
1060 linknode)
1065 1061
1066 1062 # At this point, a node can either be one we should skip or an
1067 1063 # ellipsis. If it's not an ellipsis, bail immediately.
1068 1064 if linkrev not in self._precomputedellipsis:
1069 1065 return
1070 1066
1071 1067 linkparents = self._precomputedellipsis[linkrev]
1072 1068 def local(clrev):
1073 1069 """Turn a changelog revnum into a local revnum.
1074 1070
1075 1071 The ellipsis dag is stored as revnums on the changelog,
1076 1072 but when we're producing ellipsis entries for
1077 1073 non-changelog revlogs, we need to turn those numbers into
1078 1074 something local. This does that for us, and during the
1079 1075 changelog sending phase will also expand the stored
1080 1076 mappings as needed.
1081 1077 """
1082 1078 if clrev == nullrev:
1083 1079 return nullrev
1084 1080
1085 if not self._changelogdone:
1081 if ischangelog:
1086 1082 # If we're doing the changelog, it's possible that we
1087 1083 # have a parent that is already on the client, and we
1088 1084 # need to store some extra mapping information so that
1089 1085 # our contained ellipsis nodes will be able to resolve
1090 1086 # their parents.
1091 1087 if clrev not in self._clrevtolocalrev:
1092 1088 clnode = store.node(clrev)
1093 1089 self._clnodetorev[clnode] = clrev
1094 1090 return clrev
1095 1091
1096 1092 # Walk the ellipsis-ized changelog breadth-first looking for a
1097 1093 # change that has been linked from the current revlog.
1098 1094 #
1099 1095 # For a flat manifest revlog only a single step should be necessary
1100 1096 # as all relevant changelog entries are relevant to the flat
1101 1097 # manifest.
1102 1098 #
1103 1099 # For a filelog or tree manifest dirlog however not every changelog
1104 1100 # entry will have been relevant, so we need to skip some changelog
1105 1101 # nodes even after ellipsis-izing.
1106 1102 walk = [clrev]
1107 1103 while walk:
1108 1104 p = walk[0]
1109 1105 walk = walk[1:]
1110 1106 if p in self._clrevtolocalrev:
1111 1107 return self._clrevtolocalrev[p]
1112 1108 elif p in self._fullnodes:
1113 1109 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1114 1110 if pp != nullrev])
1115 1111 elif p in self._precomputedellipsis:
1116 1112 walk.extend([pp for pp in self._precomputedellipsis[p]
1117 1113 if pp != nullrev])
1118 1114 else:
1119 1115 # In this case, we've got an ellipsis with parents
1120 1116 # outside the current bundle (likely an
1121 1117 # incremental pull). We "know" that we can use the
1122 1118 # value of this same revlog at whatever revision
1123 1119 # is pointed to by linknode. "Know" is in scare
1124 1120 # quotes because I haven't done enough examination
1125 1121 # of edge cases to convince myself this is really
1126 1122 # a fact - it works for all the (admittedly
1127 1123 # thorough) cases in our testsuite, but I would be
1128 1124 # somewhat unsurprised to find a case in the wild
1129 1125 # where this breaks down a bit. That said, I don't
1130 1126 # know if it would hurt anything.
1131 1127 for i in pycompat.xrange(rev, 0, -1):
1132 1128 if store.linkrev(i) == clrev:
1133 1129 return i
1134 1130 # We failed to resolve a parent for this node, so
1135 1131 # we crash the changegroup construction.
1136 1132 raise error.Abort(
1137 1133 'unable to resolve parent while packing %r %r'
1138 1134 ' for changeset %r' % (store.indexfile, rev, clrev))
1139 1135
1140 1136 return nullrev
1141 1137
1142 1138 if not linkparents or (
1143 1139 store.parentrevs(rev) == (nullrev, nullrev)):
1144 1140 p1, p2 = nullrev, nullrev
1145 1141 elif len(linkparents) == 1:
1146 1142 p1, = sorted(local(p) for p in linkparents)
1147 1143 p2 = nullrev
1148 1144 else:
1149 1145 p1, p2 = sorted(local(p) for p in linkparents)
1150 1146
1151 1147 n = store.node(rev)
1152 1148 p1n, p2n = store.node(p1), store.node(p2)
1153 1149 flags = store.flags(rev)
1154 1150 flags |= revlog.REVIDX_ELLIPSIS
1155 1151
1156 1152 # TODO: try and actually send deltas for ellipsis data blocks
1157 1153 data = store.revision(n)
1158 1154 diffheader = mdiff.trivialdiffheader(len(data))
1159 1155
1160 1156 return revisiondelta(
1161 1157 node=n,
1162 1158 p1node=p1n,
1163 1159 p2node=p2n,
1164 1160 basenode=nullid,
1165 1161 linknode=linknode,
1166 1162 flags=flags,
1167 1163 deltachunks=(diffheader, data),
1168 1164 )
1169 1165
1170 1166 def _deltaparentprev(store, rev, p1, p2, prev):
1171 1167 """Resolve a delta parent to the previous revision.
1172 1168
1173 1169 Used for version 1 changegroups, which don't support generaldelta.
1174 1170 """
1175 1171 return prev
1176 1172
1177 1173 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1178 1174 """Resolve a delta parent when general deltas are supported."""
1179 1175 dp = store.deltaparent(rev)
1180 1176 if dp == nullrev and store.storedeltachains:
1181 1177 # Avoid sending full revisions when delta parent is null. Pick prev
1182 1178 # in that case. It's tempting to pick p1 in this case, as p1 will
1183 1179 # be smaller in the common case. However, computing a delta against
1184 1180 # p1 may require resolving the raw text of p1, which could be
1185 1181 # expensive. The revlog caches should have prev cached, meaning
1186 1182 # less CPU for changegroup generation. There is likely room to add
1187 1183 # a flag and/or config option to control this behavior.
1188 1184 base = prev
1189 1185 elif dp == nullrev:
1190 1186 # revlog is configured to use full snapshot for a reason,
1191 1187 # stick to full snapshot.
1192 1188 base = nullrev
1193 1189 elif dp not in (p1, p2, prev):
1194 1190 # Pick prev when we can't be sure remote has the base revision.
1195 1191 return prev
1196 1192 else:
1197 1193 base = dp
1198 1194
1199 1195 if base != nullrev and not store.candelta(base, rev):
1200 1196 base = nullrev
1201 1197
1202 1198 return base
1203 1199
1204 1200 def _deltaparentellipses(store, rev, p1, p2, prev):
1205 1201 """Resolve a delta parent when in ellipses mode."""
1206 1202 # TODO: send better deltas when in narrow mode.
1207 1203 #
1208 1204 # changegroup.group() loops over revisions to send,
1209 1205 # including revisions we'll skip. What this means is that
1210 1206 # `prev` will be a potentially useless delta base for all
1211 1207 # ellipsis nodes, as the client likely won't have it. In
1212 1208 # the future we should do bookkeeping about which nodes
1213 1209 # have been sent to the client, and try to be
1214 1210 # significantly smarter about delta bases. This is
1215 1211 # slightly tricky because this same code has to work for
1216 1212 # all revlogs, and we don't have the linkrev/linknode here.
1217 1213 return p1
1218 1214
1219 1215 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1220 1216 shallow=False, ellipsisroots=None, fullnodes=None):
1221 1217 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1222 1218 d.node, d.p1node, d.p2node, d.linknode)
1223 1219
1224 1220 return cgpacker(repo, filematcher, b'01',
1225 1221 deltaparentfn=_deltaparentprev,
1226 1222 allowreorder=None,
1227 1223 builddeltaheader=builddeltaheader,
1228 1224 manifestsend=b'',
1229 1225 bundlecaps=bundlecaps,
1230 1226 ellipses=ellipses,
1231 1227 shallow=shallow,
1232 1228 ellipsisroots=ellipsisroots,
1233 1229 fullnodes=fullnodes)
1234 1230
1235 1231 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1236 1232 shallow=False, ellipsisroots=None, fullnodes=None):
1237 1233 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1238 1234 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1239 1235
1240 1236 # Since generaldelta is directly supported by cg2, reordering
1241 1237 # generally doesn't help, so we disable it by default (treating
1242 1238 # bundle.reorder=auto just like bundle.reorder=False).
1243 1239 return cgpacker(repo, filematcher, b'02',
1244 1240 deltaparentfn=_deltaparentgeneraldelta,
1245 1241 allowreorder=False,
1246 1242 builddeltaheader=builddeltaheader,
1247 1243 manifestsend=b'',
1248 1244 bundlecaps=bundlecaps,
1249 1245 ellipses=ellipses,
1250 1246 shallow=shallow,
1251 1247 ellipsisroots=ellipsisroots,
1252 1248 fullnodes=fullnodes)
1253 1249
1254 1250 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1255 1251 shallow=False, ellipsisroots=None, fullnodes=None):
1256 1252 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1257 1253 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1258 1254
1259 1255 deltaparentfn = (_deltaparentellipses if ellipses
1260 1256 else _deltaparentgeneraldelta)
1261 1257
1262 1258 return cgpacker(repo, filematcher, b'03',
1263 1259 deltaparentfn=deltaparentfn,
1264 1260 allowreorder=False,
1265 1261 builddeltaheader=builddeltaheader,
1266 1262 manifestsend=closechunk(),
1267 1263 bundlecaps=bundlecaps,
1268 1264 ellipses=ellipses,
1269 1265 shallow=shallow,
1270 1266 ellipsisroots=ellipsisroots,
1271 1267 fullnodes=fullnodes)
1272 1268
1273 1269 _packermap = {'01': (_makecg1packer, cg1unpacker),
1274 1270 # cg2 adds support for exchanging generaldelta
1275 1271 '02': (_makecg2packer, cg2unpacker),
1276 1272 # cg3 adds support for exchanging revlog flags and treemanifests
1277 1273 '03': (_makecg3packer, cg3unpacker),
1278 1274 }
1279 1275
1280 1276 def allsupportedversions(repo):
1281 1277 versions = set(_packermap.keys())
1282 1278 if not (repo.ui.configbool('experimental', 'changegroup3') or
1283 1279 repo.ui.configbool('experimental', 'treemanifest') or
1284 1280 'treemanifest' in repo.requirements):
1285 1281 versions.discard('03')
1286 1282 return versions
1287 1283
1288 1284 # Changegroup versions that can be applied to the repo
1289 1285 def supportedincomingversions(repo):
1290 1286 return allsupportedversions(repo)
1291 1287
1292 1288 # Changegroup versions that can be created from the repo
1293 1289 def supportedoutgoingversions(repo):
1294 1290 versions = allsupportedversions(repo)
1295 1291 if 'treemanifest' in repo.requirements:
1296 1292 # Versions 01 and 02 support only flat manifests and it's just too
1297 1293 # expensive to convert between the flat manifest and tree manifest on
1298 1294 # the fly. Since tree manifests are hashed differently, all of history
1299 1295 # would have to be converted. Instead, we simply don't even pretend to
1300 1296 # support versions 01 and 02.
1301 1297 versions.discard('01')
1302 1298 versions.discard('02')
1303 1299 if repository.NARROW_REQUIREMENT in repo.requirements:
1304 1300 # Versions 01 and 02 don't support revlog flags, and we need to
1305 1301 # support that for stripping and unbundling to work.
1306 1302 versions.discard('01')
1307 1303 versions.discard('02')
1308 1304 if LFS_REQUIREMENT in repo.requirements:
1309 1305 # Versions 01 and 02 don't support revlog flags, and we need to
1310 1306 # mark LFS entries with REVIDX_EXTSTORED.
1311 1307 versions.discard('01')
1312 1308 versions.discard('02')
1313 1309
1314 1310 return versions
1315 1311
1316 1312 def localversion(repo):
1317 1313 # Finds the best version to use for bundles that are meant to be used
1318 1314 # locally, such as those from strip and shelve, and temporary bundles.
1319 1315 return max(supportedoutgoingversions(repo))
1320 1316
1321 1317 def safeversion(repo):
1322 1318 # Finds the smallest version that it's safe to assume clients of the repo
1323 1319 # will support. For example, all hg versions that support generaldelta also
1324 1320 # support changegroup 02.
1325 1321 versions = supportedoutgoingversions(repo)
1326 1322 if 'generaldelta' in repo.requirements:
1327 1323 versions.discard('01')
1328 1324 assert versions
1329 1325 return min(versions)
1330 1326
1331 1327 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1332 1328 ellipses=False, shallow=False, ellipsisroots=None,
1333 1329 fullnodes=None):
1334 1330 assert version in supportedoutgoingversions(repo)
1335 1331
1336 1332 if filematcher is None:
1337 1333 filematcher = matchmod.alwaysmatcher(repo.root, '')
1338 1334
1339 1335 if version == '01' and not filematcher.always():
1340 1336 raise error.ProgrammingError('version 01 changegroups do not support '
1341 1337 'sparse file matchers')
1342 1338
1343 1339 if ellipses and version in (b'01', b'02'):
1344 1340 raise error.Abort(
1345 1341 _('ellipsis nodes require at least cg3 on client and server, '
1346 1342 'but negotiated version %s') % version)
1347 1343
1348 1344 # Requested files could include files not in the local store. So
1349 1345 # filter those out.
1350 1346 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1351 1347 filematcher)
1352 1348
1353 1349 fn = _packermap[version][0]
1354 1350 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1355 1351 shallow=shallow, ellipsisroots=ellipsisroots,
1356 1352 fullnodes=fullnodes)
1357 1353
1358 1354 def getunbundler(version, fh, alg, extras=None):
1359 1355 return _packermap[version][1](fh, alg, extras=extras)
1360 1356
1361 1357 def _changegroupinfo(repo, nodes, source):
1362 1358 if repo.ui.verbose or source == 'bundle':
1363 1359 repo.ui.status(_("%d changesets found\n") % len(nodes))
1364 1360 if repo.ui.debugflag:
1365 1361 repo.ui.debug("list of changesets:\n")
1366 1362 for node in nodes:
1367 1363 repo.ui.debug("%s\n" % hex(node))
1368 1364
1369 1365 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1370 1366 bundlecaps=None):
1371 1367 cgstream = makestream(repo, outgoing, version, source,
1372 1368 fastpath=fastpath, bundlecaps=bundlecaps)
1373 1369 return getunbundler(version, util.chunkbuffer(cgstream), None,
1374 1370 {'clcount': len(outgoing.missing) })
1375 1371
1376 1372 def makestream(repo, outgoing, version, source, fastpath=False,
1377 1373 bundlecaps=None, filematcher=None):
1378 1374 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1379 1375 filematcher=filematcher)
1380 1376
1381 1377 repo = repo.unfiltered()
1382 1378 commonrevs = outgoing.common
1383 1379 csets = outgoing.missing
1384 1380 heads = outgoing.missingheads
1385 1381 # We go through the fast path if we get told to, or if all (unfiltered
1386 1382 # heads have been requested (since we then know there all linkrevs will
1387 1383 # be pulled by the client).
1388 1384 heads.sort()
1389 1385 fastpathlinkrev = fastpath or (
1390 1386 repo.filtername is None and heads == sorted(repo.heads()))
1391 1387
1392 1388 repo.hook('preoutgoing', throw=True, source=source)
1393 1389 _changegroupinfo(repo, csets, source)
1394 1390 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1395 1391
1396 1392 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1397 1393 revisions = 0
1398 1394 files = 0
1399 1395 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1400 1396 total=expectedfiles)
1401 1397 for chunkdata in iter(source.filelogheader, {}):
1402 1398 files += 1
1403 1399 f = chunkdata["filename"]
1404 1400 repo.ui.debug("adding %s revisions\n" % f)
1405 1401 progress.increment()
1406 1402 fl = repo.file(f)
1407 1403 o = len(fl)
1408 1404 try:
1409 1405 deltas = source.deltaiter()
1410 1406 if not fl.addgroup(deltas, revmap, trp):
1411 1407 raise error.Abort(_("received file revlog group is empty"))
1412 1408 except error.CensoredBaseError as e:
1413 1409 raise error.Abort(_("received delta base is censored: %s") % e)
1414 1410 revisions += len(fl) - o
1415 1411 if f in needfiles:
1416 1412 needs = needfiles[f]
1417 1413 for new in pycompat.xrange(o, len(fl)):
1418 1414 n = fl.node(new)
1419 1415 if n in needs:
1420 1416 needs.remove(n)
1421 1417 else:
1422 1418 raise error.Abort(
1423 1419 _("received spurious file revlog entry"))
1424 1420 if not needs:
1425 1421 del needfiles[f]
1426 1422 progress.complete()
1427 1423
1428 1424 for f, needs in needfiles.iteritems():
1429 1425 fl = repo.file(f)
1430 1426 for n in needs:
1431 1427 try:
1432 1428 fl.rev(n)
1433 1429 except error.LookupError:
1434 1430 raise error.Abort(
1435 1431 _('missing file data for %s:%s - run hg verify') %
1436 1432 (f, hex(n)))
1437 1433
1438 1434 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now