##// END OF EJS Templates
changegroup: factor changelog chunk generation into own function...
Gregory Szorc -
r39012:f7228c90 default
parent child Browse files
Show More
@@ -1,1411 +1,1437 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, "wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, "wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103 class cg1unpacker(object):
104 104 """Unpacker for cg1 changegroup streams.
105 105
106 106 A changegroup unpacker handles the framing of the revision data in
107 107 the wire format. Most consumers will want to use the apply()
108 108 method to add the changes from the changegroup to a repository.
109 109
110 110 If you're forwarding a changegroup unmodified to another consumer,
111 111 use getchunks(), which returns an iterator of changegroup
112 112 chunks. This is mostly useful for cases where you need to know the
113 113 data stream has ended by observing the end of the changegroup.
114 114
115 115 deltachunk() is useful only if you're applying delta data. Most
116 116 consumers should prefer apply() instead.
117 117
118 118 A few other public methods exist. Those are used only for
119 119 bundlerepo and some debug commands - their use is discouraged.
120 120 """
121 121 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
122 122 deltaheadersize = deltaheader.size
123 123 version = '01'
124 124 _grouplistcount = 1 # One list of files after the manifests
125 125
126 126 def __init__(self, fh, alg, extras=None):
127 127 if alg is None:
128 128 alg = 'UN'
129 129 if alg not in util.compengines.supportedbundletypes:
130 130 raise error.Abort(_('unknown stream compression type: %s')
131 131 % alg)
132 132 if alg == 'BZ':
133 133 alg = '_truncatedBZ'
134 134
135 135 compengine = util.compengines.forbundletype(alg)
136 136 self._stream = compengine.decompressorreader(fh)
137 137 self._type = alg
138 138 self.extras = extras or {}
139 139 self.callback = None
140 140
141 141 # These methods (compressed, read, seek, tell) all appear to only
142 142 # be used by bundlerepo, but it's a little hard to tell.
143 143 def compressed(self):
144 144 return self._type is not None and self._type != 'UN'
145 145 def read(self, l):
146 146 return self._stream.read(l)
147 147 def seek(self, pos):
148 148 return self._stream.seek(pos)
149 149 def tell(self):
150 150 return self._stream.tell()
151 151 def close(self):
152 152 return self._stream.close()
153 153
154 154 def _chunklength(self):
155 155 d = readexactly(self._stream, 4)
156 156 l = struct.unpack(">l", d)[0]
157 157 if l <= 4:
158 158 if l:
159 159 raise error.Abort(_("invalid chunk length %d") % l)
160 160 return 0
161 161 if self.callback:
162 162 self.callback()
163 163 return l - 4
164 164
165 165 def changelogheader(self):
166 166 """v10 does not have a changelog header chunk"""
167 167 return {}
168 168
169 169 def manifestheader(self):
170 170 """v10 does not have a manifest header chunk"""
171 171 return {}
172 172
173 173 def filelogheader(self):
174 174 """return the header of the filelogs chunk, v10 only has the filename"""
175 175 l = self._chunklength()
176 176 if not l:
177 177 return {}
178 178 fname = readexactly(self._stream, l)
179 179 return {'filename': fname}
180 180
181 181 def _deltaheader(self, headertuple, prevnode):
182 182 node, p1, p2, cs = headertuple
183 183 if prevnode is None:
184 184 deltabase = p1
185 185 else:
186 186 deltabase = prevnode
187 187 flags = 0
188 188 return node, p1, p2, deltabase, cs, flags
189 189
190 190 def deltachunk(self, prevnode):
191 191 l = self._chunklength()
192 192 if not l:
193 193 return {}
194 194 headerdata = readexactly(self._stream, self.deltaheadersize)
195 195 header = self.deltaheader.unpack(headerdata)
196 196 delta = readexactly(self._stream, l - self.deltaheadersize)
197 197 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
198 198 return (node, p1, p2, cs, deltabase, delta, flags)
199 199
200 200 def getchunks(self):
201 201 """returns all the chunks contains in the bundle
202 202
203 203 Used when you need to forward the binary stream to a file or another
204 204 network API. To do so, it parse the changegroup data, otherwise it will
205 205 block in case of sshrepo because it don't know the end of the stream.
206 206 """
207 207 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
208 208 # and a list of filelogs. For changegroup 3, we expect 4 parts:
209 209 # changelog, manifestlog, a list of tree manifestlogs, and a list of
210 210 # filelogs.
211 211 #
212 212 # Changelog and manifestlog parts are terminated with empty chunks. The
213 213 # tree and file parts are a list of entry sections. Each entry section
214 214 # is a series of chunks terminating in an empty chunk. The list of these
215 215 # entry sections is terminated in yet another empty chunk, so we know
216 216 # we've reached the end of the tree/file list when we reach an empty
217 217 # chunk that was proceeded by no non-empty chunks.
218 218
219 219 parts = 0
220 220 while parts < 2 + self._grouplistcount:
221 221 noentries = True
222 222 while True:
223 223 chunk = getchunk(self)
224 224 if not chunk:
225 225 # The first two empty chunks represent the end of the
226 226 # changelog and the manifestlog portions. The remaining
227 227 # empty chunks represent either A) the end of individual
228 228 # tree or file entries in the file list, or B) the end of
229 229 # the entire list. It's the end of the entire list if there
230 230 # were no entries (i.e. noentries is True).
231 231 if parts < 2:
232 232 parts += 1
233 233 elif noentries:
234 234 parts += 1
235 235 break
236 236 noentries = False
237 237 yield chunkheader(len(chunk))
238 238 pos = 0
239 239 while pos < len(chunk):
240 240 next = pos + 2**20
241 241 yield chunk[pos:next]
242 242 pos = next
243 243 yield closechunk()
244 244
245 245 def _unpackmanifests(self, repo, revmap, trp, prog):
246 246 self.callback = prog.increment
247 247 # no need to check for empty manifest group here:
248 248 # if the result of the merge of 1 and 2 is the same in 3 and 4,
249 249 # no new manifest will be created and the manifest group will
250 250 # be empty during the pull
251 251 self.manifestheader()
252 252 deltas = self.deltaiter()
253 253 repo.manifestlog.addgroup(deltas, revmap, trp)
254 254 prog.complete()
255 255 self.callback = None
256 256
257 257 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
258 258 expectedtotal=None):
259 259 """Add the changegroup returned by source.read() to this repo.
260 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
261 261 the URL of the repo where this changegroup is coming from.
262 262
263 263 Return an integer summarizing the change to this repo:
264 264 - nothing changed or no source: 0
265 265 - more heads than before: 1+added heads (2..n)
266 266 - fewer heads than before: -1-removed heads (-2..-n)
267 267 - number of heads stays the same: 1
268 268 """
269 269 repo = repo.unfiltered()
270 270 def csmap(x):
271 271 repo.ui.debug("add changeset %s\n" % short(x))
272 272 return len(cl)
273 273
274 274 def revmap(x):
275 275 return cl.rev(x)
276 276
277 277 changesets = files = revisions = 0
278 278
279 279 try:
280 280 # The transaction may already carry source information. In this
281 281 # case we use the top level data. We overwrite the argument
282 282 # because we need to use the top level value (if they exist)
283 283 # in this function.
284 284 srctype = tr.hookargs.setdefault('source', srctype)
285 285 url = tr.hookargs.setdefault('url', url)
286 286 repo.hook('prechangegroup',
287 287 throw=True, **pycompat.strkwargs(tr.hookargs))
288 288
289 289 # write changelog data to temp files so concurrent readers
290 290 # will not see an inconsistent view
291 291 cl = repo.changelog
292 292 cl.delayupdate(tr)
293 293 oldheads = set(cl.heads())
294 294
295 295 trp = weakref.proxy(tr)
296 296 # pull off the changeset group
297 297 repo.ui.status(_("adding changesets\n"))
298 298 clstart = len(cl)
299 299 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
300 300 total=expectedtotal)
301 301 self.callback = progress.increment
302 302
303 303 efiles = set()
304 304 def onchangelog(cl, node):
305 305 efiles.update(cl.readfiles(node))
306 306
307 307 self.changelogheader()
308 308 deltas = self.deltaiter()
309 309 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
310 310 efiles = len(efiles)
311 311
312 312 if not cgnodes:
313 313 repo.ui.develwarn('applied empty changegroup',
314 314 config='warn-empty-changegroup')
315 315 clend = len(cl)
316 316 changesets = clend - clstart
317 317 progress.complete()
318 318 self.callback = None
319 319
320 320 # pull off the manifest group
321 321 repo.ui.status(_("adding manifests\n"))
322 322 # We know that we'll never have more manifests than we had
323 323 # changesets.
324 324 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
325 325 total=changesets)
326 326 self._unpackmanifests(repo, revmap, trp, progress)
327 327
328 328 needfiles = {}
329 329 if repo.ui.configbool('server', 'validate'):
330 330 cl = repo.changelog
331 331 ml = repo.manifestlog
332 332 # validate incoming csets have their manifests
333 333 for cset in pycompat.xrange(clstart, clend):
334 334 mfnode = cl.changelogrevision(cset).manifest
335 335 mfest = ml[mfnode].readdelta()
336 336 # store file cgnodes we must see
337 337 for f, n in mfest.iteritems():
338 338 needfiles.setdefault(f, set()).add(n)
339 339
340 340 # process the files
341 341 repo.ui.status(_("adding file changes\n"))
342 342 newrevs, newfiles = _addchangegroupfiles(
343 343 repo, self, revmap, trp, efiles, needfiles)
344 344 revisions += newrevs
345 345 files += newfiles
346 346
347 347 deltaheads = 0
348 348 if oldheads:
349 349 heads = cl.heads()
350 350 deltaheads = len(heads) - len(oldheads)
351 351 for h in heads:
352 352 if h not in oldheads and repo[h].closesbranch():
353 353 deltaheads -= 1
354 354 htext = ""
355 355 if deltaheads:
356 356 htext = _(" (%+d heads)") % deltaheads
357 357
358 358 repo.ui.status(_("added %d changesets"
359 359 " with %d changes to %d files%s\n")
360 360 % (changesets, revisions, files, htext))
361 361 repo.invalidatevolatilesets()
362 362
363 363 if changesets > 0:
364 364 if 'node' not in tr.hookargs:
365 365 tr.hookargs['node'] = hex(cl.node(clstart))
366 366 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
367 367 hookargs = dict(tr.hookargs)
368 368 else:
369 369 hookargs = dict(tr.hookargs)
370 370 hookargs['node'] = hex(cl.node(clstart))
371 371 hookargs['node_last'] = hex(cl.node(clend - 1))
372 372 repo.hook('pretxnchangegroup',
373 373 throw=True, **pycompat.strkwargs(hookargs))
374 374
375 375 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
376 376 phaseall = None
377 377 if srctype in ('push', 'serve'):
378 378 # Old servers can not push the boundary themselves.
379 379 # New servers won't push the boundary if changeset already
380 380 # exists locally as secret
381 381 #
382 382 # We should not use added here but the list of all change in
383 383 # the bundle
384 384 if repo.publishing():
385 385 targetphase = phaseall = phases.public
386 386 else:
387 387 # closer target phase computation
388 388
389 389 # Those changesets have been pushed from the
390 390 # outside, their phases are going to be pushed
391 391 # alongside. Therefor `targetphase` is
392 392 # ignored.
393 393 targetphase = phaseall = phases.draft
394 394 if added:
395 395 phases.registernew(repo, tr, targetphase, added)
396 396 if phaseall is not None:
397 397 phases.advanceboundary(repo, tr, phaseall, cgnodes)
398 398
399 399 if changesets > 0:
400 400
401 401 def runhooks():
402 402 # These hooks run when the lock releases, not when the
403 403 # transaction closes. So it's possible for the changelog
404 404 # to have changed since we last saw it.
405 405 if clstart >= len(repo):
406 406 return
407 407
408 408 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
409 409
410 410 for n in added:
411 411 args = hookargs.copy()
412 412 args['node'] = hex(n)
413 413 del args['node_last']
414 414 repo.hook("incoming", **pycompat.strkwargs(args))
415 415
416 416 newheads = [h for h in repo.heads()
417 417 if h not in oldheads]
418 418 repo.ui.log("incoming",
419 419 "%d incoming changes - new heads: %s\n",
420 420 len(added),
421 421 ', '.join([hex(c[:6]) for c in newheads]))
422 422
423 423 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
424 424 lambda tr: repo._afterlock(runhooks))
425 425 finally:
426 426 repo.ui.flush()
427 427 # never return 0 here:
428 428 if deltaheads < 0:
429 429 ret = deltaheads - 1
430 430 else:
431 431 ret = deltaheads + 1
432 432 return ret
433 433
434 434 def deltaiter(self):
435 435 """
436 436 returns an iterator of the deltas in this changegroup
437 437
438 438 Useful for passing to the underlying storage system to be stored.
439 439 """
440 440 chain = None
441 441 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
442 442 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
443 443 yield chunkdata
444 444 chain = chunkdata[0]
445 445
446 446 class cg2unpacker(cg1unpacker):
447 447 """Unpacker for cg2 streams.
448 448
449 449 cg2 streams add support for generaldelta, so the delta header
450 450 format is slightly different. All other features about the data
451 451 remain the same.
452 452 """
453 453 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
454 454 deltaheadersize = deltaheader.size
455 455 version = '02'
456 456
457 457 def _deltaheader(self, headertuple, prevnode):
458 458 node, p1, p2, deltabase, cs = headertuple
459 459 flags = 0
460 460 return node, p1, p2, deltabase, cs, flags
461 461
462 462 class cg3unpacker(cg2unpacker):
463 463 """Unpacker for cg3 streams.
464 464
465 465 cg3 streams add support for exchanging treemanifests and revlog
466 466 flags. It adds the revlog flags to the delta header and an empty chunk
467 467 separating manifests and files.
468 468 """
469 469 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
470 470 deltaheadersize = deltaheader.size
471 471 version = '03'
472 472 _grouplistcount = 2 # One list of manifests and one list of files
473 473
474 474 def _deltaheader(self, headertuple, prevnode):
475 475 node, p1, p2, deltabase, cs, flags = headertuple
476 476 return node, p1, p2, deltabase, cs, flags
477 477
478 478 def _unpackmanifests(self, repo, revmap, trp, prog):
479 479 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
480 480 for chunkdata in iter(self.filelogheader, {}):
481 481 # If we get here, there are directory manifests in the changegroup
482 482 d = chunkdata["filename"]
483 483 repo.ui.debug("adding %s revisions\n" % d)
484 484 dirlog = repo.manifestlog._revlog.dirlog(d)
485 485 deltas = self.deltaiter()
486 486 if not dirlog.addgroup(deltas, revmap, trp):
487 487 raise error.Abort(_("received dir revlog group is empty"))
488 488
489 489 class headerlessfixup(object):
490 490 def __init__(self, fh, h):
491 491 self._h = h
492 492 self._fh = fh
493 493 def read(self, n):
494 494 if self._h:
495 495 d, self._h = self._h[:n], self._h[n:]
496 496 if len(d) < n:
497 497 d += readexactly(self._fh, n - len(d))
498 498 return d
499 499 return readexactly(self._fh, n)
500 500
501 501 @attr.s(slots=True, frozen=True)
502 502 class revisiondelta(object):
503 503 """Describes a delta entry in a changegroup.
504 504
505 505 Captured data is sufficient to serialize the delta into multiple
506 506 formats.
507 507 """
508 508 # 20 byte node of this revision.
509 509 node = attr.ib()
510 510 # 20 byte nodes of parent revisions.
511 511 p1node = attr.ib()
512 512 p2node = attr.ib()
513 513 # 20 byte node of node this delta is against.
514 514 basenode = attr.ib()
515 515 # 20 byte node of changeset revision this delta is associated with.
516 516 linknode = attr.ib()
517 517 # 2 bytes of flags to apply to revision data.
518 518 flags = attr.ib()
519 519 # Iterable of chunks holding raw delta data.
520 520 deltachunks = attr.ib()
521 521
522 522 class cgpacker(object):
523 523 def __init__(self, repo, filematcher, version, allowreorder,
524 524 deltaparentfn, builddeltaheader, manifestsend,
525 525 bundlecaps=None, ellipses=False,
526 526 shallow=False, ellipsisroots=None, fullnodes=None):
527 527 """Given a source repo, construct a bundler.
528 528
529 529 filematcher is a matcher that matches on files to include in the
530 530 changegroup. Used to facilitate sparse changegroups.
531 531
532 532 allowreorder controls whether reordering of revisions is allowed.
533 533 This value is used when ``bundle.reorder`` is ``auto`` or isn't
534 534 set.
535 535
536 536 deltaparentfn is a callable that resolves the delta parent for
537 537 a specific revision.
538 538
539 539 builddeltaheader is a callable that constructs the header for a group
540 540 delta.
541 541
542 542 manifestsend is a chunk to send after manifests have been fully emitted.
543 543
544 544 ellipses indicates whether ellipsis serving mode is enabled.
545 545
546 546 bundlecaps is optional and can be used to specify the set of
547 547 capabilities which can be used to build the bundle. While bundlecaps is
548 548 unused in core Mercurial, extensions rely on this feature to communicate
549 549 capabilities to customize the changegroup packer.
550 550
551 551 shallow indicates whether shallow data might be sent. The packer may
552 552 need to pack file contents not introduced by the changes being packed.
553 553
554 554 fullnodes is the list of nodes which should not be ellipsis nodes. We
555 555 store this rather than the set of nodes that should be ellipsis because
556 556 for very large histories we expect this to be significantly smaller.
557 557 """
558 558 assert filematcher
559 559 self._filematcher = filematcher
560 560
561 561 self.version = version
562 562 self._deltaparentfn = deltaparentfn
563 563 self._builddeltaheader = builddeltaheader
564 564 self._manifestsend = manifestsend
565 565 self._ellipses = ellipses
566 566
567 567 # Set of capabilities we can use to build the bundle.
568 568 if bundlecaps is None:
569 569 bundlecaps = set()
570 570 self._bundlecaps = bundlecaps
571 571 self._isshallow = shallow
572 572 self._fullnodes = fullnodes
573 573
574 574 # Maps ellipsis revs to their roots at the changelog level.
575 575 self._precomputedellipsis = ellipsisroots
576 576
577 577 # experimental config: bundle.reorder
578 578 reorder = repo.ui.config('bundle', 'reorder')
579 579 if reorder == 'auto':
580 580 self._reorder = allowreorder
581 581 else:
582 582 self._reorder = stringutil.parsebool(reorder)
583 583
584 584 self._repo = repo
585 585
586 586 if self._repo.ui.verbose and not self._repo.ui.debugflag:
587 587 self._verbosenote = self._repo.ui.note
588 588 else:
589 589 self._verbosenote = lambda s: None
590 590
591 591 # TODO the functionality keyed off of this should probably be
592 592 # controlled via arguments to group() that influence behavior.
593 593 self._changelogdone = False
594 594
595 595 # Maps CL revs to per-revlog revisions. Cleared in close() at
596 596 # the end of each group.
597 597 self._clrevtolocalrev = {}
598 598 self._nextclrevtolocalrev = {}
599 599
600 600 # Maps changelog nodes to changelog revs. Filled in once
601 601 # during changelog stage and then left unmodified.
602 602 self._clnodetorev = {}
603 603
604 604 def _close(self):
605 605 # Ellipses serving mode.
606 606 self._clrevtolocalrev.clear()
607 607 if self._nextclrevtolocalrev is not None:
608 608 self._clrevtolocalrev = self._nextclrevtolocalrev
609 609 self._nextclrevtolocalrev = None
610 610 self._changelogdone = True
611 611
612 612 return closechunk()
613 613
614 614 def _fileheader(self, fname):
615 615 return chunkheader(len(fname)) + fname
616 616
617 617 # Extracted both for clarity and for overriding in extensions.
618 618 def _sortgroup(self, store, nodelist, lookup):
619 619 """Sort nodes for change group and turn them into revnums."""
620 620 # Ellipses serving mode.
621 621 #
622 622 # In a perfect world, we'd generate better ellipsis-ified graphs
623 623 # for non-changelog revlogs. In practice, we haven't started doing
624 624 # that yet, so the resulting DAGs for the manifestlog and filelogs
625 625 # are actually full of bogus parentage on all the ellipsis
626 626 # nodes. This has the side effect that, while the contents are
627 627 # correct, the individual DAGs might be completely out of whack in
628 628 # a case like 882681bc3166 and its ancestors (back about 10
629 629 # revisions or so) in the main hg repo.
630 630 #
631 631 # The one invariant we *know* holds is that the new (potentially
632 632 # bogus) DAG shape will be valid if we order the nodes in the
633 633 # order that they're introduced in dramatis personae by the
634 634 # changelog, so what we do is we sort the non-changelog histories
635 635 # by the order in which they are used by the changelog.
636 636 if self._ellipses and self._clnodetorev:
637 637 key = lambda n: self._clnodetorev[lookup(n)]
638 638 return [store.rev(n) for n in sorted(nodelist, key=key)]
639 639
640 640 # for generaldelta revlogs, we linearize the revs; this will both be
641 641 # much quicker and generate a much smaller bundle
642 642 if (store._generaldelta and self._reorder is None) or self._reorder:
643 643 dag = dagutil.revlogdag(store)
644 644 return dag.linearize(set(store.rev(n) for n in nodelist))
645 645 else:
646 646 return sorted([store.rev(n) for n in nodelist])
647 647
648 648 def group(self, nodelist, store, lookup, units=None):
649 649 """Calculate a delta group, yielding a sequence of changegroup chunks
650 650 (strings).
651 651
652 652 Given a list of changeset revs, return a set of deltas and
653 653 metadata corresponding to nodes. The first delta is
654 654 first parent(nodelist[0]) -> nodelist[0], the receiver is
655 655 guaranteed to have this parent as it has all history before
656 656 these changesets. In the case firstparent is nullrev the
657 657 changegroup starts with a full revision.
658 658
659 659 If units is not None, progress detail will be generated, units specifies
660 660 the type of revlog that is touched (changelog, manifest, etc.).
661 661 """
662 662 # if we don't have any revisions touched by these changesets, bail
663 663 if len(nodelist) == 0:
664 664 yield self._close()
665 665 return
666 666
667 667 revs = self._sortgroup(store, nodelist, lookup)
668 668
669 669 # add the parent of the first rev
670 670 p = store.parentrevs(revs[0])[0]
671 671 revs.insert(0, p)
672 672
673 673 # build deltas
674 674 progress = None
675 675 if units is not None:
676 676 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
677 677 total=(len(revs) - 1))
678 678 for r in pycompat.xrange(len(revs) - 1):
679 679 if progress:
680 680 progress.update(r + 1)
681 681 prev, curr = revs[r], revs[r + 1]
682 682 linknode = lookup(store.node(curr))
683 683 for c in self._revchunk(store, curr, prev, linknode):
684 684 yield c
685 685
686 686 if progress:
687 687 progress.complete()
688 688 yield self._close()
689 689
690 690 # filter any nodes that claim to be part of the known set
691 691 def _prune(self, store, missing, commonrevs):
692 692 # TODO this violates storage abstraction for manifests.
693 693 if isinstance(store, manifest.manifestrevlog):
694 694 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
695 695 return []
696 696
697 697 rr, rl = store.rev, store.linkrev
698 698 return [n for n in missing if rl(rr(n)) not in commonrevs]
699 699
700 700 def _packmanifests(self, dir, mfnodes, lookuplinknode):
701 701 """Pack manifests into a changegroup stream.
702 702
703 703 Encodes the directory name in the output so multiple manifests
704 704 can be sent. Multiple manifests is not supported by cg1 and cg2.
705 705 """
706 706
707 707 if dir:
708 708 assert self.version == b'03'
709 709 yield self._fileheader(dir)
710 710
711 711 # TODO violates storage abstractions by assuming revlogs.
712 712 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
713 713 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
714 714 units=_('manifests')):
715 715 yield chunk
716 716
717 717 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
718 '''yield a sequence of changegroup chunks (strings)'''
718 """Yield a sequence of changegroup byte chunks."""
719
719 720 repo = self._repo
720 721 cl = repo.changelog
721 722
723 self._verbosenote(_('uncompressed size of bundle content:\n'))
724 size = 0
725
726 clstate, chunks = self._generatechangelog(cl, clnodes)
727 for chunk in chunks:
728 size += len(chunk)
729 yield chunk
730
731 self._verbosenote(_('%8.i (changelog)\n') % size)
732
733 clrevorder = clstate['clrevorder']
734 mfs = clstate['mfs']
735 changedfiles = clstate['changedfiles']
736
737 # We need to make sure that the linkrev in the changegroup refers to
738 # the first changeset that introduced the manifest or file revision.
739 # The fastpath is usually safer than the slowpath, because the filelogs
740 # are walked in revlog order.
741 #
742 # When taking the slowpath with reorder=None and the manifest revlog
743 # uses generaldelta, the manifest may be walked in the "wrong" order.
744 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
745 # cc0ff93d0c0c).
746 #
747 # When taking the fastpath, we are only vulnerable to reordering
748 # of the changelog itself. The changelog never uses generaldelta, so
749 # it is only reordered when reorder=True. To handle this case, we
750 # simply take the slowpath, which already has the 'clrevorder' logic.
751 # This was also fixed in cc0ff93d0c0c.
752 fastpathlinkrev = fastpathlinkrev and not self._reorder
753 # Treemanifests don't work correctly with fastpathlinkrev
754 # either, because we don't discover which directory nodes to
755 # send along with files. This could probably be fixed.
756 fastpathlinkrev = fastpathlinkrev and (
757 'treemanifest' not in repo.requirements)
758
759 fnodes = {} # needed file nodes
760
761 for chunk in self.generatemanifests(commonrevs, clrevorder,
762 fastpathlinkrev, mfs, fnodes, source):
763 yield chunk
764
765 if self._ellipses:
766 mfdicts = None
767 if self._isshallow:
768 mfdicts = [(self._repo.manifestlog[n].read(), lr)
769 for (n, lr) in mfs.iteritems()]
770
771 mfs.clear()
772 clrevs = set(cl.rev(x) for x in clnodes)
773
774 if not fastpathlinkrev:
775 def linknodes(unused, fname):
776 return fnodes.get(fname, {})
777 else:
778 cln = cl.node
779 def linknodes(filerevlog, fname):
780 llr = filerevlog.linkrev
781 fln = filerevlog.node
782 revs = ((r, llr(r)) for r in filerevlog)
783 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
784
785 if self._ellipses:
786 # We need to pass the mfdicts variable down into
787 # generatefiles(), but more than one command might have
788 # wrapped generatefiles so we can't modify the function
789 # signature. Instead, we pass the data to ourselves using an
790 # instance attribute. I'm sorry.
791 self._mfdicts = mfdicts
792
793 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
794 source):
795 yield chunk
796
797 yield self._close()
798
799 if clnodes:
800 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
801
802 def _generatechangelog(self, cl, nodes):
803 """Generate data for changelog chunks.
804
805 Returns a 2-tuple of a dict containing state and an iterable of
806 byte chunks. The state will not be fully populated until the
807 chunk stream has been fully consumed.
808 """
722 809 clrevorder = {}
723 810 mfs = {} # needed manifests
724 fnodes = {} # needed file nodes
725 mfl = repo.manifestlog
811 mfl = self._repo.manifestlog
726 812 # TODO violates storage abstraction.
727 813 mfrevlog = mfl._revlog
728 814 changedfiles = set()
729 815
730 816 # Callback for the changelog, used to collect changed files and
731 817 # manifest nodes.
732 818 # Returns the linkrev node (identity in the changelog case).
733 819 def lookupcl(x):
734 820 c = cl.read(x)
735 821 clrevorder[x] = len(clrevorder)
736 822
737 823 if self._ellipses:
738 824 # Only update mfs if x is going to be sent. Otherwise we
739 825 # end up with bogus linkrevs specified for manifests and
740 826 # we skip some manifest nodes that we should otherwise
741 827 # have sent.
742 828 if (x in self._fullnodes
743 829 or cl.rev(x) in self._precomputedellipsis):
744 830 n = c[0]
745 831 # Record the first changeset introducing this manifest
746 832 # version.
747 833 mfs.setdefault(n, x)
748 834 # Set this narrow-specific dict so we have the lowest
749 835 # manifest revnum to look up for this cl revnum. (Part of
750 836 # mapping changelog ellipsis parents to manifest ellipsis
751 837 # parents)
752 838 self._nextclrevtolocalrev.setdefault(cl.rev(x),
753 839 mfrevlog.rev(n))
754 840 # We can't trust the changed files list in the changeset if the
755 841 # client requested a shallow clone.
756 842 if self._isshallow:
757 843 changedfiles.update(mfl[c[0]].read().keys())
758 844 else:
759 845 changedfiles.update(c[3])
760 846 else:
761 847
762 848 n = c[0]
763 849 # record the first changeset introducing this manifest version
764 850 mfs.setdefault(n, x)
765 851 # Record a complete list of potentially-changed files in
766 852 # this manifest.
767 853 changedfiles.update(c[3])
768 854
769 855 return x
770 856
771 self._verbosenote(_('uncompressed size of bundle content:\n'))
772 size = 0
773 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
774 size += len(chunk)
775 yield chunk
776 self._verbosenote(_('%8.i (changelog)\n') % size)
777
778 # We need to make sure that the linkrev in the changegroup refers to
779 # the first changeset that introduced the manifest or file revision.
780 # The fastpath is usually safer than the slowpath, because the filelogs
781 # are walked in revlog order.
782 #
783 # When taking the slowpath with reorder=None and the manifest revlog
784 # uses generaldelta, the manifest may be walked in the "wrong" order.
785 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
786 # cc0ff93d0c0c).
787 #
788 # When taking the fastpath, we are only vulnerable to reordering
789 # of the changelog itself. The changelog never uses generaldelta, so
790 # it is only reordered when reorder=True. To handle this case, we
791 # simply take the slowpath, which already has the 'clrevorder' logic.
792 # This was also fixed in cc0ff93d0c0c.
793 fastpathlinkrev = fastpathlinkrev and not self._reorder
794 # Treemanifests don't work correctly with fastpathlinkrev
795 # either, because we don't discover which directory nodes to
796 # send along with files. This could probably be fixed.
797 fastpathlinkrev = fastpathlinkrev and (
798 'treemanifest' not in repo.requirements)
799
800 for chunk in self.generatemanifests(commonrevs, clrevorder,
801 fastpathlinkrev, mfs, fnodes, source):
802 yield chunk
857 state = {
858 'clrevorder': clrevorder,
859 'mfs': mfs,
860 'changedfiles': changedfiles,
861 }
803 862
804 if self._ellipses:
805 mfdicts = None
806 if self._isshallow:
807 mfdicts = [(self._repo.manifestlog[n].read(), lr)
808 for (n, lr) in mfs.iteritems()]
809
810 mfs.clear()
811 clrevs = set(cl.rev(x) for x in clnodes)
863 gen = self.group(nodes, cl, lookupcl, units=_('changesets'))
812 864
813 if not fastpathlinkrev:
814 def linknodes(unused, fname):
815 return fnodes.get(fname, {})
816 else:
817 cln = cl.node
818 def linknodes(filerevlog, fname):
819 llr = filerevlog.linkrev
820 fln = filerevlog.node
821 revs = ((r, llr(r)) for r in filerevlog)
822 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
823
824 if self._ellipses:
825 # We need to pass the mfdicts variable down into
826 # generatefiles(), but more than one command might have
827 # wrapped generatefiles so we can't modify the function
828 # signature. Instead, we pass the data to ourselves using an
829 # instance attribute. I'm sorry.
830 self._mfdicts = mfdicts
831
832 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
833 source):
834 yield chunk
835
836 yield self._close()
837
838 if clnodes:
839 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
865 return state, gen
840 866
841 867 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
842 868 fnodes, source):
843 869 """Returns an iterator of changegroup chunks containing manifests.
844 870
845 871 `source` is unused here, but is used by extensions like remotefilelog to
846 872 change what is sent based in pulls vs pushes, etc.
847 873 """
848 874 repo = self._repo
849 875 mfl = repo.manifestlog
850 876 dirlog = mfl._revlog.dirlog
851 877 tmfnodes = {'': mfs}
852 878
853 879 # Callback for the manifest, used to collect linkrevs for filelog
854 880 # revisions.
855 881 # Returns the linkrev node (collected in lookupcl).
856 882 def makelookupmflinknode(dir, nodes):
857 883 if fastpathlinkrev:
858 884 assert not dir
859 885 return mfs.__getitem__
860 886
861 887 def lookupmflinknode(x):
862 888 """Callback for looking up the linknode for manifests.
863 889
864 890 Returns the linkrev node for the specified manifest.
865 891
866 892 SIDE EFFECT:
867 893
868 894 1) fclnodes gets populated with the list of relevant
869 895 file nodes if we're not using fastpathlinkrev
870 896 2) When treemanifests are in use, collects treemanifest nodes
871 897 to send
872 898
873 899 Note that this means manifests must be completely sent to
874 900 the client before you can trust the list of files and
875 901 treemanifests to send.
876 902 """
877 903 clnode = nodes[x]
878 904 mdata = mfl.get(dir, x).readfast(shallow=True)
879 905 for p, n, fl in mdata.iterentries():
880 906 if fl == 't': # subdirectory manifest
881 907 subdir = dir + p + '/'
882 908 tmfclnodes = tmfnodes.setdefault(subdir, {})
883 909 tmfclnode = tmfclnodes.setdefault(n, clnode)
884 910 if clrevorder[clnode] < clrevorder[tmfclnode]:
885 911 tmfclnodes[n] = clnode
886 912 else:
887 913 f = dir + p
888 914 fclnodes = fnodes.setdefault(f, {})
889 915 fclnode = fclnodes.setdefault(n, clnode)
890 916 if clrevorder[clnode] < clrevorder[fclnode]:
891 917 fclnodes[n] = clnode
892 918 return clnode
893 919 return lookupmflinknode
894 920
895 921 size = 0
896 922 while tmfnodes:
897 923 dir, nodes = tmfnodes.popitem()
898 924 prunednodes = self._prune(dirlog(dir), nodes, commonrevs)
899 925 if not dir or prunednodes:
900 926 for x in self._packmanifests(dir, prunednodes,
901 927 makelookupmflinknode(dir, nodes)):
902 928 size += len(x)
903 929 yield x
904 930 self._verbosenote(_('%8.i (manifests)\n') % size)
905 931 yield self._manifestsend
906 932
907 933 # The 'source' parameter is useful for extensions
908 934 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
909 935 changedfiles = list(filter(self._filematcher, changedfiles))
910 936
911 937 if self._isshallow:
912 938 # See comment in generate() for why this sadness is a thing.
913 939 mfdicts = self._mfdicts
914 940 del self._mfdicts
915 941 # In a shallow clone, the linknodes callback needs to also include
916 942 # those file nodes that are in the manifests we sent but weren't
917 943 # introduced by those manifests.
918 944 commonctxs = [self._repo[c] for c in commonrevs]
919 945 oldlinknodes = linknodes
920 946 clrev = self._repo.changelog.rev
921 947
922 948 # Defining this function has a side-effect of overriding the
923 949 # function of the same name that was passed in as an argument.
924 950 # TODO have caller pass in appropriate function.
925 951 def linknodes(flog, fname):
926 952 for c in commonctxs:
927 953 try:
928 954 fnode = c.filenode(fname)
929 955 self._clrevtolocalrev[c.rev()] = flog.rev(fnode)
930 956 except error.ManifestLookupError:
931 957 pass
932 958 links = oldlinknodes(flog, fname)
933 959 if len(links) != len(mfdicts):
934 960 for mf, lr in mfdicts:
935 961 fnode = mf.get(fname, None)
936 962 if fnode in links:
937 963 links[fnode] = min(links[fnode], lr, key=clrev)
938 964 elif fnode:
939 965 links[fnode] = lr
940 966 return links
941 967
942 968 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
943 969
944 970 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
945 971 repo = self._repo
946 972 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
947 973 total=len(changedfiles))
948 974 for i, fname in enumerate(sorted(changedfiles)):
949 975 filerevlog = repo.file(fname)
950 976 if not filerevlog:
951 977 raise error.Abort(_("empty or missing file data for %s") %
952 978 fname)
953 979
954 980 linkrevnodes = linknodes(filerevlog, fname)
955 981 # Lookup for filenodes, we collected the linkrev nodes above in the
956 982 # fastpath case and with lookupmf in the slowpath case.
957 983 def lookupfilelog(x):
958 984 return linkrevnodes[x]
959 985
960 986 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
961 987 if filenodes:
962 988 progress.update(i + 1, item=fname)
963 989 h = self._fileheader(fname)
964 990 size = len(h)
965 991 yield h
966 992 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
967 993 size += len(chunk)
968 994 yield chunk
969 995 self._verbosenote(_('%8.i %s\n') % (size, fname))
970 996 progress.complete()
971 997
972 998 def _revchunk(self, store, rev, prev, linknode):
973 999 if self._ellipses:
974 1000 fn = self._revisiondeltanarrow
975 1001 else:
976 1002 fn = self._revisiondeltanormal
977 1003
978 1004 delta = fn(store, rev, prev, linknode)
979 1005 if not delta:
980 1006 return
981 1007
982 1008 meta = self._builddeltaheader(delta)
983 1009 l = len(meta) + sum(len(x) for x in delta.deltachunks)
984 1010
985 1011 yield chunkheader(l)
986 1012 yield meta
987 1013 for x in delta.deltachunks:
988 1014 yield x
989 1015
990 1016 def _revisiondeltanormal(self, store, rev, prev, linknode):
991 1017 node = store.node(rev)
992 1018 p1, p2 = store.parentrevs(rev)
993 1019 base = self._deltaparentfn(store, rev, p1, p2, prev)
994 1020
995 1021 prefix = ''
996 1022 if store.iscensored(base) or store.iscensored(rev):
997 1023 try:
998 1024 delta = store.revision(node, raw=True)
999 1025 except error.CensoredNodeError as e:
1000 1026 delta = e.tombstone
1001 1027 if base == nullrev:
1002 1028 prefix = mdiff.trivialdiffheader(len(delta))
1003 1029 else:
1004 1030 baselen = store.rawsize(base)
1005 1031 prefix = mdiff.replacediffheader(baselen, len(delta))
1006 1032 elif base == nullrev:
1007 1033 delta = store.revision(node, raw=True)
1008 1034 prefix = mdiff.trivialdiffheader(len(delta))
1009 1035 else:
1010 1036 delta = store.revdiff(base, rev)
1011 1037 p1n, p2n = store.parents(node)
1012 1038
1013 1039 return revisiondelta(
1014 1040 node=node,
1015 1041 p1node=p1n,
1016 1042 p2node=p2n,
1017 1043 basenode=store.node(base),
1018 1044 linknode=linknode,
1019 1045 flags=store.flags(rev),
1020 1046 deltachunks=(prefix, delta),
1021 1047 )
1022 1048
1023 1049 def _revisiondeltanarrow(self, store, rev, prev, linknode):
1024 1050 # build up some mapping information that's useful later. See
1025 1051 # the local() nested function below.
1026 1052 if not self._changelogdone:
1027 1053 self._clnodetorev[linknode] = rev
1028 1054 linkrev = rev
1029 1055 self._clrevtolocalrev[linkrev] = rev
1030 1056 else:
1031 1057 linkrev = self._clnodetorev[linknode]
1032 1058 self._clrevtolocalrev[linkrev] = rev
1033 1059
1034 1060 # This is a node to send in full, because the changeset it
1035 1061 # corresponds to was a full changeset.
1036 1062 if linknode in self._fullnodes:
1037 1063 return self._revisiondeltanormal(store, rev, prev, linknode)
1038 1064
1039 1065 # At this point, a node can either be one we should skip or an
1040 1066 # ellipsis. If it's not an ellipsis, bail immediately.
1041 1067 if linkrev not in self._precomputedellipsis:
1042 1068 return
1043 1069
1044 1070 linkparents = self._precomputedellipsis[linkrev]
1045 1071 def local(clrev):
1046 1072 """Turn a changelog revnum into a local revnum.
1047 1073
1048 1074 The ellipsis dag is stored as revnums on the changelog,
1049 1075 but when we're producing ellipsis entries for
1050 1076 non-changelog revlogs, we need to turn those numbers into
1051 1077 something local. This does that for us, and during the
1052 1078 changelog sending phase will also expand the stored
1053 1079 mappings as needed.
1054 1080 """
1055 1081 if clrev == nullrev:
1056 1082 return nullrev
1057 1083
1058 1084 if not self._changelogdone:
1059 1085 # If we're doing the changelog, it's possible that we
1060 1086 # have a parent that is already on the client, and we
1061 1087 # need to store some extra mapping information so that
1062 1088 # our contained ellipsis nodes will be able to resolve
1063 1089 # their parents.
1064 1090 if clrev not in self._clrevtolocalrev:
1065 1091 clnode = store.node(clrev)
1066 1092 self._clnodetorev[clnode] = clrev
1067 1093 return clrev
1068 1094
1069 1095 # Walk the ellipsis-ized changelog breadth-first looking for a
1070 1096 # change that has been linked from the current revlog.
1071 1097 #
1072 1098 # For a flat manifest revlog only a single step should be necessary
1073 1099 # as all relevant changelog entries are relevant to the flat
1074 1100 # manifest.
1075 1101 #
1076 1102 # For a filelog or tree manifest dirlog however not every changelog
1077 1103 # entry will have been relevant, so we need to skip some changelog
1078 1104 # nodes even after ellipsis-izing.
1079 1105 walk = [clrev]
1080 1106 while walk:
1081 1107 p = walk[0]
1082 1108 walk = walk[1:]
1083 1109 if p in self._clrevtolocalrev:
1084 1110 return self._clrevtolocalrev[p]
1085 1111 elif p in self._fullnodes:
1086 1112 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1087 1113 if pp != nullrev])
1088 1114 elif p in self._precomputedellipsis:
1089 1115 walk.extend([pp for pp in self._precomputedellipsis[p]
1090 1116 if pp != nullrev])
1091 1117 else:
1092 1118 # In this case, we've got an ellipsis with parents
1093 1119 # outside the current bundle (likely an
1094 1120 # incremental pull). We "know" that we can use the
1095 1121 # value of this same revlog at whatever revision
1096 1122 # is pointed to by linknode. "Know" is in scare
1097 1123 # quotes because I haven't done enough examination
1098 1124 # of edge cases to convince myself this is really
1099 1125 # a fact - it works for all the (admittedly
1100 1126 # thorough) cases in our testsuite, but I would be
1101 1127 # somewhat unsurprised to find a case in the wild
1102 1128 # where this breaks down a bit. That said, I don't
1103 1129 # know if it would hurt anything.
1104 1130 for i in pycompat.xrange(rev, 0, -1):
1105 1131 if store.linkrev(i) == clrev:
1106 1132 return i
1107 1133 # We failed to resolve a parent for this node, so
1108 1134 # we crash the changegroup construction.
1109 1135 raise error.Abort(
1110 1136 'unable to resolve parent while packing %r %r'
1111 1137 ' for changeset %r' % (store.indexfile, rev, clrev))
1112 1138
1113 1139 return nullrev
1114 1140
1115 1141 if not linkparents or (
1116 1142 store.parentrevs(rev) == (nullrev, nullrev)):
1117 1143 p1, p2 = nullrev, nullrev
1118 1144 elif len(linkparents) == 1:
1119 1145 p1, = sorted(local(p) for p in linkparents)
1120 1146 p2 = nullrev
1121 1147 else:
1122 1148 p1, p2 = sorted(local(p) for p in linkparents)
1123 1149
1124 1150 n = store.node(rev)
1125 1151 p1n, p2n = store.node(p1), store.node(p2)
1126 1152 flags = store.flags(rev)
1127 1153 flags |= revlog.REVIDX_ELLIPSIS
1128 1154
1129 1155 # TODO: try and actually send deltas for ellipsis data blocks
1130 1156 data = store.revision(n)
1131 1157 diffheader = mdiff.trivialdiffheader(len(data))
1132 1158
1133 1159 return revisiondelta(
1134 1160 node=n,
1135 1161 p1node=p1n,
1136 1162 p2node=p2n,
1137 1163 basenode=nullid,
1138 1164 linknode=linknode,
1139 1165 flags=flags,
1140 1166 deltachunks=(diffheader, data),
1141 1167 )
1142 1168
1143 1169 def _deltaparentprev(store, rev, p1, p2, prev):
1144 1170 """Resolve a delta parent to the previous revision.
1145 1171
1146 1172 Used for version 1 changegroups, which don't support generaldelta.
1147 1173 """
1148 1174 return prev
1149 1175
1150 1176 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1151 1177 """Resolve a delta parent when general deltas are supported."""
1152 1178 dp = store.deltaparent(rev)
1153 1179 if dp == nullrev and store.storedeltachains:
1154 1180 # Avoid sending full revisions when delta parent is null. Pick prev
1155 1181 # in that case. It's tempting to pick p1 in this case, as p1 will
1156 1182 # be smaller in the common case. However, computing a delta against
1157 1183 # p1 may require resolving the raw text of p1, which could be
1158 1184 # expensive. The revlog caches should have prev cached, meaning
1159 1185 # less CPU for changegroup generation. There is likely room to add
1160 1186 # a flag and/or config option to control this behavior.
1161 1187 base = prev
1162 1188 elif dp == nullrev:
1163 1189 # revlog is configured to use full snapshot for a reason,
1164 1190 # stick to full snapshot.
1165 1191 base = nullrev
1166 1192 elif dp not in (p1, p2, prev):
1167 1193 # Pick prev when we can't be sure remote has the base revision.
1168 1194 return prev
1169 1195 else:
1170 1196 base = dp
1171 1197
1172 1198 if base != nullrev and not store.candelta(base, rev):
1173 1199 base = nullrev
1174 1200
1175 1201 return base
1176 1202
1177 1203 def _deltaparentellipses(store, rev, p1, p2, prev):
1178 1204 """Resolve a delta parent when in ellipses mode."""
1179 1205 # TODO: send better deltas when in narrow mode.
1180 1206 #
1181 1207 # changegroup.group() loops over revisions to send,
1182 1208 # including revisions we'll skip. What this means is that
1183 1209 # `prev` will be a potentially useless delta base for all
1184 1210 # ellipsis nodes, as the client likely won't have it. In
1185 1211 # the future we should do bookkeeping about which nodes
1186 1212 # have been sent to the client, and try to be
1187 1213 # significantly smarter about delta bases. This is
1188 1214 # slightly tricky because this same code has to work for
1189 1215 # all revlogs, and we don't have the linkrev/linknode here.
1190 1216 return p1
1191 1217
1192 1218 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1193 1219 shallow=False, ellipsisroots=None, fullnodes=None):
1194 1220 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1195 1221 d.node, d.p1node, d.p2node, d.linknode)
1196 1222
1197 1223 return cgpacker(repo, filematcher, b'01',
1198 1224 deltaparentfn=_deltaparentprev,
1199 1225 allowreorder=None,
1200 1226 builddeltaheader=builddeltaheader,
1201 1227 manifestsend=b'',
1202 1228 bundlecaps=bundlecaps,
1203 1229 ellipses=ellipses,
1204 1230 shallow=shallow,
1205 1231 ellipsisroots=ellipsisroots,
1206 1232 fullnodes=fullnodes)
1207 1233
1208 1234 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1209 1235 shallow=False, ellipsisroots=None, fullnodes=None):
1210 1236 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1211 1237 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1212 1238
1213 1239 # Since generaldelta is directly supported by cg2, reordering
1214 1240 # generally doesn't help, so we disable it by default (treating
1215 1241 # bundle.reorder=auto just like bundle.reorder=False).
1216 1242 return cgpacker(repo, filematcher, b'02',
1217 1243 deltaparentfn=_deltaparentgeneraldelta,
1218 1244 allowreorder=False,
1219 1245 builddeltaheader=builddeltaheader,
1220 1246 manifestsend=b'',
1221 1247 bundlecaps=bundlecaps,
1222 1248 ellipses=ellipses,
1223 1249 shallow=shallow,
1224 1250 ellipsisroots=ellipsisroots,
1225 1251 fullnodes=fullnodes)
1226 1252
1227 1253 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1228 1254 shallow=False, ellipsisroots=None, fullnodes=None):
1229 1255 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1230 1256 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1231 1257
1232 1258 deltaparentfn = (_deltaparentellipses if ellipses
1233 1259 else _deltaparentgeneraldelta)
1234 1260
1235 1261 return cgpacker(repo, filematcher, b'03',
1236 1262 deltaparentfn=deltaparentfn,
1237 1263 allowreorder=False,
1238 1264 builddeltaheader=builddeltaheader,
1239 1265 manifestsend=closechunk(),
1240 1266 bundlecaps=bundlecaps,
1241 1267 ellipses=ellipses,
1242 1268 shallow=shallow,
1243 1269 ellipsisroots=ellipsisroots,
1244 1270 fullnodes=fullnodes)
1245 1271
1246 1272 _packermap = {'01': (_makecg1packer, cg1unpacker),
1247 1273 # cg2 adds support for exchanging generaldelta
1248 1274 '02': (_makecg2packer, cg2unpacker),
1249 1275 # cg3 adds support for exchanging revlog flags and treemanifests
1250 1276 '03': (_makecg3packer, cg3unpacker),
1251 1277 }
1252 1278
1253 1279 def allsupportedversions(repo):
1254 1280 versions = set(_packermap.keys())
1255 1281 if not (repo.ui.configbool('experimental', 'changegroup3') or
1256 1282 repo.ui.configbool('experimental', 'treemanifest') or
1257 1283 'treemanifest' in repo.requirements):
1258 1284 versions.discard('03')
1259 1285 return versions
1260 1286
1261 1287 # Changegroup versions that can be applied to the repo
1262 1288 def supportedincomingversions(repo):
1263 1289 return allsupportedversions(repo)
1264 1290
1265 1291 # Changegroup versions that can be created from the repo
1266 1292 def supportedoutgoingversions(repo):
1267 1293 versions = allsupportedversions(repo)
1268 1294 if 'treemanifest' in repo.requirements:
1269 1295 # Versions 01 and 02 support only flat manifests and it's just too
1270 1296 # expensive to convert between the flat manifest and tree manifest on
1271 1297 # the fly. Since tree manifests are hashed differently, all of history
1272 1298 # would have to be converted. Instead, we simply don't even pretend to
1273 1299 # support versions 01 and 02.
1274 1300 versions.discard('01')
1275 1301 versions.discard('02')
1276 1302 if repository.NARROW_REQUIREMENT in repo.requirements:
1277 1303 # Versions 01 and 02 don't support revlog flags, and we need to
1278 1304 # support that for stripping and unbundling to work.
1279 1305 versions.discard('01')
1280 1306 versions.discard('02')
1281 1307 if LFS_REQUIREMENT in repo.requirements:
1282 1308 # Versions 01 and 02 don't support revlog flags, and we need to
1283 1309 # mark LFS entries with REVIDX_EXTSTORED.
1284 1310 versions.discard('01')
1285 1311 versions.discard('02')
1286 1312
1287 1313 return versions
1288 1314
1289 1315 def localversion(repo):
1290 1316 # Finds the best version to use for bundles that are meant to be used
1291 1317 # locally, such as those from strip and shelve, and temporary bundles.
1292 1318 return max(supportedoutgoingversions(repo))
1293 1319
1294 1320 def safeversion(repo):
1295 1321 # Finds the smallest version that it's safe to assume clients of the repo
1296 1322 # will support. For example, all hg versions that support generaldelta also
1297 1323 # support changegroup 02.
1298 1324 versions = supportedoutgoingversions(repo)
1299 1325 if 'generaldelta' in repo.requirements:
1300 1326 versions.discard('01')
1301 1327 assert versions
1302 1328 return min(versions)
1303 1329
1304 1330 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1305 1331 ellipses=False, shallow=False, ellipsisroots=None,
1306 1332 fullnodes=None):
1307 1333 assert version in supportedoutgoingversions(repo)
1308 1334
1309 1335 if filematcher is None:
1310 1336 filematcher = matchmod.alwaysmatcher(repo.root, '')
1311 1337
1312 1338 if version == '01' and not filematcher.always():
1313 1339 raise error.ProgrammingError('version 01 changegroups do not support '
1314 1340 'sparse file matchers')
1315 1341
1316 1342 if ellipses and version in (b'01', b'02'):
1317 1343 raise error.Abort(
1318 1344 _('ellipsis nodes require at least cg3 on client and server, '
1319 1345 'but negotiated version %s') % version)
1320 1346
1321 1347 # Requested files could include files not in the local store. So
1322 1348 # filter those out.
1323 1349 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1324 1350 filematcher)
1325 1351
1326 1352 fn = _packermap[version][0]
1327 1353 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1328 1354 shallow=shallow, ellipsisroots=ellipsisroots,
1329 1355 fullnodes=fullnodes)
1330 1356
1331 1357 def getunbundler(version, fh, alg, extras=None):
1332 1358 return _packermap[version][1](fh, alg, extras=extras)
1333 1359
1334 1360 def _changegroupinfo(repo, nodes, source):
1335 1361 if repo.ui.verbose or source == 'bundle':
1336 1362 repo.ui.status(_("%d changesets found\n") % len(nodes))
1337 1363 if repo.ui.debugflag:
1338 1364 repo.ui.debug("list of changesets:\n")
1339 1365 for node in nodes:
1340 1366 repo.ui.debug("%s\n" % hex(node))
1341 1367
1342 1368 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1343 1369 bundlecaps=None):
1344 1370 cgstream = makestream(repo, outgoing, version, source,
1345 1371 fastpath=fastpath, bundlecaps=bundlecaps)
1346 1372 return getunbundler(version, util.chunkbuffer(cgstream), None,
1347 1373 {'clcount': len(outgoing.missing) })
1348 1374
1349 1375 def makestream(repo, outgoing, version, source, fastpath=False,
1350 1376 bundlecaps=None, filematcher=None):
1351 1377 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1352 1378 filematcher=filematcher)
1353 1379
1354 1380 repo = repo.unfiltered()
1355 1381 commonrevs = outgoing.common
1356 1382 csets = outgoing.missing
1357 1383 heads = outgoing.missingheads
1358 1384 # We go through the fast path if we get told to, or if all (unfiltered
1359 1385 # heads have been requested (since we then know there all linkrevs will
1360 1386 # be pulled by the client).
1361 1387 heads.sort()
1362 1388 fastpathlinkrev = fastpath or (
1363 1389 repo.filtername is None and heads == sorted(repo.heads()))
1364 1390
1365 1391 repo.hook('preoutgoing', throw=True, source=source)
1366 1392 _changegroupinfo(repo, csets, source)
1367 1393 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1368 1394
1369 1395 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1370 1396 revisions = 0
1371 1397 files = 0
1372 1398 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1373 1399 total=expectedfiles)
1374 1400 for chunkdata in iter(source.filelogheader, {}):
1375 1401 files += 1
1376 1402 f = chunkdata["filename"]
1377 1403 repo.ui.debug("adding %s revisions\n" % f)
1378 1404 progress.increment()
1379 1405 fl = repo.file(f)
1380 1406 o = len(fl)
1381 1407 try:
1382 1408 deltas = source.deltaiter()
1383 1409 if not fl.addgroup(deltas, revmap, trp):
1384 1410 raise error.Abort(_("received file revlog group is empty"))
1385 1411 except error.CensoredBaseError as e:
1386 1412 raise error.Abort(_("received delta base is censored: %s") % e)
1387 1413 revisions += len(fl) - o
1388 1414 if f in needfiles:
1389 1415 needs = needfiles[f]
1390 1416 for new in pycompat.xrange(o, len(fl)):
1391 1417 n = fl.node(new)
1392 1418 if n in needs:
1393 1419 needs.remove(n)
1394 1420 else:
1395 1421 raise error.Abort(
1396 1422 _("received spurious file revlog entry"))
1397 1423 if not needs:
1398 1424 del needfiles[f]
1399 1425 progress.complete()
1400 1426
1401 1427 for f, needs in needfiles.iteritems():
1402 1428 fl = repo.file(f)
1403 1429 for n in needs:
1404 1430 try:
1405 1431 fl.rev(n)
1406 1432 except error.LookupError:
1407 1433 raise error.Abort(
1408 1434 _('missing file data for %s:%s - run hg verify') %
1409 1435 (f, hex(n)))
1410 1436
1411 1437 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now