##// END OF EJS Templates
changegroup: record changelogdone after fully consuming its data...
Gregory Szorc -
r39015:6d726d1b default
parent child Browse files
Show More
@@ -1,1437 +1,1438
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, "wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, "wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103 class cg1unpacker(object):
104 104 """Unpacker for cg1 changegroup streams.
105 105
106 106 A changegroup unpacker handles the framing of the revision data in
107 107 the wire format. Most consumers will want to use the apply()
108 108 method to add the changes from the changegroup to a repository.
109 109
110 110 If you're forwarding a changegroup unmodified to another consumer,
111 111 use getchunks(), which returns an iterator of changegroup
112 112 chunks. This is mostly useful for cases where you need to know the
113 113 data stream has ended by observing the end of the changegroup.
114 114
115 115 deltachunk() is useful only if you're applying delta data. Most
116 116 consumers should prefer apply() instead.
117 117
118 118 A few other public methods exist. Those are used only for
119 119 bundlerepo and some debug commands - their use is discouraged.
120 120 """
121 121 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
122 122 deltaheadersize = deltaheader.size
123 123 version = '01'
124 124 _grouplistcount = 1 # One list of files after the manifests
125 125
126 126 def __init__(self, fh, alg, extras=None):
127 127 if alg is None:
128 128 alg = 'UN'
129 129 if alg not in util.compengines.supportedbundletypes:
130 130 raise error.Abort(_('unknown stream compression type: %s')
131 131 % alg)
132 132 if alg == 'BZ':
133 133 alg = '_truncatedBZ'
134 134
135 135 compengine = util.compengines.forbundletype(alg)
136 136 self._stream = compengine.decompressorreader(fh)
137 137 self._type = alg
138 138 self.extras = extras or {}
139 139 self.callback = None
140 140
141 141 # These methods (compressed, read, seek, tell) all appear to only
142 142 # be used by bundlerepo, but it's a little hard to tell.
143 143 def compressed(self):
144 144 return self._type is not None and self._type != 'UN'
145 145 def read(self, l):
146 146 return self._stream.read(l)
147 147 def seek(self, pos):
148 148 return self._stream.seek(pos)
149 149 def tell(self):
150 150 return self._stream.tell()
151 151 def close(self):
152 152 return self._stream.close()
153 153
154 154 def _chunklength(self):
155 155 d = readexactly(self._stream, 4)
156 156 l = struct.unpack(">l", d)[0]
157 157 if l <= 4:
158 158 if l:
159 159 raise error.Abort(_("invalid chunk length %d") % l)
160 160 return 0
161 161 if self.callback:
162 162 self.callback()
163 163 return l - 4
164 164
165 165 def changelogheader(self):
166 166 """v10 does not have a changelog header chunk"""
167 167 return {}
168 168
169 169 def manifestheader(self):
170 170 """v10 does not have a manifest header chunk"""
171 171 return {}
172 172
173 173 def filelogheader(self):
174 174 """return the header of the filelogs chunk, v10 only has the filename"""
175 175 l = self._chunklength()
176 176 if not l:
177 177 return {}
178 178 fname = readexactly(self._stream, l)
179 179 return {'filename': fname}
180 180
181 181 def _deltaheader(self, headertuple, prevnode):
182 182 node, p1, p2, cs = headertuple
183 183 if prevnode is None:
184 184 deltabase = p1
185 185 else:
186 186 deltabase = prevnode
187 187 flags = 0
188 188 return node, p1, p2, deltabase, cs, flags
189 189
190 190 def deltachunk(self, prevnode):
191 191 l = self._chunklength()
192 192 if not l:
193 193 return {}
194 194 headerdata = readexactly(self._stream, self.deltaheadersize)
195 195 header = self.deltaheader.unpack(headerdata)
196 196 delta = readexactly(self._stream, l - self.deltaheadersize)
197 197 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
198 198 return (node, p1, p2, cs, deltabase, delta, flags)
199 199
200 200 def getchunks(self):
201 201 """returns all the chunks contains in the bundle
202 202
203 203 Used when you need to forward the binary stream to a file or another
204 204 network API. To do so, it parse the changegroup data, otherwise it will
205 205 block in case of sshrepo because it don't know the end of the stream.
206 206 """
207 207 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
208 208 # and a list of filelogs. For changegroup 3, we expect 4 parts:
209 209 # changelog, manifestlog, a list of tree manifestlogs, and a list of
210 210 # filelogs.
211 211 #
212 212 # Changelog and manifestlog parts are terminated with empty chunks. The
213 213 # tree and file parts are a list of entry sections. Each entry section
214 214 # is a series of chunks terminating in an empty chunk. The list of these
215 215 # entry sections is terminated in yet another empty chunk, so we know
216 216 # we've reached the end of the tree/file list when we reach an empty
217 217 # chunk that was proceeded by no non-empty chunks.
218 218
219 219 parts = 0
220 220 while parts < 2 + self._grouplistcount:
221 221 noentries = True
222 222 while True:
223 223 chunk = getchunk(self)
224 224 if not chunk:
225 225 # The first two empty chunks represent the end of the
226 226 # changelog and the manifestlog portions. The remaining
227 227 # empty chunks represent either A) the end of individual
228 228 # tree or file entries in the file list, or B) the end of
229 229 # the entire list. It's the end of the entire list if there
230 230 # were no entries (i.e. noentries is True).
231 231 if parts < 2:
232 232 parts += 1
233 233 elif noentries:
234 234 parts += 1
235 235 break
236 236 noentries = False
237 237 yield chunkheader(len(chunk))
238 238 pos = 0
239 239 while pos < len(chunk):
240 240 next = pos + 2**20
241 241 yield chunk[pos:next]
242 242 pos = next
243 243 yield closechunk()
244 244
245 245 def _unpackmanifests(self, repo, revmap, trp, prog):
246 246 self.callback = prog.increment
247 247 # no need to check for empty manifest group here:
248 248 # if the result of the merge of 1 and 2 is the same in 3 and 4,
249 249 # no new manifest will be created and the manifest group will
250 250 # be empty during the pull
251 251 self.manifestheader()
252 252 deltas = self.deltaiter()
253 253 repo.manifestlog.addgroup(deltas, revmap, trp)
254 254 prog.complete()
255 255 self.callback = None
256 256
257 257 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
258 258 expectedtotal=None):
259 259 """Add the changegroup returned by source.read() to this repo.
260 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
261 261 the URL of the repo where this changegroup is coming from.
262 262
263 263 Return an integer summarizing the change to this repo:
264 264 - nothing changed or no source: 0
265 265 - more heads than before: 1+added heads (2..n)
266 266 - fewer heads than before: -1-removed heads (-2..-n)
267 267 - number of heads stays the same: 1
268 268 """
269 269 repo = repo.unfiltered()
270 270 def csmap(x):
271 271 repo.ui.debug("add changeset %s\n" % short(x))
272 272 return len(cl)
273 273
274 274 def revmap(x):
275 275 return cl.rev(x)
276 276
277 277 changesets = files = revisions = 0
278 278
279 279 try:
280 280 # The transaction may already carry source information. In this
281 281 # case we use the top level data. We overwrite the argument
282 282 # because we need to use the top level value (if they exist)
283 283 # in this function.
284 284 srctype = tr.hookargs.setdefault('source', srctype)
285 285 url = tr.hookargs.setdefault('url', url)
286 286 repo.hook('prechangegroup',
287 287 throw=True, **pycompat.strkwargs(tr.hookargs))
288 288
289 289 # write changelog data to temp files so concurrent readers
290 290 # will not see an inconsistent view
291 291 cl = repo.changelog
292 292 cl.delayupdate(tr)
293 293 oldheads = set(cl.heads())
294 294
295 295 trp = weakref.proxy(tr)
296 296 # pull off the changeset group
297 297 repo.ui.status(_("adding changesets\n"))
298 298 clstart = len(cl)
299 299 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
300 300 total=expectedtotal)
301 301 self.callback = progress.increment
302 302
303 303 efiles = set()
304 304 def onchangelog(cl, node):
305 305 efiles.update(cl.readfiles(node))
306 306
307 307 self.changelogheader()
308 308 deltas = self.deltaiter()
309 309 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
310 310 efiles = len(efiles)
311 311
312 312 if not cgnodes:
313 313 repo.ui.develwarn('applied empty changegroup',
314 314 config='warn-empty-changegroup')
315 315 clend = len(cl)
316 316 changesets = clend - clstart
317 317 progress.complete()
318 318 self.callback = None
319 319
320 320 # pull off the manifest group
321 321 repo.ui.status(_("adding manifests\n"))
322 322 # We know that we'll never have more manifests than we had
323 323 # changesets.
324 324 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
325 325 total=changesets)
326 326 self._unpackmanifests(repo, revmap, trp, progress)
327 327
328 328 needfiles = {}
329 329 if repo.ui.configbool('server', 'validate'):
330 330 cl = repo.changelog
331 331 ml = repo.manifestlog
332 332 # validate incoming csets have their manifests
333 333 for cset in pycompat.xrange(clstart, clend):
334 334 mfnode = cl.changelogrevision(cset).manifest
335 335 mfest = ml[mfnode].readdelta()
336 336 # store file cgnodes we must see
337 337 for f, n in mfest.iteritems():
338 338 needfiles.setdefault(f, set()).add(n)
339 339
340 340 # process the files
341 341 repo.ui.status(_("adding file changes\n"))
342 342 newrevs, newfiles = _addchangegroupfiles(
343 343 repo, self, revmap, trp, efiles, needfiles)
344 344 revisions += newrevs
345 345 files += newfiles
346 346
347 347 deltaheads = 0
348 348 if oldheads:
349 349 heads = cl.heads()
350 350 deltaheads = len(heads) - len(oldheads)
351 351 for h in heads:
352 352 if h not in oldheads and repo[h].closesbranch():
353 353 deltaheads -= 1
354 354 htext = ""
355 355 if deltaheads:
356 356 htext = _(" (%+d heads)") % deltaheads
357 357
358 358 repo.ui.status(_("added %d changesets"
359 359 " with %d changes to %d files%s\n")
360 360 % (changesets, revisions, files, htext))
361 361 repo.invalidatevolatilesets()
362 362
363 363 if changesets > 0:
364 364 if 'node' not in tr.hookargs:
365 365 tr.hookargs['node'] = hex(cl.node(clstart))
366 366 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
367 367 hookargs = dict(tr.hookargs)
368 368 else:
369 369 hookargs = dict(tr.hookargs)
370 370 hookargs['node'] = hex(cl.node(clstart))
371 371 hookargs['node_last'] = hex(cl.node(clend - 1))
372 372 repo.hook('pretxnchangegroup',
373 373 throw=True, **pycompat.strkwargs(hookargs))
374 374
375 375 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
376 376 phaseall = None
377 377 if srctype in ('push', 'serve'):
378 378 # Old servers can not push the boundary themselves.
379 379 # New servers won't push the boundary if changeset already
380 380 # exists locally as secret
381 381 #
382 382 # We should not use added here but the list of all change in
383 383 # the bundle
384 384 if repo.publishing():
385 385 targetphase = phaseall = phases.public
386 386 else:
387 387 # closer target phase computation
388 388
389 389 # Those changesets have been pushed from the
390 390 # outside, their phases are going to be pushed
391 391 # alongside. Therefor `targetphase` is
392 392 # ignored.
393 393 targetphase = phaseall = phases.draft
394 394 if added:
395 395 phases.registernew(repo, tr, targetphase, added)
396 396 if phaseall is not None:
397 397 phases.advanceboundary(repo, tr, phaseall, cgnodes)
398 398
399 399 if changesets > 0:
400 400
401 401 def runhooks():
402 402 # These hooks run when the lock releases, not when the
403 403 # transaction closes. So it's possible for the changelog
404 404 # to have changed since we last saw it.
405 405 if clstart >= len(repo):
406 406 return
407 407
408 408 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
409 409
410 410 for n in added:
411 411 args = hookargs.copy()
412 412 args['node'] = hex(n)
413 413 del args['node_last']
414 414 repo.hook("incoming", **pycompat.strkwargs(args))
415 415
416 416 newheads = [h for h in repo.heads()
417 417 if h not in oldheads]
418 418 repo.ui.log("incoming",
419 419 "%d incoming changes - new heads: %s\n",
420 420 len(added),
421 421 ', '.join([hex(c[:6]) for c in newheads]))
422 422
423 423 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
424 424 lambda tr: repo._afterlock(runhooks))
425 425 finally:
426 426 repo.ui.flush()
427 427 # never return 0 here:
428 428 if deltaheads < 0:
429 429 ret = deltaheads - 1
430 430 else:
431 431 ret = deltaheads + 1
432 432 return ret
433 433
434 434 def deltaiter(self):
435 435 """
436 436 returns an iterator of the deltas in this changegroup
437 437
438 438 Useful for passing to the underlying storage system to be stored.
439 439 """
440 440 chain = None
441 441 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
442 442 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
443 443 yield chunkdata
444 444 chain = chunkdata[0]
445 445
446 446 class cg2unpacker(cg1unpacker):
447 447 """Unpacker for cg2 streams.
448 448
449 449 cg2 streams add support for generaldelta, so the delta header
450 450 format is slightly different. All other features about the data
451 451 remain the same.
452 452 """
453 453 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
454 454 deltaheadersize = deltaheader.size
455 455 version = '02'
456 456
457 457 def _deltaheader(self, headertuple, prevnode):
458 458 node, p1, p2, deltabase, cs = headertuple
459 459 flags = 0
460 460 return node, p1, p2, deltabase, cs, flags
461 461
462 462 class cg3unpacker(cg2unpacker):
463 463 """Unpacker for cg3 streams.
464 464
465 465 cg3 streams add support for exchanging treemanifests and revlog
466 466 flags. It adds the revlog flags to the delta header and an empty chunk
467 467 separating manifests and files.
468 468 """
469 469 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
470 470 deltaheadersize = deltaheader.size
471 471 version = '03'
472 472 _grouplistcount = 2 # One list of manifests and one list of files
473 473
474 474 def _deltaheader(self, headertuple, prevnode):
475 475 node, p1, p2, deltabase, cs, flags = headertuple
476 476 return node, p1, p2, deltabase, cs, flags
477 477
478 478 def _unpackmanifests(self, repo, revmap, trp, prog):
479 479 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
480 480 for chunkdata in iter(self.filelogheader, {}):
481 481 # If we get here, there are directory manifests in the changegroup
482 482 d = chunkdata["filename"]
483 483 repo.ui.debug("adding %s revisions\n" % d)
484 484 dirlog = repo.manifestlog._revlog.dirlog(d)
485 485 deltas = self.deltaiter()
486 486 if not dirlog.addgroup(deltas, revmap, trp):
487 487 raise error.Abort(_("received dir revlog group is empty"))
488 488
489 489 class headerlessfixup(object):
490 490 def __init__(self, fh, h):
491 491 self._h = h
492 492 self._fh = fh
493 493 def read(self, n):
494 494 if self._h:
495 495 d, self._h = self._h[:n], self._h[n:]
496 496 if len(d) < n:
497 497 d += readexactly(self._fh, n - len(d))
498 498 return d
499 499 return readexactly(self._fh, n)
500 500
501 501 @attr.s(slots=True, frozen=True)
502 502 class revisiondelta(object):
503 503 """Describes a delta entry in a changegroup.
504 504
505 505 Captured data is sufficient to serialize the delta into multiple
506 506 formats.
507 507 """
508 508 # 20 byte node of this revision.
509 509 node = attr.ib()
510 510 # 20 byte nodes of parent revisions.
511 511 p1node = attr.ib()
512 512 p2node = attr.ib()
513 513 # 20 byte node of node this delta is against.
514 514 basenode = attr.ib()
515 515 # 20 byte node of changeset revision this delta is associated with.
516 516 linknode = attr.ib()
517 517 # 2 bytes of flags to apply to revision data.
518 518 flags = attr.ib()
519 519 # Iterable of chunks holding raw delta data.
520 520 deltachunks = attr.ib()
521 521
522 522 class cgpacker(object):
523 523 def __init__(self, repo, filematcher, version, allowreorder,
524 524 deltaparentfn, builddeltaheader, manifestsend,
525 525 bundlecaps=None, ellipses=False,
526 526 shallow=False, ellipsisroots=None, fullnodes=None):
527 527 """Given a source repo, construct a bundler.
528 528
529 529 filematcher is a matcher that matches on files to include in the
530 530 changegroup. Used to facilitate sparse changegroups.
531 531
532 532 allowreorder controls whether reordering of revisions is allowed.
533 533 This value is used when ``bundle.reorder`` is ``auto`` or isn't
534 534 set.
535 535
536 536 deltaparentfn is a callable that resolves the delta parent for
537 537 a specific revision.
538 538
539 539 builddeltaheader is a callable that constructs the header for a group
540 540 delta.
541 541
542 542 manifestsend is a chunk to send after manifests have been fully emitted.
543 543
544 544 ellipses indicates whether ellipsis serving mode is enabled.
545 545
546 546 bundlecaps is optional and can be used to specify the set of
547 547 capabilities which can be used to build the bundle. While bundlecaps is
548 548 unused in core Mercurial, extensions rely on this feature to communicate
549 549 capabilities to customize the changegroup packer.
550 550
551 551 shallow indicates whether shallow data might be sent. The packer may
552 552 need to pack file contents not introduced by the changes being packed.
553 553
554 554 fullnodes is the list of nodes which should not be ellipsis nodes. We
555 555 store this rather than the set of nodes that should be ellipsis because
556 556 for very large histories we expect this to be significantly smaller.
557 557 """
558 558 assert filematcher
559 559 self._filematcher = filematcher
560 560
561 561 self.version = version
562 562 self._deltaparentfn = deltaparentfn
563 563 self._builddeltaheader = builddeltaheader
564 564 self._manifestsend = manifestsend
565 565 self._ellipses = ellipses
566 566
567 567 # Set of capabilities we can use to build the bundle.
568 568 if bundlecaps is None:
569 569 bundlecaps = set()
570 570 self._bundlecaps = bundlecaps
571 571 self._isshallow = shallow
572 572 self._fullnodes = fullnodes
573 573
574 574 # Maps ellipsis revs to their roots at the changelog level.
575 575 self._precomputedellipsis = ellipsisroots
576 576
577 577 # experimental config: bundle.reorder
578 578 reorder = repo.ui.config('bundle', 'reorder')
579 579 if reorder == 'auto':
580 580 self._reorder = allowreorder
581 581 else:
582 582 self._reorder = stringutil.parsebool(reorder)
583 583
584 584 self._repo = repo
585 585
586 586 if self._repo.ui.verbose and not self._repo.ui.debugflag:
587 587 self._verbosenote = self._repo.ui.note
588 588 else:
589 589 self._verbosenote = lambda s: None
590 590
591 591 # TODO the functionality keyed off of this should probably be
592 592 # controlled via arguments to group() that influence behavior.
593 593 self._changelogdone = False
594 594
595 595 # Maps CL revs to per-revlog revisions. Cleared in close() at
596 596 # the end of each group.
597 597 self._clrevtolocalrev = {}
598 598 self._nextclrevtolocalrev = {}
599 599
600 600 # Maps changelog nodes to changelog revs. Filled in once
601 601 # during changelog stage and then left unmodified.
602 602 self._clnodetorev = {}
603 603
604 604 def _close(self):
605 605 # Ellipses serving mode.
606 606 self._clrevtolocalrev.clear()
607 607 if self._nextclrevtolocalrev is not None:
608 608 self._clrevtolocalrev = self._nextclrevtolocalrev
609 609 self._nextclrevtolocalrev = None
610 self._changelogdone = True
611 610
612 611 return closechunk()
613 612
614 613 def _fileheader(self, fname):
615 614 return chunkheader(len(fname)) + fname
616 615
617 616 # Extracted both for clarity and for overriding in extensions.
618 617 def _sortgroup(self, store, nodelist, lookup):
619 618 """Sort nodes for change group and turn them into revnums."""
620 619 # Ellipses serving mode.
621 620 #
622 621 # In a perfect world, we'd generate better ellipsis-ified graphs
623 622 # for non-changelog revlogs. In practice, we haven't started doing
624 623 # that yet, so the resulting DAGs for the manifestlog and filelogs
625 624 # are actually full of bogus parentage on all the ellipsis
626 625 # nodes. This has the side effect that, while the contents are
627 626 # correct, the individual DAGs might be completely out of whack in
628 627 # a case like 882681bc3166 and its ancestors (back about 10
629 628 # revisions or so) in the main hg repo.
630 629 #
631 630 # The one invariant we *know* holds is that the new (potentially
632 631 # bogus) DAG shape will be valid if we order the nodes in the
633 632 # order that they're introduced in dramatis personae by the
634 633 # changelog, so what we do is we sort the non-changelog histories
635 634 # by the order in which they are used by the changelog.
636 635 if self._ellipses and self._changelogdone:
637 636 key = lambda n: self._clnodetorev[lookup(n)]
638 637 return [store.rev(n) for n in sorted(nodelist, key=key)]
639 638
640 639 # for generaldelta revlogs, we linearize the revs; this will both be
641 640 # much quicker and generate a much smaller bundle
642 641 if (store._generaldelta and self._reorder is None) or self._reorder:
643 642 dag = dagutil.revlogdag(store)
644 643 return dag.linearize(set(store.rev(n) for n in nodelist))
645 644 else:
646 645 return sorted([store.rev(n) for n in nodelist])
647 646
648 647 def group(self, nodelist, store, lookup, units=None):
649 648 """Calculate a delta group, yielding a sequence of changegroup chunks
650 649 (strings).
651 650
652 651 Given a list of changeset revs, return a set of deltas and
653 652 metadata corresponding to nodes. The first delta is
654 653 first parent(nodelist[0]) -> nodelist[0], the receiver is
655 654 guaranteed to have this parent as it has all history before
656 655 these changesets. In the case firstparent is nullrev the
657 656 changegroup starts with a full revision.
658 657
659 658 If units is not None, progress detail will be generated, units specifies
660 659 the type of revlog that is touched (changelog, manifest, etc.).
661 660 """
662 661 # if we don't have any revisions touched by these changesets, bail
663 662 if len(nodelist) == 0:
664 663 yield self._close()
665 664 return
666 665
667 666 revs = self._sortgroup(store, nodelist, lookup)
668 667
669 668 # add the parent of the first rev
670 669 p = store.parentrevs(revs[0])[0]
671 670 revs.insert(0, p)
672 671
673 672 # build deltas
674 673 progress = None
675 674 if units is not None:
676 675 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
677 676 total=(len(revs) - 1))
678 677 for r in pycompat.xrange(len(revs) - 1):
679 678 if progress:
680 679 progress.update(r + 1)
681 680 prev, curr = revs[r], revs[r + 1]
682 681 linknode = lookup(store.node(curr))
683 682 for c in self._revchunk(store, curr, prev, linknode):
684 683 yield c
685 684
686 685 if progress:
687 686 progress.complete()
688 687 yield self._close()
689 688
690 689 # filter any nodes that claim to be part of the known set
691 690 def _prune(self, store, missing, commonrevs):
692 691 # TODO this violates storage abstraction for manifests.
693 692 if isinstance(store, manifest.manifestrevlog):
694 693 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
695 694 return []
696 695
697 696 rr, rl = store.rev, store.linkrev
698 697 return [n for n in missing if rl(rr(n)) not in commonrevs]
699 698
700 699 def _packmanifests(self, dir, mfnodes, lookuplinknode):
701 700 """Pack manifests into a changegroup stream.
702 701
703 702 Encodes the directory name in the output so multiple manifests
704 703 can be sent. Multiple manifests is not supported by cg1 and cg2.
705 704 """
706 705
707 706 if dir:
708 707 assert self.version == b'03'
709 708 yield self._fileheader(dir)
710 709
711 710 # TODO violates storage abstractions by assuming revlogs.
712 711 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
713 712 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
714 713 units=_('manifests')):
715 714 yield chunk
716 715
717 716 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
718 717 """Yield a sequence of changegroup byte chunks."""
719 718
720 719 repo = self._repo
721 720 cl = repo.changelog
722 721
723 722 self._verbosenote(_('uncompressed size of bundle content:\n'))
724 723 size = 0
725 724
726 725 clstate, chunks = self._generatechangelog(cl, clnodes)
727 726 for chunk in chunks:
728 727 size += len(chunk)
729 728 yield chunk
730 729
731 730 self._verbosenote(_('%8.i (changelog)\n') % size)
732 731
732 self._changelogdone = True
733
733 734 clrevorder = clstate['clrevorder']
734 735 mfs = clstate['mfs']
735 736 changedfiles = clstate['changedfiles']
736 737
737 738 # We need to make sure that the linkrev in the changegroup refers to
738 739 # the first changeset that introduced the manifest or file revision.
739 740 # The fastpath is usually safer than the slowpath, because the filelogs
740 741 # are walked in revlog order.
741 742 #
742 743 # When taking the slowpath with reorder=None and the manifest revlog
743 744 # uses generaldelta, the manifest may be walked in the "wrong" order.
744 745 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
745 746 # cc0ff93d0c0c).
746 747 #
747 748 # When taking the fastpath, we are only vulnerable to reordering
748 749 # of the changelog itself. The changelog never uses generaldelta, so
749 750 # it is only reordered when reorder=True. To handle this case, we
750 751 # simply take the slowpath, which already has the 'clrevorder' logic.
751 752 # This was also fixed in cc0ff93d0c0c.
752 753 fastpathlinkrev = fastpathlinkrev and not self._reorder
753 754 # Treemanifests don't work correctly with fastpathlinkrev
754 755 # either, because we don't discover which directory nodes to
755 756 # send along with files. This could probably be fixed.
756 757 fastpathlinkrev = fastpathlinkrev and (
757 758 'treemanifest' not in repo.requirements)
758 759
759 760 fnodes = {} # needed file nodes
760 761
761 762 for chunk in self.generatemanifests(commonrevs, clrevorder,
762 763 fastpathlinkrev, mfs, fnodes, source):
763 764 yield chunk
764 765
765 766 if self._ellipses:
766 767 mfdicts = None
767 768 if self._isshallow:
768 769 mfdicts = [(self._repo.manifestlog[n].read(), lr)
769 770 for (n, lr) in mfs.iteritems()]
770 771
771 772 mfs.clear()
772 773 clrevs = set(cl.rev(x) for x in clnodes)
773 774
774 775 if not fastpathlinkrev:
775 776 def linknodes(unused, fname):
776 777 return fnodes.get(fname, {})
777 778 else:
778 779 cln = cl.node
779 780 def linknodes(filerevlog, fname):
780 781 llr = filerevlog.linkrev
781 782 fln = filerevlog.node
782 783 revs = ((r, llr(r)) for r in filerevlog)
783 784 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
784 785
785 786 if self._ellipses:
786 787 # We need to pass the mfdicts variable down into
787 788 # generatefiles(), but more than one command might have
788 789 # wrapped generatefiles so we can't modify the function
789 790 # signature. Instead, we pass the data to ourselves using an
790 791 # instance attribute. I'm sorry.
791 792 self._mfdicts = mfdicts
792 793
793 794 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
794 795 source):
795 796 yield chunk
796 797
797 798 yield self._close()
798 799
799 800 if clnodes:
800 801 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
801 802
802 803 def _generatechangelog(self, cl, nodes):
803 804 """Generate data for changelog chunks.
804 805
805 806 Returns a 2-tuple of a dict containing state and an iterable of
806 807 byte chunks. The state will not be fully populated until the
807 808 chunk stream has been fully consumed.
808 809 """
809 810 clrevorder = {}
810 811 mfs = {} # needed manifests
811 812 mfl = self._repo.manifestlog
812 813 # TODO violates storage abstraction.
813 814 mfrevlog = mfl._revlog
814 815 changedfiles = set()
815 816
816 817 # Callback for the changelog, used to collect changed files and
817 818 # manifest nodes.
818 819 # Returns the linkrev node (identity in the changelog case).
819 820 def lookupcl(x):
820 821 c = cl.read(x)
821 822 clrevorder[x] = len(clrevorder)
822 823
823 824 if self._ellipses:
824 825 # Only update mfs if x is going to be sent. Otherwise we
825 826 # end up with bogus linkrevs specified for manifests and
826 827 # we skip some manifest nodes that we should otherwise
827 828 # have sent.
828 829 if (x in self._fullnodes
829 830 or cl.rev(x) in self._precomputedellipsis):
830 831 n = c[0]
831 832 # Record the first changeset introducing this manifest
832 833 # version.
833 834 mfs.setdefault(n, x)
834 835 # Set this narrow-specific dict so we have the lowest
835 836 # manifest revnum to look up for this cl revnum. (Part of
836 837 # mapping changelog ellipsis parents to manifest ellipsis
837 838 # parents)
838 839 self._nextclrevtolocalrev.setdefault(cl.rev(x),
839 840 mfrevlog.rev(n))
840 841 # We can't trust the changed files list in the changeset if the
841 842 # client requested a shallow clone.
842 843 if self._isshallow:
843 844 changedfiles.update(mfl[c[0]].read().keys())
844 845 else:
845 846 changedfiles.update(c[3])
846 847 else:
847 848
848 849 n = c[0]
849 850 # record the first changeset introducing this manifest version
850 851 mfs.setdefault(n, x)
851 852 # Record a complete list of potentially-changed files in
852 853 # this manifest.
853 854 changedfiles.update(c[3])
854 855
855 856 return x
856 857
857 858 state = {
858 859 'clrevorder': clrevorder,
859 860 'mfs': mfs,
860 861 'changedfiles': changedfiles,
861 862 }
862 863
863 864 gen = self.group(nodes, cl, lookupcl, units=_('changesets'))
864 865
865 866 return state, gen
866 867
867 868 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
868 869 fnodes, source):
869 870 """Returns an iterator of changegroup chunks containing manifests.
870 871
871 872 `source` is unused here, but is used by extensions like remotefilelog to
872 873 change what is sent based in pulls vs pushes, etc.
873 874 """
874 875 repo = self._repo
875 876 mfl = repo.manifestlog
876 877 dirlog = mfl._revlog.dirlog
877 878 tmfnodes = {'': mfs}
878 879
879 880 # Callback for the manifest, used to collect linkrevs for filelog
880 881 # revisions.
881 882 # Returns the linkrev node (collected in lookupcl).
882 883 def makelookupmflinknode(dir, nodes):
883 884 if fastpathlinkrev:
884 885 assert not dir
885 886 return mfs.__getitem__
886 887
887 888 def lookupmflinknode(x):
888 889 """Callback for looking up the linknode for manifests.
889 890
890 891 Returns the linkrev node for the specified manifest.
891 892
892 893 SIDE EFFECT:
893 894
894 895 1) fclnodes gets populated with the list of relevant
895 896 file nodes if we're not using fastpathlinkrev
896 897 2) When treemanifests are in use, collects treemanifest nodes
897 898 to send
898 899
899 900 Note that this means manifests must be completely sent to
900 901 the client before you can trust the list of files and
901 902 treemanifests to send.
902 903 """
903 904 clnode = nodes[x]
904 905 mdata = mfl.get(dir, x).readfast(shallow=True)
905 906 for p, n, fl in mdata.iterentries():
906 907 if fl == 't': # subdirectory manifest
907 908 subdir = dir + p + '/'
908 909 tmfclnodes = tmfnodes.setdefault(subdir, {})
909 910 tmfclnode = tmfclnodes.setdefault(n, clnode)
910 911 if clrevorder[clnode] < clrevorder[tmfclnode]:
911 912 tmfclnodes[n] = clnode
912 913 else:
913 914 f = dir + p
914 915 fclnodes = fnodes.setdefault(f, {})
915 916 fclnode = fclnodes.setdefault(n, clnode)
916 917 if clrevorder[clnode] < clrevorder[fclnode]:
917 918 fclnodes[n] = clnode
918 919 return clnode
919 920 return lookupmflinknode
920 921
921 922 size = 0
922 923 while tmfnodes:
923 924 dir, nodes = tmfnodes.popitem()
924 925 prunednodes = self._prune(dirlog(dir), nodes, commonrevs)
925 926 if not dir or prunednodes:
926 927 for x in self._packmanifests(dir, prunednodes,
927 928 makelookupmflinknode(dir, nodes)):
928 929 size += len(x)
929 930 yield x
930 931 self._verbosenote(_('%8.i (manifests)\n') % size)
931 932 yield self._manifestsend
932 933
933 934 # The 'source' parameter is useful for extensions
934 935 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
935 936 changedfiles = list(filter(self._filematcher, changedfiles))
936 937
937 938 if self._isshallow:
938 939 # See comment in generate() for why this sadness is a thing.
939 940 mfdicts = self._mfdicts
940 941 del self._mfdicts
941 942 # In a shallow clone, the linknodes callback needs to also include
942 943 # those file nodes that are in the manifests we sent but weren't
943 944 # introduced by those manifests.
944 945 commonctxs = [self._repo[c] for c in commonrevs]
945 946 oldlinknodes = linknodes
946 947 clrev = self._repo.changelog.rev
947 948
948 949 # Defining this function has a side-effect of overriding the
949 950 # function of the same name that was passed in as an argument.
950 951 # TODO have caller pass in appropriate function.
951 952 def linknodes(flog, fname):
952 953 for c in commonctxs:
953 954 try:
954 955 fnode = c.filenode(fname)
955 956 self._clrevtolocalrev[c.rev()] = flog.rev(fnode)
956 957 except error.ManifestLookupError:
957 958 pass
958 959 links = oldlinknodes(flog, fname)
959 960 if len(links) != len(mfdicts):
960 961 for mf, lr in mfdicts:
961 962 fnode = mf.get(fname, None)
962 963 if fnode in links:
963 964 links[fnode] = min(links[fnode], lr, key=clrev)
964 965 elif fnode:
965 966 links[fnode] = lr
966 967 return links
967 968
968 969 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
969 970
970 971 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
971 972 repo = self._repo
972 973 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
973 974 total=len(changedfiles))
974 975 for i, fname in enumerate(sorted(changedfiles)):
975 976 filerevlog = repo.file(fname)
976 977 if not filerevlog:
977 978 raise error.Abort(_("empty or missing file data for %s") %
978 979 fname)
979 980
980 981 linkrevnodes = linknodes(filerevlog, fname)
981 982 # Lookup for filenodes, we collected the linkrev nodes above in the
982 983 # fastpath case and with lookupmf in the slowpath case.
983 984 def lookupfilelog(x):
984 985 return linkrevnodes[x]
985 986
986 987 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
987 988 if filenodes:
988 989 progress.update(i + 1, item=fname)
989 990 h = self._fileheader(fname)
990 991 size = len(h)
991 992 yield h
992 993 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
993 994 size += len(chunk)
994 995 yield chunk
995 996 self._verbosenote(_('%8.i %s\n') % (size, fname))
996 997 progress.complete()
997 998
998 999 def _revchunk(self, store, rev, prev, linknode):
999 1000 if self._ellipses:
1000 1001 fn = self._revisiondeltanarrow
1001 1002 else:
1002 1003 fn = self._revisiondeltanormal
1003 1004
1004 1005 delta = fn(store, rev, prev, linknode)
1005 1006 if not delta:
1006 1007 return
1007 1008
1008 1009 meta = self._builddeltaheader(delta)
1009 1010 l = len(meta) + sum(len(x) for x in delta.deltachunks)
1010 1011
1011 1012 yield chunkheader(l)
1012 1013 yield meta
1013 1014 for x in delta.deltachunks:
1014 1015 yield x
1015 1016
1016 1017 def _revisiondeltanormal(self, store, rev, prev, linknode):
1017 1018 node = store.node(rev)
1018 1019 p1, p2 = store.parentrevs(rev)
1019 1020 base = self._deltaparentfn(store, rev, p1, p2, prev)
1020 1021
1021 1022 prefix = ''
1022 1023 if store.iscensored(base) or store.iscensored(rev):
1023 1024 try:
1024 1025 delta = store.revision(node, raw=True)
1025 1026 except error.CensoredNodeError as e:
1026 1027 delta = e.tombstone
1027 1028 if base == nullrev:
1028 1029 prefix = mdiff.trivialdiffheader(len(delta))
1029 1030 else:
1030 1031 baselen = store.rawsize(base)
1031 1032 prefix = mdiff.replacediffheader(baselen, len(delta))
1032 1033 elif base == nullrev:
1033 1034 delta = store.revision(node, raw=True)
1034 1035 prefix = mdiff.trivialdiffheader(len(delta))
1035 1036 else:
1036 1037 delta = store.revdiff(base, rev)
1037 1038 p1n, p2n = store.parents(node)
1038 1039
1039 1040 return revisiondelta(
1040 1041 node=node,
1041 1042 p1node=p1n,
1042 1043 p2node=p2n,
1043 1044 basenode=store.node(base),
1044 1045 linknode=linknode,
1045 1046 flags=store.flags(rev),
1046 1047 deltachunks=(prefix, delta),
1047 1048 )
1048 1049
1049 1050 def _revisiondeltanarrow(self, store, rev, prev, linknode):
1050 1051 # build up some mapping information that's useful later. See
1051 1052 # the local() nested function below.
1052 1053 if not self._changelogdone:
1053 1054 self._clnodetorev[linknode] = rev
1054 1055 linkrev = rev
1055 1056 self._clrevtolocalrev[linkrev] = rev
1056 1057 else:
1057 1058 linkrev = self._clnodetorev[linknode]
1058 1059 self._clrevtolocalrev[linkrev] = rev
1059 1060
1060 1061 # This is a node to send in full, because the changeset it
1061 1062 # corresponds to was a full changeset.
1062 1063 if linknode in self._fullnodes:
1063 1064 return self._revisiondeltanormal(store, rev, prev, linknode)
1064 1065
1065 1066 # At this point, a node can either be one we should skip or an
1066 1067 # ellipsis. If it's not an ellipsis, bail immediately.
1067 1068 if linkrev not in self._precomputedellipsis:
1068 1069 return
1069 1070
1070 1071 linkparents = self._precomputedellipsis[linkrev]
1071 1072 def local(clrev):
1072 1073 """Turn a changelog revnum into a local revnum.
1073 1074
1074 1075 The ellipsis dag is stored as revnums on the changelog,
1075 1076 but when we're producing ellipsis entries for
1076 1077 non-changelog revlogs, we need to turn those numbers into
1077 1078 something local. This does that for us, and during the
1078 1079 changelog sending phase will also expand the stored
1079 1080 mappings as needed.
1080 1081 """
1081 1082 if clrev == nullrev:
1082 1083 return nullrev
1083 1084
1084 1085 if not self._changelogdone:
1085 1086 # If we're doing the changelog, it's possible that we
1086 1087 # have a parent that is already on the client, and we
1087 1088 # need to store some extra mapping information so that
1088 1089 # our contained ellipsis nodes will be able to resolve
1089 1090 # their parents.
1090 1091 if clrev not in self._clrevtolocalrev:
1091 1092 clnode = store.node(clrev)
1092 1093 self._clnodetorev[clnode] = clrev
1093 1094 return clrev
1094 1095
1095 1096 # Walk the ellipsis-ized changelog breadth-first looking for a
1096 1097 # change that has been linked from the current revlog.
1097 1098 #
1098 1099 # For a flat manifest revlog only a single step should be necessary
1099 1100 # as all relevant changelog entries are relevant to the flat
1100 1101 # manifest.
1101 1102 #
1102 1103 # For a filelog or tree manifest dirlog however not every changelog
1103 1104 # entry will have been relevant, so we need to skip some changelog
1104 1105 # nodes even after ellipsis-izing.
1105 1106 walk = [clrev]
1106 1107 while walk:
1107 1108 p = walk[0]
1108 1109 walk = walk[1:]
1109 1110 if p in self._clrevtolocalrev:
1110 1111 return self._clrevtolocalrev[p]
1111 1112 elif p in self._fullnodes:
1112 1113 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1113 1114 if pp != nullrev])
1114 1115 elif p in self._precomputedellipsis:
1115 1116 walk.extend([pp for pp in self._precomputedellipsis[p]
1116 1117 if pp != nullrev])
1117 1118 else:
1118 1119 # In this case, we've got an ellipsis with parents
1119 1120 # outside the current bundle (likely an
1120 1121 # incremental pull). We "know" that we can use the
1121 1122 # value of this same revlog at whatever revision
1122 1123 # is pointed to by linknode. "Know" is in scare
1123 1124 # quotes because I haven't done enough examination
1124 1125 # of edge cases to convince myself this is really
1125 1126 # a fact - it works for all the (admittedly
1126 1127 # thorough) cases in our testsuite, but I would be
1127 1128 # somewhat unsurprised to find a case in the wild
1128 1129 # where this breaks down a bit. That said, I don't
1129 1130 # know if it would hurt anything.
1130 1131 for i in pycompat.xrange(rev, 0, -1):
1131 1132 if store.linkrev(i) == clrev:
1132 1133 return i
1133 1134 # We failed to resolve a parent for this node, so
1134 1135 # we crash the changegroup construction.
1135 1136 raise error.Abort(
1136 1137 'unable to resolve parent while packing %r %r'
1137 1138 ' for changeset %r' % (store.indexfile, rev, clrev))
1138 1139
1139 1140 return nullrev
1140 1141
1141 1142 if not linkparents or (
1142 1143 store.parentrevs(rev) == (nullrev, nullrev)):
1143 1144 p1, p2 = nullrev, nullrev
1144 1145 elif len(linkparents) == 1:
1145 1146 p1, = sorted(local(p) for p in linkparents)
1146 1147 p2 = nullrev
1147 1148 else:
1148 1149 p1, p2 = sorted(local(p) for p in linkparents)
1149 1150
1150 1151 n = store.node(rev)
1151 1152 p1n, p2n = store.node(p1), store.node(p2)
1152 1153 flags = store.flags(rev)
1153 1154 flags |= revlog.REVIDX_ELLIPSIS
1154 1155
1155 1156 # TODO: try and actually send deltas for ellipsis data blocks
1156 1157 data = store.revision(n)
1157 1158 diffheader = mdiff.trivialdiffheader(len(data))
1158 1159
1159 1160 return revisiondelta(
1160 1161 node=n,
1161 1162 p1node=p1n,
1162 1163 p2node=p2n,
1163 1164 basenode=nullid,
1164 1165 linknode=linknode,
1165 1166 flags=flags,
1166 1167 deltachunks=(diffheader, data),
1167 1168 )
1168 1169
1169 1170 def _deltaparentprev(store, rev, p1, p2, prev):
1170 1171 """Resolve a delta parent to the previous revision.
1171 1172
1172 1173 Used for version 1 changegroups, which don't support generaldelta.
1173 1174 """
1174 1175 return prev
1175 1176
1176 1177 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1177 1178 """Resolve a delta parent when general deltas are supported."""
1178 1179 dp = store.deltaparent(rev)
1179 1180 if dp == nullrev and store.storedeltachains:
1180 1181 # Avoid sending full revisions when delta parent is null. Pick prev
1181 1182 # in that case. It's tempting to pick p1 in this case, as p1 will
1182 1183 # be smaller in the common case. However, computing a delta against
1183 1184 # p1 may require resolving the raw text of p1, which could be
1184 1185 # expensive. The revlog caches should have prev cached, meaning
1185 1186 # less CPU for changegroup generation. There is likely room to add
1186 1187 # a flag and/or config option to control this behavior.
1187 1188 base = prev
1188 1189 elif dp == nullrev:
1189 1190 # revlog is configured to use full snapshot for a reason,
1190 1191 # stick to full snapshot.
1191 1192 base = nullrev
1192 1193 elif dp not in (p1, p2, prev):
1193 1194 # Pick prev when we can't be sure remote has the base revision.
1194 1195 return prev
1195 1196 else:
1196 1197 base = dp
1197 1198
1198 1199 if base != nullrev and not store.candelta(base, rev):
1199 1200 base = nullrev
1200 1201
1201 1202 return base
1202 1203
1203 1204 def _deltaparentellipses(store, rev, p1, p2, prev):
1204 1205 """Resolve a delta parent when in ellipses mode."""
1205 1206 # TODO: send better deltas when in narrow mode.
1206 1207 #
1207 1208 # changegroup.group() loops over revisions to send,
1208 1209 # including revisions we'll skip. What this means is that
1209 1210 # `prev` will be a potentially useless delta base for all
1210 1211 # ellipsis nodes, as the client likely won't have it. In
1211 1212 # the future we should do bookkeeping about which nodes
1212 1213 # have been sent to the client, and try to be
1213 1214 # significantly smarter about delta bases. This is
1214 1215 # slightly tricky because this same code has to work for
1215 1216 # all revlogs, and we don't have the linkrev/linknode here.
1216 1217 return p1
1217 1218
1218 1219 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1219 1220 shallow=False, ellipsisroots=None, fullnodes=None):
1220 1221 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1221 1222 d.node, d.p1node, d.p2node, d.linknode)
1222 1223
1223 1224 return cgpacker(repo, filematcher, b'01',
1224 1225 deltaparentfn=_deltaparentprev,
1225 1226 allowreorder=None,
1226 1227 builddeltaheader=builddeltaheader,
1227 1228 manifestsend=b'',
1228 1229 bundlecaps=bundlecaps,
1229 1230 ellipses=ellipses,
1230 1231 shallow=shallow,
1231 1232 ellipsisroots=ellipsisroots,
1232 1233 fullnodes=fullnodes)
1233 1234
1234 1235 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1235 1236 shallow=False, ellipsisroots=None, fullnodes=None):
1236 1237 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1237 1238 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1238 1239
1239 1240 # Since generaldelta is directly supported by cg2, reordering
1240 1241 # generally doesn't help, so we disable it by default (treating
1241 1242 # bundle.reorder=auto just like bundle.reorder=False).
1242 1243 return cgpacker(repo, filematcher, b'02',
1243 1244 deltaparentfn=_deltaparentgeneraldelta,
1244 1245 allowreorder=False,
1245 1246 builddeltaheader=builddeltaheader,
1246 1247 manifestsend=b'',
1247 1248 bundlecaps=bundlecaps,
1248 1249 ellipses=ellipses,
1249 1250 shallow=shallow,
1250 1251 ellipsisroots=ellipsisroots,
1251 1252 fullnodes=fullnodes)
1252 1253
1253 1254 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1254 1255 shallow=False, ellipsisroots=None, fullnodes=None):
1255 1256 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1256 1257 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1257 1258
1258 1259 deltaparentfn = (_deltaparentellipses if ellipses
1259 1260 else _deltaparentgeneraldelta)
1260 1261
1261 1262 return cgpacker(repo, filematcher, b'03',
1262 1263 deltaparentfn=deltaparentfn,
1263 1264 allowreorder=False,
1264 1265 builddeltaheader=builddeltaheader,
1265 1266 manifestsend=closechunk(),
1266 1267 bundlecaps=bundlecaps,
1267 1268 ellipses=ellipses,
1268 1269 shallow=shallow,
1269 1270 ellipsisroots=ellipsisroots,
1270 1271 fullnodes=fullnodes)
1271 1272
1272 1273 _packermap = {'01': (_makecg1packer, cg1unpacker),
1273 1274 # cg2 adds support for exchanging generaldelta
1274 1275 '02': (_makecg2packer, cg2unpacker),
1275 1276 # cg3 adds support for exchanging revlog flags and treemanifests
1276 1277 '03': (_makecg3packer, cg3unpacker),
1277 1278 }
1278 1279
1279 1280 def allsupportedversions(repo):
1280 1281 versions = set(_packermap.keys())
1281 1282 if not (repo.ui.configbool('experimental', 'changegroup3') or
1282 1283 repo.ui.configbool('experimental', 'treemanifest') or
1283 1284 'treemanifest' in repo.requirements):
1284 1285 versions.discard('03')
1285 1286 return versions
1286 1287
1287 1288 # Changegroup versions that can be applied to the repo
1288 1289 def supportedincomingversions(repo):
1289 1290 return allsupportedversions(repo)
1290 1291
1291 1292 # Changegroup versions that can be created from the repo
1292 1293 def supportedoutgoingversions(repo):
1293 1294 versions = allsupportedversions(repo)
1294 1295 if 'treemanifest' in repo.requirements:
1295 1296 # Versions 01 and 02 support only flat manifests and it's just too
1296 1297 # expensive to convert between the flat manifest and tree manifest on
1297 1298 # the fly. Since tree manifests are hashed differently, all of history
1298 1299 # would have to be converted. Instead, we simply don't even pretend to
1299 1300 # support versions 01 and 02.
1300 1301 versions.discard('01')
1301 1302 versions.discard('02')
1302 1303 if repository.NARROW_REQUIREMENT in repo.requirements:
1303 1304 # Versions 01 and 02 don't support revlog flags, and we need to
1304 1305 # support that for stripping and unbundling to work.
1305 1306 versions.discard('01')
1306 1307 versions.discard('02')
1307 1308 if LFS_REQUIREMENT in repo.requirements:
1308 1309 # Versions 01 and 02 don't support revlog flags, and we need to
1309 1310 # mark LFS entries with REVIDX_EXTSTORED.
1310 1311 versions.discard('01')
1311 1312 versions.discard('02')
1312 1313
1313 1314 return versions
1314 1315
1315 1316 def localversion(repo):
1316 1317 # Finds the best version to use for bundles that are meant to be used
1317 1318 # locally, such as those from strip and shelve, and temporary bundles.
1318 1319 return max(supportedoutgoingversions(repo))
1319 1320
1320 1321 def safeversion(repo):
1321 1322 # Finds the smallest version that it's safe to assume clients of the repo
1322 1323 # will support. For example, all hg versions that support generaldelta also
1323 1324 # support changegroup 02.
1324 1325 versions = supportedoutgoingversions(repo)
1325 1326 if 'generaldelta' in repo.requirements:
1326 1327 versions.discard('01')
1327 1328 assert versions
1328 1329 return min(versions)
1329 1330
1330 1331 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1331 1332 ellipses=False, shallow=False, ellipsisroots=None,
1332 1333 fullnodes=None):
1333 1334 assert version in supportedoutgoingversions(repo)
1334 1335
1335 1336 if filematcher is None:
1336 1337 filematcher = matchmod.alwaysmatcher(repo.root, '')
1337 1338
1338 1339 if version == '01' and not filematcher.always():
1339 1340 raise error.ProgrammingError('version 01 changegroups do not support '
1340 1341 'sparse file matchers')
1341 1342
1342 1343 if ellipses and version in (b'01', b'02'):
1343 1344 raise error.Abort(
1344 1345 _('ellipsis nodes require at least cg3 on client and server, '
1345 1346 'but negotiated version %s') % version)
1346 1347
1347 1348 # Requested files could include files not in the local store. So
1348 1349 # filter those out.
1349 1350 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1350 1351 filematcher)
1351 1352
1352 1353 fn = _packermap[version][0]
1353 1354 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1354 1355 shallow=shallow, ellipsisroots=ellipsisroots,
1355 1356 fullnodes=fullnodes)
1356 1357
1357 1358 def getunbundler(version, fh, alg, extras=None):
1358 1359 return _packermap[version][1](fh, alg, extras=extras)
1359 1360
1360 1361 def _changegroupinfo(repo, nodes, source):
1361 1362 if repo.ui.verbose or source == 'bundle':
1362 1363 repo.ui.status(_("%d changesets found\n") % len(nodes))
1363 1364 if repo.ui.debugflag:
1364 1365 repo.ui.debug("list of changesets:\n")
1365 1366 for node in nodes:
1366 1367 repo.ui.debug("%s\n" % hex(node))
1367 1368
1368 1369 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1369 1370 bundlecaps=None):
1370 1371 cgstream = makestream(repo, outgoing, version, source,
1371 1372 fastpath=fastpath, bundlecaps=bundlecaps)
1372 1373 return getunbundler(version, util.chunkbuffer(cgstream), None,
1373 1374 {'clcount': len(outgoing.missing) })
1374 1375
1375 1376 def makestream(repo, outgoing, version, source, fastpath=False,
1376 1377 bundlecaps=None, filematcher=None):
1377 1378 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1378 1379 filematcher=filematcher)
1379 1380
1380 1381 repo = repo.unfiltered()
1381 1382 commonrevs = outgoing.common
1382 1383 csets = outgoing.missing
1383 1384 heads = outgoing.missingheads
1384 1385 # We go through the fast path if we get told to, or if all (unfiltered
1385 1386 # heads have been requested (since we then know there all linkrevs will
1386 1387 # be pulled by the client).
1387 1388 heads.sort()
1388 1389 fastpathlinkrev = fastpath or (
1389 1390 repo.filtername is None and heads == sorted(repo.heads()))
1390 1391
1391 1392 repo.hook('preoutgoing', throw=True, source=source)
1392 1393 _changegroupinfo(repo, csets, source)
1393 1394 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1394 1395
1395 1396 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1396 1397 revisions = 0
1397 1398 files = 0
1398 1399 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1399 1400 total=expectedfiles)
1400 1401 for chunkdata in iter(source.filelogheader, {}):
1401 1402 files += 1
1402 1403 f = chunkdata["filename"]
1403 1404 repo.ui.debug("adding %s revisions\n" % f)
1404 1405 progress.increment()
1405 1406 fl = repo.file(f)
1406 1407 o = len(fl)
1407 1408 try:
1408 1409 deltas = source.deltaiter()
1409 1410 if not fl.addgroup(deltas, revmap, trp):
1410 1411 raise error.Abort(_("received file revlog group is empty"))
1411 1412 except error.CensoredBaseError as e:
1412 1413 raise error.Abort(_("received delta base is censored: %s") % e)
1413 1414 revisions += len(fl) - o
1414 1415 if f in needfiles:
1415 1416 needs = needfiles[f]
1416 1417 for new in pycompat.xrange(o, len(fl)):
1417 1418 n = fl.node(new)
1418 1419 if n in needs:
1419 1420 needs.remove(n)
1420 1421 else:
1421 1422 raise error.Abort(
1422 1423 _("received spurious file revlog entry"))
1423 1424 if not needs:
1424 1425 del needfiles[f]
1425 1426 progress.complete()
1426 1427
1427 1428 for f, needs in needfiles.iteritems():
1428 1429 fl = repo.file(f)
1429 1430 for n in needs:
1430 1431 try:
1431 1432 fl.rev(n)
1432 1433 except error.LookupError:
1433 1434 raise error.Abort(
1434 1435 _('missing file data for %s:%s - run hg verify') %
1435 1436 (f, hex(n)))
1436 1437
1437 1438 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now