##// END OF EJS Templates
changegroup: control delta parent behavior via constructor...
Gregory Szorc -
r38937:23ae0c07 default
parent child Browse files
Show More
@@ -1,1387 +1,1399 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, "wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, "wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103 class cg1unpacker(object):
104 104 """Unpacker for cg1 changegroup streams.
105 105
106 106 A changegroup unpacker handles the framing of the revision data in
107 107 the wire format. Most consumers will want to use the apply()
108 108 method to add the changes from the changegroup to a repository.
109 109
110 110 If you're forwarding a changegroup unmodified to another consumer,
111 111 use getchunks(), which returns an iterator of changegroup
112 112 chunks. This is mostly useful for cases where you need to know the
113 113 data stream has ended by observing the end of the changegroup.
114 114
115 115 deltachunk() is useful only if you're applying delta data. Most
116 116 consumers should prefer apply() instead.
117 117
118 118 A few other public methods exist. Those are used only for
119 119 bundlerepo and some debug commands - their use is discouraged.
120 120 """
121 121 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
122 122 deltaheadersize = deltaheader.size
123 123 version = '01'
124 124 _grouplistcount = 1 # One list of files after the manifests
125 125
126 126 def __init__(self, fh, alg, extras=None):
127 127 if alg is None:
128 128 alg = 'UN'
129 129 if alg not in util.compengines.supportedbundletypes:
130 130 raise error.Abort(_('unknown stream compression type: %s')
131 131 % alg)
132 132 if alg == 'BZ':
133 133 alg = '_truncatedBZ'
134 134
135 135 compengine = util.compengines.forbundletype(alg)
136 136 self._stream = compengine.decompressorreader(fh)
137 137 self._type = alg
138 138 self.extras = extras or {}
139 139 self.callback = None
140 140
141 141 # These methods (compressed, read, seek, tell) all appear to only
142 142 # be used by bundlerepo, but it's a little hard to tell.
143 143 def compressed(self):
144 144 return self._type is not None and self._type != 'UN'
145 145 def read(self, l):
146 146 return self._stream.read(l)
147 147 def seek(self, pos):
148 148 return self._stream.seek(pos)
149 149 def tell(self):
150 150 return self._stream.tell()
151 151 def close(self):
152 152 return self._stream.close()
153 153
154 154 def _chunklength(self):
155 155 d = readexactly(self._stream, 4)
156 156 l = struct.unpack(">l", d)[0]
157 157 if l <= 4:
158 158 if l:
159 159 raise error.Abort(_("invalid chunk length %d") % l)
160 160 return 0
161 161 if self.callback:
162 162 self.callback()
163 163 return l - 4
164 164
165 165 def changelogheader(self):
166 166 """v10 does not have a changelog header chunk"""
167 167 return {}
168 168
169 169 def manifestheader(self):
170 170 """v10 does not have a manifest header chunk"""
171 171 return {}
172 172
173 173 def filelogheader(self):
174 174 """return the header of the filelogs chunk, v10 only has the filename"""
175 175 l = self._chunklength()
176 176 if not l:
177 177 return {}
178 178 fname = readexactly(self._stream, l)
179 179 return {'filename': fname}
180 180
181 181 def _deltaheader(self, headertuple, prevnode):
182 182 node, p1, p2, cs = headertuple
183 183 if prevnode is None:
184 184 deltabase = p1
185 185 else:
186 186 deltabase = prevnode
187 187 flags = 0
188 188 return node, p1, p2, deltabase, cs, flags
189 189
190 190 def deltachunk(self, prevnode):
191 191 l = self._chunklength()
192 192 if not l:
193 193 return {}
194 194 headerdata = readexactly(self._stream, self.deltaheadersize)
195 195 header = self.deltaheader.unpack(headerdata)
196 196 delta = readexactly(self._stream, l - self.deltaheadersize)
197 197 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
198 198 return (node, p1, p2, cs, deltabase, delta, flags)
199 199
200 200 def getchunks(self):
201 201 """returns all the chunks contains in the bundle
202 202
203 203 Used when you need to forward the binary stream to a file or another
204 204 network API. To do so, it parse the changegroup data, otherwise it will
205 205 block in case of sshrepo because it don't know the end of the stream.
206 206 """
207 207 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
208 208 # and a list of filelogs. For changegroup 3, we expect 4 parts:
209 209 # changelog, manifestlog, a list of tree manifestlogs, and a list of
210 210 # filelogs.
211 211 #
212 212 # Changelog and manifestlog parts are terminated with empty chunks. The
213 213 # tree and file parts are a list of entry sections. Each entry section
214 214 # is a series of chunks terminating in an empty chunk. The list of these
215 215 # entry sections is terminated in yet another empty chunk, so we know
216 216 # we've reached the end of the tree/file list when we reach an empty
217 217 # chunk that was proceeded by no non-empty chunks.
218 218
219 219 parts = 0
220 220 while parts < 2 + self._grouplistcount:
221 221 noentries = True
222 222 while True:
223 223 chunk = getchunk(self)
224 224 if not chunk:
225 225 # The first two empty chunks represent the end of the
226 226 # changelog and the manifestlog portions. The remaining
227 227 # empty chunks represent either A) the end of individual
228 228 # tree or file entries in the file list, or B) the end of
229 229 # the entire list. It's the end of the entire list if there
230 230 # were no entries (i.e. noentries is True).
231 231 if parts < 2:
232 232 parts += 1
233 233 elif noentries:
234 234 parts += 1
235 235 break
236 236 noentries = False
237 237 yield chunkheader(len(chunk))
238 238 pos = 0
239 239 while pos < len(chunk):
240 240 next = pos + 2**20
241 241 yield chunk[pos:next]
242 242 pos = next
243 243 yield closechunk()
244 244
245 245 def _unpackmanifests(self, repo, revmap, trp, prog):
246 246 self.callback = prog.increment
247 247 # no need to check for empty manifest group here:
248 248 # if the result of the merge of 1 and 2 is the same in 3 and 4,
249 249 # no new manifest will be created and the manifest group will
250 250 # be empty during the pull
251 251 self.manifestheader()
252 252 deltas = self.deltaiter()
253 253 repo.manifestlog.addgroup(deltas, revmap, trp)
254 254 prog.complete()
255 255 self.callback = None
256 256
257 257 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
258 258 expectedtotal=None):
259 259 """Add the changegroup returned by source.read() to this repo.
260 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
261 261 the URL of the repo where this changegroup is coming from.
262 262
263 263 Return an integer summarizing the change to this repo:
264 264 - nothing changed or no source: 0
265 265 - more heads than before: 1+added heads (2..n)
266 266 - fewer heads than before: -1-removed heads (-2..-n)
267 267 - number of heads stays the same: 1
268 268 """
269 269 repo = repo.unfiltered()
270 270 def csmap(x):
271 271 repo.ui.debug("add changeset %s\n" % short(x))
272 272 return len(cl)
273 273
274 274 def revmap(x):
275 275 return cl.rev(x)
276 276
277 277 changesets = files = revisions = 0
278 278
279 279 try:
280 280 # The transaction may already carry source information. In this
281 281 # case we use the top level data. We overwrite the argument
282 282 # because we need to use the top level value (if they exist)
283 283 # in this function.
284 284 srctype = tr.hookargs.setdefault('source', srctype)
285 285 url = tr.hookargs.setdefault('url', url)
286 286 repo.hook('prechangegroup',
287 287 throw=True, **pycompat.strkwargs(tr.hookargs))
288 288
289 289 # write changelog data to temp files so concurrent readers
290 290 # will not see an inconsistent view
291 291 cl = repo.changelog
292 292 cl.delayupdate(tr)
293 293 oldheads = set(cl.heads())
294 294
295 295 trp = weakref.proxy(tr)
296 296 # pull off the changeset group
297 297 repo.ui.status(_("adding changesets\n"))
298 298 clstart = len(cl)
299 299 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
300 300 total=expectedtotal)
301 301 self.callback = progress.increment
302 302
303 303 efiles = set()
304 304 def onchangelog(cl, node):
305 305 efiles.update(cl.readfiles(node))
306 306
307 307 self.changelogheader()
308 308 deltas = self.deltaiter()
309 309 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
310 310 efiles = len(efiles)
311 311
312 312 if not cgnodes:
313 313 repo.ui.develwarn('applied empty changegroup',
314 314 config='warn-empty-changegroup')
315 315 clend = len(cl)
316 316 changesets = clend - clstart
317 317 progress.complete()
318 318 self.callback = None
319 319
320 320 # pull off the manifest group
321 321 repo.ui.status(_("adding manifests\n"))
322 322 # We know that we'll never have more manifests than we had
323 323 # changesets.
324 324 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
325 325 total=changesets)
326 326 self._unpackmanifests(repo, revmap, trp, progress)
327 327
328 328 needfiles = {}
329 329 if repo.ui.configbool('server', 'validate'):
330 330 cl = repo.changelog
331 331 ml = repo.manifestlog
332 332 # validate incoming csets have their manifests
333 333 for cset in pycompat.xrange(clstart, clend):
334 334 mfnode = cl.changelogrevision(cset).manifest
335 335 mfest = ml[mfnode].readdelta()
336 336 # store file cgnodes we must see
337 337 for f, n in mfest.iteritems():
338 338 needfiles.setdefault(f, set()).add(n)
339 339
340 340 # process the files
341 341 repo.ui.status(_("adding file changes\n"))
342 342 newrevs, newfiles = _addchangegroupfiles(
343 343 repo, self, revmap, trp, efiles, needfiles)
344 344 revisions += newrevs
345 345 files += newfiles
346 346
347 347 deltaheads = 0
348 348 if oldheads:
349 349 heads = cl.heads()
350 350 deltaheads = len(heads) - len(oldheads)
351 351 for h in heads:
352 352 if h not in oldheads and repo[h].closesbranch():
353 353 deltaheads -= 1
354 354 htext = ""
355 355 if deltaheads:
356 356 htext = _(" (%+d heads)") % deltaheads
357 357
358 358 repo.ui.status(_("added %d changesets"
359 359 " with %d changes to %d files%s\n")
360 360 % (changesets, revisions, files, htext))
361 361 repo.invalidatevolatilesets()
362 362
363 363 if changesets > 0:
364 364 if 'node' not in tr.hookargs:
365 365 tr.hookargs['node'] = hex(cl.node(clstart))
366 366 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
367 367 hookargs = dict(tr.hookargs)
368 368 else:
369 369 hookargs = dict(tr.hookargs)
370 370 hookargs['node'] = hex(cl.node(clstart))
371 371 hookargs['node_last'] = hex(cl.node(clend - 1))
372 372 repo.hook('pretxnchangegroup',
373 373 throw=True, **pycompat.strkwargs(hookargs))
374 374
375 375 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
376 376 phaseall = None
377 377 if srctype in ('push', 'serve'):
378 378 # Old servers can not push the boundary themselves.
379 379 # New servers won't push the boundary if changeset already
380 380 # exists locally as secret
381 381 #
382 382 # We should not use added here but the list of all change in
383 383 # the bundle
384 384 if repo.publishing():
385 385 targetphase = phaseall = phases.public
386 386 else:
387 387 # closer target phase computation
388 388
389 389 # Those changesets have been pushed from the
390 390 # outside, their phases are going to be pushed
391 391 # alongside. Therefor `targetphase` is
392 392 # ignored.
393 393 targetphase = phaseall = phases.draft
394 394 if added:
395 395 phases.registernew(repo, tr, targetphase, added)
396 396 if phaseall is not None:
397 397 phases.advanceboundary(repo, tr, phaseall, cgnodes)
398 398
399 399 if changesets > 0:
400 400
401 401 def runhooks():
402 402 # These hooks run when the lock releases, not when the
403 403 # transaction closes. So it's possible for the changelog
404 404 # to have changed since we last saw it.
405 405 if clstart >= len(repo):
406 406 return
407 407
408 408 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
409 409
410 410 for n in added:
411 411 args = hookargs.copy()
412 412 args['node'] = hex(n)
413 413 del args['node_last']
414 414 repo.hook("incoming", **pycompat.strkwargs(args))
415 415
416 416 newheads = [h for h in repo.heads()
417 417 if h not in oldheads]
418 418 repo.ui.log("incoming",
419 419 "%d incoming changes - new heads: %s\n",
420 420 len(added),
421 421 ', '.join([hex(c[:6]) for c in newheads]))
422 422
423 423 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
424 424 lambda tr: repo._afterlock(runhooks))
425 425 finally:
426 426 repo.ui.flush()
427 427 # never return 0 here:
428 428 if deltaheads < 0:
429 429 ret = deltaheads - 1
430 430 else:
431 431 ret = deltaheads + 1
432 432 return ret
433 433
434 434 def deltaiter(self):
435 435 """
436 436 returns an iterator of the deltas in this changegroup
437 437
438 438 Useful for passing to the underlying storage system to be stored.
439 439 """
440 440 chain = None
441 441 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
442 442 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
443 443 yield chunkdata
444 444 chain = chunkdata[0]
445 445
446 446 class cg2unpacker(cg1unpacker):
447 447 """Unpacker for cg2 streams.
448 448
449 449 cg2 streams add support for generaldelta, so the delta header
450 450 format is slightly different. All other features about the data
451 451 remain the same.
452 452 """
453 453 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
454 454 deltaheadersize = deltaheader.size
455 455 version = '02'
456 456
457 457 def _deltaheader(self, headertuple, prevnode):
458 458 node, p1, p2, deltabase, cs = headertuple
459 459 flags = 0
460 460 return node, p1, p2, deltabase, cs, flags
461 461
462 462 class cg3unpacker(cg2unpacker):
463 463 """Unpacker for cg3 streams.
464 464
465 465 cg3 streams add support for exchanging treemanifests and revlog
466 466 flags. It adds the revlog flags to the delta header and an empty chunk
467 467 separating manifests and files.
468 468 """
469 469 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
470 470 deltaheadersize = deltaheader.size
471 471 version = '03'
472 472 _grouplistcount = 2 # One list of manifests and one list of files
473 473
474 474 def _deltaheader(self, headertuple, prevnode):
475 475 node, p1, p2, deltabase, cs, flags = headertuple
476 476 return node, p1, p2, deltabase, cs, flags
477 477
478 478 def _unpackmanifests(self, repo, revmap, trp, prog):
479 479 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
480 480 for chunkdata in iter(self.filelogheader, {}):
481 481 # If we get here, there are directory manifests in the changegroup
482 482 d = chunkdata["filename"]
483 483 repo.ui.debug("adding %s revisions\n" % d)
484 484 dirlog = repo.manifestlog._revlog.dirlog(d)
485 485 deltas = self.deltaiter()
486 486 if not dirlog.addgroup(deltas, revmap, trp):
487 487 raise error.Abort(_("received dir revlog group is empty"))
488 488
489 489 class headerlessfixup(object):
490 490 def __init__(self, fh, h):
491 491 self._h = h
492 492 self._fh = fh
493 493 def read(self, n):
494 494 if self._h:
495 495 d, self._h = self._h[:n], self._h[n:]
496 496 if len(d) < n:
497 497 d += readexactly(self._fh, n - len(d))
498 498 return d
499 499 return readexactly(self._fh, n)
500 500
501 501 @attr.s(slots=True, frozen=True)
502 502 class revisiondelta(object):
503 503 """Describes a delta entry in a changegroup.
504 504
505 505 Captured data is sufficient to serialize the delta into multiple
506 506 formats.
507 507 """
508 508 # 20 byte node of this revision.
509 509 node = attr.ib()
510 510 # 20 byte nodes of parent revisions.
511 511 p1node = attr.ib()
512 512 p2node = attr.ib()
513 513 # 20 byte node of node this delta is against.
514 514 basenode = attr.ib()
515 515 # 20 byte node of changeset revision this delta is associated with.
516 516 linknode = attr.ib()
517 517 # 2 bytes of flags to apply to revision data.
518 518 flags = attr.ib()
519 519 # Iterable of chunks holding raw delta data.
520 520 deltachunks = attr.ib()
521 521
522 522 class cg1packer(object):
523 523 def __init__(self, repo, filematcher, version, allowreorder,
524 builddeltaheader, manifestsend, sendtreemanifests,
525 bundlecaps=None):
524 useprevdelta, builddeltaheader, manifestsend,
525 sendtreemanifests, bundlecaps=None):
526 526 """Given a source repo, construct a bundler.
527 527
528 528 filematcher is a matcher that matches on files to include in the
529 529 changegroup. Used to facilitate sparse changegroups.
530 530
531 531 allowreorder controls whether reordering of revisions is allowed.
532 532 This value is used when ``bundle.reorder`` is ``auto`` or isn't
533 533 set.
534 534
535 useprevdelta controls whether revisions should always delta against
536 the previous revision in the changegroup.
537
535 538 builddeltaheader is a callable that constructs the header for a group
536 539 delta.
537 540
538 541 manifestsend is a chunk to send after manifests have been fully emitted.
539 542
540 543 sendtreemanifests indicates whether tree manifests should be emitted.
541 544
542 545 bundlecaps is optional and can be used to specify the set of
543 546 capabilities which can be used to build the bundle. While bundlecaps is
544 547 unused in core Mercurial, extensions rely on this feature to communicate
545 548 capabilities to customize the changegroup packer.
546 549 """
547 550 assert filematcher
548 551 self._filematcher = filematcher
549 552
550 553 self.version = version
554 self._useprevdelta = useprevdelta
551 555 self._builddeltaheader = builddeltaheader
552 556 self._manifestsend = manifestsend
553 557 self._sendtreemanifests = sendtreemanifests
554 558
555 559 # Set of capabilities we can use to build the bundle.
556 560 if bundlecaps is None:
557 561 bundlecaps = set()
558 562 self._bundlecaps = bundlecaps
559 563
560 564 # experimental config: bundle.reorder
561 565 reorder = repo.ui.config('bundle', 'reorder')
562 566 if reorder == 'auto':
563 567 self._reorder = allowreorder
564 568 else:
565 569 self._reorder = stringutil.parsebool(reorder)
566 570
567 571 self._repo = repo
568 572
569 573 if self._repo.ui.verbose and not self._repo.ui.debugflag:
570 574 self._verbosenote = self._repo.ui.note
571 575 else:
572 576 self._verbosenote = lambda s: None
573 577
574 578 def close(self):
575 579 # Ellipses serving mode.
576 580 getattr(self, 'clrev_to_localrev', {}).clear()
577 581 if getattr(self, 'next_clrev_to_localrev', {}):
578 582 self.clrev_to_localrev = self.next_clrev_to_localrev
579 583 del self.next_clrev_to_localrev
580 584 self.changelog_done = True
581 585
582 586 return closechunk()
583 587
584 588 def fileheader(self, fname):
585 589 return chunkheader(len(fname)) + fname
586 590
587 591 # Extracted both for clarity and for overriding in extensions.
588 592 def _sortgroup(self, store, nodelist, lookup):
589 593 """Sort nodes for change group and turn them into revnums."""
590 594 # Ellipses serving mode.
591 595 #
592 596 # In a perfect world, we'd generate better ellipsis-ified graphs
593 597 # for non-changelog revlogs. In practice, we haven't started doing
594 598 # that yet, so the resulting DAGs for the manifestlog and filelogs
595 599 # are actually full of bogus parentage on all the ellipsis
596 600 # nodes. This has the side effect that, while the contents are
597 601 # correct, the individual DAGs might be completely out of whack in
598 602 # a case like 882681bc3166 and its ancestors (back about 10
599 603 # revisions or so) in the main hg repo.
600 604 #
601 605 # The one invariant we *know* holds is that the new (potentially
602 606 # bogus) DAG shape will be valid if we order the nodes in the
603 607 # order that they're introduced in dramatis personae by the
604 608 # changelog, so what we do is we sort the non-changelog histories
605 609 # by the order in which they are used by the changelog.
606 610 if util.safehasattr(self, 'full_nodes') and self.clnode_to_rev:
607 611 key = lambda n: self.clnode_to_rev[lookup(n)]
608 612 return [store.rev(n) for n in sorted(nodelist, key=key)]
609 613
610 614 # for generaldelta revlogs, we linearize the revs; this will both be
611 615 # much quicker and generate a much smaller bundle
612 616 if (store._generaldelta and self._reorder is None) or self._reorder:
613 617 dag = dagutil.revlogdag(store)
614 618 return dag.linearize(set(store.rev(n) for n in nodelist))
615 619 else:
616 620 return sorted([store.rev(n) for n in nodelist])
617 621
618 622 def group(self, nodelist, store, lookup, units=None):
619 623 """Calculate a delta group, yielding a sequence of changegroup chunks
620 624 (strings).
621 625
622 626 Given a list of changeset revs, return a set of deltas and
623 627 metadata corresponding to nodes. The first delta is
624 628 first parent(nodelist[0]) -> nodelist[0], the receiver is
625 629 guaranteed to have this parent as it has all history before
626 630 these changesets. In the case firstparent is nullrev the
627 631 changegroup starts with a full revision.
628 632
629 633 If units is not None, progress detail will be generated, units specifies
630 634 the type of revlog that is touched (changelog, manifest, etc.).
631 635 """
632 636 # if we don't have any revisions touched by these changesets, bail
633 637 if len(nodelist) == 0:
634 638 yield self.close()
635 639 return
636 640
637 641 revs = self._sortgroup(store, nodelist, lookup)
638 642
639 643 # add the parent of the first rev
640 644 p = store.parentrevs(revs[0])[0]
641 645 revs.insert(0, p)
642 646
643 647 # build deltas
644 648 progress = None
645 649 if units is not None:
646 650 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
647 651 total=(len(revs) - 1))
648 652 for r in pycompat.xrange(len(revs) - 1):
649 653 if progress:
650 654 progress.update(r + 1)
651 655 prev, curr = revs[r], revs[r + 1]
652 656 linknode = lookup(store.node(curr))
653 657 for c in self.revchunk(store, curr, prev, linknode):
654 658 yield c
655 659
656 660 if progress:
657 661 progress.complete()
658 662 yield self.close()
659 663
660 664 # filter any nodes that claim to be part of the known set
661 665 def prune(self, store, missing, commonrevs):
662 666 # TODO this violates storage abstraction for manifests.
663 667 if isinstance(store, manifest.manifestrevlog):
664 668 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
665 669 return []
666 670
667 671 rr, rl = store.rev, store.linkrev
668 672 return [n for n in missing if rl(rr(n)) not in commonrevs]
669 673
670 674 def _packmanifests(self, dir, mfnodes, lookuplinknode):
671 675 """Pack flat manifests into a changegroup stream."""
672 676 assert not dir
673 677 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
674 678 lookuplinknode, units=_('manifests')):
675 679 yield chunk
676 680
677 681 def _packtreemanifests(self, dir, mfnodes, lookuplinknode):
678 682 """Version of _packmanifests that operates on directory manifests.
679 683
680 684 Encodes the directory name in the output so multiple manifests
681 685 can be sent.
682 686 """
683 687 assert self.version == b'03'
684 688
685 689 if dir:
686 690 yield self.fileheader(dir)
687 691
688 692 # TODO violates storage abstractions by assuming revlogs.
689 693 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
690 694 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
691 695 units=_('manifests')):
692 696 yield chunk
693 697
694 698 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
695 699 '''yield a sequence of changegroup chunks (strings)'''
696 700 repo = self._repo
697 701 cl = repo.changelog
698 702
699 703 clrevorder = {}
700 704 mfs = {} # needed manifests
701 705 fnodes = {} # needed file nodes
702 706 mfl = repo.manifestlog
703 707 # TODO violates storage abstraction.
704 708 mfrevlog = mfl._revlog
705 709 changedfiles = set()
706 710
707 711 ellipsesmode = util.safehasattr(self, 'full_nodes')
708 712
709 713 # Callback for the changelog, used to collect changed files and
710 714 # manifest nodes.
711 715 # Returns the linkrev node (identity in the changelog case).
712 716 def lookupcl(x):
713 717 c = cl.read(x)
714 718 clrevorder[x] = len(clrevorder)
715 719
716 720 if ellipsesmode:
717 721 # Only update mfs if x is going to be sent. Otherwise we
718 722 # end up with bogus linkrevs specified for manifests and
719 723 # we skip some manifest nodes that we should otherwise
720 724 # have sent.
721 725 if (x in self.full_nodes
722 726 or cl.rev(x) in self.precomputed_ellipsis):
723 727 n = c[0]
724 728 # Record the first changeset introducing this manifest
725 729 # version.
726 730 mfs.setdefault(n, x)
727 731 # Set this narrow-specific dict so we have the lowest
728 732 # manifest revnum to look up for this cl revnum. (Part of
729 733 # mapping changelog ellipsis parents to manifest ellipsis
730 734 # parents)
731 735 self.next_clrev_to_localrev.setdefault(cl.rev(x),
732 736 mfrevlog.rev(n))
733 737 # We can't trust the changed files list in the changeset if the
734 738 # client requested a shallow clone.
735 739 if self.is_shallow:
736 740 changedfiles.update(mfl[c[0]].read().keys())
737 741 else:
738 742 changedfiles.update(c[3])
739 743 else:
740 744
741 745 n = c[0]
742 746 # record the first changeset introducing this manifest version
743 747 mfs.setdefault(n, x)
744 748 # Record a complete list of potentially-changed files in
745 749 # this manifest.
746 750 changedfiles.update(c[3])
747 751
748 752 return x
749 753
750 754 self._verbosenote(_('uncompressed size of bundle content:\n'))
751 755 size = 0
752 756 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
753 757 size += len(chunk)
754 758 yield chunk
755 759 self._verbosenote(_('%8.i (changelog)\n') % size)
756 760
757 761 # We need to make sure that the linkrev in the changegroup refers to
758 762 # the first changeset that introduced the manifest or file revision.
759 763 # The fastpath is usually safer than the slowpath, because the filelogs
760 764 # are walked in revlog order.
761 765 #
762 766 # When taking the slowpath with reorder=None and the manifest revlog
763 767 # uses generaldelta, the manifest may be walked in the "wrong" order.
764 768 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
765 769 # cc0ff93d0c0c).
766 770 #
767 771 # When taking the fastpath, we are only vulnerable to reordering
768 772 # of the changelog itself. The changelog never uses generaldelta, so
769 773 # it is only reordered when reorder=True. To handle this case, we
770 774 # simply take the slowpath, which already has the 'clrevorder' logic.
771 775 # This was also fixed in cc0ff93d0c0c.
772 776 fastpathlinkrev = fastpathlinkrev and not self._reorder
773 777 # Treemanifests don't work correctly with fastpathlinkrev
774 778 # either, because we don't discover which directory nodes to
775 779 # send along with files. This could probably be fixed.
776 780 fastpathlinkrev = fastpathlinkrev and (
777 781 'treemanifest' not in repo.requirements)
778 782
779 783 for chunk in self.generatemanifests(commonrevs, clrevorder,
780 784 fastpathlinkrev, mfs, fnodes, source):
781 785 yield chunk
782 786
783 787 if ellipsesmode:
784 788 mfdicts = None
785 789 if self.is_shallow:
786 790 mfdicts = [(self._repo.manifestlog[n].read(), lr)
787 791 for (n, lr) in mfs.iteritems()]
788 792
789 793 mfs.clear()
790 794 clrevs = set(cl.rev(x) for x in clnodes)
791 795
792 796 if not fastpathlinkrev:
793 797 def linknodes(unused, fname):
794 798 return fnodes.get(fname, {})
795 799 else:
796 800 cln = cl.node
797 801 def linknodes(filerevlog, fname):
798 802 llr = filerevlog.linkrev
799 803 fln = filerevlog.node
800 804 revs = ((r, llr(r)) for r in filerevlog)
801 805 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
802 806
803 807 if ellipsesmode:
804 808 # We need to pass the mfdicts variable down into
805 809 # generatefiles(), but more than one command might have
806 810 # wrapped generatefiles so we can't modify the function
807 811 # signature. Instead, we pass the data to ourselves using an
808 812 # instance attribute. I'm sorry.
809 813 self._mfdicts = mfdicts
810 814
811 815 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
812 816 source):
813 817 yield chunk
814 818
815 819 yield self.close()
816 820
817 821 if clnodes:
818 822 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
819 823
820 824 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
821 825 fnodes, source):
822 826 """Returns an iterator of changegroup chunks containing manifests.
823 827
824 828 `source` is unused here, but is used by extensions like remotefilelog to
825 829 change what is sent based in pulls vs pushes, etc.
826 830 """
827 831 repo = self._repo
828 832 mfl = repo.manifestlog
829 833 dirlog = mfl._revlog.dirlog
830 834 tmfnodes = {'': mfs}
831 835
832 836 # Callback for the manifest, used to collect linkrevs for filelog
833 837 # revisions.
834 838 # Returns the linkrev node (collected in lookupcl).
835 839 def makelookupmflinknode(dir, nodes):
836 840 if fastpathlinkrev:
837 841 assert not dir
838 842 return mfs.__getitem__
839 843
840 844 def lookupmflinknode(x):
841 845 """Callback for looking up the linknode for manifests.
842 846
843 847 Returns the linkrev node for the specified manifest.
844 848
845 849 SIDE EFFECT:
846 850
847 851 1) fclnodes gets populated with the list of relevant
848 852 file nodes if we're not using fastpathlinkrev
849 853 2) When treemanifests are in use, collects treemanifest nodes
850 854 to send
851 855
852 856 Note that this means manifests must be completely sent to
853 857 the client before you can trust the list of files and
854 858 treemanifests to send.
855 859 """
856 860 clnode = nodes[x]
857 861 mdata = mfl.get(dir, x).readfast(shallow=True)
858 862 for p, n, fl in mdata.iterentries():
859 863 if fl == 't': # subdirectory manifest
860 864 subdir = dir + p + '/'
861 865 tmfclnodes = tmfnodes.setdefault(subdir, {})
862 866 tmfclnode = tmfclnodes.setdefault(n, clnode)
863 867 if clrevorder[clnode] < clrevorder[tmfclnode]:
864 868 tmfclnodes[n] = clnode
865 869 else:
866 870 f = dir + p
867 871 fclnodes = fnodes.setdefault(f, {})
868 872 fclnode = fclnodes.setdefault(n, clnode)
869 873 if clrevorder[clnode] < clrevorder[fclnode]:
870 874 fclnodes[n] = clnode
871 875 return clnode
872 876 return lookupmflinknode
873 877
874 878 fn = (self._packtreemanifests if self._sendtreemanifests
875 879 else self._packmanifests)
876 880 size = 0
877 881 while tmfnodes:
878 882 dir, nodes = tmfnodes.popitem()
879 883 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
880 884 if not dir or prunednodes:
881 885 for x in fn(dir, prunednodes, makelookupmflinknode(dir, nodes)):
882 886 size += len(x)
883 887 yield x
884 888 self._verbosenote(_('%8.i (manifests)\n') % size)
885 889 yield self._manifestsend
886 890
887 891 # The 'source' parameter is useful for extensions
888 892 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
889 893 changedfiles = list(filter(self._filematcher, changedfiles))
890 894
891 895 if getattr(self, 'is_shallow', False):
892 896 # See comment in generate() for why this sadness is a thing.
893 897 mfdicts = self._mfdicts
894 898 del self._mfdicts
895 899 # In a shallow clone, the linknodes callback needs to also include
896 900 # those file nodes that are in the manifests we sent but weren't
897 901 # introduced by those manifests.
898 902 commonctxs = [self._repo[c] for c in commonrevs]
899 903 oldlinknodes = linknodes
900 904 clrev = self._repo.changelog.rev
901 905
902 906 # Defining this function has a side-effect of overriding the
903 907 # function of the same name that was passed in as an argument.
904 908 # TODO have caller pass in appropriate function.
905 909 def linknodes(flog, fname):
906 910 for c in commonctxs:
907 911 try:
908 912 fnode = c.filenode(fname)
909 913 self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
910 914 except error.ManifestLookupError:
911 915 pass
912 916 links = oldlinknodes(flog, fname)
913 917 if len(links) != len(mfdicts):
914 918 for mf, lr in mfdicts:
915 919 fnode = mf.get(fname, None)
916 920 if fnode in links:
917 921 links[fnode] = min(links[fnode], lr, key=clrev)
918 922 elif fnode:
919 923 links[fnode] = lr
920 924 return links
921 925
922 926 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
923 927
924 928 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
925 929 repo = self._repo
926 930 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
927 931 total=len(changedfiles))
928 932 for i, fname in enumerate(sorted(changedfiles)):
929 933 filerevlog = repo.file(fname)
930 934 if not filerevlog:
931 935 raise error.Abort(_("empty or missing file data for %s") %
932 936 fname)
933 937
934 938 linkrevnodes = linknodes(filerevlog, fname)
935 939 # Lookup for filenodes, we collected the linkrev nodes above in the
936 940 # fastpath case and with lookupmf in the slowpath case.
937 941 def lookupfilelog(x):
938 942 return linkrevnodes[x]
939 943
940 944 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
941 945 if filenodes:
942 946 progress.update(i + 1, item=fname)
943 947 h = self.fileheader(fname)
944 948 size = len(h)
945 949 yield h
946 950 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
947 951 size += len(chunk)
948 952 yield chunk
949 953 self._verbosenote(_('%8.i %s\n') % (size, fname))
950 954 progress.complete()
951 955
952 956 def deltaparent(self, store, rev, p1, p2, prev):
953 if not store.candelta(prev, rev):
954 raise error.ProgrammingError('cg1 should not be used in this case')
955 return prev
957 if self._useprevdelta:
958 if not store.candelta(prev, rev):
959 raise error.ProgrammingError(
960 'cg1 should not be used in this case')
961 return prev
962
963 # Narrow ellipses mode.
964 if util.safehasattr(self, 'full_nodes'):
965 # TODO: send better deltas when in narrow mode.
966 #
967 # changegroup.group() loops over revisions to send,
968 # including revisions we'll skip. What this means is that
969 # `prev` will be a potentially useless delta base for all
970 # ellipsis nodes, as the client likely won't have it. In
971 # the future we should do bookkeeping about which nodes
972 # have been sent to the client, and try to be
973 # significantly smarter about delta bases. This is
974 # slightly tricky because this same code has to work for
975 # all revlogs, and we don't have the linkrev/linknode here.
976 return p1
977
978 dp = store.deltaparent(rev)
979 if dp == nullrev and store.storedeltachains:
980 # Avoid sending full revisions when delta parent is null. Pick prev
981 # in that case. It's tempting to pick p1 in this case, as p1 will
982 # be smaller in the common case. However, computing a delta against
983 # p1 may require resolving the raw text of p1, which could be
984 # expensive. The revlog caches should have prev cached, meaning
985 # less CPU for changegroup generation. There is likely room to add
986 # a flag and/or config option to control this behavior.
987 base = prev
988 elif dp == nullrev:
989 # revlog is configured to use full snapshot for a reason,
990 # stick to full snapshot.
991 base = nullrev
992 elif dp not in (p1, p2, prev):
993 # Pick prev when we can't be sure remote has the base revision.
994 return prev
995 else:
996 base = dp
997
998 if base != nullrev and not store.candelta(base, rev):
999 base = nullrev
1000
1001 return base
956 1002
957 1003 def revchunk(self, store, rev, prev, linknode):
958 1004 if util.safehasattr(self, 'full_nodes'):
959 1005 fn = self._revisiondeltanarrow
960 1006 else:
961 1007 fn = self._revisiondeltanormal
962 1008
963 1009 delta = fn(store, rev, prev, linknode)
964 1010 if not delta:
965 1011 return
966 1012
967 1013 meta = self._builddeltaheader(delta)
968 1014 l = len(meta) + sum(len(x) for x in delta.deltachunks)
969 1015
970 1016 yield chunkheader(l)
971 1017 yield meta
972 1018 for x in delta.deltachunks:
973 1019 yield x
974 1020
975 1021 def _revisiondeltanormal(self, store, rev, prev, linknode):
976 1022 node = store.node(rev)
977 1023 p1, p2 = store.parentrevs(rev)
978 1024 base = self.deltaparent(store, rev, p1, p2, prev)
979 1025
980 1026 prefix = ''
981 1027 if store.iscensored(base) or store.iscensored(rev):
982 1028 try:
983 1029 delta = store.revision(node, raw=True)
984 1030 except error.CensoredNodeError as e:
985 1031 delta = e.tombstone
986 1032 if base == nullrev:
987 1033 prefix = mdiff.trivialdiffheader(len(delta))
988 1034 else:
989 1035 baselen = store.rawsize(base)
990 1036 prefix = mdiff.replacediffheader(baselen, len(delta))
991 1037 elif base == nullrev:
992 1038 delta = store.revision(node, raw=True)
993 1039 prefix = mdiff.trivialdiffheader(len(delta))
994 1040 else:
995 1041 delta = store.revdiff(base, rev)
996 1042 p1n, p2n = store.parents(node)
997 1043
998 1044 return revisiondelta(
999 1045 node=node,
1000 1046 p1node=p1n,
1001 1047 p2node=p2n,
1002 1048 basenode=store.node(base),
1003 1049 linknode=linknode,
1004 1050 flags=store.flags(rev),
1005 1051 deltachunks=(prefix, delta),
1006 1052 )
1007 1053
1008 1054 def _revisiondeltanarrow(self, store, rev, prev, linknode):
1009 1055 # build up some mapping information that's useful later. See
1010 1056 # the local() nested function below.
1011 1057 if not self.changelog_done:
1012 1058 self.clnode_to_rev[linknode] = rev
1013 1059 linkrev = rev
1014 1060 self.clrev_to_localrev[linkrev] = rev
1015 1061 else:
1016 1062 linkrev = self.clnode_to_rev[linknode]
1017 1063 self.clrev_to_localrev[linkrev] = rev
1018 1064
1019 1065 # This is a node to send in full, because the changeset it
1020 1066 # corresponds to was a full changeset.
1021 1067 if linknode in self.full_nodes:
1022 1068 return self._revisiondeltanormal(store, rev, prev, linknode)
1023 1069
1024 1070 # At this point, a node can either be one we should skip or an
1025 1071 # ellipsis. If it's not an ellipsis, bail immediately.
1026 1072 if linkrev not in self.precomputed_ellipsis:
1027 1073 return
1028 1074
1029 1075 linkparents = self.precomputed_ellipsis[linkrev]
1030 1076 def local(clrev):
1031 1077 """Turn a changelog revnum into a local revnum.
1032 1078
1033 1079 The ellipsis dag is stored as revnums on the changelog,
1034 1080 but when we're producing ellipsis entries for
1035 1081 non-changelog revlogs, we need to turn those numbers into
1036 1082 something local. This does that for us, and during the
1037 1083 changelog sending phase will also expand the stored
1038 1084 mappings as needed.
1039 1085 """
1040 1086 if clrev == nullrev:
1041 1087 return nullrev
1042 1088
1043 1089 if not self.changelog_done:
1044 1090 # If we're doing the changelog, it's possible that we
1045 1091 # have a parent that is already on the client, and we
1046 1092 # need to store some extra mapping information so that
1047 1093 # our contained ellipsis nodes will be able to resolve
1048 1094 # their parents.
1049 1095 if clrev not in self.clrev_to_localrev:
1050 1096 clnode = store.node(clrev)
1051 1097 self.clnode_to_rev[clnode] = clrev
1052 1098 return clrev
1053 1099
1054 1100 # Walk the ellipsis-ized changelog breadth-first looking for a
1055 1101 # change that has been linked from the current revlog.
1056 1102 #
1057 1103 # For a flat manifest revlog only a single step should be necessary
1058 1104 # as all relevant changelog entries are relevant to the flat
1059 1105 # manifest.
1060 1106 #
1061 1107 # For a filelog or tree manifest dirlog however not every changelog
1062 1108 # entry will have been relevant, so we need to skip some changelog
1063 1109 # nodes even after ellipsis-izing.
1064 1110 walk = [clrev]
1065 1111 while walk:
1066 1112 p = walk[0]
1067 1113 walk = walk[1:]
1068 1114 if p in self.clrev_to_localrev:
1069 1115 return self.clrev_to_localrev[p]
1070 1116 elif p in self.full_nodes:
1071 1117 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1072 1118 if pp != nullrev])
1073 1119 elif p in self.precomputed_ellipsis:
1074 1120 walk.extend([pp for pp in self.precomputed_ellipsis[p]
1075 1121 if pp != nullrev])
1076 1122 else:
1077 1123 # In this case, we've got an ellipsis with parents
1078 1124 # outside the current bundle (likely an
1079 1125 # incremental pull). We "know" that we can use the
1080 1126 # value of this same revlog at whatever revision
1081 1127 # is pointed to by linknode. "Know" is in scare
1082 1128 # quotes because I haven't done enough examination
1083 1129 # of edge cases to convince myself this is really
1084 1130 # a fact - it works for all the (admittedly
1085 1131 # thorough) cases in our testsuite, but I would be
1086 1132 # somewhat unsurprised to find a case in the wild
1087 1133 # where this breaks down a bit. That said, I don't
1088 1134 # know if it would hurt anything.
1089 1135 for i in pycompat.xrange(rev, 0, -1):
1090 1136 if store.linkrev(i) == clrev:
1091 1137 return i
1092 1138 # We failed to resolve a parent for this node, so
1093 1139 # we crash the changegroup construction.
1094 1140 raise error.Abort(
1095 1141 'unable to resolve parent while packing %r %r'
1096 1142 ' for changeset %r' % (store.indexfile, rev, clrev))
1097 1143
1098 1144 return nullrev
1099 1145
1100 1146 if not linkparents or (
1101 1147 store.parentrevs(rev) == (nullrev, nullrev)):
1102 1148 p1, p2 = nullrev, nullrev
1103 1149 elif len(linkparents) == 1:
1104 1150 p1, = sorted(local(p) for p in linkparents)
1105 1151 p2 = nullrev
1106 1152 else:
1107 1153 p1, p2 = sorted(local(p) for p in linkparents)
1108 1154
1109 1155 n = store.node(rev)
1110 1156 p1n, p2n = store.node(p1), store.node(p2)
1111 1157 flags = store.flags(rev)
1112 1158 flags |= revlog.REVIDX_ELLIPSIS
1113 1159
1114 1160 # TODO: try and actually send deltas for ellipsis data blocks
1115 1161 data = store.revision(n)
1116 1162 diffheader = mdiff.trivialdiffheader(len(data))
1117 1163
1118 1164 return revisiondelta(
1119 1165 node=n,
1120 1166 p1node=p1n,
1121 1167 p2node=p2n,
1122 1168 basenode=nullid,
1123 1169 linknode=linknode,
1124 1170 flags=flags,
1125 1171 deltachunks=(diffheader, data),
1126 1172 )
1127 1173
1128 class cg2packer(cg1packer):
1129 def deltaparent(self, store, rev, p1, p2, prev):
1130 # Narrow ellipses mode.
1131 if util.safehasattr(self, 'full_nodes'):
1132 # TODO: send better deltas when in narrow mode.
1133 #
1134 # changegroup.group() loops over revisions to send,
1135 # including revisions we'll skip. What this means is that
1136 # `prev` will be a potentially useless delta base for all
1137 # ellipsis nodes, as the client likely won't have it. In
1138 # the future we should do bookkeeping about which nodes
1139 # have been sent to the client, and try to be
1140 # significantly smarter about delta bases. This is
1141 # slightly tricky because this same code has to work for
1142 # all revlogs, and we don't have the linkrev/linknode here.
1143 return p1
1144
1145 dp = store.deltaparent(rev)
1146 if dp == nullrev and store.storedeltachains:
1147 # Avoid sending full revisions when delta parent is null. Pick prev
1148 # in that case. It's tempting to pick p1 in this case, as p1 will
1149 # be smaller in the common case. However, computing a delta against
1150 # p1 may require resolving the raw text of p1, which could be
1151 # expensive. The revlog caches should have prev cached, meaning
1152 # less CPU for changegroup generation. There is likely room to add
1153 # a flag and/or config option to control this behavior.
1154 base = prev
1155 elif dp == nullrev:
1156 # revlog is configured to use full snapshot for a reason,
1157 # stick to full snapshot.
1158 base = nullrev
1159 elif dp not in (p1, p2, prev):
1160 # Pick prev when we can't be sure remote has the base revision.
1161 return prev
1162 else:
1163 base = dp
1164 if base != nullrev and not store.candelta(base, rev):
1165 base = nullrev
1166 return base
1167
1168 1174 def _makecg1packer(repo, filematcher, bundlecaps):
1169 1175 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1170 1176 d.node, d.p1node, d.p2node, d.linknode)
1171 1177
1172 return cg1packer(repo, filematcher, b'01', allowreorder=None,
1178 return cg1packer(repo, filematcher, b'01',
1179 useprevdelta=True,
1180 allowreorder=None,
1173 1181 builddeltaheader=builddeltaheader,
1174 1182 manifestsend=b'', sendtreemanifests=False,
1175 1183 bundlecaps=bundlecaps)
1176 1184
1177 1185 def _makecg2packer(repo, filematcher, bundlecaps):
1178 1186 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1179 1187 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1180 1188
1181 1189 # Since generaldelta is directly supported by cg2, reordering
1182 1190 # generally doesn't help, so we disable it by default (treating
1183 1191 # bundle.reorder=auto just like bundle.reorder=False).
1184 return cg2packer(repo, filematcher, b'02', allowreorder=False,
1192 return cg1packer(repo, filematcher, b'02',
1193 useprevdelta=False,
1194 allowreorder=False,
1185 1195 builddeltaheader=builddeltaheader,
1186 1196 manifestsend=b'', sendtreemanifests=False,
1187 1197 bundlecaps=bundlecaps)
1188 1198
1189 1199 def _makecg3packer(repo, filematcher, bundlecaps):
1190 1200 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1191 1201 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1192 1202
1193 return cg2packer(repo, filematcher, b'03', allowreorder=False,
1203 return cg1packer(repo, filematcher, b'03',
1204 useprevdelta=False,
1205 allowreorder=False,
1194 1206 builddeltaheader=builddeltaheader,
1195 1207 manifestsend=closechunk(), sendtreemanifests=True,
1196 1208 bundlecaps=bundlecaps)
1197 1209
1198 1210 _packermap = {'01': (_makecg1packer, cg1unpacker),
1199 1211 # cg2 adds support for exchanging generaldelta
1200 1212 '02': (_makecg2packer, cg2unpacker),
1201 1213 # cg3 adds support for exchanging revlog flags and treemanifests
1202 1214 '03': (_makecg3packer, cg3unpacker),
1203 1215 }
1204 1216
1205 1217 def allsupportedversions(repo):
1206 1218 versions = set(_packermap.keys())
1207 1219 if not (repo.ui.configbool('experimental', 'changegroup3') or
1208 1220 repo.ui.configbool('experimental', 'treemanifest') or
1209 1221 'treemanifest' in repo.requirements):
1210 1222 versions.discard('03')
1211 1223 return versions
1212 1224
1213 1225 # Changegroup versions that can be applied to the repo
1214 1226 def supportedincomingversions(repo):
1215 1227 return allsupportedversions(repo)
1216 1228
1217 1229 # Changegroup versions that can be created from the repo
1218 1230 def supportedoutgoingversions(repo):
1219 1231 versions = allsupportedversions(repo)
1220 1232 if 'treemanifest' in repo.requirements:
1221 1233 # Versions 01 and 02 support only flat manifests and it's just too
1222 1234 # expensive to convert between the flat manifest and tree manifest on
1223 1235 # the fly. Since tree manifests are hashed differently, all of history
1224 1236 # would have to be converted. Instead, we simply don't even pretend to
1225 1237 # support versions 01 and 02.
1226 1238 versions.discard('01')
1227 1239 versions.discard('02')
1228 1240 if repository.NARROW_REQUIREMENT in repo.requirements:
1229 1241 # Versions 01 and 02 don't support revlog flags, and we need to
1230 1242 # support that for stripping and unbundling to work.
1231 1243 versions.discard('01')
1232 1244 versions.discard('02')
1233 1245 if LFS_REQUIREMENT in repo.requirements:
1234 1246 # Versions 01 and 02 don't support revlog flags, and we need to
1235 1247 # mark LFS entries with REVIDX_EXTSTORED.
1236 1248 versions.discard('01')
1237 1249 versions.discard('02')
1238 1250
1239 1251 return versions
1240 1252
1241 1253 def localversion(repo):
1242 1254 # Finds the best version to use for bundles that are meant to be used
1243 1255 # locally, such as those from strip and shelve, and temporary bundles.
1244 1256 return max(supportedoutgoingversions(repo))
1245 1257
1246 1258 def safeversion(repo):
1247 1259 # Finds the smallest version that it's safe to assume clients of the repo
1248 1260 # will support. For example, all hg versions that support generaldelta also
1249 1261 # support changegroup 02.
1250 1262 versions = supportedoutgoingversions(repo)
1251 1263 if 'generaldelta' in repo.requirements:
1252 1264 versions.discard('01')
1253 1265 assert versions
1254 1266 return min(versions)
1255 1267
1256 1268 def getbundler(version, repo, bundlecaps=None, filematcher=None):
1257 1269 assert version in supportedoutgoingversions(repo)
1258 1270
1259 1271 if filematcher is None:
1260 1272 filematcher = matchmod.alwaysmatcher(repo.root, '')
1261 1273
1262 1274 if version == '01' and not filematcher.always():
1263 1275 raise error.ProgrammingError('version 01 changegroups do not support '
1264 1276 'sparse file matchers')
1265 1277
1266 1278 # Requested files could include files not in the local store. So
1267 1279 # filter those out.
1268 1280 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1269 1281 filematcher)
1270 1282
1271 1283 fn = _packermap[version][0]
1272 1284 return fn(repo, filematcher, bundlecaps)
1273 1285
1274 1286 def getunbundler(version, fh, alg, extras=None):
1275 1287 return _packermap[version][1](fh, alg, extras=extras)
1276 1288
1277 1289 def _changegroupinfo(repo, nodes, source):
1278 1290 if repo.ui.verbose or source == 'bundle':
1279 1291 repo.ui.status(_("%d changesets found\n") % len(nodes))
1280 1292 if repo.ui.debugflag:
1281 1293 repo.ui.debug("list of changesets:\n")
1282 1294 for node in nodes:
1283 1295 repo.ui.debug("%s\n" % hex(node))
1284 1296
1285 1297 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1286 1298 bundlecaps=None):
1287 1299 cgstream = makestream(repo, outgoing, version, source,
1288 1300 fastpath=fastpath, bundlecaps=bundlecaps)
1289 1301 return getunbundler(version, util.chunkbuffer(cgstream), None,
1290 1302 {'clcount': len(outgoing.missing) })
1291 1303
1292 1304 def makestream(repo, outgoing, version, source, fastpath=False,
1293 1305 bundlecaps=None, filematcher=None):
1294 1306 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1295 1307 filematcher=filematcher)
1296 1308
1297 1309 repo = repo.unfiltered()
1298 1310 commonrevs = outgoing.common
1299 1311 csets = outgoing.missing
1300 1312 heads = outgoing.missingheads
1301 1313 # We go through the fast path if we get told to, or if all (unfiltered
1302 1314 # heads have been requested (since we then know there all linkrevs will
1303 1315 # be pulled by the client).
1304 1316 heads.sort()
1305 1317 fastpathlinkrev = fastpath or (
1306 1318 repo.filtername is None and heads == sorted(repo.heads()))
1307 1319
1308 1320 repo.hook('preoutgoing', throw=True, source=source)
1309 1321 _changegroupinfo(repo, csets, source)
1310 1322 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1311 1323
1312 1324 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1313 1325 revisions = 0
1314 1326 files = 0
1315 1327 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1316 1328 total=expectedfiles)
1317 1329 for chunkdata in iter(source.filelogheader, {}):
1318 1330 files += 1
1319 1331 f = chunkdata["filename"]
1320 1332 repo.ui.debug("adding %s revisions\n" % f)
1321 1333 progress.increment()
1322 1334 fl = repo.file(f)
1323 1335 o = len(fl)
1324 1336 try:
1325 1337 deltas = source.deltaiter()
1326 1338 if not fl.addgroup(deltas, revmap, trp):
1327 1339 raise error.Abort(_("received file revlog group is empty"))
1328 1340 except error.CensoredBaseError as e:
1329 1341 raise error.Abort(_("received delta base is censored: %s") % e)
1330 1342 revisions += len(fl) - o
1331 1343 if f in needfiles:
1332 1344 needs = needfiles[f]
1333 1345 for new in pycompat.xrange(o, len(fl)):
1334 1346 n = fl.node(new)
1335 1347 if n in needs:
1336 1348 needs.remove(n)
1337 1349 else:
1338 1350 raise error.Abort(
1339 1351 _("received spurious file revlog entry"))
1340 1352 if not needs:
1341 1353 del needfiles[f]
1342 1354 progress.complete()
1343 1355
1344 1356 for f, needs in needfiles.iteritems():
1345 1357 fl = repo.file(f)
1346 1358 for n in needs:
1347 1359 try:
1348 1360 fl.rev(n)
1349 1361 except error.LookupError:
1350 1362 raise error.Abort(
1351 1363 _('missing file data for %s:%s - run hg verify') %
1352 1364 (f, hex(n)))
1353 1365
1354 1366 return revisions, files
1355 1367
1356 1368 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1357 1369 ellipsisroots, visitnodes, depth, source, version):
1358 1370 if version in ('01', '02'):
1359 1371 raise error.Abort(
1360 1372 'ellipsis nodes require at least cg3 on client and server, '
1361 1373 'but negotiated version %s' % version)
1362 1374 # We wrap cg1packer.revchunk, using a side channel to pass
1363 1375 # relevant_nodes into that area. Then if linknode isn't in the
1364 1376 # set, we know we have an ellipsis node and we should defer
1365 1377 # sending that node's data. We override close() to detect
1366 1378 # pending ellipsis nodes and flush them.
1367 1379 packer = getbundler(version, repo, filematcher=match)
1368 1380 # Give the packer the list of nodes which should not be
1369 1381 # ellipsis nodes. We store this rather than the set of nodes
1370 1382 # that should be an ellipsis because for very large histories
1371 1383 # we expect this to be significantly smaller.
1372 1384 packer.full_nodes = relevant_nodes
1373 1385 # Maps ellipsis revs to their roots at the changelog level.
1374 1386 packer.precomputed_ellipsis = ellipsisroots
1375 1387 # Maps CL revs to per-revlog revisions. Cleared in close() at
1376 1388 # the end of each group.
1377 1389 packer.clrev_to_localrev = {}
1378 1390 packer.next_clrev_to_localrev = {}
1379 1391 # Maps changelog nodes to changelog revs. Filled in once
1380 1392 # during changelog stage and then left unmodified.
1381 1393 packer.clnode_to_rev = {}
1382 1394 packer.changelog_done = False
1383 1395 # If true, informs the packer that it is serving shallow content and might
1384 1396 # need to pack file contents not introduced by the changes being packed.
1385 1397 packer.is_shallow = depth is not None
1386 1398
1387 1399 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now