##// END OF EJS Templates
changegroup: emit revisiondelta instances from deltagroup()...
Gregory Szorc -
r39050:d662959d default
parent child Browse files
Show More
@@ -1,1455 +1,1456 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 match as matchmod,
30 30 mdiff,
31 31 phases,
32 32 pycompat,
33 33 repository,
34 34 revlog,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 stringutil,
40 40 )
41 41
42 42 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
43 43 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
44 44 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
45 45
46 46 LFS_REQUIREMENT = 'lfs'
47 47
48 48 readexactly = util.readexactly
49 49
50 50 def getchunk(stream):
51 51 """return the next chunk from stream as a string"""
52 52 d = readexactly(stream, 4)
53 53 l = struct.unpack(">l", d)[0]
54 54 if l <= 4:
55 55 if l:
56 56 raise error.Abort(_("invalid chunk length %d") % l)
57 57 return ""
58 58 return readexactly(stream, l - 4)
59 59
60 60 def chunkheader(length):
61 61 """return a changegroup chunk header (string)"""
62 62 return struct.pack(">l", length + 4)
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(">l", 0)
67 67
68 68 def _fileheader(path):
69 69 """Obtain a changegroup chunk header for a named path."""
70 70 return chunkheader(len(path)) + path
71 71
72 72 def writechunks(ui, chunks, filename, vfs=None):
73 73 """Write chunks to a file and return its filename.
74 74
75 75 The stream is assumed to be a bundle file.
76 76 Existing files will not be overwritten.
77 77 If no filename is specified, a temporary file is created.
78 78 """
79 79 fh = None
80 80 cleanup = None
81 81 try:
82 82 if filename:
83 83 if vfs:
84 84 fh = vfs.open(filename, "wb")
85 85 else:
86 86 # Increase default buffer size because default is usually
87 87 # small (4k is common on Linux).
88 88 fh = open(filename, "wb", 131072)
89 89 else:
90 90 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
91 91 fh = os.fdopen(fd, r"wb")
92 92 cleanup = filename
93 93 for c in chunks:
94 94 fh.write(c)
95 95 cleanup = None
96 96 return filename
97 97 finally:
98 98 if fh is not None:
99 99 fh.close()
100 100 if cleanup is not None:
101 101 if filename and vfs:
102 102 vfs.unlink(cleanup)
103 103 else:
104 104 os.unlink(cleanup)
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
125 125 deltaheadersize = deltaheader.size
126 126 version = '01'
127 127 _grouplistcount = 1 # One list of files after the manifests
128 128
129 129 def __init__(self, fh, alg, extras=None):
130 130 if alg is None:
131 131 alg = 'UN'
132 132 if alg not in util.compengines.supportedbundletypes:
133 133 raise error.Abort(_('unknown stream compression type: %s')
134 134 % alg)
135 135 if alg == 'BZ':
136 136 alg = '_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != 'UN'
148 148 def read(self, l):
149 149 return self._stream.read(l)
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152 def tell(self):
153 153 return self._stream.tell()
154 154 def close(self):
155 155 return self._stream.close()
156 156
157 157 def _chunklength(self):
158 158 d = readexactly(self._stream, 4)
159 159 l = struct.unpack(">l", d)[0]
160 160 if l <= 4:
161 161 if l:
162 162 raise error.Abort(_("invalid chunk length %d") % l)
163 163 return 0
164 164 if self.callback:
165 165 self.callback()
166 166 return l - 4
167 167
168 168 def changelogheader(self):
169 169 """v10 does not have a changelog header chunk"""
170 170 return {}
171 171
172 172 def manifestheader(self):
173 173 """v10 does not have a manifest header chunk"""
174 174 return {}
175 175
176 176 def filelogheader(self):
177 177 """return the header of the filelogs chunk, v10 only has the filename"""
178 178 l = self._chunklength()
179 179 if not l:
180 180 return {}
181 181 fname = readexactly(self._stream, l)
182 182 return {'filename': fname}
183 183
184 184 def _deltaheader(self, headertuple, prevnode):
185 185 node, p1, p2, cs = headertuple
186 186 if prevnode is None:
187 187 deltabase = p1
188 188 else:
189 189 deltabase = prevnode
190 190 flags = 0
191 191 return node, p1, p2, deltabase, cs, flags
192 192
193 193 def deltachunk(self, prevnode):
194 194 l = self._chunklength()
195 195 if not l:
196 196 return {}
197 197 headerdata = readexactly(self._stream, self.deltaheadersize)
198 198 header = self.deltaheader.unpack(headerdata)
199 199 delta = readexactly(self._stream, l - self.deltaheadersize)
200 200 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
201 201 return (node, p1, p2, cs, deltabase, delta, flags)
202 202
203 203 def getchunks(self):
204 204 """returns all the chunks contains in the bundle
205 205
206 206 Used when you need to forward the binary stream to a file or another
207 207 network API. To do so, it parse the changegroup data, otherwise it will
208 208 block in case of sshrepo because it don't know the end of the stream.
209 209 """
210 210 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
211 211 # and a list of filelogs. For changegroup 3, we expect 4 parts:
212 212 # changelog, manifestlog, a list of tree manifestlogs, and a list of
213 213 # filelogs.
214 214 #
215 215 # Changelog and manifestlog parts are terminated with empty chunks. The
216 216 # tree and file parts are a list of entry sections. Each entry section
217 217 # is a series of chunks terminating in an empty chunk. The list of these
218 218 # entry sections is terminated in yet another empty chunk, so we know
219 219 # we've reached the end of the tree/file list when we reach an empty
220 220 # chunk that was proceeded by no non-empty chunks.
221 221
222 222 parts = 0
223 223 while parts < 2 + self._grouplistcount:
224 224 noentries = True
225 225 while True:
226 226 chunk = getchunk(self)
227 227 if not chunk:
228 228 # The first two empty chunks represent the end of the
229 229 # changelog and the manifestlog portions. The remaining
230 230 # empty chunks represent either A) the end of individual
231 231 # tree or file entries in the file list, or B) the end of
232 232 # the entire list. It's the end of the entire list if there
233 233 # were no entries (i.e. noentries is True).
234 234 if parts < 2:
235 235 parts += 1
236 236 elif noentries:
237 237 parts += 1
238 238 break
239 239 noentries = False
240 240 yield chunkheader(len(chunk))
241 241 pos = 0
242 242 while pos < len(chunk):
243 243 next = pos + 2**20
244 244 yield chunk[pos:next]
245 245 pos = next
246 246 yield closechunk()
247 247
248 248 def _unpackmanifests(self, repo, revmap, trp, prog):
249 249 self.callback = prog.increment
250 250 # no need to check for empty manifest group here:
251 251 # if the result of the merge of 1 and 2 is the same in 3 and 4,
252 252 # no new manifest will be created and the manifest group will
253 253 # be empty during the pull
254 254 self.manifestheader()
255 255 deltas = self.deltaiter()
256 256 repo.manifestlog.addgroup(deltas, revmap, trp)
257 257 prog.complete()
258 258 self.callback = None
259 259
260 260 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
261 261 expectedtotal=None):
262 262 """Add the changegroup returned by source.read() to this repo.
263 263 srctype is a string like 'push', 'pull', or 'unbundle'. url is
264 264 the URL of the repo where this changegroup is coming from.
265 265
266 266 Return an integer summarizing the change to this repo:
267 267 - nothing changed or no source: 0
268 268 - more heads than before: 1+added heads (2..n)
269 269 - fewer heads than before: -1-removed heads (-2..-n)
270 270 - number of heads stays the same: 1
271 271 """
272 272 repo = repo.unfiltered()
273 273 def csmap(x):
274 274 repo.ui.debug("add changeset %s\n" % short(x))
275 275 return len(cl)
276 276
277 277 def revmap(x):
278 278 return cl.rev(x)
279 279
280 280 changesets = files = revisions = 0
281 281
282 282 try:
283 283 # The transaction may already carry source information. In this
284 284 # case we use the top level data. We overwrite the argument
285 285 # because we need to use the top level value (if they exist)
286 286 # in this function.
287 287 srctype = tr.hookargs.setdefault('source', srctype)
288 288 url = tr.hookargs.setdefault('url', url)
289 289 repo.hook('prechangegroup',
290 290 throw=True, **pycompat.strkwargs(tr.hookargs))
291 291
292 292 # write changelog data to temp files so concurrent readers
293 293 # will not see an inconsistent view
294 294 cl = repo.changelog
295 295 cl.delayupdate(tr)
296 296 oldheads = set(cl.heads())
297 297
298 298 trp = weakref.proxy(tr)
299 299 # pull off the changeset group
300 300 repo.ui.status(_("adding changesets\n"))
301 301 clstart = len(cl)
302 302 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
303 303 total=expectedtotal)
304 304 self.callback = progress.increment
305 305
306 306 efiles = set()
307 307 def onchangelog(cl, node):
308 308 efiles.update(cl.readfiles(node))
309 309
310 310 self.changelogheader()
311 311 deltas = self.deltaiter()
312 312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
313 313 efiles = len(efiles)
314 314
315 315 if not cgnodes:
316 316 repo.ui.develwarn('applied empty changegroup',
317 317 config='warn-empty-changegroup')
318 318 clend = len(cl)
319 319 changesets = clend - clstart
320 320 progress.complete()
321 321 self.callback = None
322 322
323 323 # pull off the manifest group
324 324 repo.ui.status(_("adding manifests\n"))
325 325 # We know that we'll never have more manifests than we had
326 326 # changesets.
327 327 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
328 328 total=changesets)
329 329 self._unpackmanifests(repo, revmap, trp, progress)
330 330
331 331 needfiles = {}
332 332 if repo.ui.configbool('server', 'validate'):
333 333 cl = repo.changelog
334 334 ml = repo.manifestlog
335 335 # validate incoming csets have their manifests
336 336 for cset in pycompat.xrange(clstart, clend):
337 337 mfnode = cl.changelogrevision(cset).manifest
338 338 mfest = ml[mfnode].readdelta()
339 339 # store file cgnodes we must see
340 340 for f, n in mfest.iteritems():
341 341 needfiles.setdefault(f, set()).add(n)
342 342
343 343 # process the files
344 344 repo.ui.status(_("adding file changes\n"))
345 345 newrevs, newfiles = _addchangegroupfiles(
346 346 repo, self, revmap, trp, efiles, needfiles)
347 347 revisions += newrevs
348 348 files += newfiles
349 349
350 350 deltaheads = 0
351 351 if oldheads:
352 352 heads = cl.heads()
353 353 deltaheads = len(heads) - len(oldheads)
354 354 for h in heads:
355 355 if h not in oldheads and repo[h].closesbranch():
356 356 deltaheads -= 1
357 357 htext = ""
358 358 if deltaheads:
359 359 htext = _(" (%+d heads)") % deltaheads
360 360
361 361 repo.ui.status(_("added %d changesets"
362 362 " with %d changes to %d files%s\n")
363 363 % (changesets, revisions, files, htext))
364 364 repo.invalidatevolatilesets()
365 365
366 366 if changesets > 0:
367 367 if 'node' not in tr.hookargs:
368 368 tr.hookargs['node'] = hex(cl.node(clstart))
369 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
370 370 hookargs = dict(tr.hookargs)
371 371 else:
372 372 hookargs = dict(tr.hookargs)
373 373 hookargs['node'] = hex(cl.node(clstart))
374 374 hookargs['node_last'] = hex(cl.node(clend - 1))
375 375 repo.hook('pretxnchangegroup',
376 376 throw=True, **pycompat.strkwargs(hookargs))
377 377
378 378 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
379 379 phaseall = None
380 380 if srctype in ('push', 'serve'):
381 381 # Old servers can not push the boundary themselves.
382 382 # New servers won't push the boundary if changeset already
383 383 # exists locally as secret
384 384 #
385 385 # We should not use added here but the list of all change in
386 386 # the bundle
387 387 if repo.publishing():
388 388 targetphase = phaseall = phases.public
389 389 else:
390 390 # closer target phase computation
391 391
392 392 # Those changesets have been pushed from the
393 393 # outside, their phases are going to be pushed
394 394 # alongside. Therefor `targetphase` is
395 395 # ignored.
396 396 targetphase = phaseall = phases.draft
397 397 if added:
398 398 phases.registernew(repo, tr, targetphase, added)
399 399 if phaseall is not None:
400 400 phases.advanceboundary(repo, tr, phaseall, cgnodes)
401 401
402 402 if changesets > 0:
403 403
404 404 def runhooks():
405 405 # These hooks run when the lock releases, not when the
406 406 # transaction closes. So it's possible for the changelog
407 407 # to have changed since we last saw it.
408 408 if clstart >= len(repo):
409 409 return
410 410
411 411 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
412 412
413 413 for n in added:
414 414 args = hookargs.copy()
415 415 args['node'] = hex(n)
416 416 del args['node_last']
417 417 repo.hook("incoming", **pycompat.strkwargs(args))
418 418
419 419 newheads = [h for h in repo.heads()
420 420 if h not in oldheads]
421 421 repo.ui.log("incoming",
422 422 "%d incoming changes - new heads: %s\n",
423 423 len(added),
424 424 ', '.join([hex(c[:6]) for c in newheads]))
425 425
426 426 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
427 427 lambda tr: repo._afterlock(runhooks))
428 428 finally:
429 429 repo.ui.flush()
430 430 # never return 0 here:
431 431 if deltaheads < 0:
432 432 ret = deltaheads - 1
433 433 else:
434 434 ret = deltaheads + 1
435 435 return ret
436 436
437 437 def deltaiter(self):
438 438 """
439 439 returns an iterator of the deltas in this changegroup
440 440
441 441 Useful for passing to the underlying storage system to be stored.
442 442 """
443 443 chain = None
444 444 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
445 445 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
446 446 yield chunkdata
447 447 chain = chunkdata[0]
448 448
449 449 class cg2unpacker(cg1unpacker):
450 450 """Unpacker for cg2 streams.
451 451
452 452 cg2 streams add support for generaldelta, so the delta header
453 453 format is slightly different. All other features about the data
454 454 remain the same.
455 455 """
456 456 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
457 457 deltaheadersize = deltaheader.size
458 458 version = '02'
459 459
460 460 def _deltaheader(self, headertuple, prevnode):
461 461 node, p1, p2, deltabase, cs = headertuple
462 462 flags = 0
463 463 return node, p1, p2, deltabase, cs, flags
464 464
465 465 class cg3unpacker(cg2unpacker):
466 466 """Unpacker for cg3 streams.
467 467
468 468 cg3 streams add support for exchanging treemanifests and revlog
469 469 flags. It adds the revlog flags to the delta header and an empty chunk
470 470 separating manifests and files.
471 471 """
472 472 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
473 473 deltaheadersize = deltaheader.size
474 474 version = '03'
475 475 _grouplistcount = 2 # One list of manifests and one list of files
476 476
477 477 def _deltaheader(self, headertuple, prevnode):
478 478 node, p1, p2, deltabase, cs, flags = headertuple
479 479 return node, p1, p2, deltabase, cs, flags
480 480
481 481 def _unpackmanifests(self, repo, revmap, trp, prog):
482 482 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
483 483 for chunkdata in iter(self.filelogheader, {}):
484 484 # If we get here, there are directory manifests in the changegroup
485 485 d = chunkdata["filename"]
486 486 repo.ui.debug("adding %s revisions\n" % d)
487 487 dirlog = repo.manifestlog._revlog.dirlog(d)
488 488 deltas = self.deltaiter()
489 489 if not dirlog.addgroup(deltas, revmap, trp):
490 490 raise error.Abort(_("received dir revlog group is empty"))
491 491
492 492 class headerlessfixup(object):
493 493 def __init__(self, fh, h):
494 494 self._h = h
495 495 self._fh = fh
496 496 def read(self, n):
497 497 if self._h:
498 498 d, self._h = self._h[:n], self._h[n:]
499 499 if len(d) < n:
500 500 d += readexactly(self._fh, n - len(d))
501 501 return d
502 502 return readexactly(self._fh, n)
503 503
504 504 @attr.s(slots=True, frozen=True)
505 505 class revisiondelta(object):
506 506 """Describes a delta entry in a changegroup.
507 507
508 508 Captured data is sufficient to serialize the delta into multiple
509 509 formats.
510 510 """
511 511 # 20 byte node of this revision.
512 512 node = attr.ib()
513 513 # 20 byte nodes of parent revisions.
514 514 p1node = attr.ib()
515 515 p2node = attr.ib()
516 516 # 20 byte node of node this delta is against.
517 517 basenode = attr.ib()
518 518 # 20 byte node of changeset revision this delta is associated with.
519 519 linknode = attr.ib()
520 520 # 2 bytes of flags to apply to revision data.
521 521 flags = attr.ib()
522 522 # Iterable of chunks holding raw delta data.
523 523 deltachunks = attr.ib()
524 524
525 def _revisiondeltatochunks(delta, headerfn):
526 """Serialize a revisiondelta to changegroup chunks."""
527 meta = headerfn(delta)
528 l = len(meta) + sum(len(x) for x in delta.deltachunks)
529 yield chunkheader(l)
530 yield meta
531 for x in delta.deltachunks:
532 yield x
533
525 534 def _sortnodesnormal(store, nodes, reorder):
526 535 """Sort nodes for changegroup generation and turn into revnums."""
527 536 # for generaldelta revlogs, we linearize the revs; this will both be
528 537 # much quicker and generate a much smaller bundle
529 538 if (store._generaldelta and reorder is None) or reorder:
530 539 dag = dagutil.revlogdag(store)
531 540 return dag.linearize(set(store.rev(n) for n in nodes))
532 541 else:
533 542 return sorted([store.rev(n) for n in nodes])
534 543
535 544 def _sortnodesellipsis(store, nodes, cl, lookup):
536 545 """Sort nodes for changegroup generation and turn into revnums."""
537 546 # Ellipses serving mode.
538 547 #
539 548 # In a perfect world, we'd generate better ellipsis-ified graphs
540 549 # for non-changelog revlogs. In practice, we haven't started doing
541 550 # that yet, so the resulting DAGs for the manifestlog and filelogs
542 551 # are actually full of bogus parentage on all the ellipsis
543 552 # nodes. This has the side effect that, while the contents are
544 553 # correct, the individual DAGs might be completely out of whack in
545 554 # a case like 882681bc3166 and its ancestors (back about 10
546 555 # revisions or so) in the main hg repo.
547 556 #
548 557 # The one invariant we *know* holds is that the new (potentially
549 558 # bogus) DAG shape will be valid if we order the nodes in the
550 559 # order that they're introduced in dramatis personae by the
551 560 # changelog, so what we do is we sort the non-changelog histories
552 561 # by the order in which they are used by the changelog.
553 562 key = lambda n: cl.rev(lookup(n))
554 563 return [store.rev(n) for n in sorted(nodes, key=key)]
555 564
556 565 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
557 566 """Construct a revision delta for non-ellipses changegroup generation."""
558 567 node = store.node(rev)
559 568 p1, p2 = store.parentrevs(rev)
560 569 base = deltaparentfn(store, rev, p1, p2, prev)
561 570
562 571 prefix = ''
563 572 if store.iscensored(base) or store.iscensored(rev):
564 573 try:
565 574 delta = store.revision(node, raw=True)
566 575 except error.CensoredNodeError as e:
567 576 delta = e.tombstone
568 577 if base == nullrev:
569 578 prefix = mdiff.trivialdiffheader(len(delta))
570 579 else:
571 580 baselen = store.rawsize(base)
572 581 prefix = mdiff.replacediffheader(baselen, len(delta))
573 582 elif base == nullrev:
574 583 delta = store.revision(node, raw=True)
575 584 prefix = mdiff.trivialdiffheader(len(delta))
576 585 else:
577 586 delta = store.revdiff(base, rev)
578 587 p1n, p2n = store.parents(node)
579 588
580 589 return revisiondelta(
581 590 node=node,
582 591 p1node=p1n,
583 592 p2node=p2n,
584 593 basenode=store.node(base),
585 594 linknode=linknode,
586 595 flags=store.flags(rev),
587 596 deltachunks=(prefix, delta),
588 597 )
589 598
590 599 def _revisiondeltanarrow(cl, store, ischangelog, rev, linkrev,
591 600 linknode, clrevtolocalrev, fullclnodes,
592 601 precomputedellipsis):
593 602 linkparents = precomputedellipsis[linkrev]
594 603 def local(clrev):
595 604 """Turn a changelog revnum into a local revnum.
596 605
597 606 The ellipsis dag is stored as revnums on the changelog,
598 607 but when we're producing ellipsis entries for
599 608 non-changelog revlogs, we need to turn those numbers into
600 609 something local. This does that for us, and during the
601 610 changelog sending phase will also expand the stored
602 611 mappings as needed.
603 612 """
604 613 if clrev == nullrev:
605 614 return nullrev
606 615
607 616 if ischangelog:
608 617 return clrev
609 618
610 619 # Walk the ellipsis-ized changelog breadth-first looking for a
611 620 # change that has been linked from the current revlog.
612 621 #
613 622 # For a flat manifest revlog only a single step should be necessary
614 623 # as all relevant changelog entries are relevant to the flat
615 624 # manifest.
616 625 #
617 626 # For a filelog or tree manifest dirlog however not every changelog
618 627 # entry will have been relevant, so we need to skip some changelog
619 628 # nodes even after ellipsis-izing.
620 629 walk = [clrev]
621 630 while walk:
622 631 p = walk[0]
623 632 walk = walk[1:]
624 633 if p in clrevtolocalrev:
625 634 return clrevtolocalrev[p]
626 635 elif p in fullclnodes:
627 636 walk.extend([pp for pp in cl.parentrevs(p)
628 637 if pp != nullrev])
629 638 elif p in precomputedellipsis:
630 639 walk.extend([pp for pp in precomputedellipsis[p]
631 640 if pp != nullrev])
632 641 else:
633 642 # In this case, we've got an ellipsis with parents
634 643 # outside the current bundle (likely an
635 644 # incremental pull). We "know" that we can use the
636 645 # value of this same revlog at whatever revision
637 646 # is pointed to by linknode. "Know" is in scare
638 647 # quotes because I haven't done enough examination
639 648 # of edge cases to convince myself this is really
640 649 # a fact - it works for all the (admittedly
641 650 # thorough) cases in our testsuite, but I would be
642 651 # somewhat unsurprised to find a case in the wild
643 652 # where this breaks down a bit. That said, I don't
644 653 # know if it would hurt anything.
645 654 for i in pycompat.xrange(rev, 0, -1):
646 655 if store.linkrev(i) == clrev:
647 656 return i
648 657 # We failed to resolve a parent for this node, so
649 658 # we crash the changegroup construction.
650 659 raise error.Abort(
651 660 'unable to resolve parent while packing %r %r'
652 661 ' for changeset %r' % (store.indexfile, rev, clrev))
653 662
654 663 return nullrev
655 664
656 665 if not linkparents or (
657 666 store.parentrevs(rev) == (nullrev, nullrev)):
658 667 p1, p2 = nullrev, nullrev
659 668 elif len(linkparents) == 1:
660 669 p1, = sorted(local(p) for p in linkparents)
661 670 p2 = nullrev
662 671 else:
663 672 p1, p2 = sorted(local(p) for p in linkparents)
664 673
665 674 n = store.node(rev)
666 675 p1n, p2n = store.node(p1), store.node(p2)
667 676 flags = store.flags(rev)
668 677 flags |= revlog.REVIDX_ELLIPSIS
669 678
670 679 # TODO: try and actually send deltas for ellipsis data blocks
671 680 data = store.revision(n)
672 681 diffheader = mdiff.trivialdiffheader(len(data))
673 682
674 683 return revisiondelta(
675 684 node=n,
676 685 p1node=p1n,
677 686 p2node=p2n,
678 687 basenode=nullid,
679 688 linknode=linknode,
680 689 flags=flags,
681 690 deltachunks=(diffheader, data),
682 691 )
683 692
684 693 def deltagroup(repo, revs, store, ischangelog, lookup, deltaparentfn,
685 deltaheaderfn, units=None,
694 units=None,
686 695 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
687 696 precomputedellipsis=None):
688 """Calculate a delta group, yielding a sequence of changegroup chunks
689 (strings).
697 """Calculate deltas for a set of revisions.
690 698
691 Given a list of changeset revs, return a set of deltas and
692 metadata corresponding to nodes. The first delta is
693 first parent(nodelist[0]) -> nodelist[0], the receiver is
694 guaranteed to have this parent as it has all history before
695 these changesets. In the case firstparent is nullrev the
696 changegroup starts with a full revision.
699 Is a generator of ``revisiondelta`` instances.
697 700
698 701 If units is not None, progress detail will be generated, units specifies
699 702 the type of revlog that is touched (changelog, manifest, etc.).
700 703 """
701 704 # if we don't have any revisions touched by these changesets, bail
702 705 if len(revs) == 0:
703 706 return
704 707
705 708 cl = repo.changelog
706 709
707 710 # add the parent of the first rev
708 711 p = store.parentrevs(revs[0])[0]
709 712 revs.insert(0, p)
710 713
711 714 # build deltas
712 715 progress = None
713 716 if units is not None:
714 717 progress = repo.ui.makeprogress(_('bundling'), unit=units,
715 718 total=(len(revs) - 1))
716 719 for r in pycompat.xrange(len(revs) - 1):
717 720 if progress:
718 721 progress.update(r + 1)
719 722 prev, curr = revs[r], revs[r + 1]
720 723 linknode = lookup(store.node(curr))
721 724
722 725 if ellipses:
723 726 linkrev = cl.rev(linknode)
724 727 clrevtolocalrev[linkrev] = curr
725 728
726 729 # This is a node to send in full, because the changeset it
727 730 # corresponds to was a full changeset.
728 731 if linknode in fullclnodes:
729 732 delta = _revisiondeltanormal(store, curr, prev, linknode,
730 733 deltaparentfn)
731 734 elif linkrev not in precomputedellipsis:
732 735 delta = None
733 736 else:
734 737 delta = _revisiondeltanarrow(
735 738 cl, store, ischangelog, curr, linkrev, linknode,
736 739 clrevtolocalrev, fullclnodes,
737 740 precomputedellipsis)
738 741 else:
739 742 delta = _revisiondeltanormal(store, curr, prev, linknode,
740 743 deltaparentfn)
741 744
742 if not delta:
743 continue
744
745 meta = deltaheaderfn(delta)
746 l = len(meta) + sum(len(x) for x in delta.deltachunks)
747 yield chunkheader(l)
748 yield meta
749 for x in delta.deltachunks:
750 yield x
745 if delta:
746 yield delta
751 747
752 748 if progress:
753 749 progress.complete()
754 750
755 751 class cgpacker(object):
756 752 def __init__(self, repo, filematcher, version, allowreorder,
757 753 deltaparentfn, builddeltaheader, manifestsend,
758 754 bundlecaps=None, ellipses=False,
759 755 shallow=False, ellipsisroots=None, fullnodes=None):
760 756 """Given a source repo, construct a bundler.
761 757
762 758 filematcher is a matcher that matches on files to include in the
763 759 changegroup. Used to facilitate sparse changegroups.
764 760
765 761 allowreorder controls whether reordering of revisions is allowed.
766 762 This value is used when ``bundle.reorder`` is ``auto`` or isn't
767 763 set.
768 764
769 765 deltaparentfn is a callable that resolves the delta parent for
770 766 a specific revision.
771 767
772 768 builddeltaheader is a callable that constructs the header for a group
773 769 delta.
774 770
775 771 manifestsend is a chunk to send after manifests have been fully emitted.
776 772
777 773 ellipses indicates whether ellipsis serving mode is enabled.
778 774
779 775 bundlecaps is optional and can be used to specify the set of
780 776 capabilities which can be used to build the bundle. While bundlecaps is
781 777 unused in core Mercurial, extensions rely on this feature to communicate
782 778 capabilities to customize the changegroup packer.
783 779
784 780 shallow indicates whether shallow data might be sent. The packer may
785 781 need to pack file contents not introduced by the changes being packed.
786 782
787 783 fullnodes is the set of changelog nodes which should not be ellipsis
788 784 nodes. We store this rather than the set of nodes that should be
789 785 ellipsis because for very large histories we expect this to be
790 786 significantly smaller.
791 787 """
792 788 assert filematcher
793 789 self._filematcher = filematcher
794 790
795 791 self.version = version
796 792 self._deltaparentfn = deltaparentfn
797 793 self._builddeltaheader = builddeltaheader
798 794 self._manifestsend = manifestsend
799 795 self._ellipses = ellipses
800 796
801 797 # Set of capabilities we can use to build the bundle.
802 798 if bundlecaps is None:
803 799 bundlecaps = set()
804 800 self._bundlecaps = bundlecaps
805 801 self._isshallow = shallow
806 802 self._fullclnodes = fullnodes
807 803
808 804 # Maps ellipsis revs to their roots at the changelog level.
809 805 self._precomputedellipsis = ellipsisroots
810 806
811 807 # experimental config: bundle.reorder
812 808 reorder = repo.ui.config('bundle', 'reorder')
813 809 if reorder == 'auto':
814 810 self._reorder = allowreorder
815 811 else:
816 812 self._reorder = stringutil.parsebool(reorder)
817 813
818 814 self._repo = repo
819 815
820 816 if self._repo.ui.verbose and not self._repo.ui.debugflag:
821 817 self._verbosenote = self._repo.ui.note
822 818 else:
823 819 self._verbosenote = lambda s: None
824 820
825 821 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
826 822 """Yield a sequence of changegroup byte chunks."""
827 823
828 824 repo = self._repo
829 825 cl = repo.changelog
830 826
831 827 self._verbosenote(_('uncompressed size of bundle content:\n'))
832 828 size = 0
833 829
834 clstate, chunks = self._generatechangelog(cl, clnodes)
835 for chunk in chunks:
836 size += len(chunk)
837 yield chunk
830 clstate, deltas = self._generatechangelog(cl, clnodes)
831 for delta in deltas:
832 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
833 size += len(chunk)
834 yield chunk
838 835
839 836 close = closechunk()
840 837 size += len(close)
841 838 yield closechunk()
842 839
843 840 self._verbosenote(_('%8.i (changelog)\n') % size)
844 841
845 842 clrevorder = clstate['clrevorder']
846 843 mfs = clstate['mfs']
847 844 changedfiles = clstate['changedfiles']
848 845
849 846 # We need to make sure that the linkrev in the changegroup refers to
850 847 # the first changeset that introduced the manifest or file revision.
851 848 # The fastpath is usually safer than the slowpath, because the filelogs
852 849 # are walked in revlog order.
853 850 #
854 851 # When taking the slowpath with reorder=None and the manifest revlog
855 852 # uses generaldelta, the manifest may be walked in the "wrong" order.
856 853 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
857 854 # cc0ff93d0c0c).
858 855 #
859 856 # When taking the fastpath, we are only vulnerable to reordering
860 857 # of the changelog itself. The changelog never uses generaldelta, so
861 858 # it is only reordered when reorder=True. To handle this case, we
862 859 # simply take the slowpath, which already has the 'clrevorder' logic.
863 860 # This was also fixed in cc0ff93d0c0c.
864 861 fastpathlinkrev = fastpathlinkrev and not self._reorder
865 862 # Treemanifests don't work correctly with fastpathlinkrev
866 863 # either, because we don't discover which directory nodes to
867 864 # send along with files. This could probably be fixed.
868 865 fastpathlinkrev = fastpathlinkrev and (
869 866 'treemanifest' not in repo.requirements)
870 867
871 868 fnodes = {} # needed file nodes
872 869
873 870 size = 0
874 871 it = self.generatemanifests(
875 872 commonrevs, clrevorder, fastpathlinkrev, mfs, fnodes, source,
876 873 clstate['clrevtomanifestrev'])
877 874
878 for dir, chunks in it:
875 for dir, deltas in it:
879 876 if dir:
880 877 assert self.version == b'03'
881 878 chunk = _fileheader(dir)
882 879 size += len(chunk)
883 880 yield chunk
884 881
885 for chunk in chunks:
886 size += len(chunk)
887 yield chunk
882 for delta in deltas:
883 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
884 for chunk in chunks:
885 size += len(chunk)
886 yield chunk
888 887
889 888 close = closechunk()
890 889 size += len(close)
891 890 yield close
892 891
893 892 self._verbosenote(_('%8.i (manifests)\n') % size)
894 893 yield self._manifestsend
895 894
896 895 mfdicts = None
897 896 if self._ellipses and self._isshallow:
898 897 mfdicts = [(self._repo.manifestlog[n].read(), lr)
899 898 for (n, lr) in mfs.iteritems()]
900 899
901 900 mfs.clear()
902 901 clrevs = set(cl.rev(x) for x in clnodes)
903 902
904 903 it = self.generatefiles(changedfiles, commonrevs,
905 904 source, mfdicts, fastpathlinkrev,
906 905 fnodes, clrevs)
907 906
908 for path, chunks in it:
907 for path, deltas in it:
909 908 h = _fileheader(path)
910 909 size = len(h)
911 910 yield h
912 911
913 for chunk in chunks:
914 size += len(chunk)
915 yield chunk
912 for delta in deltas:
913 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
914 for chunk in chunks:
915 size += len(chunk)
916 yield chunk
916 917
917 918 close = closechunk()
918 919 size += len(close)
919 920 yield close
920 921
921 922 self._verbosenote(_('%8.i %s\n') % (size, path))
922 923
923 924 yield closechunk()
924 925
925 926 if clnodes:
926 927 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
927 928
928 929 def _generatechangelog(self, cl, nodes):
929 930 """Generate data for changelog chunks.
930 931
931 932 Returns a 2-tuple of a dict containing state and an iterable of
932 933 byte chunks. The state will not be fully populated until the
933 934 chunk stream has been fully consumed.
934 935 """
935 936 clrevorder = {}
936 937 mfs = {} # needed manifests
937 938 mfl = self._repo.manifestlog
938 939 # TODO violates storage abstraction.
939 940 mfrevlog = mfl._revlog
940 941 changedfiles = set()
941 942 clrevtomanifestrev = {}
942 943
943 944 # Callback for the changelog, used to collect changed files and
944 945 # manifest nodes.
945 946 # Returns the linkrev node (identity in the changelog case).
946 947 def lookupcl(x):
947 948 c = cl.read(x)
948 949 clrevorder[x] = len(clrevorder)
949 950
950 951 if self._ellipses:
951 952 # Only update mfs if x is going to be sent. Otherwise we
952 953 # end up with bogus linkrevs specified for manifests and
953 954 # we skip some manifest nodes that we should otherwise
954 955 # have sent.
955 956 if (x in self._fullclnodes
956 957 or cl.rev(x) in self._precomputedellipsis):
957 958 n = c[0]
958 959 # Record the first changeset introducing this manifest
959 960 # version.
960 961 mfs.setdefault(n, x)
961 962 # Set this narrow-specific dict so we have the lowest
962 963 # manifest revnum to look up for this cl revnum. (Part of
963 964 # mapping changelog ellipsis parents to manifest ellipsis
964 965 # parents)
965 966 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
966 967 # We can't trust the changed files list in the changeset if the
967 968 # client requested a shallow clone.
968 969 if self._isshallow:
969 970 changedfiles.update(mfl[c[0]].read().keys())
970 971 else:
971 972 changedfiles.update(c[3])
972 973 else:
973 974
974 975 n = c[0]
975 976 # record the first changeset introducing this manifest version
976 977 mfs.setdefault(n, x)
977 978 # Record a complete list of potentially-changed files in
978 979 # this manifest.
979 980 changedfiles.update(c[3])
980 981
981 982 return x
982 983
983 984 # Changelog doesn't benefit from reordering revisions. So send out
984 985 # revisions in store order.
985 986 revs = sorted(cl.rev(n) for n in nodes)
986 987
987 988 state = {
988 989 'clrevorder': clrevorder,
989 990 'mfs': mfs,
990 991 'changedfiles': changedfiles,
991 992 'clrevtomanifestrev': clrevtomanifestrev,
992 993 }
993 994
994 995 gen = deltagroup(
995 996 self._repo, revs, cl, True, lookupcl,
996 self._deltaparentfn, self._builddeltaheader,
997 self._deltaparentfn,
997 998 ellipses=self._ellipses,
998 999 units=_('changesets'),
999 1000 clrevtolocalrev={},
1000 1001 fullclnodes=self._fullclnodes,
1001 1002 precomputedellipsis=self._precomputedellipsis)
1002 1003
1003 1004 return state, gen
1004 1005
1005 1006 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
1006 1007 fnodes, source, clrevtolocalrev):
1007 1008 """Returns an iterator of changegroup chunks containing manifests.
1008 1009
1009 1010 `source` is unused here, but is used by extensions like remotefilelog to
1010 1011 change what is sent based in pulls vs pushes, etc.
1011 1012 """
1012 1013 repo = self._repo
1013 1014 cl = repo.changelog
1014 1015 mfl = repo.manifestlog
1015 1016 dirlog = mfl._revlog.dirlog
1016 1017 tmfnodes = {'': mfs}
1017 1018
1018 1019 # Callback for the manifest, used to collect linkrevs for filelog
1019 1020 # revisions.
1020 1021 # Returns the linkrev node (collected in lookupcl).
1021 1022 def makelookupmflinknode(dir, nodes):
1022 1023 if fastpathlinkrev:
1023 1024 assert not dir
1024 1025 return mfs.__getitem__
1025 1026
1026 1027 def lookupmflinknode(x):
1027 1028 """Callback for looking up the linknode for manifests.
1028 1029
1029 1030 Returns the linkrev node for the specified manifest.
1030 1031
1031 1032 SIDE EFFECT:
1032 1033
1033 1034 1) fclnodes gets populated with the list of relevant
1034 1035 file nodes if we're not using fastpathlinkrev
1035 1036 2) When treemanifests are in use, collects treemanifest nodes
1036 1037 to send
1037 1038
1038 1039 Note that this means manifests must be completely sent to
1039 1040 the client before you can trust the list of files and
1040 1041 treemanifests to send.
1041 1042 """
1042 1043 clnode = nodes[x]
1043 1044 mdata = mfl.get(dir, x).readfast(shallow=True)
1044 1045 for p, n, fl in mdata.iterentries():
1045 1046 if fl == 't': # subdirectory manifest
1046 1047 subdir = dir + p + '/'
1047 1048 tmfclnodes = tmfnodes.setdefault(subdir, {})
1048 1049 tmfclnode = tmfclnodes.setdefault(n, clnode)
1049 1050 if clrevorder[clnode] < clrevorder[tmfclnode]:
1050 1051 tmfclnodes[n] = clnode
1051 1052 else:
1052 1053 f = dir + p
1053 1054 fclnodes = fnodes.setdefault(f, {})
1054 1055 fclnode = fclnodes.setdefault(n, clnode)
1055 1056 if clrevorder[clnode] < clrevorder[fclnode]:
1056 1057 fclnodes[n] = clnode
1057 1058 return clnode
1058 1059 return lookupmflinknode
1059 1060
1060 1061 while tmfnodes:
1061 1062 dir, nodes = tmfnodes.popitem()
1062 1063 store = dirlog(dir)
1063 1064
1064 1065 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
1065 1066 prunednodes = []
1066 1067 else:
1067 1068 frev, flr = store.rev, store.linkrev
1068 1069 prunednodes = [n for n in nodes
1069 1070 if flr(frev(n)) not in commonrevs]
1070 1071
1071 1072 if dir and not prunednodes:
1072 1073 continue
1073 1074
1074 1075 lookupfn = makelookupmflinknode(dir, nodes)
1075 1076
1076 1077 if self._ellipses:
1077 1078 revs = _sortnodesellipsis(store, prunednodes, cl,
1078 1079 lookupfn)
1079 1080 else:
1080 1081 revs = _sortnodesnormal(store, prunednodes,
1081 1082 self._reorder)
1082 1083
1083 it = deltagroup(
1084 deltas = deltagroup(
1084 1085 self._repo, revs, store, False, lookupfn,
1085 self._deltaparentfn, self._builddeltaheader,
1086 self._deltaparentfn,
1086 1087 ellipses=self._ellipses,
1087 1088 units=_('manifests'),
1088 1089 clrevtolocalrev=clrevtolocalrev,
1089 1090 fullclnodes=self._fullclnodes,
1090 1091 precomputedellipsis=self._precomputedellipsis)
1091 1092
1092 yield dir, it
1093 yield dir, deltas
1093 1094
1094 1095 # The 'source' parameter is useful for extensions
1095 1096 def generatefiles(self, changedfiles, commonrevs, source,
1096 1097 mfdicts, fastpathlinkrev, fnodes, clrevs):
1097 1098 changedfiles = list(filter(self._filematcher, changedfiles))
1098 1099
1099 1100 if not fastpathlinkrev:
1100 1101 def normallinknodes(unused, fname):
1101 1102 return fnodes.get(fname, {})
1102 1103 else:
1103 1104 cln = self._repo.changelog.node
1104 1105
1105 1106 def normallinknodes(store, fname):
1106 1107 flinkrev = store.linkrev
1107 1108 fnode = store.node
1108 1109 revs = ((r, flinkrev(r)) for r in store)
1109 1110 return dict((fnode(r), cln(lr))
1110 1111 for r, lr in revs if lr in clrevs)
1111 1112
1112 1113 clrevtolocalrev = {}
1113 1114
1114 1115 if self._isshallow:
1115 1116 # In a shallow clone, the linknodes callback needs to also include
1116 1117 # those file nodes that are in the manifests we sent but weren't
1117 1118 # introduced by those manifests.
1118 1119 commonctxs = [self._repo[c] for c in commonrevs]
1119 1120 clrev = self._repo.changelog.rev
1120 1121
1121 1122 # Defining this function has a side-effect of overriding the
1122 1123 # function of the same name that was passed in as an argument.
1123 1124 # TODO have caller pass in appropriate function.
1124 1125 def linknodes(flog, fname):
1125 1126 for c in commonctxs:
1126 1127 try:
1127 1128 fnode = c.filenode(fname)
1128 1129 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1129 1130 except error.ManifestLookupError:
1130 1131 pass
1131 1132 links = normallinknodes(flog, fname)
1132 1133 if len(links) != len(mfdicts):
1133 1134 for mf, lr in mfdicts:
1134 1135 fnode = mf.get(fname, None)
1135 1136 if fnode in links:
1136 1137 links[fnode] = min(links[fnode], lr, key=clrev)
1137 1138 elif fnode:
1138 1139 links[fnode] = lr
1139 1140 return links
1140 1141 else:
1141 1142 linknodes = normallinknodes
1142 1143
1143 1144 repo = self._repo
1144 1145 cl = repo.changelog
1145 1146 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1146 1147 total=len(changedfiles))
1147 1148 for i, fname in enumerate(sorted(changedfiles)):
1148 1149 filerevlog = repo.file(fname)
1149 1150 if not filerevlog:
1150 1151 raise error.Abort(_("empty or missing file data for %s") %
1151 1152 fname)
1152 1153
1153 1154 clrevtolocalrev.clear()
1154 1155
1155 1156 linkrevnodes = linknodes(filerevlog, fname)
1156 1157 # Lookup for filenodes, we collected the linkrev nodes above in the
1157 1158 # fastpath case and with lookupmf in the slowpath case.
1158 1159 def lookupfilelog(x):
1159 1160 return linkrevnodes[x]
1160 1161
1161 1162 frev, flr = filerevlog.rev, filerevlog.linkrev
1162 1163 filenodes = [n for n in linkrevnodes
1163 1164 if flr(frev(n)) not in commonrevs]
1164 1165
1165 1166 if filenodes:
1166 1167 if self._ellipses:
1167 1168 revs = _sortnodesellipsis(filerevlog, filenodes,
1168 1169 cl, lookupfilelog)
1169 1170 else:
1170 1171 revs = _sortnodesnormal(filerevlog, filenodes,
1171 1172 self._reorder)
1172 1173
1173 1174 progress.update(i + 1, item=fname)
1174 1175
1175 it = deltagroup(
1176 deltas = deltagroup(
1176 1177 self._repo, revs, filerevlog, False, lookupfilelog,
1177 self._deltaparentfn, self._builddeltaheader,
1178 self._deltaparentfn,
1178 1179 ellipses=self._ellipses,
1179 1180 clrevtolocalrev=clrevtolocalrev,
1180 1181 fullclnodes=self._fullclnodes,
1181 1182 precomputedellipsis=self._precomputedellipsis)
1182 1183
1183 yield fname, it
1184 yield fname, deltas
1184 1185
1185 1186 progress.complete()
1186 1187
1187 1188 def _deltaparentprev(store, rev, p1, p2, prev):
1188 1189 """Resolve a delta parent to the previous revision.
1189 1190
1190 1191 Used for version 1 changegroups, which don't support generaldelta.
1191 1192 """
1192 1193 return prev
1193 1194
1194 1195 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1195 1196 """Resolve a delta parent when general deltas are supported."""
1196 1197 dp = store.deltaparent(rev)
1197 1198 if dp == nullrev and store.storedeltachains:
1198 1199 # Avoid sending full revisions when delta parent is null. Pick prev
1199 1200 # in that case. It's tempting to pick p1 in this case, as p1 will
1200 1201 # be smaller in the common case. However, computing a delta against
1201 1202 # p1 may require resolving the raw text of p1, which could be
1202 1203 # expensive. The revlog caches should have prev cached, meaning
1203 1204 # less CPU for changegroup generation. There is likely room to add
1204 1205 # a flag and/or config option to control this behavior.
1205 1206 base = prev
1206 1207 elif dp == nullrev:
1207 1208 # revlog is configured to use full snapshot for a reason,
1208 1209 # stick to full snapshot.
1209 1210 base = nullrev
1210 1211 elif dp not in (p1, p2, prev):
1211 1212 # Pick prev when we can't be sure remote has the base revision.
1212 1213 return prev
1213 1214 else:
1214 1215 base = dp
1215 1216
1216 1217 if base != nullrev and not store.candelta(base, rev):
1217 1218 base = nullrev
1218 1219
1219 1220 return base
1220 1221
1221 1222 def _deltaparentellipses(store, rev, p1, p2, prev):
1222 1223 """Resolve a delta parent when in ellipses mode."""
1223 1224 # TODO: send better deltas when in narrow mode.
1224 1225 #
1225 1226 # changegroup.group() loops over revisions to send,
1226 1227 # including revisions we'll skip. What this means is that
1227 1228 # `prev` will be a potentially useless delta base for all
1228 1229 # ellipsis nodes, as the client likely won't have it. In
1229 1230 # the future we should do bookkeeping about which nodes
1230 1231 # have been sent to the client, and try to be
1231 1232 # significantly smarter about delta bases. This is
1232 1233 # slightly tricky because this same code has to work for
1233 1234 # all revlogs, and we don't have the linkrev/linknode here.
1234 1235 return p1
1235 1236
1236 1237 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1237 1238 shallow=False, ellipsisroots=None, fullnodes=None):
1238 1239 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1239 1240 d.node, d.p1node, d.p2node, d.linknode)
1240 1241
1241 1242 return cgpacker(repo, filematcher, b'01',
1242 1243 deltaparentfn=_deltaparentprev,
1243 1244 allowreorder=None,
1244 1245 builddeltaheader=builddeltaheader,
1245 1246 manifestsend=b'',
1246 1247 bundlecaps=bundlecaps,
1247 1248 ellipses=ellipses,
1248 1249 shallow=shallow,
1249 1250 ellipsisroots=ellipsisroots,
1250 1251 fullnodes=fullnodes)
1251 1252
1252 1253 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1253 1254 shallow=False, ellipsisroots=None, fullnodes=None):
1254 1255 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1255 1256 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1256 1257
1257 1258 # Since generaldelta is directly supported by cg2, reordering
1258 1259 # generally doesn't help, so we disable it by default (treating
1259 1260 # bundle.reorder=auto just like bundle.reorder=False).
1260 1261 return cgpacker(repo, filematcher, b'02',
1261 1262 deltaparentfn=_deltaparentgeneraldelta,
1262 1263 allowreorder=False,
1263 1264 builddeltaheader=builddeltaheader,
1264 1265 manifestsend=b'',
1265 1266 bundlecaps=bundlecaps,
1266 1267 ellipses=ellipses,
1267 1268 shallow=shallow,
1268 1269 ellipsisroots=ellipsisroots,
1269 1270 fullnodes=fullnodes)
1270 1271
1271 1272 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1272 1273 shallow=False, ellipsisroots=None, fullnodes=None):
1273 1274 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1274 1275 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1275 1276
1276 1277 deltaparentfn = (_deltaparentellipses if ellipses
1277 1278 else _deltaparentgeneraldelta)
1278 1279
1279 1280 return cgpacker(repo, filematcher, b'03',
1280 1281 deltaparentfn=deltaparentfn,
1281 1282 allowreorder=False,
1282 1283 builddeltaheader=builddeltaheader,
1283 1284 manifestsend=closechunk(),
1284 1285 bundlecaps=bundlecaps,
1285 1286 ellipses=ellipses,
1286 1287 shallow=shallow,
1287 1288 ellipsisroots=ellipsisroots,
1288 1289 fullnodes=fullnodes)
1289 1290
1290 1291 _packermap = {'01': (_makecg1packer, cg1unpacker),
1291 1292 # cg2 adds support for exchanging generaldelta
1292 1293 '02': (_makecg2packer, cg2unpacker),
1293 1294 # cg3 adds support for exchanging revlog flags and treemanifests
1294 1295 '03': (_makecg3packer, cg3unpacker),
1295 1296 }
1296 1297
1297 1298 def allsupportedversions(repo):
1298 1299 versions = set(_packermap.keys())
1299 1300 if not (repo.ui.configbool('experimental', 'changegroup3') or
1300 1301 repo.ui.configbool('experimental', 'treemanifest') or
1301 1302 'treemanifest' in repo.requirements):
1302 1303 versions.discard('03')
1303 1304 return versions
1304 1305
1305 1306 # Changegroup versions that can be applied to the repo
1306 1307 def supportedincomingversions(repo):
1307 1308 return allsupportedversions(repo)
1308 1309
1309 1310 # Changegroup versions that can be created from the repo
1310 1311 def supportedoutgoingversions(repo):
1311 1312 versions = allsupportedversions(repo)
1312 1313 if 'treemanifest' in repo.requirements:
1313 1314 # Versions 01 and 02 support only flat manifests and it's just too
1314 1315 # expensive to convert between the flat manifest and tree manifest on
1315 1316 # the fly. Since tree manifests are hashed differently, all of history
1316 1317 # would have to be converted. Instead, we simply don't even pretend to
1317 1318 # support versions 01 and 02.
1318 1319 versions.discard('01')
1319 1320 versions.discard('02')
1320 1321 if repository.NARROW_REQUIREMENT in repo.requirements:
1321 1322 # Versions 01 and 02 don't support revlog flags, and we need to
1322 1323 # support that for stripping and unbundling to work.
1323 1324 versions.discard('01')
1324 1325 versions.discard('02')
1325 1326 if LFS_REQUIREMENT in repo.requirements:
1326 1327 # Versions 01 and 02 don't support revlog flags, and we need to
1327 1328 # mark LFS entries with REVIDX_EXTSTORED.
1328 1329 versions.discard('01')
1329 1330 versions.discard('02')
1330 1331
1331 1332 return versions
1332 1333
1333 1334 def localversion(repo):
1334 1335 # Finds the best version to use for bundles that are meant to be used
1335 1336 # locally, such as those from strip and shelve, and temporary bundles.
1336 1337 return max(supportedoutgoingversions(repo))
1337 1338
1338 1339 def safeversion(repo):
1339 1340 # Finds the smallest version that it's safe to assume clients of the repo
1340 1341 # will support. For example, all hg versions that support generaldelta also
1341 1342 # support changegroup 02.
1342 1343 versions = supportedoutgoingversions(repo)
1343 1344 if 'generaldelta' in repo.requirements:
1344 1345 versions.discard('01')
1345 1346 assert versions
1346 1347 return min(versions)
1347 1348
1348 1349 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1349 1350 ellipses=False, shallow=False, ellipsisroots=None,
1350 1351 fullnodes=None):
1351 1352 assert version in supportedoutgoingversions(repo)
1352 1353
1353 1354 if filematcher is None:
1354 1355 filematcher = matchmod.alwaysmatcher(repo.root, '')
1355 1356
1356 1357 if version == '01' and not filematcher.always():
1357 1358 raise error.ProgrammingError('version 01 changegroups do not support '
1358 1359 'sparse file matchers')
1359 1360
1360 1361 if ellipses and version in (b'01', b'02'):
1361 1362 raise error.Abort(
1362 1363 _('ellipsis nodes require at least cg3 on client and server, '
1363 1364 'but negotiated version %s') % version)
1364 1365
1365 1366 # Requested files could include files not in the local store. So
1366 1367 # filter those out.
1367 1368 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1368 1369 filematcher)
1369 1370
1370 1371 fn = _packermap[version][0]
1371 1372 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1372 1373 shallow=shallow, ellipsisroots=ellipsisroots,
1373 1374 fullnodes=fullnodes)
1374 1375
1375 1376 def getunbundler(version, fh, alg, extras=None):
1376 1377 return _packermap[version][1](fh, alg, extras=extras)
1377 1378
1378 1379 def _changegroupinfo(repo, nodes, source):
1379 1380 if repo.ui.verbose or source == 'bundle':
1380 1381 repo.ui.status(_("%d changesets found\n") % len(nodes))
1381 1382 if repo.ui.debugflag:
1382 1383 repo.ui.debug("list of changesets:\n")
1383 1384 for node in nodes:
1384 1385 repo.ui.debug("%s\n" % hex(node))
1385 1386
1386 1387 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1387 1388 bundlecaps=None):
1388 1389 cgstream = makestream(repo, outgoing, version, source,
1389 1390 fastpath=fastpath, bundlecaps=bundlecaps)
1390 1391 return getunbundler(version, util.chunkbuffer(cgstream), None,
1391 1392 {'clcount': len(outgoing.missing) })
1392 1393
1393 1394 def makestream(repo, outgoing, version, source, fastpath=False,
1394 1395 bundlecaps=None, filematcher=None):
1395 1396 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1396 1397 filematcher=filematcher)
1397 1398
1398 1399 repo = repo.unfiltered()
1399 1400 commonrevs = outgoing.common
1400 1401 csets = outgoing.missing
1401 1402 heads = outgoing.missingheads
1402 1403 # We go through the fast path if we get told to, or if all (unfiltered
1403 1404 # heads have been requested (since we then know there all linkrevs will
1404 1405 # be pulled by the client).
1405 1406 heads.sort()
1406 1407 fastpathlinkrev = fastpath or (
1407 1408 repo.filtername is None and heads == sorted(repo.heads()))
1408 1409
1409 1410 repo.hook('preoutgoing', throw=True, source=source)
1410 1411 _changegroupinfo(repo, csets, source)
1411 1412 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1412 1413
1413 1414 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1414 1415 revisions = 0
1415 1416 files = 0
1416 1417 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1417 1418 total=expectedfiles)
1418 1419 for chunkdata in iter(source.filelogheader, {}):
1419 1420 files += 1
1420 1421 f = chunkdata["filename"]
1421 1422 repo.ui.debug("adding %s revisions\n" % f)
1422 1423 progress.increment()
1423 1424 fl = repo.file(f)
1424 1425 o = len(fl)
1425 1426 try:
1426 1427 deltas = source.deltaiter()
1427 1428 if not fl.addgroup(deltas, revmap, trp):
1428 1429 raise error.Abort(_("received file revlog group is empty"))
1429 1430 except error.CensoredBaseError as e:
1430 1431 raise error.Abort(_("received delta base is censored: %s") % e)
1431 1432 revisions += len(fl) - o
1432 1433 if f in needfiles:
1433 1434 needs = needfiles[f]
1434 1435 for new in pycompat.xrange(o, len(fl)):
1435 1436 n = fl.node(new)
1436 1437 if n in needs:
1437 1438 needs.remove(n)
1438 1439 else:
1439 1440 raise error.Abort(
1440 1441 _("received spurious file revlog entry"))
1441 1442 if not needs:
1442 1443 del needfiles[f]
1443 1444 progress.complete()
1444 1445
1445 1446 for f, needs in needfiles.iteritems():
1446 1447 fl = repo.file(f)
1447 1448 for n in needs:
1448 1449 try:
1449 1450 fl.rev(n)
1450 1451 except error.LookupError:
1451 1452 raise error.Abort(
1452 1453 _('missing file data for %s:%s - run hg verify') %
1453 1454 (f, hex(n)))
1454 1455
1455 1456 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now