##// END OF EJS Templates
changegroup: refactor delta parent code...
Gregory Szorc -
r39053:ef3d3a2f default
parent child Browse files
Show More
@@ -1,1487 +1,1461 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 match as matchmod,
30 30 mdiff,
31 31 phases,
32 32 pycompat,
33 33 repository,
34 34 revlog,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 stringutil,
40 40 )
41 41
42 42 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
43 43 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
44 44 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
45 45
46 46 LFS_REQUIREMENT = 'lfs'
47 47
48 48 readexactly = util.readexactly
49 49
50 50 def getchunk(stream):
51 51 """return the next chunk from stream as a string"""
52 52 d = readexactly(stream, 4)
53 53 l = struct.unpack(">l", d)[0]
54 54 if l <= 4:
55 55 if l:
56 56 raise error.Abort(_("invalid chunk length %d") % l)
57 57 return ""
58 58 return readexactly(stream, l - 4)
59 59
60 60 def chunkheader(length):
61 61 """return a changegroup chunk header (string)"""
62 62 return struct.pack(">l", length + 4)
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(">l", 0)
67 67
68 68 def _fileheader(path):
69 69 """Obtain a changegroup chunk header for a named path."""
70 70 return chunkheader(len(path)) + path
71 71
72 72 def writechunks(ui, chunks, filename, vfs=None):
73 73 """Write chunks to a file and return its filename.
74 74
75 75 The stream is assumed to be a bundle file.
76 76 Existing files will not be overwritten.
77 77 If no filename is specified, a temporary file is created.
78 78 """
79 79 fh = None
80 80 cleanup = None
81 81 try:
82 82 if filename:
83 83 if vfs:
84 84 fh = vfs.open(filename, "wb")
85 85 else:
86 86 # Increase default buffer size because default is usually
87 87 # small (4k is common on Linux).
88 88 fh = open(filename, "wb", 131072)
89 89 else:
90 90 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
91 91 fh = os.fdopen(fd, r"wb")
92 92 cleanup = filename
93 93 for c in chunks:
94 94 fh.write(c)
95 95 cleanup = None
96 96 return filename
97 97 finally:
98 98 if fh is not None:
99 99 fh.close()
100 100 if cleanup is not None:
101 101 if filename and vfs:
102 102 vfs.unlink(cleanup)
103 103 else:
104 104 os.unlink(cleanup)
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
125 125 deltaheadersize = deltaheader.size
126 126 version = '01'
127 127 _grouplistcount = 1 # One list of files after the manifests
128 128
129 129 def __init__(self, fh, alg, extras=None):
130 130 if alg is None:
131 131 alg = 'UN'
132 132 if alg not in util.compengines.supportedbundletypes:
133 133 raise error.Abort(_('unknown stream compression type: %s')
134 134 % alg)
135 135 if alg == 'BZ':
136 136 alg = '_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != 'UN'
148 148 def read(self, l):
149 149 return self._stream.read(l)
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152 def tell(self):
153 153 return self._stream.tell()
154 154 def close(self):
155 155 return self._stream.close()
156 156
157 157 def _chunklength(self):
158 158 d = readexactly(self._stream, 4)
159 159 l = struct.unpack(">l", d)[0]
160 160 if l <= 4:
161 161 if l:
162 162 raise error.Abort(_("invalid chunk length %d") % l)
163 163 return 0
164 164 if self.callback:
165 165 self.callback()
166 166 return l - 4
167 167
168 168 def changelogheader(self):
169 169 """v10 does not have a changelog header chunk"""
170 170 return {}
171 171
172 172 def manifestheader(self):
173 173 """v10 does not have a manifest header chunk"""
174 174 return {}
175 175
176 176 def filelogheader(self):
177 177 """return the header of the filelogs chunk, v10 only has the filename"""
178 178 l = self._chunklength()
179 179 if not l:
180 180 return {}
181 181 fname = readexactly(self._stream, l)
182 182 return {'filename': fname}
183 183
184 184 def _deltaheader(self, headertuple, prevnode):
185 185 node, p1, p2, cs = headertuple
186 186 if prevnode is None:
187 187 deltabase = p1
188 188 else:
189 189 deltabase = prevnode
190 190 flags = 0
191 191 return node, p1, p2, deltabase, cs, flags
192 192
193 193 def deltachunk(self, prevnode):
194 194 l = self._chunklength()
195 195 if not l:
196 196 return {}
197 197 headerdata = readexactly(self._stream, self.deltaheadersize)
198 198 header = self.deltaheader.unpack(headerdata)
199 199 delta = readexactly(self._stream, l - self.deltaheadersize)
200 200 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
201 201 return (node, p1, p2, cs, deltabase, delta, flags)
202 202
203 203 def getchunks(self):
204 204 """returns all the chunks contains in the bundle
205 205
206 206 Used when you need to forward the binary stream to a file or another
207 207 network API. To do so, it parse the changegroup data, otherwise it will
208 208 block in case of sshrepo because it don't know the end of the stream.
209 209 """
210 210 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
211 211 # and a list of filelogs. For changegroup 3, we expect 4 parts:
212 212 # changelog, manifestlog, a list of tree manifestlogs, and a list of
213 213 # filelogs.
214 214 #
215 215 # Changelog and manifestlog parts are terminated with empty chunks. The
216 216 # tree and file parts are a list of entry sections. Each entry section
217 217 # is a series of chunks terminating in an empty chunk. The list of these
218 218 # entry sections is terminated in yet another empty chunk, so we know
219 219 # we've reached the end of the tree/file list when we reach an empty
220 220 # chunk that was proceeded by no non-empty chunks.
221 221
222 222 parts = 0
223 223 while parts < 2 + self._grouplistcount:
224 224 noentries = True
225 225 while True:
226 226 chunk = getchunk(self)
227 227 if not chunk:
228 228 # The first two empty chunks represent the end of the
229 229 # changelog and the manifestlog portions. The remaining
230 230 # empty chunks represent either A) the end of individual
231 231 # tree or file entries in the file list, or B) the end of
232 232 # the entire list. It's the end of the entire list if there
233 233 # were no entries (i.e. noentries is True).
234 234 if parts < 2:
235 235 parts += 1
236 236 elif noentries:
237 237 parts += 1
238 238 break
239 239 noentries = False
240 240 yield chunkheader(len(chunk))
241 241 pos = 0
242 242 while pos < len(chunk):
243 243 next = pos + 2**20
244 244 yield chunk[pos:next]
245 245 pos = next
246 246 yield closechunk()
247 247
248 248 def _unpackmanifests(self, repo, revmap, trp, prog):
249 249 self.callback = prog.increment
250 250 # no need to check for empty manifest group here:
251 251 # if the result of the merge of 1 and 2 is the same in 3 and 4,
252 252 # no new manifest will be created and the manifest group will
253 253 # be empty during the pull
254 254 self.manifestheader()
255 255 deltas = self.deltaiter()
256 256 repo.manifestlog.addgroup(deltas, revmap, trp)
257 257 prog.complete()
258 258 self.callback = None
259 259
260 260 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
261 261 expectedtotal=None):
262 262 """Add the changegroup returned by source.read() to this repo.
263 263 srctype is a string like 'push', 'pull', or 'unbundle'. url is
264 264 the URL of the repo where this changegroup is coming from.
265 265
266 266 Return an integer summarizing the change to this repo:
267 267 - nothing changed or no source: 0
268 268 - more heads than before: 1+added heads (2..n)
269 269 - fewer heads than before: -1-removed heads (-2..-n)
270 270 - number of heads stays the same: 1
271 271 """
272 272 repo = repo.unfiltered()
273 273 def csmap(x):
274 274 repo.ui.debug("add changeset %s\n" % short(x))
275 275 return len(cl)
276 276
277 277 def revmap(x):
278 278 return cl.rev(x)
279 279
280 280 changesets = files = revisions = 0
281 281
282 282 try:
283 283 # The transaction may already carry source information. In this
284 284 # case we use the top level data. We overwrite the argument
285 285 # because we need to use the top level value (if they exist)
286 286 # in this function.
287 287 srctype = tr.hookargs.setdefault('source', srctype)
288 288 url = tr.hookargs.setdefault('url', url)
289 289 repo.hook('prechangegroup',
290 290 throw=True, **pycompat.strkwargs(tr.hookargs))
291 291
292 292 # write changelog data to temp files so concurrent readers
293 293 # will not see an inconsistent view
294 294 cl = repo.changelog
295 295 cl.delayupdate(tr)
296 296 oldheads = set(cl.heads())
297 297
298 298 trp = weakref.proxy(tr)
299 299 # pull off the changeset group
300 300 repo.ui.status(_("adding changesets\n"))
301 301 clstart = len(cl)
302 302 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
303 303 total=expectedtotal)
304 304 self.callback = progress.increment
305 305
306 306 efiles = set()
307 307 def onchangelog(cl, node):
308 308 efiles.update(cl.readfiles(node))
309 309
310 310 self.changelogheader()
311 311 deltas = self.deltaiter()
312 312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
313 313 efiles = len(efiles)
314 314
315 315 if not cgnodes:
316 316 repo.ui.develwarn('applied empty changegroup',
317 317 config='warn-empty-changegroup')
318 318 clend = len(cl)
319 319 changesets = clend - clstart
320 320 progress.complete()
321 321 self.callback = None
322 322
323 323 # pull off the manifest group
324 324 repo.ui.status(_("adding manifests\n"))
325 325 # We know that we'll never have more manifests than we had
326 326 # changesets.
327 327 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
328 328 total=changesets)
329 329 self._unpackmanifests(repo, revmap, trp, progress)
330 330
331 331 needfiles = {}
332 332 if repo.ui.configbool('server', 'validate'):
333 333 cl = repo.changelog
334 334 ml = repo.manifestlog
335 335 # validate incoming csets have their manifests
336 336 for cset in pycompat.xrange(clstart, clend):
337 337 mfnode = cl.changelogrevision(cset).manifest
338 338 mfest = ml[mfnode].readdelta()
339 339 # store file cgnodes we must see
340 340 for f, n in mfest.iteritems():
341 341 needfiles.setdefault(f, set()).add(n)
342 342
343 343 # process the files
344 344 repo.ui.status(_("adding file changes\n"))
345 345 newrevs, newfiles = _addchangegroupfiles(
346 346 repo, self, revmap, trp, efiles, needfiles)
347 347 revisions += newrevs
348 348 files += newfiles
349 349
350 350 deltaheads = 0
351 351 if oldheads:
352 352 heads = cl.heads()
353 353 deltaheads = len(heads) - len(oldheads)
354 354 for h in heads:
355 355 if h not in oldheads and repo[h].closesbranch():
356 356 deltaheads -= 1
357 357 htext = ""
358 358 if deltaheads:
359 359 htext = _(" (%+d heads)") % deltaheads
360 360
361 361 repo.ui.status(_("added %d changesets"
362 362 " with %d changes to %d files%s\n")
363 363 % (changesets, revisions, files, htext))
364 364 repo.invalidatevolatilesets()
365 365
366 366 if changesets > 0:
367 367 if 'node' not in tr.hookargs:
368 368 tr.hookargs['node'] = hex(cl.node(clstart))
369 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
370 370 hookargs = dict(tr.hookargs)
371 371 else:
372 372 hookargs = dict(tr.hookargs)
373 373 hookargs['node'] = hex(cl.node(clstart))
374 374 hookargs['node_last'] = hex(cl.node(clend - 1))
375 375 repo.hook('pretxnchangegroup',
376 376 throw=True, **pycompat.strkwargs(hookargs))
377 377
378 378 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
379 379 phaseall = None
380 380 if srctype in ('push', 'serve'):
381 381 # Old servers can not push the boundary themselves.
382 382 # New servers won't push the boundary if changeset already
383 383 # exists locally as secret
384 384 #
385 385 # We should not use added here but the list of all change in
386 386 # the bundle
387 387 if repo.publishing():
388 388 targetphase = phaseall = phases.public
389 389 else:
390 390 # closer target phase computation
391 391
392 392 # Those changesets have been pushed from the
393 393 # outside, their phases are going to be pushed
394 394 # alongside. Therefor `targetphase` is
395 395 # ignored.
396 396 targetphase = phaseall = phases.draft
397 397 if added:
398 398 phases.registernew(repo, tr, targetphase, added)
399 399 if phaseall is not None:
400 400 phases.advanceboundary(repo, tr, phaseall, cgnodes)
401 401
402 402 if changesets > 0:
403 403
404 404 def runhooks():
405 405 # These hooks run when the lock releases, not when the
406 406 # transaction closes. So it's possible for the changelog
407 407 # to have changed since we last saw it.
408 408 if clstart >= len(repo):
409 409 return
410 410
411 411 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
412 412
413 413 for n in added:
414 414 args = hookargs.copy()
415 415 args['node'] = hex(n)
416 416 del args['node_last']
417 417 repo.hook("incoming", **pycompat.strkwargs(args))
418 418
419 419 newheads = [h for h in repo.heads()
420 420 if h not in oldheads]
421 421 repo.ui.log("incoming",
422 422 "%d incoming changes - new heads: %s\n",
423 423 len(added),
424 424 ', '.join([hex(c[:6]) for c in newheads]))
425 425
426 426 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
427 427 lambda tr: repo._afterlock(runhooks))
428 428 finally:
429 429 repo.ui.flush()
430 430 # never return 0 here:
431 431 if deltaheads < 0:
432 432 ret = deltaheads - 1
433 433 else:
434 434 ret = deltaheads + 1
435 435 return ret
436 436
437 437 def deltaiter(self):
438 438 """
439 439 returns an iterator of the deltas in this changegroup
440 440
441 441 Useful for passing to the underlying storage system to be stored.
442 442 """
443 443 chain = None
444 444 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
445 445 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
446 446 yield chunkdata
447 447 chain = chunkdata[0]
448 448
449 449 class cg2unpacker(cg1unpacker):
450 450 """Unpacker for cg2 streams.
451 451
452 452 cg2 streams add support for generaldelta, so the delta header
453 453 format is slightly different. All other features about the data
454 454 remain the same.
455 455 """
456 456 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
457 457 deltaheadersize = deltaheader.size
458 458 version = '02'
459 459
460 460 def _deltaheader(self, headertuple, prevnode):
461 461 node, p1, p2, deltabase, cs = headertuple
462 462 flags = 0
463 463 return node, p1, p2, deltabase, cs, flags
464 464
465 465 class cg3unpacker(cg2unpacker):
466 466 """Unpacker for cg3 streams.
467 467
468 468 cg3 streams add support for exchanging treemanifests and revlog
469 469 flags. It adds the revlog flags to the delta header and an empty chunk
470 470 separating manifests and files.
471 471 """
472 472 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
473 473 deltaheadersize = deltaheader.size
474 474 version = '03'
475 475 _grouplistcount = 2 # One list of manifests and one list of files
476 476
477 477 def _deltaheader(self, headertuple, prevnode):
478 478 node, p1, p2, deltabase, cs, flags = headertuple
479 479 return node, p1, p2, deltabase, cs, flags
480 480
481 481 def _unpackmanifests(self, repo, revmap, trp, prog):
482 482 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
483 483 for chunkdata in iter(self.filelogheader, {}):
484 484 # If we get here, there are directory manifests in the changegroup
485 485 d = chunkdata["filename"]
486 486 repo.ui.debug("adding %s revisions\n" % d)
487 487 dirlog = repo.manifestlog._revlog.dirlog(d)
488 488 deltas = self.deltaiter()
489 489 if not dirlog.addgroup(deltas, revmap, trp):
490 490 raise error.Abort(_("received dir revlog group is empty"))
491 491
492 492 class headerlessfixup(object):
493 493 def __init__(self, fh, h):
494 494 self._h = h
495 495 self._fh = fh
496 496 def read(self, n):
497 497 if self._h:
498 498 d, self._h = self._h[:n], self._h[n:]
499 499 if len(d) < n:
500 500 d += readexactly(self._fh, n - len(d))
501 501 return d
502 502 return readexactly(self._fh, n)
503 503
504 504 @attr.s(slots=True, frozen=True)
505 505 class revisiondelta(object):
506 506 """Describes a delta entry in a changegroup.
507 507
508 508 Captured data is sufficient to serialize the delta into multiple
509 509 formats.
510 510
511 511 ``revision`` and ``delta`` are mutually exclusive.
512 512 """
513 513 # 20 byte node of this revision.
514 514 node = attr.ib()
515 515 # 20 byte nodes of parent revisions.
516 516 p1node = attr.ib()
517 517 p2node = attr.ib()
518 518 # 20 byte node of node this delta is against.
519 519 basenode = attr.ib()
520 520 # 20 byte node of changeset revision this delta is associated with.
521 521 linknode = attr.ib()
522 522 # 2 bytes of flags to apply to revision data.
523 523 flags = attr.ib()
524 524 # Size of base revision this delta is against. May be None if
525 525 # basenode is nullid.
526 526 baserevisionsize = attr.ib()
527 527 # Raw fulltext revision data.
528 528 revision = attr.ib()
529 529 # Delta between the basenode and node.
530 530 delta = attr.ib()
531 531
532 532 def _revisiondeltatochunks(delta, headerfn):
533 533 """Serialize a revisiondelta to changegroup chunks."""
534 534
535 535 # The captured revision delta may be encoded as a delta against
536 536 # a base revision or as a full revision. The changegroup format
537 537 # requires that everything on the wire be deltas. So for full
538 538 # revisions, we need to invent a header that says to rewrite
539 539 # data.
540 540
541 541 if delta.delta is not None:
542 542 prefix, data = b'', delta.delta
543 543 elif delta.basenode == nullid:
544 544 data = delta.revision
545 545 prefix = mdiff.trivialdiffheader(len(data))
546 546 else:
547 547 data = delta.revision
548 548 prefix = mdiff.replacediffheader(delta.baserevisionsize,
549 549 len(data))
550 550
551 551 meta = headerfn(delta)
552 552
553 553 yield chunkheader(len(meta) + len(prefix) + len(data))
554 554 yield meta
555 555 if prefix:
556 556 yield prefix
557 557 yield data
558 558
559 559 def _sortnodesnormal(store, nodes, reorder):
560 560 """Sort nodes for changegroup generation and turn into revnums."""
561 561 # for generaldelta revlogs, we linearize the revs; this will both be
562 562 # much quicker and generate a much smaller bundle
563 563 if (store._generaldelta and reorder is None) or reorder:
564 564 dag = dagutil.revlogdag(store)
565 565 return dag.linearize(set(store.rev(n) for n in nodes))
566 566 else:
567 567 return sorted([store.rev(n) for n in nodes])
568 568
569 569 def _sortnodesellipsis(store, nodes, cl, lookup):
570 570 """Sort nodes for changegroup generation and turn into revnums."""
571 571 # Ellipses serving mode.
572 572 #
573 573 # In a perfect world, we'd generate better ellipsis-ified graphs
574 574 # for non-changelog revlogs. In practice, we haven't started doing
575 575 # that yet, so the resulting DAGs for the manifestlog and filelogs
576 576 # are actually full of bogus parentage on all the ellipsis
577 577 # nodes. This has the side effect that, while the contents are
578 578 # correct, the individual DAGs might be completely out of whack in
579 579 # a case like 882681bc3166 and its ancestors (back about 10
580 580 # revisions or so) in the main hg repo.
581 581 #
582 582 # The one invariant we *know* holds is that the new (potentially
583 583 # bogus) DAG shape will be valid if we order the nodes in the
584 584 # order that they're introduced in dramatis personae by the
585 585 # changelog, so what we do is we sort the non-changelog histories
586 586 # by the order in which they are used by the changelog.
587 587 key = lambda n: cl.rev(lookup(n))
588 588 return [store.rev(n) for n in sorted(nodes, key=key)]
589 589
590 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
590 def _revisiondeltanormal(store, rev, prev, linknode, forcedeltaparentprev):
591 591 """Construct a revision delta for non-ellipses changegroup generation."""
592 592 node = store.node(rev)
593 593 p1, p2 = store.parentrevs(rev)
594 base = deltaparentfn(store, rev, p1, p2, prev)
594
595 if forcedeltaparentprev:
596 base = prev
597 else:
598 dp = store.deltaparent(rev)
599
600 if dp == nullrev and store.storedeltachains:
601 # Avoid sending full revisions when delta parent is null. Pick prev
602 # in that case. It's tempting to pick p1 in this case, as p1 will
603 # be smaller in the common case. However, computing a delta against
604 # p1 may require resolving the raw text of p1, which could be
605 # expensive. The revlog caches should have prev cached, meaning
606 # less CPU for changegroup generation. There is likely room to add
607 # a flag and/or config option to control this behavior.
608 base = prev
609 elif dp == nullrev:
610 # revlog is configured to use full snapshot for a reason,
611 # stick to full snapshot.
612 base = nullrev
613 elif dp not in (p1, p2, prev):
614 # Pick prev when we can't be sure remote has the base revision.
615 base = prev
616 else:
617 base = dp
618
619 if base != nullrev and not store.candelta(base, rev):
620 base = nullrev
595 621
596 622 revision = None
597 623 delta = None
598 624 baserevisionsize = None
599 625
600 626 if store.iscensored(base) or store.iscensored(rev):
601 627 try:
602 628 revision = store.revision(node, raw=True)
603 629 except error.CensoredNodeError as e:
604 630 revision = e.tombstone
605 631
606 632 if base != nullrev:
607 633 baserevisionsize = store.rawsize(base)
608 634
609 635 elif base == nullrev:
610 636 revision = store.revision(node, raw=True)
611 637 else:
612 638 delta = store.revdiff(base, rev)
613 639
614 640 p1n, p2n = store.parents(node)
615 641
616 642 return revisiondelta(
617 643 node=node,
618 644 p1node=p1n,
619 645 p2node=p2n,
620 646 basenode=store.node(base),
621 647 linknode=linknode,
622 648 flags=store.flags(rev),
623 649 baserevisionsize=baserevisionsize,
624 650 revision=revision,
625 651 delta=delta,
626 652 )
627 653
628 654 def _revisiondeltanarrow(cl, store, ischangelog, rev, linkrev,
629 655 linknode, clrevtolocalrev, fullclnodes,
630 656 precomputedellipsis):
631 657 linkparents = precomputedellipsis[linkrev]
632 658 def local(clrev):
633 659 """Turn a changelog revnum into a local revnum.
634 660
635 661 The ellipsis dag is stored as revnums on the changelog,
636 662 but when we're producing ellipsis entries for
637 663 non-changelog revlogs, we need to turn those numbers into
638 664 something local. This does that for us, and during the
639 665 changelog sending phase will also expand the stored
640 666 mappings as needed.
641 667 """
642 668 if clrev == nullrev:
643 669 return nullrev
644 670
645 671 if ischangelog:
646 672 return clrev
647 673
648 674 # Walk the ellipsis-ized changelog breadth-first looking for a
649 675 # change that has been linked from the current revlog.
650 676 #
651 677 # For a flat manifest revlog only a single step should be necessary
652 678 # as all relevant changelog entries are relevant to the flat
653 679 # manifest.
654 680 #
655 681 # For a filelog or tree manifest dirlog however not every changelog
656 682 # entry will have been relevant, so we need to skip some changelog
657 683 # nodes even after ellipsis-izing.
658 684 walk = [clrev]
659 685 while walk:
660 686 p = walk[0]
661 687 walk = walk[1:]
662 688 if p in clrevtolocalrev:
663 689 return clrevtolocalrev[p]
664 690 elif p in fullclnodes:
665 691 walk.extend([pp for pp in cl.parentrevs(p)
666 692 if pp != nullrev])
667 693 elif p in precomputedellipsis:
668 694 walk.extend([pp for pp in precomputedellipsis[p]
669 695 if pp != nullrev])
670 696 else:
671 697 # In this case, we've got an ellipsis with parents
672 698 # outside the current bundle (likely an
673 699 # incremental pull). We "know" that we can use the
674 700 # value of this same revlog at whatever revision
675 701 # is pointed to by linknode. "Know" is in scare
676 702 # quotes because I haven't done enough examination
677 703 # of edge cases to convince myself this is really
678 704 # a fact - it works for all the (admittedly
679 705 # thorough) cases in our testsuite, but I would be
680 706 # somewhat unsurprised to find a case in the wild
681 707 # where this breaks down a bit. That said, I don't
682 708 # know if it would hurt anything.
683 709 for i in pycompat.xrange(rev, 0, -1):
684 710 if store.linkrev(i) == clrev:
685 711 return i
686 712 # We failed to resolve a parent for this node, so
687 713 # we crash the changegroup construction.
688 714 raise error.Abort(
689 715 'unable to resolve parent while packing %r %r'
690 716 ' for changeset %r' % (store.indexfile, rev, clrev))
691 717
692 718 return nullrev
693 719
694 720 if not linkparents or (
695 721 store.parentrevs(rev) == (nullrev, nullrev)):
696 722 p1, p2 = nullrev, nullrev
697 723 elif len(linkparents) == 1:
698 724 p1, = sorted(local(p) for p in linkparents)
699 725 p2 = nullrev
700 726 else:
701 727 p1, p2 = sorted(local(p) for p in linkparents)
702 728
703 729 n = store.node(rev)
704 730 p1n, p2n = store.node(p1), store.node(p2)
705 731 flags = store.flags(rev)
706 732 flags |= revlog.REVIDX_ELLIPSIS
707 733
708 734 # TODO: try and actually send deltas for ellipsis data blocks
709 735
710 736 return revisiondelta(
711 737 node=n,
712 738 p1node=p1n,
713 739 p2node=p2n,
714 740 basenode=nullid,
715 741 linknode=linknode,
716 742 flags=flags,
717 743 baserevisionsize=None,
718 744 revision=store.revision(n),
719 745 delta=None,
720 746 )
721 747
722 def deltagroup(repo, revs, store, ischangelog, lookup, deltaparentfn,
748 def deltagroup(repo, revs, store, ischangelog, lookup, forcedeltaparentprev,
723 749 units=None,
724 750 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
725 751 precomputedellipsis=None):
726 752 """Calculate deltas for a set of revisions.
727 753
728 754 Is a generator of ``revisiondelta`` instances.
729 755
730 756 If units is not None, progress detail will be generated, units specifies
731 757 the type of revlog that is touched (changelog, manifest, etc.).
732 758 """
733 759 if not revs:
734 760 return
735 761
736 762 cl = repo.changelog
737 763
738 764 # Add the parent of the first rev.
739 765 revs.insert(0, store.parentrevs(revs[0])[0])
740 766
741 767 # build deltas
742 768 progress = None
743 769 if units is not None:
744 770 progress = repo.ui.makeprogress(_('bundling'), unit=units,
745 771 total=(len(revs) - 1))
746 772
747 773 for i in pycompat.xrange(len(revs) - 1):
748 774 if progress:
749 775 progress.update(i + 1)
750 776
751 777 prev = revs[i]
752 778 curr = revs[i + 1]
753 779
754 780 linknode = lookup(store.node(curr))
755 781
756 782 if ellipses:
757 783 linkrev = cl.rev(linknode)
758 784 clrevtolocalrev[linkrev] = curr
759 785
760 786 # This is a node to send in full, because the changeset it
761 787 # corresponds to was a full changeset.
762 788 if linknode in fullclnodes:
763 789 delta = _revisiondeltanormal(store, curr, prev, linknode,
764 deltaparentfn)
790 forcedeltaparentprev)
765 791 elif linkrev not in precomputedellipsis:
766 792 delta = None
767 793 else:
768 794 delta = _revisiondeltanarrow(
769 795 cl, store, ischangelog, curr, linkrev, linknode,
770 796 clrevtolocalrev, fullclnodes,
771 797 precomputedellipsis)
772 798 else:
773 799 delta = _revisiondeltanormal(store, curr, prev, linknode,
774 deltaparentfn)
800 forcedeltaparentprev)
775 801
776 802 if delta:
777 803 yield delta
778 804
779 805 if progress:
780 806 progress.complete()
781 807
782 808 class cgpacker(object):
783 809 def __init__(self, repo, filematcher, version, allowreorder,
784 deltaparentfn, builddeltaheader, manifestsend,
810 builddeltaheader, manifestsend,
811 forcedeltaparentprev=False,
785 812 bundlecaps=None, ellipses=False,
786 813 shallow=False, ellipsisroots=None, fullnodes=None):
787 814 """Given a source repo, construct a bundler.
788 815
789 816 filematcher is a matcher that matches on files to include in the
790 817 changegroup. Used to facilitate sparse changegroups.
791 818
792 819 allowreorder controls whether reordering of revisions is allowed.
793 820 This value is used when ``bundle.reorder`` is ``auto`` or isn't
794 821 set.
795 822
796 deltaparentfn is a callable that resolves the delta parent for
797 a specific revision.
823 forcedeltaparentprev indicates whether delta parents must be against
824 the previous revision in a delta group. This should only be used for
825 compatibility with changegroup version 1.
798 826
799 827 builddeltaheader is a callable that constructs the header for a group
800 828 delta.
801 829
802 830 manifestsend is a chunk to send after manifests have been fully emitted.
803 831
804 832 ellipses indicates whether ellipsis serving mode is enabled.
805 833
806 834 bundlecaps is optional and can be used to specify the set of
807 835 capabilities which can be used to build the bundle. While bundlecaps is
808 836 unused in core Mercurial, extensions rely on this feature to communicate
809 837 capabilities to customize the changegroup packer.
810 838
811 839 shallow indicates whether shallow data might be sent. The packer may
812 840 need to pack file contents not introduced by the changes being packed.
813 841
814 842 fullnodes is the set of changelog nodes which should not be ellipsis
815 843 nodes. We store this rather than the set of nodes that should be
816 844 ellipsis because for very large histories we expect this to be
817 845 significantly smaller.
818 846 """
819 847 assert filematcher
820 848 self._filematcher = filematcher
821 849
822 850 self.version = version
823 self._deltaparentfn = deltaparentfn
851 self._forcedeltaparentprev = forcedeltaparentprev
824 852 self._builddeltaheader = builddeltaheader
825 853 self._manifestsend = manifestsend
826 854 self._ellipses = ellipses
827 855
828 856 # Set of capabilities we can use to build the bundle.
829 857 if bundlecaps is None:
830 858 bundlecaps = set()
831 859 self._bundlecaps = bundlecaps
832 860 self._isshallow = shallow
833 861 self._fullclnodes = fullnodes
834 862
835 863 # Maps ellipsis revs to their roots at the changelog level.
836 864 self._precomputedellipsis = ellipsisroots
837 865
838 866 # experimental config: bundle.reorder
839 867 reorder = repo.ui.config('bundle', 'reorder')
840 868 if reorder == 'auto':
841 869 self._reorder = allowreorder
842 870 else:
843 871 self._reorder = stringutil.parsebool(reorder)
844 872
845 873 self._repo = repo
846 874
847 875 if self._repo.ui.verbose and not self._repo.ui.debugflag:
848 876 self._verbosenote = self._repo.ui.note
849 877 else:
850 878 self._verbosenote = lambda s: None
851 879
852 880 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
853 881 """Yield a sequence of changegroup byte chunks."""
854 882
855 883 repo = self._repo
856 884 cl = repo.changelog
857 885
858 886 self._verbosenote(_('uncompressed size of bundle content:\n'))
859 887 size = 0
860 888
861 889 clstate, deltas = self._generatechangelog(cl, clnodes)
862 890 for delta in deltas:
863 891 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
864 892 size += len(chunk)
865 893 yield chunk
866 894
867 895 close = closechunk()
868 896 size += len(close)
869 897 yield closechunk()
870 898
871 899 self._verbosenote(_('%8.i (changelog)\n') % size)
872 900
873 901 clrevorder = clstate['clrevorder']
874 902 mfs = clstate['mfs']
875 903 changedfiles = clstate['changedfiles']
876 904
877 905 # We need to make sure that the linkrev in the changegroup refers to
878 906 # the first changeset that introduced the manifest or file revision.
879 907 # The fastpath is usually safer than the slowpath, because the filelogs
880 908 # are walked in revlog order.
881 909 #
882 910 # When taking the slowpath with reorder=None and the manifest revlog
883 911 # uses generaldelta, the manifest may be walked in the "wrong" order.
884 912 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
885 913 # cc0ff93d0c0c).
886 914 #
887 915 # When taking the fastpath, we are only vulnerable to reordering
888 916 # of the changelog itself. The changelog never uses generaldelta, so
889 917 # it is only reordered when reorder=True. To handle this case, we
890 918 # simply take the slowpath, which already has the 'clrevorder' logic.
891 919 # This was also fixed in cc0ff93d0c0c.
892 920 fastpathlinkrev = fastpathlinkrev and not self._reorder
893 921 # Treemanifests don't work correctly with fastpathlinkrev
894 922 # either, because we don't discover which directory nodes to
895 923 # send along with files. This could probably be fixed.
896 924 fastpathlinkrev = fastpathlinkrev and (
897 925 'treemanifest' not in repo.requirements)
898 926
899 927 fnodes = {} # needed file nodes
900 928
901 929 size = 0
902 930 it = self.generatemanifests(
903 931 commonrevs, clrevorder, fastpathlinkrev, mfs, fnodes, source,
904 932 clstate['clrevtomanifestrev'])
905 933
906 934 for dir, deltas in it:
907 935 if dir:
908 936 assert self.version == b'03'
909 937 chunk = _fileheader(dir)
910 938 size += len(chunk)
911 939 yield chunk
912 940
913 941 for delta in deltas:
914 942 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
915 943 for chunk in chunks:
916 944 size += len(chunk)
917 945 yield chunk
918 946
919 947 close = closechunk()
920 948 size += len(close)
921 949 yield close
922 950
923 951 self._verbosenote(_('%8.i (manifests)\n') % size)
924 952 yield self._manifestsend
925 953
926 954 mfdicts = None
927 955 if self._ellipses and self._isshallow:
928 956 mfdicts = [(self._repo.manifestlog[n].read(), lr)
929 957 for (n, lr) in mfs.iteritems()]
930 958
931 959 mfs.clear()
932 960 clrevs = set(cl.rev(x) for x in clnodes)
933 961
934 962 it = self.generatefiles(changedfiles, commonrevs,
935 963 source, mfdicts, fastpathlinkrev,
936 964 fnodes, clrevs)
937 965
938 966 for path, deltas in it:
939 967 h = _fileheader(path)
940 968 size = len(h)
941 969 yield h
942 970
943 971 for delta in deltas:
944 972 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
945 973 for chunk in chunks:
946 974 size += len(chunk)
947 975 yield chunk
948 976
949 977 close = closechunk()
950 978 size += len(close)
951 979 yield close
952 980
953 981 self._verbosenote(_('%8.i %s\n') % (size, path))
954 982
955 983 yield closechunk()
956 984
957 985 if clnodes:
958 986 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
959 987
960 988 def _generatechangelog(self, cl, nodes):
961 989 """Generate data for changelog chunks.
962 990
963 991 Returns a 2-tuple of a dict containing state and an iterable of
964 992 byte chunks. The state will not be fully populated until the
965 993 chunk stream has been fully consumed.
966 994 """
967 995 clrevorder = {}
968 996 mfs = {} # needed manifests
969 997 mfl = self._repo.manifestlog
970 998 # TODO violates storage abstraction.
971 999 mfrevlog = mfl._revlog
972 1000 changedfiles = set()
973 1001 clrevtomanifestrev = {}
974 1002
975 1003 # Callback for the changelog, used to collect changed files and
976 1004 # manifest nodes.
977 1005 # Returns the linkrev node (identity in the changelog case).
978 1006 def lookupcl(x):
979 1007 c = cl.read(x)
980 1008 clrevorder[x] = len(clrevorder)
981 1009
982 1010 if self._ellipses:
983 1011 # Only update mfs if x is going to be sent. Otherwise we
984 1012 # end up with bogus linkrevs specified for manifests and
985 1013 # we skip some manifest nodes that we should otherwise
986 1014 # have sent.
987 1015 if (x in self._fullclnodes
988 1016 or cl.rev(x) in self._precomputedellipsis):
989 1017 n = c[0]
990 1018 # Record the first changeset introducing this manifest
991 1019 # version.
992 1020 mfs.setdefault(n, x)
993 1021 # Set this narrow-specific dict so we have the lowest
994 1022 # manifest revnum to look up for this cl revnum. (Part of
995 1023 # mapping changelog ellipsis parents to manifest ellipsis
996 1024 # parents)
997 1025 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
998 1026 # We can't trust the changed files list in the changeset if the
999 1027 # client requested a shallow clone.
1000 1028 if self._isshallow:
1001 1029 changedfiles.update(mfl[c[0]].read().keys())
1002 1030 else:
1003 1031 changedfiles.update(c[3])
1004 1032 else:
1005 1033
1006 1034 n = c[0]
1007 1035 # record the first changeset introducing this manifest version
1008 1036 mfs.setdefault(n, x)
1009 1037 # Record a complete list of potentially-changed files in
1010 1038 # this manifest.
1011 1039 changedfiles.update(c[3])
1012 1040
1013 1041 return x
1014 1042
1015 1043 # Changelog doesn't benefit from reordering revisions. So send out
1016 1044 # revisions in store order.
1017 1045 revs = sorted(cl.rev(n) for n in nodes)
1018 1046
1019 1047 state = {
1020 1048 'clrevorder': clrevorder,
1021 1049 'mfs': mfs,
1022 1050 'changedfiles': changedfiles,
1023 1051 'clrevtomanifestrev': clrevtomanifestrev,
1024 1052 }
1025 1053
1026 1054 gen = deltagroup(
1027 1055 self._repo, revs, cl, True, lookupcl,
1028 self._deltaparentfn,
1056 self._forcedeltaparentprev,
1029 1057 ellipses=self._ellipses,
1030 1058 units=_('changesets'),
1031 1059 clrevtolocalrev={},
1032 1060 fullclnodes=self._fullclnodes,
1033 1061 precomputedellipsis=self._precomputedellipsis)
1034 1062
1035 1063 return state, gen
1036 1064
1037 1065 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
1038 1066 fnodes, source, clrevtolocalrev):
1039 1067 """Returns an iterator of changegroup chunks containing manifests.
1040 1068
1041 1069 `source` is unused here, but is used by extensions like remotefilelog to
1042 1070 change what is sent based in pulls vs pushes, etc.
1043 1071 """
1044 1072 repo = self._repo
1045 1073 cl = repo.changelog
1046 1074 mfl = repo.manifestlog
1047 1075 dirlog = mfl._revlog.dirlog
1048 1076 tmfnodes = {'': mfs}
1049 1077
1050 1078 # Callback for the manifest, used to collect linkrevs for filelog
1051 1079 # revisions.
1052 1080 # Returns the linkrev node (collected in lookupcl).
1053 1081 def makelookupmflinknode(dir, nodes):
1054 1082 if fastpathlinkrev:
1055 1083 assert not dir
1056 1084 return mfs.__getitem__
1057 1085
1058 1086 def lookupmflinknode(x):
1059 1087 """Callback for looking up the linknode for manifests.
1060 1088
1061 1089 Returns the linkrev node for the specified manifest.
1062 1090
1063 1091 SIDE EFFECT:
1064 1092
1065 1093 1) fclnodes gets populated with the list of relevant
1066 1094 file nodes if we're not using fastpathlinkrev
1067 1095 2) When treemanifests are in use, collects treemanifest nodes
1068 1096 to send
1069 1097
1070 1098 Note that this means manifests must be completely sent to
1071 1099 the client before you can trust the list of files and
1072 1100 treemanifests to send.
1073 1101 """
1074 1102 clnode = nodes[x]
1075 1103 mdata = mfl.get(dir, x).readfast(shallow=True)
1076 1104 for p, n, fl in mdata.iterentries():
1077 1105 if fl == 't': # subdirectory manifest
1078 1106 subdir = dir + p + '/'
1079 1107 tmfclnodes = tmfnodes.setdefault(subdir, {})
1080 1108 tmfclnode = tmfclnodes.setdefault(n, clnode)
1081 1109 if clrevorder[clnode] < clrevorder[tmfclnode]:
1082 1110 tmfclnodes[n] = clnode
1083 1111 else:
1084 1112 f = dir + p
1085 1113 fclnodes = fnodes.setdefault(f, {})
1086 1114 fclnode = fclnodes.setdefault(n, clnode)
1087 1115 if clrevorder[clnode] < clrevorder[fclnode]:
1088 1116 fclnodes[n] = clnode
1089 1117 return clnode
1090 1118 return lookupmflinknode
1091 1119
1092 1120 while tmfnodes:
1093 1121 dir, nodes = tmfnodes.popitem()
1094 1122 store = dirlog(dir)
1095 1123
1096 1124 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
1097 1125 prunednodes = []
1098 1126 else:
1099 1127 frev, flr = store.rev, store.linkrev
1100 1128 prunednodes = [n for n in nodes
1101 1129 if flr(frev(n)) not in commonrevs]
1102 1130
1103 1131 if dir and not prunednodes:
1104 1132 continue
1105 1133
1106 1134 lookupfn = makelookupmflinknode(dir, nodes)
1107 1135
1108 1136 if self._ellipses:
1109 1137 revs = _sortnodesellipsis(store, prunednodes, cl,
1110 1138 lookupfn)
1111 1139 else:
1112 1140 revs = _sortnodesnormal(store, prunednodes,
1113 1141 self._reorder)
1114 1142
1115 1143 deltas = deltagroup(
1116 1144 self._repo, revs, store, False, lookupfn,
1117 self._deltaparentfn,
1145 self._forcedeltaparentprev,
1118 1146 ellipses=self._ellipses,
1119 1147 units=_('manifests'),
1120 1148 clrevtolocalrev=clrevtolocalrev,
1121 1149 fullclnodes=self._fullclnodes,
1122 1150 precomputedellipsis=self._precomputedellipsis)
1123 1151
1124 1152 yield dir, deltas
1125 1153
1126 1154 # The 'source' parameter is useful for extensions
1127 1155 def generatefiles(self, changedfiles, commonrevs, source,
1128 1156 mfdicts, fastpathlinkrev, fnodes, clrevs):
1129 1157 changedfiles = list(filter(self._filematcher, changedfiles))
1130 1158
1131 1159 if not fastpathlinkrev:
1132 1160 def normallinknodes(unused, fname):
1133 1161 return fnodes.get(fname, {})
1134 1162 else:
1135 1163 cln = self._repo.changelog.node
1136 1164
1137 1165 def normallinknodes(store, fname):
1138 1166 flinkrev = store.linkrev
1139 1167 fnode = store.node
1140 1168 revs = ((r, flinkrev(r)) for r in store)
1141 1169 return dict((fnode(r), cln(lr))
1142 1170 for r, lr in revs if lr in clrevs)
1143 1171
1144 1172 clrevtolocalrev = {}
1145 1173
1146 1174 if self._isshallow:
1147 1175 # In a shallow clone, the linknodes callback needs to also include
1148 1176 # those file nodes that are in the manifests we sent but weren't
1149 1177 # introduced by those manifests.
1150 1178 commonctxs = [self._repo[c] for c in commonrevs]
1151 1179 clrev = self._repo.changelog.rev
1152 1180
1153 1181 # Defining this function has a side-effect of overriding the
1154 1182 # function of the same name that was passed in as an argument.
1155 1183 # TODO have caller pass in appropriate function.
1156 1184 def linknodes(flog, fname):
1157 1185 for c in commonctxs:
1158 1186 try:
1159 1187 fnode = c.filenode(fname)
1160 1188 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1161 1189 except error.ManifestLookupError:
1162 1190 pass
1163 1191 links = normallinknodes(flog, fname)
1164 1192 if len(links) != len(mfdicts):
1165 1193 for mf, lr in mfdicts:
1166 1194 fnode = mf.get(fname, None)
1167 1195 if fnode in links:
1168 1196 links[fnode] = min(links[fnode], lr, key=clrev)
1169 1197 elif fnode:
1170 1198 links[fnode] = lr
1171 1199 return links
1172 1200 else:
1173 1201 linknodes = normallinknodes
1174 1202
1175 1203 repo = self._repo
1176 1204 cl = repo.changelog
1177 1205 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1178 1206 total=len(changedfiles))
1179 1207 for i, fname in enumerate(sorted(changedfiles)):
1180 1208 filerevlog = repo.file(fname)
1181 1209 if not filerevlog:
1182 1210 raise error.Abort(_("empty or missing file data for %s") %
1183 1211 fname)
1184 1212
1185 1213 clrevtolocalrev.clear()
1186 1214
1187 1215 linkrevnodes = linknodes(filerevlog, fname)
1188 1216 # Lookup for filenodes, we collected the linkrev nodes above in the
1189 1217 # fastpath case and with lookupmf in the slowpath case.
1190 1218 def lookupfilelog(x):
1191 1219 return linkrevnodes[x]
1192 1220
1193 1221 frev, flr = filerevlog.rev, filerevlog.linkrev
1194 1222 filenodes = [n for n in linkrevnodes
1195 1223 if flr(frev(n)) not in commonrevs]
1196 1224
1197 1225 if filenodes:
1198 1226 if self._ellipses:
1199 1227 revs = _sortnodesellipsis(filerevlog, filenodes,
1200 1228 cl, lookupfilelog)
1201 1229 else:
1202 1230 revs = _sortnodesnormal(filerevlog, filenodes,
1203 1231 self._reorder)
1204 1232
1205 1233 progress.update(i + 1, item=fname)
1206 1234
1207 1235 deltas = deltagroup(
1208 1236 self._repo, revs, filerevlog, False, lookupfilelog,
1209 self._deltaparentfn,
1237 self._forcedeltaparentprev,
1210 1238 ellipses=self._ellipses,
1211 1239 clrevtolocalrev=clrevtolocalrev,
1212 1240 fullclnodes=self._fullclnodes,
1213 1241 precomputedellipsis=self._precomputedellipsis)
1214 1242
1215 1243 yield fname, deltas
1216 1244
1217 1245 progress.complete()
1218 1246
1219 def _deltaparentprev(store, rev, p1, p2, prev):
1220 """Resolve a delta parent to the previous revision.
1221
1222 Used for version 1 changegroups, which don't support generaldelta.
1223 """
1224 return prev
1225
1226 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1227 """Resolve a delta parent when general deltas are supported."""
1228 dp = store.deltaparent(rev)
1229 if dp == nullrev and store.storedeltachains:
1230 # Avoid sending full revisions when delta parent is null. Pick prev
1231 # in that case. It's tempting to pick p1 in this case, as p1 will
1232 # be smaller in the common case. However, computing a delta against
1233 # p1 may require resolving the raw text of p1, which could be
1234 # expensive. The revlog caches should have prev cached, meaning
1235 # less CPU for changegroup generation. There is likely room to add
1236 # a flag and/or config option to control this behavior.
1237 base = prev
1238 elif dp == nullrev:
1239 # revlog is configured to use full snapshot for a reason,
1240 # stick to full snapshot.
1241 base = nullrev
1242 elif dp not in (p1, p2, prev):
1243 # Pick prev when we can't be sure remote has the base revision.
1244 return prev
1245 else:
1246 base = dp
1247
1248 if base != nullrev and not store.candelta(base, rev):
1249 base = nullrev
1250
1251 return base
1252
1253 def _deltaparentellipses(store, rev, p1, p2, prev):
1254 """Resolve a delta parent when in ellipses mode."""
1255 # TODO: send better deltas when in narrow mode.
1256 #
1257 # changegroup.group() loops over revisions to send,
1258 # including revisions we'll skip. What this means is that
1259 # `prev` will be a potentially useless delta base for all
1260 # ellipsis nodes, as the client likely won't have it. In
1261 # the future we should do bookkeeping about which nodes
1262 # have been sent to the client, and try to be
1263 # significantly smarter about delta bases. This is
1264 # slightly tricky because this same code has to work for
1265 # all revlogs, and we don't have the linkrev/linknode here.
1266 return p1
1267
1268 1247 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1269 1248 shallow=False, ellipsisroots=None, fullnodes=None):
1270 1249 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1271 1250 d.node, d.p1node, d.p2node, d.linknode)
1272 1251
1273 1252 return cgpacker(repo, filematcher, b'01',
1274 deltaparentfn=_deltaparentprev,
1275 1253 allowreorder=None,
1276 1254 builddeltaheader=builddeltaheader,
1277 1255 manifestsend=b'',
1256 forcedeltaparentprev=True,
1278 1257 bundlecaps=bundlecaps,
1279 1258 ellipses=ellipses,
1280 1259 shallow=shallow,
1281 1260 ellipsisroots=ellipsisroots,
1282 1261 fullnodes=fullnodes)
1283 1262
1284 1263 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1285 1264 shallow=False, ellipsisroots=None, fullnodes=None):
1286 1265 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1287 1266 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1288 1267
1289 1268 # Since generaldelta is directly supported by cg2, reordering
1290 1269 # generally doesn't help, so we disable it by default (treating
1291 1270 # bundle.reorder=auto just like bundle.reorder=False).
1292 1271 return cgpacker(repo, filematcher, b'02',
1293 deltaparentfn=_deltaparentgeneraldelta,
1294 1272 allowreorder=False,
1295 1273 builddeltaheader=builddeltaheader,
1296 1274 manifestsend=b'',
1297 1275 bundlecaps=bundlecaps,
1298 1276 ellipses=ellipses,
1299 1277 shallow=shallow,
1300 1278 ellipsisroots=ellipsisroots,
1301 1279 fullnodes=fullnodes)
1302 1280
1303 1281 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1304 1282 shallow=False, ellipsisroots=None, fullnodes=None):
1305 1283 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1306 1284 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1307 1285
1308 deltaparentfn = (_deltaparentellipses if ellipses
1309 else _deltaparentgeneraldelta)
1310
1311 1286 return cgpacker(repo, filematcher, b'03',
1312 deltaparentfn=deltaparentfn,
1313 1287 allowreorder=False,
1314 1288 builddeltaheader=builddeltaheader,
1315 1289 manifestsend=closechunk(),
1316 1290 bundlecaps=bundlecaps,
1317 1291 ellipses=ellipses,
1318 1292 shallow=shallow,
1319 1293 ellipsisroots=ellipsisroots,
1320 1294 fullnodes=fullnodes)
1321 1295
1322 1296 _packermap = {'01': (_makecg1packer, cg1unpacker),
1323 1297 # cg2 adds support for exchanging generaldelta
1324 1298 '02': (_makecg2packer, cg2unpacker),
1325 1299 # cg3 adds support for exchanging revlog flags and treemanifests
1326 1300 '03': (_makecg3packer, cg3unpacker),
1327 1301 }
1328 1302
1329 1303 def allsupportedversions(repo):
1330 1304 versions = set(_packermap.keys())
1331 1305 if not (repo.ui.configbool('experimental', 'changegroup3') or
1332 1306 repo.ui.configbool('experimental', 'treemanifest') or
1333 1307 'treemanifest' in repo.requirements):
1334 1308 versions.discard('03')
1335 1309 return versions
1336 1310
1337 1311 # Changegroup versions that can be applied to the repo
1338 1312 def supportedincomingversions(repo):
1339 1313 return allsupportedversions(repo)
1340 1314
1341 1315 # Changegroup versions that can be created from the repo
1342 1316 def supportedoutgoingversions(repo):
1343 1317 versions = allsupportedversions(repo)
1344 1318 if 'treemanifest' in repo.requirements:
1345 1319 # Versions 01 and 02 support only flat manifests and it's just too
1346 1320 # expensive to convert between the flat manifest and tree manifest on
1347 1321 # the fly. Since tree manifests are hashed differently, all of history
1348 1322 # would have to be converted. Instead, we simply don't even pretend to
1349 1323 # support versions 01 and 02.
1350 1324 versions.discard('01')
1351 1325 versions.discard('02')
1352 1326 if repository.NARROW_REQUIREMENT in repo.requirements:
1353 1327 # Versions 01 and 02 don't support revlog flags, and we need to
1354 1328 # support that for stripping and unbundling to work.
1355 1329 versions.discard('01')
1356 1330 versions.discard('02')
1357 1331 if LFS_REQUIREMENT in repo.requirements:
1358 1332 # Versions 01 and 02 don't support revlog flags, and we need to
1359 1333 # mark LFS entries with REVIDX_EXTSTORED.
1360 1334 versions.discard('01')
1361 1335 versions.discard('02')
1362 1336
1363 1337 return versions
1364 1338
1365 1339 def localversion(repo):
1366 1340 # Finds the best version to use for bundles that are meant to be used
1367 1341 # locally, such as those from strip and shelve, and temporary bundles.
1368 1342 return max(supportedoutgoingversions(repo))
1369 1343
1370 1344 def safeversion(repo):
1371 1345 # Finds the smallest version that it's safe to assume clients of the repo
1372 1346 # will support. For example, all hg versions that support generaldelta also
1373 1347 # support changegroup 02.
1374 1348 versions = supportedoutgoingversions(repo)
1375 1349 if 'generaldelta' in repo.requirements:
1376 1350 versions.discard('01')
1377 1351 assert versions
1378 1352 return min(versions)
1379 1353
1380 1354 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1381 1355 ellipses=False, shallow=False, ellipsisroots=None,
1382 1356 fullnodes=None):
1383 1357 assert version in supportedoutgoingversions(repo)
1384 1358
1385 1359 if filematcher is None:
1386 1360 filematcher = matchmod.alwaysmatcher(repo.root, '')
1387 1361
1388 1362 if version == '01' and not filematcher.always():
1389 1363 raise error.ProgrammingError('version 01 changegroups do not support '
1390 1364 'sparse file matchers')
1391 1365
1392 1366 if ellipses and version in (b'01', b'02'):
1393 1367 raise error.Abort(
1394 1368 _('ellipsis nodes require at least cg3 on client and server, '
1395 1369 'but negotiated version %s') % version)
1396 1370
1397 1371 # Requested files could include files not in the local store. So
1398 1372 # filter those out.
1399 1373 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1400 1374 filematcher)
1401 1375
1402 1376 fn = _packermap[version][0]
1403 1377 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1404 1378 shallow=shallow, ellipsisroots=ellipsisroots,
1405 1379 fullnodes=fullnodes)
1406 1380
1407 1381 def getunbundler(version, fh, alg, extras=None):
1408 1382 return _packermap[version][1](fh, alg, extras=extras)
1409 1383
1410 1384 def _changegroupinfo(repo, nodes, source):
1411 1385 if repo.ui.verbose or source == 'bundle':
1412 1386 repo.ui.status(_("%d changesets found\n") % len(nodes))
1413 1387 if repo.ui.debugflag:
1414 1388 repo.ui.debug("list of changesets:\n")
1415 1389 for node in nodes:
1416 1390 repo.ui.debug("%s\n" % hex(node))
1417 1391
1418 1392 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1419 1393 bundlecaps=None):
1420 1394 cgstream = makestream(repo, outgoing, version, source,
1421 1395 fastpath=fastpath, bundlecaps=bundlecaps)
1422 1396 return getunbundler(version, util.chunkbuffer(cgstream), None,
1423 1397 {'clcount': len(outgoing.missing) })
1424 1398
1425 1399 def makestream(repo, outgoing, version, source, fastpath=False,
1426 1400 bundlecaps=None, filematcher=None):
1427 1401 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1428 1402 filematcher=filematcher)
1429 1403
1430 1404 repo = repo.unfiltered()
1431 1405 commonrevs = outgoing.common
1432 1406 csets = outgoing.missing
1433 1407 heads = outgoing.missingheads
1434 1408 # We go through the fast path if we get told to, or if all (unfiltered
1435 1409 # heads have been requested (since we then know there all linkrevs will
1436 1410 # be pulled by the client).
1437 1411 heads.sort()
1438 1412 fastpathlinkrev = fastpath or (
1439 1413 repo.filtername is None and heads == sorted(repo.heads()))
1440 1414
1441 1415 repo.hook('preoutgoing', throw=True, source=source)
1442 1416 _changegroupinfo(repo, csets, source)
1443 1417 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1444 1418
1445 1419 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1446 1420 revisions = 0
1447 1421 files = 0
1448 1422 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1449 1423 total=expectedfiles)
1450 1424 for chunkdata in iter(source.filelogheader, {}):
1451 1425 files += 1
1452 1426 f = chunkdata["filename"]
1453 1427 repo.ui.debug("adding %s revisions\n" % f)
1454 1428 progress.increment()
1455 1429 fl = repo.file(f)
1456 1430 o = len(fl)
1457 1431 try:
1458 1432 deltas = source.deltaiter()
1459 1433 if not fl.addgroup(deltas, revmap, trp):
1460 1434 raise error.Abort(_("received file revlog group is empty"))
1461 1435 except error.CensoredBaseError as e:
1462 1436 raise error.Abort(_("received delta base is censored: %s") % e)
1463 1437 revisions += len(fl) - o
1464 1438 if f in needfiles:
1465 1439 needs = needfiles[f]
1466 1440 for new in pycompat.xrange(o, len(fl)):
1467 1441 n = fl.node(new)
1468 1442 if n in needs:
1469 1443 needs.remove(n)
1470 1444 else:
1471 1445 raise error.Abort(
1472 1446 _("received spurious file revlog entry"))
1473 1447 if not needs:
1474 1448 del needfiles[f]
1475 1449 progress.complete()
1476 1450
1477 1451 for f, needs in needfiles.iteritems():
1478 1452 fl = repo.file(f)
1479 1453 for n in needs:
1480 1454 try:
1481 1455 fl.rev(n)
1482 1456 except error.LookupError:
1483 1457 raise error.Abort(
1484 1458 _('missing file data for %s:%s - run hg verify') %
1485 1459 (f, hex(n)))
1486 1460
1487 1461 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now