##// END OF EJS Templates
changegroup: move manifest chunk emission to generate()...
Gregory Szorc -
r39048:c921ad9c default
parent child Browse files
Show More
@@ -1,1447 +1,1451 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 match as matchmod,
30 30 mdiff,
31 31 phases,
32 32 pycompat,
33 33 repository,
34 34 revlog,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 stringutil,
40 40 )
41 41
42 42 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
43 43 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
44 44 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
45 45
46 46 LFS_REQUIREMENT = 'lfs'
47 47
48 48 readexactly = util.readexactly
49 49
50 50 def getchunk(stream):
51 51 """return the next chunk from stream as a string"""
52 52 d = readexactly(stream, 4)
53 53 l = struct.unpack(">l", d)[0]
54 54 if l <= 4:
55 55 if l:
56 56 raise error.Abort(_("invalid chunk length %d") % l)
57 57 return ""
58 58 return readexactly(stream, l - 4)
59 59
60 60 def chunkheader(length):
61 61 """return a changegroup chunk header (string)"""
62 62 return struct.pack(">l", length + 4)
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(">l", 0)
67 67
68 68 def _fileheader(path):
69 69 """Obtain a changegroup chunk header for a named path."""
70 70 return chunkheader(len(path)) + path
71 71
72 72 def writechunks(ui, chunks, filename, vfs=None):
73 73 """Write chunks to a file and return its filename.
74 74
75 75 The stream is assumed to be a bundle file.
76 76 Existing files will not be overwritten.
77 77 If no filename is specified, a temporary file is created.
78 78 """
79 79 fh = None
80 80 cleanup = None
81 81 try:
82 82 if filename:
83 83 if vfs:
84 84 fh = vfs.open(filename, "wb")
85 85 else:
86 86 # Increase default buffer size because default is usually
87 87 # small (4k is common on Linux).
88 88 fh = open(filename, "wb", 131072)
89 89 else:
90 90 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
91 91 fh = os.fdopen(fd, r"wb")
92 92 cleanup = filename
93 93 for c in chunks:
94 94 fh.write(c)
95 95 cleanup = None
96 96 return filename
97 97 finally:
98 98 if fh is not None:
99 99 fh.close()
100 100 if cleanup is not None:
101 101 if filename and vfs:
102 102 vfs.unlink(cleanup)
103 103 else:
104 104 os.unlink(cleanup)
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
125 125 deltaheadersize = deltaheader.size
126 126 version = '01'
127 127 _grouplistcount = 1 # One list of files after the manifests
128 128
129 129 def __init__(self, fh, alg, extras=None):
130 130 if alg is None:
131 131 alg = 'UN'
132 132 if alg not in util.compengines.supportedbundletypes:
133 133 raise error.Abort(_('unknown stream compression type: %s')
134 134 % alg)
135 135 if alg == 'BZ':
136 136 alg = '_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != 'UN'
148 148 def read(self, l):
149 149 return self._stream.read(l)
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152 def tell(self):
153 153 return self._stream.tell()
154 154 def close(self):
155 155 return self._stream.close()
156 156
157 157 def _chunklength(self):
158 158 d = readexactly(self._stream, 4)
159 159 l = struct.unpack(">l", d)[0]
160 160 if l <= 4:
161 161 if l:
162 162 raise error.Abort(_("invalid chunk length %d") % l)
163 163 return 0
164 164 if self.callback:
165 165 self.callback()
166 166 return l - 4
167 167
168 168 def changelogheader(self):
169 169 """v10 does not have a changelog header chunk"""
170 170 return {}
171 171
172 172 def manifestheader(self):
173 173 """v10 does not have a manifest header chunk"""
174 174 return {}
175 175
176 176 def filelogheader(self):
177 177 """return the header of the filelogs chunk, v10 only has the filename"""
178 178 l = self._chunklength()
179 179 if not l:
180 180 return {}
181 181 fname = readexactly(self._stream, l)
182 182 return {'filename': fname}
183 183
184 184 def _deltaheader(self, headertuple, prevnode):
185 185 node, p1, p2, cs = headertuple
186 186 if prevnode is None:
187 187 deltabase = p1
188 188 else:
189 189 deltabase = prevnode
190 190 flags = 0
191 191 return node, p1, p2, deltabase, cs, flags
192 192
193 193 def deltachunk(self, prevnode):
194 194 l = self._chunklength()
195 195 if not l:
196 196 return {}
197 197 headerdata = readexactly(self._stream, self.deltaheadersize)
198 198 header = self.deltaheader.unpack(headerdata)
199 199 delta = readexactly(self._stream, l - self.deltaheadersize)
200 200 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
201 201 return (node, p1, p2, cs, deltabase, delta, flags)
202 202
203 203 def getchunks(self):
204 204 """returns all the chunks contains in the bundle
205 205
206 206 Used when you need to forward the binary stream to a file or another
207 207 network API. To do so, it parse the changegroup data, otherwise it will
208 208 block in case of sshrepo because it don't know the end of the stream.
209 209 """
210 210 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
211 211 # and a list of filelogs. For changegroup 3, we expect 4 parts:
212 212 # changelog, manifestlog, a list of tree manifestlogs, and a list of
213 213 # filelogs.
214 214 #
215 215 # Changelog and manifestlog parts are terminated with empty chunks. The
216 216 # tree and file parts are a list of entry sections. Each entry section
217 217 # is a series of chunks terminating in an empty chunk. The list of these
218 218 # entry sections is terminated in yet another empty chunk, so we know
219 219 # we've reached the end of the tree/file list when we reach an empty
220 220 # chunk that was proceeded by no non-empty chunks.
221 221
222 222 parts = 0
223 223 while parts < 2 + self._grouplistcount:
224 224 noentries = True
225 225 while True:
226 226 chunk = getchunk(self)
227 227 if not chunk:
228 228 # The first two empty chunks represent the end of the
229 229 # changelog and the manifestlog portions. The remaining
230 230 # empty chunks represent either A) the end of individual
231 231 # tree or file entries in the file list, or B) the end of
232 232 # the entire list. It's the end of the entire list if there
233 233 # were no entries (i.e. noentries is True).
234 234 if parts < 2:
235 235 parts += 1
236 236 elif noentries:
237 237 parts += 1
238 238 break
239 239 noentries = False
240 240 yield chunkheader(len(chunk))
241 241 pos = 0
242 242 while pos < len(chunk):
243 243 next = pos + 2**20
244 244 yield chunk[pos:next]
245 245 pos = next
246 246 yield closechunk()
247 247
248 248 def _unpackmanifests(self, repo, revmap, trp, prog):
249 249 self.callback = prog.increment
250 250 # no need to check for empty manifest group here:
251 251 # if the result of the merge of 1 and 2 is the same in 3 and 4,
252 252 # no new manifest will be created and the manifest group will
253 253 # be empty during the pull
254 254 self.manifestheader()
255 255 deltas = self.deltaiter()
256 256 repo.manifestlog.addgroup(deltas, revmap, trp)
257 257 prog.complete()
258 258 self.callback = None
259 259
260 260 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
261 261 expectedtotal=None):
262 262 """Add the changegroup returned by source.read() to this repo.
263 263 srctype is a string like 'push', 'pull', or 'unbundle'. url is
264 264 the URL of the repo where this changegroup is coming from.
265 265
266 266 Return an integer summarizing the change to this repo:
267 267 - nothing changed or no source: 0
268 268 - more heads than before: 1+added heads (2..n)
269 269 - fewer heads than before: -1-removed heads (-2..-n)
270 270 - number of heads stays the same: 1
271 271 """
272 272 repo = repo.unfiltered()
273 273 def csmap(x):
274 274 repo.ui.debug("add changeset %s\n" % short(x))
275 275 return len(cl)
276 276
277 277 def revmap(x):
278 278 return cl.rev(x)
279 279
280 280 changesets = files = revisions = 0
281 281
282 282 try:
283 283 # The transaction may already carry source information. In this
284 284 # case we use the top level data. We overwrite the argument
285 285 # because we need to use the top level value (if they exist)
286 286 # in this function.
287 287 srctype = tr.hookargs.setdefault('source', srctype)
288 288 url = tr.hookargs.setdefault('url', url)
289 289 repo.hook('prechangegroup',
290 290 throw=True, **pycompat.strkwargs(tr.hookargs))
291 291
292 292 # write changelog data to temp files so concurrent readers
293 293 # will not see an inconsistent view
294 294 cl = repo.changelog
295 295 cl.delayupdate(tr)
296 296 oldheads = set(cl.heads())
297 297
298 298 trp = weakref.proxy(tr)
299 299 # pull off the changeset group
300 300 repo.ui.status(_("adding changesets\n"))
301 301 clstart = len(cl)
302 302 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
303 303 total=expectedtotal)
304 304 self.callback = progress.increment
305 305
306 306 efiles = set()
307 307 def onchangelog(cl, node):
308 308 efiles.update(cl.readfiles(node))
309 309
310 310 self.changelogheader()
311 311 deltas = self.deltaiter()
312 312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
313 313 efiles = len(efiles)
314 314
315 315 if not cgnodes:
316 316 repo.ui.develwarn('applied empty changegroup',
317 317 config='warn-empty-changegroup')
318 318 clend = len(cl)
319 319 changesets = clend - clstart
320 320 progress.complete()
321 321 self.callback = None
322 322
323 323 # pull off the manifest group
324 324 repo.ui.status(_("adding manifests\n"))
325 325 # We know that we'll never have more manifests than we had
326 326 # changesets.
327 327 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
328 328 total=changesets)
329 329 self._unpackmanifests(repo, revmap, trp, progress)
330 330
331 331 needfiles = {}
332 332 if repo.ui.configbool('server', 'validate'):
333 333 cl = repo.changelog
334 334 ml = repo.manifestlog
335 335 # validate incoming csets have their manifests
336 336 for cset in pycompat.xrange(clstart, clend):
337 337 mfnode = cl.changelogrevision(cset).manifest
338 338 mfest = ml[mfnode].readdelta()
339 339 # store file cgnodes we must see
340 340 for f, n in mfest.iteritems():
341 341 needfiles.setdefault(f, set()).add(n)
342 342
343 343 # process the files
344 344 repo.ui.status(_("adding file changes\n"))
345 345 newrevs, newfiles = _addchangegroupfiles(
346 346 repo, self, revmap, trp, efiles, needfiles)
347 347 revisions += newrevs
348 348 files += newfiles
349 349
350 350 deltaheads = 0
351 351 if oldheads:
352 352 heads = cl.heads()
353 353 deltaheads = len(heads) - len(oldheads)
354 354 for h in heads:
355 355 if h not in oldheads and repo[h].closesbranch():
356 356 deltaheads -= 1
357 357 htext = ""
358 358 if deltaheads:
359 359 htext = _(" (%+d heads)") % deltaheads
360 360
361 361 repo.ui.status(_("added %d changesets"
362 362 " with %d changes to %d files%s\n")
363 363 % (changesets, revisions, files, htext))
364 364 repo.invalidatevolatilesets()
365 365
366 366 if changesets > 0:
367 367 if 'node' not in tr.hookargs:
368 368 tr.hookargs['node'] = hex(cl.node(clstart))
369 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
370 370 hookargs = dict(tr.hookargs)
371 371 else:
372 372 hookargs = dict(tr.hookargs)
373 373 hookargs['node'] = hex(cl.node(clstart))
374 374 hookargs['node_last'] = hex(cl.node(clend - 1))
375 375 repo.hook('pretxnchangegroup',
376 376 throw=True, **pycompat.strkwargs(hookargs))
377 377
378 378 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
379 379 phaseall = None
380 380 if srctype in ('push', 'serve'):
381 381 # Old servers can not push the boundary themselves.
382 382 # New servers won't push the boundary if changeset already
383 383 # exists locally as secret
384 384 #
385 385 # We should not use added here but the list of all change in
386 386 # the bundle
387 387 if repo.publishing():
388 388 targetphase = phaseall = phases.public
389 389 else:
390 390 # closer target phase computation
391 391
392 392 # Those changesets have been pushed from the
393 393 # outside, their phases are going to be pushed
394 394 # alongside. Therefor `targetphase` is
395 395 # ignored.
396 396 targetphase = phaseall = phases.draft
397 397 if added:
398 398 phases.registernew(repo, tr, targetphase, added)
399 399 if phaseall is not None:
400 400 phases.advanceboundary(repo, tr, phaseall, cgnodes)
401 401
402 402 if changesets > 0:
403 403
404 404 def runhooks():
405 405 # These hooks run when the lock releases, not when the
406 406 # transaction closes. So it's possible for the changelog
407 407 # to have changed since we last saw it.
408 408 if clstart >= len(repo):
409 409 return
410 410
411 411 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
412 412
413 413 for n in added:
414 414 args = hookargs.copy()
415 415 args['node'] = hex(n)
416 416 del args['node_last']
417 417 repo.hook("incoming", **pycompat.strkwargs(args))
418 418
419 419 newheads = [h for h in repo.heads()
420 420 if h not in oldheads]
421 421 repo.ui.log("incoming",
422 422 "%d incoming changes - new heads: %s\n",
423 423 len(added),
424 424 ', '.join([hex(c[:6]) for c in newheads]))
425 425
426 426 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
427 427 lambda tr: repo._afterlock(runhooks))
428 428 finally:
429 429 repo.ui.flush()
430 430 # never return 0 here:
431 431 if deltaheads < 0:
432 432 ret = deltaheads - 1
433 433 else:
434 434 ret = deltaheads + 1
435 435 return ret
436 436
437 437 def deltaiter(self):
438 438 """
439 439 returns an iterator of the deltas in this changegroup
440 440
441 441 Useful for passing to the underlying storage system to be stored.
442 442 """
443 443 chain = None
444 444 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
445 445 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
446 446 yield chunkdata
447 447 chain = chunkdata[0]
448 448
449 449 class cg2unpacker(cg1unpacker):
450 450 """Unpacker for cg2 streams.
451 451
452 452 cg2 streams add support for generaldelta, so the delta header
453 453 format is slightly different. All other features about the data
454 454 remain the same.
455 455 """
456 456 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
457 457 deltaheadersize = deltaheader.size
458 458 version = '02'
459 459
460 460 def _deltaheader(self, headertuple, prevnode):
461 461 node, p1, p2, deltabase, cs = headertuple
462 462 flags = 0
463 463 return node, p1, p2, deltabase, cs, flags
464 464
465 465 class cg3unpacker(cg2unpacker):
466 466 """Unpacker for cg3 streams.
467 467
468 468 cg3 streams add support for exchanging treemanifests and revlog
469 469 flags. It adds the revlog flags to the delta header and an empty chunk
470 470 separating manifests and files.
471 471 """
472 472 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
473 473 deltaheadersize = deltaheader.size
474 474 version = '03'
475 475 _grouplistcount = 2 # One list of manifests and one list of files
476 476
477 477 def _deltaheader(self, headertuple, prevnode):
478 478 node, p1, p2, deltabase, cs, flags = headertuple
479 479 return node, p1, p2, deltabase, cs, flags
480 480
481 481 def _unpackmanifests(self, repo, revmap, trp, prog):
482 482 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
483 483 for chunkdata in iter(self.filelogheader, {}):
484 484 # If we get here, there are directory manifests in the changegroup
485 485 d = chunkdata["filename"]
486 486 repo.ui.debug("adding %s revisions\n" % d)
487 487 dirlog = repo.manifestlog._revlog.dirlog(d)
488 488 deltas = self.deltaiter()
489 489 if not dirlog.addgroup(deltas, revmap, trp):
490 490 raise error.Abort(_("received dir revlog group is empty"))
491 491
492 492 class headerlessfixup(object):
493 493 def __init__(self, fh, h):
494 494 self._h = h
495 495 self._fh = fh
496 496 def read(self, n):
497 497 if self._h:
498 498 d, self._h = self._h[:n], self._h[n:]
499 499 if len(d) < n:
500 500 d += readexactly(self._fh, n - len(d))
501 501 return d
502 502 return readexactly(self._fh, n)
503 503
504 504 @attr.s(slots=True, frozen=True)
505 505 class revisiondelta(object):
506 506 """Describes a delta entry in a changegroup.
507 507
508 508 Captured data is sufficient to serialize the delta into multiple
509 509 formats.
510 510 """
511 511 # 20 byte node of this revision.
512 512 node = attr.ib()
513 513 # 20 byte nodes of parent revisions.
514 514 p1node = attr.ib()
515 515 p2node = attr.ib()
516 516 # 20 byte node of node this delta is against.
517 517 basenode = attr.ib()
518 518 # 20 byte node of changeset revision this delta is associated with.
519 519 linknode = attr.ib()
520 520 # 2 bytes of flags to apply to revision data.
521 521 flags = attr.ib()
522 522 # Iterable of chunks holding raw delta data.
523 523 deltachunks = attr.ib()
524 524
525 525 def _sortnodesnormal(store, nodes, reorder):
526 526 """Sort nodes for changegroup generation and turn into revnums."""
527 527 # for generaldelta revlogs, we linearize the revs; this will both be
528 528 # much quicker and generate a much smaller bundle
529 529 if (store._generaldelta and reorder is None) or reorder:
530 530 dag = dagutil.revlogdag(store)
531 531 return dag.linearize(set(store.rev(n) for n in nodes))
532 532 else:
533 533 return sorted([store.rev(n) for n in nodes])
534 534
535 535 def _sortnodesellipsis(store, nodes, cl, lookup):
536 536 """Sort nodes for changegroup generation and turn into revnums."""
537 537 # Ellipses serving mode.
538 538 #
539 539 # In a perfect world, we'd generate better ellipsis-ified graphs
540 540 # for non-changelog revlogs. In practice, we haven't started doing
541 541 # that yet, so the resulting DAGs for the manifestlog and filelogs
542 542 # are actually full of bogus parentage on all the ellipsis
543 543 # nodes. This has the side effect that, while the contents are
544 544 # correct, the individual DAGs might be completely out of whack in
545 545 # a case like 882681bc3166 and its ancestors (back about 10
546 546 # revisions or so) in the main hg repo.
547 547 #
548 548 # The one invariant we *know* holds is that the new (potentially
549 549 # bogus) DAG shape will be valid if we order the nodes in the
550 550 # order that they're introduced in dramatis personae by the
551 551 # changelog, so what we do is we sort the non-changelog histories
552 552 # by the order in which they are used by the changelog.
553 553 key = lambda n: cl.rev(lookup(n))
554 554 return [store.rev(n) for n in sorted(nodes, key=key)]
555 555
556 556 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
557 557 """Construct a revision delta for non-ellipses changegroup generation."""
558 558 node = store.node(rev)
559 559 p1, p2 = store.parentrevs(rev)
560 560 base = deltaparentfn(store, rev, p1, p2, prev)
561 561
562 562 prefix = ''
563 563 if store.iscensored(base) or store.iscensored(rev):
564 564 try:
565 565 delta = store.revision(node, raw=True)
566 566 except error.CensoredNodeError as e:
567 567 delta = e.tombstone
568 568 if base == nullrev:
569 569 prefix = mdiff.trivialdiffheader(len(delta))
570 570 else:
571 571 baselen = store.rawsize(base)
572 572 prefix = mdiff.replacediffheader(baselen, len(delta))
573 573 elif base == nullrev:
574 574 delta = store.revision(node, raw=True)
575 575 prefix = mdiff.trivialdiffheader(len(delta))
576 576 else:
577 577 delta = store.revdiff(base, rev)
578 578 p1n, p2n = store.parents(node)
579 579
580 580 return revisiondelta(
581 581 node=node,
582 582 p1node=p1n,
583 583 p2node=p2n,
584 584 basenode=store.node(base),
585 585 linknode=linknode,
586 586 flags=store.flags(rev),
587 587 deltachunks=(prefix, delta),
588 588 )
589 589
590 590 def _revisiondeltanarrow(cl, store, ischangelog, rev, linkrev,
591 591 linknode, clrevtolocalrev, fullclnodes,
592 592 precomputedellipsis):
593 593 linkparents = precomputedellipsis[linkrev]
594 594 def local(clrev):
595 595 """Turn a changelog revnum into a local revnum.
596 596
597 597 The ellipsis dag is stored as revnums on the changelog,
598 598 but when we're producing ellipsis entries for
599 599 non-changelog revlogs, we need to turn those numbers into
600 600 something local. This does that for us, and during the
601 601 changelog sending phase will also expand the stored
602 602 mappings as needed.
603 603 """
604 604 if clrev == nullrev:
605 605 return nullrev
606 606
607 607 if ischangelog:
608 608 return clrev
609 609
610 610 # Walk the ellipsis-ized changelog breadth-first looking for a
611 611 # change that has been linked from the current revlog.
612 612 #
613 613 # For a flat manifest revlog only a single step should be necessary
614 614 # as all relevant changelog entries are relevant to the flat
615 615 # manifest.
616 616 #
617 617 # For a filelog or tree manifest dirlog however not every changelog
618 618 # entry will have been relevant, so we need to skip some changelog
619 619 # nodes even after ellipsis-izing.
620 620 walk = [clrev]
621 621 while walk:
622 622 p = walk[0]
623 623 walk = walk[1:]
624 624 if p in clrevtolocalrev:
625 625 return clrevtolocalrev[p]
626 626 elif p in fullclnodes:
627 627 walk.extend([pp for pp in cl.parentrevs(p)
628 628 if pp != nullrev])
629 629 elif p in precomputedellipsis:
630 630 walk.extend([pp for pp in precomputedellipsis[p]
631 631 if pp != nullrev])
632 632 else:
633 633 # In this case, we've got an ellipsis with parents
634 634 # outside the current bundle (likely an
635 635 # incremental pull). We "know" that we can use the
636 636 # value of this same revlog at whatever revision
637 637 # is pointed to by linknode. "Know" is in scare
638 638 # quotes because I haven't done enough examination
639 639 # of edge cases to convince myself this is really
640 640 # a fact - it works for all the (admittedly
641 641 # thorough) cases in our testsuite, but I would be
642 642 # somewhat unsurprised to find a case in the wild
643 643 # where this breaks down a bit. That said, I don't
644 644 # know if it would hurt anything.
645 645 for i in pycompat.xrange(rev, 0, -1):
646 646 if store.linkrev(i) == clrev:
647 647 return i
648 648 # We failed to resolve a parent for this node, so
649 649 # we crash the changegroup construction.
650 650 raise error.Abort(
651 651 'unable to resolve parent while packing %r %r'
652 652 ' for changeset %r' % (store.indexfile, rev, clrev))
653 653
654 654 return nullrev
655 655
656 656 if not linkparents or (
657 657 store.parentrevs(rev) == (nullrev, nullrev)):
658 658 p1, p2 = nullrev, nullrev
659 659 elif len(linkparents) == 1:
660 660 p1, = sorted(local(p) for p in linkparents)
661 661 p2 = nullrev
662 662 else:
663 663 p1, p2 = sorted(local(p) for p in linkparents)
664 664
665 665 n = store.node(rev)
666 666 p1n, p2n = store.node(p1), store.node(p2)
667 667 flags = store.flags(rev)
668 668 flags |= revlog.REVIDX_ELLIPSIS
669 669
670 670 # TODO: try and actually send deltas for ellipsis data blocks
671 671 data = store.revision(n)
672 672 diffheader = mdiff.trivialdiffheader(len(data))
673 673
674 674 return revisiondelta(
675 675 node=n,
676 676 p1node=p1n,
677 677 p2node=p2n,
678 678 basenode=nullid,
679 679 linknode=linknode,
680 680 flags=flags,
681 681 deltachunks=(diffheader, data),
682 682 )
683 683
684 684 def deltagroup(repo, revs, store, ischangelog, lookup, deltaparentfn,
685 685 deltaheaderfn, units=None,
686 686 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
687 687 precomputedellipsis=None):
688 688 """Calculate a delta group, yielding a sequence of changegroup chunks
689 689 (strings).
690 690
691 691 Given a list of changeset revs, return a set of deltas and
692 692 metadata corresponding to nodes. The first delta is
693 693 first parent(nodelist[0]) -> nodelist[0], the receiver is
694 694 guaranteed to have this parent as it has all history before
695 695 these changesets. In the case firstparent is nullrev the
696 696 changegroup starts with a full revision.
697 697
698 698 If units is not None, progress detail will be generated, units specifies
699 699 the type of revlog that is touched (changelog, manifest, etc.).
700 700 """
701 701 # if we don't have any revisions touched by these changesets, bail
702 702 if len(revs) == 0:
703 703 return
704 704
705 705 cl = repo.changelog
706 706
707 707 # add the parent of the first rev
708 708 p = store.parentrevs(revs[0])[0]
709 709 revs.insert(0, p)
710 710
711 711 # build deltas
712 712 progress = None
713 713 if units is not None:
714 714 progress = repo.ui.makeprogress(_('bundling'), unit=units,
715 715 total=(len(revs) - 1))
716 716 for r in pycompat.xrange(len(revs) - 1):
717 717 if progress:
718 718 progress.update(r + 1)
719 719 prev, curr = revs[r], revs[r + 1]
720 720 linknode = lookup(store.node(curr))
721 721
722 722 if ellipses:
723 723 linkrev = cl.rev(linknode)
724 724 clrevtolocalrev[linkrev] = curr
725 725
726 726 # This is a node to send in full, because the changeset it
727 727 # corresponds to was a full changeset.
728 728 if linknode in fullclnodes:
729 729 delta = _revisiondeltanormal(store, curr, prev, linknode,
730 730 deltaparentfn)
731 731 elif linkrev not in precomputedellipsis:
732 732 delta = None
733 733 else:
734 734 delta = _revisiondeltanarrow(
735 735 cl, store, ischangelog, curr, linkrev, linknode,
736 736 clrevtolocalrev, fullclnodes,
737 737 precomputedellipsis)
738 738 else:
739 739 delta = _revisiondeltanormal(store, curr, prev, linknode,
740 740 deltaparentfn)
741 741
742 742 if not delta:
743 743 continue
744 744
745 745 meta = deltaheaderfn(delta)
746 746 l = len(meta) + sum(len(x) for x in delta.deltachunks)
747 747 yield chunkheader(l)
748 748 yield meta
749 749 for x in delta.deltachunks:
750 750 yield x
751 751
752 752 if progress:
753 753 progress.complete()
754 754
755 755 class cgpacker(object):
756 756 def __init__(self, repo, filematcher, version, allowreorder,
757 757 deltaparentfn, builddeltaheader, manifestsend,
758 758 bundlecaps=None, ellipses=False,
759 759 shallow=False, ellipsisroots=None, fullnodes=None):
760 760 """Given a source repo, construct a bundler.
761 761
762 762 filematcher is a matcher that matches on files to include in the
763 763 changegroup. Used to facilitate sparse changegroups.
764 764
765 765 allowreorder controls whether reordering of revisions is allowed.
766 766 This value is used when ``bundle.reorder`` is ``auto`` or isn't
767 767 set.
768 768
769 769 deltaparentfn is a callable that resolves the delta parent for
770 770 a specific revision.
771 771
772 772 builddeltaheader is a callable that constructs the header for a group
773 773 delta.
774 774
775 775 manifestsend is a chunk to send after manifests have been fully emitted.
776 776
777 777 ellipses indicates whether ellipsis serving mode is enabled.
778 778
779 779 bundlecaps is optional and can be used to specify the set of
780 780 capabilities which can be used to build the bundle. While bundlecaps is
781 781 unused in core Mercurial, extensions rely on this feature to communicate
782 782 capabilities to customize the changegroup packer.
783 783
784 784 shallow indicates whether shallow data might be sent. The packer may
785 785 need to pack file contents not introduced by the changes being packed.
786 786
787 787 fullnodes is the set of changelog nodes which should not be ellipsis
788 788 nodes. We store this rather than the set of nodes that should be
789 789 ellipsis because for very large histories we expect this to be
790 790 significantly smaller.
791 791 """
792 792 assert filematcher
793 793 self._filematcher = filematcher
794 794
795 795 self.version = version
796 796 self._deltaparentfn = deltaparentfn
797 797 self._builddeltaheader = builddeltaheader
798 798 self._manifestsend = manifestsend
799 799 self._ellipses = ellipses
800 800
801 801 # Set of capabilities we can use to build the bundle.
802 802 if bundlecaps is None:
803 803 bundlecaps = set()
804 804 self._bundlecaps = bundlecaps
805 805 self._isshallow = shallow
806 806 self._fullclnodes = fullnodes
807 807
808 808 # Maps ellipsis revs to their roots at the changelog level.
809 809 self._precomputedellipsis = ellipsisroots
810 810
811 811 # experimental config: bundle.reorder
812 812 reorder = repo.ui.config('bundle', 'reorder')
813 813 if reorder == 'auto':
814 814 self._reorder = allowreorder
815 815 else:
816 816 self._reorder = stringutil.parsebool(reorder)
817 817
818 818 self._repo = repo
819 819
820 820 if self._repo.ui.verbose and not self._repo.ui.debugflag:
821 821 self._verbosenote = self._repo.ui.note
822 822 else:
823 823 self._verbosenote = lambda s: None
824 824
825 825 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
826 826 """Yield a sequence of changegroup byte chunks."""
827 827
828 828 repo = self._repo
829 829 cl = repo.changelog
830 830
831 831 self._verbosenote(_('uncompressed size of bundle content:\n'))
832 832 size = 0
833 833
834 834 clstate, chunks = self._generatechangelog(cl, clnodes)
835 835 for chunk in chunks:
836 836 size += len(chunk)
837 837 yield chunk
838 838
839 839 close = closechunk()
840 840 size += len(close)
841 841 yield closechunk()
842 842
843 843 self._verbosenote(_('%8.i (changelog)\n') % size)
844 844
845 845 clrevorder = clstate['clrevorder']
846 846 mfs = clstate['mfs']
847 847 changedfiles = clstate['changedfiles']
848 848
849 849 # We need to make sure that the linkrev in the changegroup refers to
850 850 # the first changeset that introduced the manifest or file revision.
851 851 # The fastpath is usually safer than the slowpath, because the filelogs
852 852 # are walked in revlog order.
853 853 #
854 854 # When taking the slowpath with reorder=None and the manifest revlog
855 855 # uses generaldelta, the manifest may be walked in the "wrong" order.
856 856 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
857 857 # cc0ff93d0c0c).
858 858 #
859 859 # When taking the fastpath, we are only vulnerable to reordering
860 860 # of the changelog itself. The changelog never uses generaldelta, so
861 861 # it is only reordered when reorder=True. To handle this case, we
862 862 # simply take the slowpath, which already has the 'clrevorder' logic.
863 863 # This was also fixed in cc0ff93d0c0c.
864 864 fastpathlinkrev = fastpathlinkrev and not self._reorder
865 865 # Treemanifests don't work correctly with fastpathlinkrev
866 866 # either, because we don't discover which directory nodes to
867 867 # send along with files. This could probably be fixed.
868 868 fastpathlinkrev = fastpathlinkrev and (
869 869 'treemanifest' not in repo.requirements)
870 870
871 871 fnodes = {} # needed file nodes
872 872
873 873 size = 0
874 for chunk in self.generatemanifests(commonrevs, clrevorder,
875 fastpathlinkrev, mfs, fnodes, source,
876 clstate['clrevtomanifestrev']):
877 size += len(chunk)
878 yield chunk
874 it = self.generatemanifests(
875 commonrevs, clrevorder, fastpathlinkrev, mfs, fnodes, source,
876 clstate['clrevtomanifestrev'])
877
878 for dir, chunks in it:
879 if dir:
880 assert self.version == b'03'
881 chunk = _fileheader(dir)
882 size += len(chunk)
883 yield chunk
884
885 for chunk in chunks:
886 size += len(chunk)
887 yield chunk
888
889 close = closechunk()
890 size += len(close)
891 yield close
879 892
880 893 self._verbosenote(_('%8.i (manifests)\n') % size)
881 894 yield self._manifestsend
882 895
883 896 mfdicts = None
884 897 if self._ellipses and self._isshallow:
885 898 mfdicts = [(self._repo.manifestlog[n].read(), lr)
886 899 for (n, lr) in mfs.iteritems()]
887 900
888 901 mfs.clear()
889 902 clrevs = set(cl.rev(x) for x in clnodes)
890 903
891 904 for chunk in self.generatefiles(changedfiles, commonrevs,
892 905 source, mfdicts, fastpathlinkrev,
893 906 fnodes, clrevs):
894 907 yield chunk
895 908
896 909 yield closechunk()
897 910
898 911 if clnodes:
899 912 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
900 913
901 914 def _generatechangelog(self, cl, nodes):
902 915 """Generate data for changelog chunks.
903 916
904 917 Returns a 2-tuple of a dict containing state and an iterable of
905 918 byte chunks. The state will not be fully populated until the
906 919 chunk stream has been fully consumed.
907 920 """
908 921 clrevorder = {}
909 922 mfs = {} # needed manifests
910 923 mfl = self._repo.manifestlog
911 924 # TODO violates storage abstraction.
912 925 mfrevlog = mfl._revlog
913 926 changedfiles = set()
914 927 clrevtomanifestrev = {}
915 928
916 929 # Callback for the changelog, used to collect changed files and
917 930 # manifest nodes.
918 931 # Returns the linkrev node (identity in the changelog case).
919 932 def lookupcl(x):
920 933 c = cl.read(x)
921 934 clrevorder[x] = len(clrevorder)
922 935
923 936 if self._ellipses:
924 937 # Only update mfs if x is going to be sent. Otherwise we
925 938 # end up with bogus linkrevs specified for manifests and
926 939 # we skip some manifest nodes that we should otherwise
927 940 # have sent.
928 941 if (x in self._fullclnodes
929 942 or cl.rev(x) in self._precomputedellipsis):
930 943 n = c[0]
931 944 # Record the first changeset introducing this manifest
932 945 # version.
933 946 mfs.setdefault(n, x)
934 947 # Set this narrow-specific dict so we have the lowest
935 948 # manifest revnum to look up for this cl revnum. (Part of
936 949 # mapping changelog ellipsis parents to manifest ellipsis
937 950 # parents)
938 951 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
939 952 # We can't trust the changed files list in the changeset if the
940 953 # client requested a shallow clone.
941 954 if self._isshallow:
942 955 changedfiles.update(mfl[c[0]].read().keys())
943 956 else:
944 957 changedfiles.update(c[3])
945 958 else:
946 959
947 960 n = c[0]
948 961 # record the first changeset introducing this manifest version
949 962 mfs.setdefault(n, x)
950 963 # Record a complete list of potentially-changed files in
951 964 # this manifest.
952 965 changedfiles.update(c[3])
953 966
954 967 return x
955 968
956 969 # Changelog doesn't benefit from reordering revisions. So send out
957 970 # revisions in store order.
958 971 revs = sorted(cl.rev(n) for n in nodes)
959 972
960 973 state = {
961 974 'clrevorder': clrevorder,
962 975 'mfs': mfs,
963 976 'changedfiles': changedfiles,
964 977 'clrevtomanifestrev': clrevtomanifestrev,
965 978 }
966 979
967 980 gen = deltagroup(
968 981 self._repo, revs, cl, True, lookupcl,
969 982 self._deltaparentfn, self._builddeltaheader,
970 983 ellipses=self._ellipses,
971 984 units=_('changesets'),
972 985 clrevtolocalrev={},
973 986 fullclnodes=self._fullclnodes,
974 987 precomputedellipsis=self._precomputedellipsis)
975 988
976 989 return state, gen
977 990
978 991 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
979 992 fnodes, source, clrevtolocalrev):
980 993 """Returns an iterator of changegroup chunks containing manifests.
981 994
982 995 `source` is unused here, but is used by extensions like remotefilelog to
983 996 change what is sent based in pulls vs pushes, etc.
984 997 """
985 998 repo = self._repo
986 999 cl = repo.changelog
987 1000 mfl = repo.manifestlog
988 1001 dirlog = mfl._revlog.dirlog
989 1002 tmfnodes = {'': mfs}
990 1003
991 1004 # Callback for the manifest, used to collect linkrevs for filelog
992 1005 # revisions.
993 1006 # Returns the linkrev node (collected in lookupcl).
994 1007 def makelookupmflinknode(dir, nodes):
995 1008 if fastpathlinkrev:
996 1009 assert not dir
997 1010 return mfs.__getitem__
998 1011
999 1012 def lookupmflinknode(x):
1000 1013 """Callback for looking up the linknode for manifests.
1001 1014
1002 1015 Returns the linkrev node for the specified manifest.
1003 1016
1004 1017 SIDE EFFECT:
1005 1018
1006 1019 1) fclnodes gets populated with the list of relevant
1007 1020 file nodes if we're not using fastpathlinkrev
1008 1021 2) When treemanifests are in use, collects treemanifest nodes
1009 1022 to send
1010 1023
1011 1024 Note that this means manifests must be completely sent to
1012 1025 the client before you can trust the list of files and
1013 1026 treemanifests to send.
1014 1027 """
1015 1028 clnode = nodes[x]
1016 1029 mdata = mfl.get(dir, x).readfast(shallow=True)
1017 1030 for p, n, fl in mdata.iterentries():
1018 1031 if fl == 't': # subdirectory manifest
1019 1032 subdir = dir + p + '/'
1020 1033 tmfclnodes = tmfnodes.setdefault(subdir, {})
1021 1034 tmfclnode = tmfclnodes.setdefault(n, clnode)
1022 1035 if clrevorder[clnode] < clrevorder[tmfclnode]:
1023 1036 tmfclnodes[n] = clnode
1024 1037 else:
1025 1038 f = dir + p
1026 1039 fclnodes = fnodes.setdefault(f, {})
1027 1040 fclnode = fclnodes.setdefault(n, clnode)
1028 1041 if clrevorder[clnode] < clrevorder[fclnode]:
1029 1042 fclnodes[n] = clnode
1030 1043 return clnode
1031 1044 return lookupmflinknode
1032 1045
1033 1046 while tmfnodes:
1034 1047 dir, nodes = tmfnodes.popitem()
1035 1048 store = dirlog(dir)
1036 1049
1037 1050 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
1038 1051 prunednodes = []
1039 1052 else:
1040 1053 frev, flr = store.rev, store.linkrev
1041 1054 prunednodes = [n for n in nodes
1042 1055 if flr(frev(n)) not in commonrevs]
1043 1056
1044 1057 if dir and not prunednodes:
1045 1058 continue
1046 1059
1047 1060 lookupfn = makelookupmflinknode(dir, nodes)
1048 1061
1049 1062 if self._ellipses:
1050 1063 revs = _sortnodesellipsis(store, prunednodes, cl,
1051 1064 lookupfn)
1052 1065 else:
1053 1066 revs = _sortnodesnormal(store, prunednodes,
1054 1067 self._reorder)
1055 1068
1056 if dir:
1057 assert self.version == b'03'
1058 chunk = _fileheader(dir)
1059 yield chunk
1060
1061 1069 it = deltagroup(
1062 1070 self._repo, revs, store, False, lookupfn,
1063 1071 self._deltaparentfn, self._builddeltaheader,
1064 1072 ellipses=self._ellipses,
1065 1073 units=_('manifests'),
1066 1074 clrevtolocalrev=clrevtolocalrev,
1067 1075 fullclnodes=self._fullclnodes,
1068 1076 precomputedellipsis=self._precomputedellipsis)
1069 1077
1070 for chunk in it:
1071 yield chunk
1072
1073 close = closechunk()
1074 yield close
1078 yield dir, it
1075 1079
1076 1080 # The 'source' parameter is useful for extensions
1077 1081 def generatefiles(self, changedfiles, commonrevs, source,
1078 1082 mfdicts, fastpathlinkrev, fnodes, clrevs):
1079 1083 changedfiles = list(filter(self._filematcher, changedfiles))
1080 1084
1081 1085 if not fastpathlinkrev:
1082 1086 def normallinknodes(unused, fname):
1083 1087 return fnodes.get(fname, {})
1084 1088 else:
1085 1089 cln = self._repo.changelog.node
1086 1090
1087 1091 def normallinknodes(store, fname):
1088 1092 flinkrev = store.linkrev
1089 1093 fnode = store.node
1090 1094 revs = ((r, flinkrev(r)) for r in store)
1091 1095 return dict((fnode(r), cln(lr))
1092 1096 for r, lr in revs if lr in clrevs)
1093 1097
1094 1098 clrevtolocalrev = {}
1095 1099
1096 1100 if self._isshallow:
1097 1101 # In a shallow clone, the linknodes callback needs to also include
1098 1102 # those file nodes that are in the manifests we sent but weren't
1099 1103 # introduced by those manifests.
1100 1104 commonctxs = [self._repo[c] for c in commonrevs]
1101 1105 clrev = self._repo.changelog.rev
1102 1106
1103 1107 # Defining this function has a side-effect of overriding the
1104 1108 # function of the same name that was passed in as an argument.
1105 1109 # TODO have caller pass in appropriate function.
1106 1110 def linknodes(flog, fname):
1107 1111 for c in commonctxs:
1108 1112 try:
1109 1113 fnode = c.filenode(fname)
1110 1114 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1111 1115 except error.ManifestLookupError:
1112 1116 pass
1113 1117 links = normallinknodes(flog, fname)
1114 1118 if len(links) != len(mfdicts):
1115 1119 for mf, lr in mfdicts:
1116 1120 fnode = mf.get(fname, None)
1117 1121 if fnode in links:
1118 1122 links[fnode] = min(links[fnode], lr, key=clrev)
1119 1123 elif fnode:
1120 1124 links[fnode] = lr
1121 1125 return links
1122 1126 else:
1123 1127 linknodes = normallinknodes
1124 1128
1125 1129 repo = self._repo
1126 1130 cl = repo.changelog
1127 1131 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1128 1132 total=len(changedfiles))
1129 1133 for i, fname in enumerate(sorted(changedfiles)):
1130 1134 filerevlog = repo.file(fname)
1131 1135 if not filerevlog:
1132 1136 raise error.Abort(_("empty or missing file data for %s") %
1133 1137 fname)
1134 1138
1135 1139 clrevtolocalrev.clear()
1136 1140
1137 1141 linkrevnodes = linknodes(filerevlog, fname)
1138 1142 # Lookup for filenodes, we collected the linkrev nodes above in the
1139 1143 # fastpath case and with lookupmf in the slowpath case.
1140 1144 def lookupfilelog(x):
1141 1145 return linkrevnodes[x]
1142 1146
1143 1147 frev, flr = filerevlog.rev, filerevlog.linkrev
1144 1148 filenodes = [n for n in linkrevnodes
1145 1149 if flr(frev(n)) not in commonrevs]
1146 1150
1147 1151 if filenodes:
1148 1152 if self._ellipses:
1149 1153 revs = _sortnodesellipsis(filerevlog, filenodes,
1150 1154 cl, lookupfilelog)
1151 1155 else:
1152 1156 revs = _sortnodesnormal(filerevlog, filenodes,
1153 1157 self._reorder)
1154 1158
1155 1159 progress.update(i + 1, item=fname)
1156 1160 h = _fileheader(fname)
1157 1161 size = len(h)
1158 1162 yield h
1159 1163
1160 1164 it = deltagroup(
1161 1165 self._repo, revs, filerevlog, False, lookupfilelog,
1162 1166 self._deltaparentfn, self._builddeltaheader,
1163 1167 ellipses=self._ellipses,
1164 1168 clrevtolocalrev=clrevtolocalrev,
1165 1169 fullclnodes=self._fullclnodes,
1166 1170 precomputedellipsis=self._precomputedellipsis)
1167 1171
1168 1172 for chunk in it:
1169 1173 size += len(chunk)
1170 1174 yield chunk
1171 1175
1172 1176 close = closechunk()
1173 1177 size += len(close)
1174 1178 yield close
1175 1179
1176 1180 self._verbosenote(_('%8.i %s\n') % (size, fname))
1177 1181 progress.complete()
1178 1182
1179 1183 def _deltaparentprev(store, rev, p1, p2, prev):
1180 1184 """Resolve a delta parent to the previous revision.
1181 1185
1182 1186 Used for version 1 changegroups, which don't support generaldelta.
1183 1187 """
1184 1188 return prev
1185 1189
1186 1190 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1187 1191 """Resolve a delta parent when general deltas are supported."""
1188 1192 dp = store.deltaparent(rev)
1189 1193 if dp == nullrev and store.storedeltachains:
1190 1194 # Avoid sending full revisions when delta parent is null. Pick prev
1191 1195 # in that case. It's tempting to pick p1 in this case, as p1 will
1192 1196 # be smaller in the common case. However, computing a delta against
1193 1197 # p1 may require resolving the raw text of p1, which could be
1194 1198 # expensive. The revlog caches should have prev cached, meaning
1195 1199 # less CPU for changegroup generation. There is likely room to add
1196 1200 # a flag and/or config option to control this behavior.
1197 1201 base = prev
1198 1202 elif dp == nullrev:
1199 1203 # revlog is configured to use full snapshot for a reason,
1200 1204 # stick to full snapshot.
1201 1205 base = nullrev
1202 1206 elif dp not in (p1, p2, prev):
1203 1207 # Pick prev when we can't be sure remote has the base revision.
1204 1208 return prev
1205 1209 else:
1206 1210 base = dp
1207 1211
1208 1212 if base != nullrev and not store.candelta(base, rev):
1209 1213 base = nullrev
1210 1214
1211 1215 return base
1212 1216
1213 1217 def _deltaparentellipses(store, rev, p1, p2, prev):
1214 1218 """Resolve a delta parent when in ellipses mode."""
1215 1219 # TODO: send better deltas when in narrow mode.
1216 1220 #
1217 1221 # changegroup.group() loops over revisions to send,
1218 1222 # including revisions we'll skip. What this means is that
1219 1223 # `prev` will be a potentially useless delta base for all
1220 1224 # ellipsis nodes, as the client likely won't have it. In
1221 1225 # the future we should do bookkeeping about which nodes
1222 1226 # have been sent to the client, and try to be
1223 1227 # significantly smarter about delta bases. This is
1224 1228 # slightly tricky because this same code has to work for
1225 1229 # all revlogs, and we don't have the linkrev/linknode here.
1226 1230 return p1
1227 1231
1228 1232 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1229 1233 shallow=False, ellipsisroots=None, fullnodes=None):
1230 1234 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1231 1235 d.node, d.p1node, d.p2node, d.linknode)
1232 1236
1233 1237 return cgpacker(repo, filematcher, b'01',
1234 1238 deltaparentfn=_deltaparentprev,
1235 1239 allowreorder=None,
1236 1240 builddeltaheader=builddeltaheader,
1237 1241 manifestsend=b'',
1238 1242 bundlecaps=bundlecaps,
1239 1243 ellipses=ellipses,
1240 1244 shallow=shallow,
1241 1245 ellipsisroots=ellipsisroots,
1242 1246 fullnodes=fullnodes)
1243 1247
1244 1248 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1245 1249 shallow=False, ellipsisroots=None, fullnodes=None):
1246 1250 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1247 1251 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1248 1252
1249 1253 # Since generaldelta is directly supported by cg2, reordering
1250 1254 # generally doesn't help, so we disable it by default (treating
1251 1255 # bundle.reorder=auto just like bundle.reorder=False).
1252 1256 return cgpacker(repo, filematcher, b'02',
1253 1257 deltaparentfn=_deltaparentgeneraldelta,
1254 1258 allowreorder=False,
1255 1259 builddeltaheader=builddeltaheader,
1256 1260 manifestsend=b'',
1257 1261 bundlecaps=bundlecaps,
1258 1262 ellipses=ellipses,
1259 1263 shallow=shallow,
1260 1264 ellipsisroots=ellipsisroots,
1261 1265 fullnodes=fullnodes)
1262 1266
1263 1267 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1264 1268 shallow=False, ellipsisroots=None, fullnodes=None):
1265 1269 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1266 1270 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1267 1271
1268 1272 deltaparentfn = (_deltaparentellipses if ellipses
1269 1273 else _deltaparentgeneraldelta)
1270 1274
1271 1275 return cgpacker(repo, filematcher, b'03',
1272 1276 deltaparentfn=deltaparentfn,
1273 1277 allowreorder=False,
1274 1278 builddeltaheader=builddeltaheader,
1275 1279 manifestsend=closechunk(),
1276 1280 bundlecaps=bundlecaps,
1277 1281 ellipses=ellipses,
1278 1282 shallow=shallow,
1279 1283 ellipsisroots=ellipsisroots,
1280 1284 fullnodes=fullnodes)
1281 1285
1282 1286 _packermap = {'01': (_makecg1packer, cg1unpacker),
1283 1287 # cg2 adds support for exchanging generaldelta
1284 1288 '02': (_makecg2packer, cg2unpacker),
1285 1289 # cg3 adds support for exchanging revlog flags and treemanifests
1286 1290 '03': (_makecg3packer, cg3unpacker),
1287 1291 }
1288 1292
1289 1293 def allsupportedversions(repo):
1290 1294 versions = set(_packermap.keys())
1291 1295 if not (repo.ui.configbool('experimental', 'changegroup3') or
1292 1296 repo.ui.configbool('experimental', 'treemanifest') or
1293 1297 'treemanifest' in repo.requirements):
1294 1298 versions.discard('03')
1295 1299 return versions
1296 1300
1297 1301 # Changegroup versions that can be applied to the repo
1298 1302 def supportedincomingversions(repo):
1299 1303 return allsupportedversions(repo)
1300 1304
1301 1305 # Changegroup versions that can be created from the repo
1302 1306 def supportedoutgoingversions(repo):
1303 1307 versions = allsupportedversions(repo)
1304 1308 if 'treemanifest' in repo.requirements:
1305 1309 # Versions 01 and 02 support only flat manifests and it's just too
1306 1310 # expensive to convert between the flat manifest and tree manifest on
1307 1311 # the fly. Since tree manifests are hashed differently, all of history
1308 1312 # would have to be converted. Instead, we simply don't even pretend to
1309 1313 # support versions 01 and 02.
1310 1314 versions.discard('01')
1311 1315 versions.discard('02')
1312 1316 if repository.NARROW_REQUIREMENT in repo.requirements:
1313 1317 # Versions 01 and 02 don't support revlog flags, and we need to
1314 1318 # support that for stripping and unbundling to work.
1315 1319 versions.discard('01')
1316 1320 versions.discard('02')
1317 1321 if LFS_REQUIREMENT in repo.requirements:
1318 1322 # Versions 01 and 02 don't support revlog flags, and we need to
1319 1323 # mark LFS entries with REVIDX_EXTSTORED.
1320 1324 versions.discard('01')
1321 1325 versions.discard('02')
1322 1326
1323 1327 return versions
1324 1328
1325 1329 def localversion(repo):
1326 1330 # Finds the best version to use for bundles that are meant to be used
1327 1331 # locally, such as those from strip and shelve, and temporary bundles.
1328 1332 return max(supportedoutgoingversions(repo))
1329 1333
1330 1334 def safeversion(repo):
1331 1335 # Finds the smallest version that it's safe to assume clients of the repo
1332 1336 # will support. For example, all hg versions that support generaldelta also
1333 1337 # support changegroup 02.
1334 1338 versions = supportedoutgoingversions(repo)
1335 1339 if 'generaldelta' in repo.requirements:
1336 1340 versions.discard('01')
1337 1341 assert versions
1338 1342 return min(versions)
1339 1343
1340 1344 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1341 1345 ellipses=False, shallow=False, ellipsisroots=None,
1342 1346 fullnodes=None):
1343 1347 assert version in supportedoutgoingversions(repo)
1344 1348
1345 1349 if filematcher is None:
1346 1350 filematcher = matchmod.alwaysmatcher(repo.root, '')
1347 1351
1348 1352 if version == '01' and not filematcher.always():
1349 1353 raise error.ProgrammingError('version 01 changegroups do not support '
1350 1354 'sparse file matchers')
1351 1355
1352 1356 if ellipses and version in (b'01', b'02'):
1353 1357 raise error.Abort(
1354 1358 _('ellipsis nodes require at least cg3 on client and server, '
1355 1359 'but negotiated version %s') % version)
1356 1360
1357 1361 # Requested files could include files not in the local store. So
1358 1362 # filter those out.
1359 1363 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1360 1364 filematcher)
1361 1365
1362 1366 fn = _packermap[version][0]
1363 1367 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1364 1368 shallow=shallow, ellipsisroots=ellipsisroots,
1365 1369 fullnodes=fullnodes)
1366 1370
1367 1371 def getunbundler(version, fh, alg, extras=None):
1368 1372 return _packermap[version][1](fh, alg, extras=extras)
1369 1373
1370 1374 def _changegroupinfo(repo, nodes, source):
1371 1375 if repo.ui.verbose or source == 'bundle':
1372 1376 repo.ui.status(_("%d changesets found\n") % len(nodes))
1373 1377 if repo.ui.debugflag:
1374 1378 repo.ui.debug("list of changesets:\n")
1375 1379 for node in nodes:
1376 1380 repo.ui.debug("%s\n" % hex(node))
1377 1381
1378 1382 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1379 1383 bundlecaps=None):
1380 1384 cgstream = makestream(repo, outgoing, version, source,
1381 1385 fastpath=fastpath, bundlecaps=bundlecaps)
1382 1386 return getunbundler(version, util.chunkbuffer(cgstream), None,
1383 1387 {'clcount': len(outgoing.missing) })
1384 1388
1385 1389 def makestream(repo, outgoing, version, source, fastpath=False,
1386 1390 bundlecaps=None, filematcher=None):
1387 1391 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1388 1392 filematcher=filematcher)
1389 1393
1390 1394 repo = repo.unfiltered()
1391 1395 commonrevs = outgoing.common
1392 1396 csets = outgoing.missing
1393 1397 heads = outgoing.missingheads
1394 1398 # We go through the fast path if we get told to, or if all (unfiltered
1395 1399 # heads have been requested (since we then know there all linkrevs will
1396 1400 # be pulled by the client).
1397 1401 heads.sort()
1398 1402 fastpathlinkrev = fastpath or (
1399 1403 repo.filtername is None and heads == sorted(repo.heads()))
1400 1404
1401 1405 repo.hook('preoutgoing', throw=True, source=source)
1402 1406 _changegroupinfo(repo, csets, source)
1403 1407 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1404 1408
1405 1409 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1406 1410 revisions = 0
1407 1411 files = 0
1408 1412 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1409 1413 total=expectedfiles)
1410 1414 for chunkdata in iter(source.filelogheader, {}):
1411 1415 files += 1
1412 1416 f = chunkdata["filename"]
1413 1417 repo.ui.debug("adding %s revisions\n" % f)
1414 1418 progress.increment()
1415 1419 fl = repo.file(f)
1416 1420 o = len(fl)
1417 1421 try:
1418 1422 deltas = source.deltaiter()
1419 1423 if not fl.addgroup(deltas, revmap, trp):
1420 1424 raise error.Abort(_("received file revlog group is empty"))
1421 1425 except error.CensoredBaseError as e:
1422 1426 raise error.Abort(_("received delta base is censored: %s") % e)
1423 1427 revisions += len(fl) - o
1424 1428 if f in needfiles:
1425 1429 needs = needfiles[f]
1426 1430 for new in pycompat.xrange(o, len(fl)):
1427 1431 n = fl.node(new)
1428 1432 if n in needs:
1429 1433 needs.remove(n)
1430 1434 else:
1431 1435 raise error.Abort(
1432 1436 _("received spurious file revlog entry"))
1433 1437 if not needs:
1434 1438 del needfiles[f]
1435 1439 progress.complete()
1436 1440
1437 1441 for f, needs in needfiles.iteritems():
1438 1442 fl = repo.file(f)
1439 1443 for n in needs:
1440 1444 try:
1441 1445 fl.rev(n)
1442 1446 except error.LookupError:
1443 1447 raise error.Abort(
1444 1448 _('missing file data for %s:%s - run hg verify') %
1445 1449 (f, hex(n)))
1446 1450
1447 1451 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now