##// END OF EJS Templates
changegroup: reintroduce some comments that have gotten lost over the years...
Augie Fackler -
r39769:5adc5fe4 default
parent child Browse files
Show More
@@ -1,1401 +1,1411 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagop,
28 28 error,
29 29 match as matchmod,
30 30 mdiff,
31 31 phases,
32 32 pycompat,
33 33 repository,
34 34 util,
35 35 )
36 36
37 37 from .utils import (
38 38 interfaceutil,
39 39 stringutil,
40 40 )
41 41
42 42 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
43 43 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
44 44 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
45 45
46 46 LFS_REQUIREMENT = 'lfs'
47 47
48 48 readexactly = util.readexactly
49 49
50 50 def getchunk(stream):
51 51 """return the next chunk from stream as a string"""
52 52 d = readexactly(stream, 4)
53 53 l = struct.unpack(">l", d)[0]
54 54 if l <= 4:
55 55 if l:
56 56 raise error.Abort(_("invalid chunk length %d") % l)
57 57 return ""
58 58 return readexactly(stream, l - 4)
59 59
60 60 def chunkheader(length):
61 61 """return a changegroup chunk header (string)"""
62 62 return struct.pack(">l", length + 4)
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(">l", 0)
67 67
68 68 def _fileheader(path):
69 69 """Obtain a changegroup chunk header for a named path."""
70 70 return chunkheader(len(path)) + path
71 71
72 72 def writechunks(ui, chunks, filename, vfs=None):
73 73 """Write chunks to a file and return its filename.
74 74
75 75 The stream is assumed to be a bundle file.
76 76 Existing files will not be overwritten.
77 77 If no filename is specified, a temporary file is created.
78 78 """
79 79 fh = None
80 80 cleanup = None
81 81 try:
82 82 if filename:
83 83 if vfs:
84 84 fh = vfs.open(filename, "wb")
85 85 else:
86 86 # Increase default buffer size because default is usually
87 87 # small (4k is common on Linux).
88 88 fh = open(filename, "wb", 131072)
89 89 else:
90 90 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
91 91 fh = os.fdopen(fd, r"wb")
92 92 cleanup = filename
93 93 for c in chunks:
94 94 fh.write(c)
95 95 cleanup = None
96 96 return filename
97 97 finally:
98 98 if fh is not None:
99 99 fh.close()
100 100 if cleanup is not None:
101 101 if filename and vfs:
102 102 vfs.unlink(cleanup)
103 103 else:
104 104 os.unlink(cleanup)
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
125 125 deltaheadersize = deltaheader.size
126 126 version = '01'
127 127 _grouplistcount = 1 # One list of files after the manifests
128 128
129 129 def __init__(self, fh, alg, extras=None):
130 130 if alg is None:
131 131 alg = 'UN'
132 132 if alg not in util.compengines.supportedbundletypes:
133 133 raise error.Abort(_('unknown stream compression type: %s')
134 134 % alg)
135 135 if alg == 'BZ':
136 136 alg = '_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != 'UN'
148 148 def read(self, l):
149 149 return self._stream.read(l)
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152 def tell(self):
153 153 return self._stream.tell()
154 154 def close(self):
155 155 return self._stream.close()
156 156
157 157 def _chunklength(self):
158 158 d = readexactly(self._stream, 4)
159 159 l = struct.unpack(">l", d)[0]
160 160 if l <= 4:
161 161 if l:
162 162 raise error.Abort(_("invalid chunk length %d") % l)
163 163 return 0
164 164 if self.callback:
165 165 self.callback()
166 166 return l - 4
167 167
168 168 def changelogheader(self):
169 169 """v10 does not have a changelog header chunk"""
170 170 return {}
171 171
172 172 def manifestheader(self):
173 173 """v10 does not have a manifest header chunk"""
174 174 return {}
175 175
176 176 def filelogheader(self):
177 177 """return the header of the filelogs chunk, v10 only has the filename"""
178 178 l = self._chunklength()
179 179 if not l:
180 180 return {}
181 181 fname = readexactly(self._stream, l)
182 182 return {'filename': fname}
183 183
184 184 def _deltaheader(self, headertuple, prevnode):
185 185 node, p1, p2, cs = headertuple
186 186 if prevnode is None:
187 187 deltabase = p1
188 188 else:
189 189 deltabase = prevnode
190 190 flags = 0
191 191 return node, p1, p2, deltabase, cs, flags
192 192
193 193 def deltachunk(self, prevnode):
194 194 l = self._chunklength()
195 195 if not l:
196 196 return {}
197 197 headerdata = readexactly(self._stream, self.deltaheadersize)
198 198 header = self.deltaheader.unpack(headerdata)
199 199 delta = readexactly(self._stream, l - self.deltaheadersize)
200 200 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
201 201 return (node, p1, p2, cs, deltabase, delta, flags)
202 202
203 203 def getchunks(self):
204 204 """returns all the chunks contains in the bundle
205 205
206 206 Used when you need to forward the binary stream to a file or another
207 207 network API. To do so, it parse the changegroup data, otherwise it will
208 208 block in case of sshrepo because it don't know the end of the stream.
209 209 """
210 210 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
211 211 # and a list of filelogs. For changegroup 3, we expect 4 parts:
212 212 # changelog, manifestlog, a list of tree manifestlogs, and a list of
213 213 # filelogs.
214 214 #
215 215 # Changelog and manifestlog parts are terminated with empty chunks. The
216 216 # tree and file parts are a list of entry sections. Each entry section
217 217 # is a series of chunks terminating in an empty chunk. The list of these
218 218 # entry sections is terminated in yet another empty chunk, so we know
219 219 # we've reached the end of the tree/file list when we reach an empty
220 220 # chunk that was proceeded by no non-empty chunks.
221 221
222 222 parts = 0
223 223 while parts < 2 + self._grouplistcount:
224 224 noentries = True
225 225 while True:
226 226 chunk = getchunk(self)
227 227 if not chunk:
228 228 # The first two empty chunks represent the end of the
229 229 # changelog and the manifestlog portions. The remaining
230 230 # empty chunks represent either A) the end of individual
231 231 # tree or file entries in the file list, or B) the end of
232 232 # the entire list. It's the end of the entire list if there
233 233 # were no entries (i.e. noentries is True).
234 234 if parts < 2:
235 235 parts += 1
236 236 elif noentries:
237 237 parts += 1
238 238 break
239 239 noentries = False
240 240 yield chunkheader(len(chunk))
241 241 pos = 0
242 242 while pos < len(chunk):
243 243 next = pos + 2**20
244 244 yield chunk[pos:next]
245 245 pos = next
246 246 yield closechunk()
247 247
248 248 def _unpackmanifests(self, repo, revmap, trp, prog):
249 249 self.callback = prog.increment
250 250 # no need to check for empty manifest group here:
251 251 # if the result of the merge of 1 and 2 is the same in 3 and 4,
252 252 # no new manifest will be created and the manifest group will
253 253 # be empty during the pull
254 254 self.manifestheader()
255 255 deltas = self.deltaiter()
256 256 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
257 257 prog.complete()
258 258 self.callback = None
259 259
260 260 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
261 261 expectedtotal=None):
262 262 """Add the changegroup returned by source.read() to this repo.
263 263 srctype is a string like 'push', 'pull', or 'unbundle'. url is
264 264 the URL of the repo where this changegroup is coming from.
265 265
266 266 Return an integer summarizing the change to this repo:
267 267 - nothing changed or no source: 0
268 268 - more heads than before: 1+added heads (2..n)
269 269 - fewer heads than before: -1-removed heads (-2..-n)
270 270 - number of heads stays the same: 1
271 271 """
272 272 repo = repo.unfiltered()
273 273 def csmap(x):
274 274 repo.ui.debug("add changeset %s\n" % short(x))
275 275 return len(cl)
276 276
277 277 def revmap(x):
278 278 return cl.rev(x)
279 279
280 280 changesets = files = revisions = 0
281 281
282 282 try:
283 283 # The transaction may already carry source information. In this
284 284 # case we use the top level data. We overwrite the argument
285 285 # because we need to use the top level value (if they exist)
286 286 # in this function.
287 287 srctype = tr.hookargs.setdefault('source', srctype)
288 288 url = tr.hookargs.setdefault('url', url)
289 289 repo.hook('prechangegroup',
290 290 throw=True, **pycompat.strkwargs(tr.hookargs))
291 291
292 292 # write changelog data to temp files so concurrent readers
293 293 # will not see an inconsistent view
294 294 cl = repo.changelog
295 295 cl.delayupdate(tr)
296 296 oldheads = set(cl.heads())
297 297
298 298 trp = weakref.proxy(tr)
299 299 # pull off the changeset group
300 300 repo.ui.status(_("adding changesets\n"))
301 301 clstart = len(cl)
302 302 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
303 303 total=expectedtotal)
304 304 self.callback = progress.increment
305 305
306 306 efiles = set()
307 307 def onchangelog(cl, node):
308 308 efiles.update(cl.readfiles(node))
309 309
310 310 self.changelogheader()
311 311 deltas = self.deltaiter()
312 312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
313 313 efiles = len(efiles)
314 314
315 315 if not cgnodes:
316 316 repo.ui.develwarn('applied empty changelog from changegroup',
317 317 config='warn-empty-changegroup')
318 318 clend = len(cl)
319 319 changesets = clend - clstart
320 320 progress.complete()
321 321 self.callback = None
322 322
323 323 # pull off the manifest group
324 324 repo.ui.status(_("adding manifests\n"))
325 325 # We know that we'll never have more manifests than we had
326 326 # changesets.
327 327 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
328 328 total=changesets)
329 329 self._unpackmanifests(repo, revmap, trp, progress)
330 330
331 331 needfiles = {}
332 332 if repo.ui.configbool('server', 'validate'):
333 333 cl = repo.changelog
334 334 ml = repo.manifestlog
335 335 # validate incoming csets have their manifests
336 336 for cset in pycompat.xrange(clstart, clend):
337 337 mfnode = cl.changelogrevision(cset).manifest
338 338 mfest = ml[mfnode].readdelta()
339 339 # store file cgnodes we must see
340 340 for f, n in mfest.iteritems():
341 341 needfiles.setdefault(f, set()).add(n)
342 342
343 343 # process the files
344 344 repo.ui.status(_("adding file changes\n"))
345 345 newrevs, newfiles = _addchangegroupfiles(
346 346 repo, self, revmap, trp, efiles, needfiles)
347 347 revisions += newrevs
348 348 files += newfiles
349 349
350 350 deltaheads = 0
351 351 if oldheads:
352 352 heads = cl.heads()
353 353 deltaheads = len(heads) - len(oldheads)
354 354 for h in heads:
355 355 if h not in oldheads and repo[h].closesbranch():
356 356 deltaheads -= 1
357 357 htext = ""
358 358 if deltaheads:
359 359 htext = _(" (%+d heads)") % deltaheads
360 360
361 361 repo.ui.status(_("added %d changesets"
362 362 " with %d changes to %d files%s\n")
363 363 % (changesets, revisions, files, htext))
364 364 repo.invalidatevolatilesets()
365 365
366 366 if changesets > 0:
367 367 if 'node' not in tr.hookargs:
368 368 tr.hookargs['node'] = hex(cl.node(clstart))
369 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
370 370 hookargs = dict(tr.hookargs)
371 371 else:
372 372 hookargs = dict(tr.hookargs)
373 373 hookargs['node'] = hex(cl.node(clstart))
374 374 hookargs['node_last'] = hex(cl.node(clend - 1))
375 375 repo.hook('pretxnchangegroup',
376 376 throw=True, **pycompat.strkwargs(hookargs))
377 377
378 378 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
379 379 phaseall = None
380 380 if srctype in ('push', 'serve'):
381 381 # Old servers can not push the boundary themselves.
382 382 # New servers won't push the boundary if changeset already
383 383 # exists locally as secret
384 384 #
385 385 # We should not use added here but the list of all change in
386 386 # the bundle
387 387 if repo.publishing():
388 388 targetphase = phaseall = phases.public
389 389 else:
390 390 # closer target phase computation
391 391
392 392 # Those changesets have been pushed from the
393 393 # outside, their phases are going to be pushed
394 394 # alongside. Therefor `targetphase` is
395 395 # ignored.
396 396 targetphase = phaseall = phases.draft
397 397 if added:
398 398 phases.registernew(repo, tr, targetphase, added)
399 399 if phaseall is not None:
400 400 phases.advanceboundary(repo, tr, phaseall, cgnodes)
401 401
402 402 if changesets > 0:
403 403
404 404 def runhooks():
405 405 # These hooks run when the lock releases, not when the
406 406 # transaction closes. So it's possible for the changelog
407 407 # to have changed since we last saw it.
408 408 if clstart >= len(repo):
409 409 return
410 410
411 411 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
412 412
413 413 for n in added:
414 414 args = hookargs.copy()
415 415 args['node'] = hex(n)
416 416 del args['node_last']
417 417 repo.hook("incoming", **pycompat.strkwargs(args))
418 418
419 419 newheads = [h for h in repo.heads()
420 420 if h not in oldheads]
421 421 repo.ui.log("incoming",
422 422 "%d incoming changes - new heads: %s\n",
423 423 len(added),
424 424 ', '.join([hex(c[:6]) for c in newheads]))
425 425
426 426 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
427 427 lambda tr: repo._afterlock(runhooks))
428 428 finally:
429 429 repo.ui.flush()
430 430 # never return 0 here:
431 431 if deltaheads < 0:
432 432 ret = deltaheads - 1
433 433 else:
434 434 ret = deltaheads + 1
435 435 return ret
436 436
437 437 def deltaiter(self):
438 438 """
439 439 returns an iterator of the deltas in this changegroup
440 440
441 441 Useful for passing to the underlying storage system to be stored.
442 442 """
443 443 chain = None
444 444 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
445 445 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
446 446 yield chunkdata
447 447 chain = chunkdata[0]
448 448
449 449 class cg2unpacker(cg1unpacker):
450 450 """Unpacker for cg2 streams.
451 451
452 452 cg2 streams add support for generaldelta, so the delta header
453 453 format is slightly different. All other features about the data
454 454 remain the same.
455 455 """
456 456 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
457 457 deltaheadersize = deltaheader.size
458 458 version = '02'
459 459
460 460 def _deltaheader(self, headertuple, prevnode):
461 461 node, p1, p2, deltabase, cs = headertuple
462 462 flags = 0
463 463 return node, p1, p2, deltabase, cs, flags
464 464
465 465 class cg3unpacker(cg2unpacker):
466 466 """Unpacker for cg3 streams.
467 467
468 468 cg3 streams add support for exchanging treemanifests and revlog
469 469 flags. It adds the revlog flags to the delta header and an empty chunk
470 470 separating manifests and files.
471 471 """
472 472 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
473 473 deltaheadersize = deltaheader.size
474 474 version = '03'
475 475 _grouplistcount = 2 # One list of manifests and one list of files
476 476
477 477 def _deltaheader(self, headertuple, prevnode):
478 478 node, p1, p2, deltabase, cs, flags = headertuple
479 479 return node, p1, p2, deltabase, cs, flags
480 480
481 481 def _unpackmanifests(self, repo, revmap, trp, prog):
482 482 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
483 483 for chunkdata in iter(self.filelogheader, {}):
484 484 # If we get here, there are directory manifests in the changegroup
485 485 d = chunkdata["filename"]
486 486 repo.ui.debug("adding %s revisions\n" % d)
487 487 deltas = self.deltaiter()
488 488 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
489 489 raise error.Abort(_("received dir revlog group is empty"))
490 490
491 491 class headerlessfixup(object):
492 492 def __init__(self, fh, h):
493 493 self._h = h
494 494 self._fh = fh
495 495 def read(self, n):
496 496 if self._h:
497 497 d, self._h = self._h[:n], self._h[n:]
498 498 if len(d) < n:
499 499 d += readexactly(self._fh, n - len(d))
500 500 return d
501 501 return readexactly(self._fh, n)
502 502
503 503 @interfaceutil.implementer(repository.irevisiondeltarequest)
504 504 @attr.s(slots=True, frozen=True)
505 505 class revisiondeltarequest(object):
506 506 node = attr.ib()
507 507 linknode = attr.ib()
508 508 p1node = attr.ib()
509 509 p2node = attr.ib()
510 510 basenode = attr.ib()
511 511 ellipsis = attr.ib(default=False)
512 512
513 513 def _revisiondeltatochunks(delta, headerfn):
514 514 """Serialize a revisiondelta to changegroup chunks."""
515 515
516 516 # The captured revision delta may be encoded as a delta against
517 517 # a base revision or as a full revision. The changegroup format
518 518 # requires that everything on the wire be deltas. So for full
519 519 # revisions, we need to invent a header that says to rewrite
520 520 # data.
521 521
522 522 if delta.delta is not None:
523 523 prefix, data = b'', delta.delta
524 524 elif delta.basenode == nullid:
525 525 data = delta.revision
526 526 prefix = mdiff.trivialdiffheader(len(data))
527 527 else:
528 528 data = delta.revision
529 529 prefix = mdiff.replacediffheader(delta.baserevisionsize,
530 530 len(data))
531 531
532 532 meta = headerfn(delta)
533 533
534 534 yield chunkheader(len(meta) + len(prefix) + len(data))
535 535 yield meta
536 536 if prefix:
537 537 yield prefix
538 538 yield data
539 539
540 540 def _sortnodesnormal(store, nodes, reorder):
541 541 """Sort nodes for changegroup generation and turn into revnums."""
542 542 # for generaldelta revlogs, we linearize the revs; this will both be
543 543 # much quicker and generate a much smaller bundle
544 544 if (store._generaldelta and reorder is None) or reorder:
545 545 revs = set(store.rev(n) for n in nodes)
546 546 return dagop.linearize(revs, store.parentrevs)
547 547 else:
548 548 return sorted([store.rev(n) for n in nodes])
549 549
550 550 def _sortnodesellipsis(store, nodes, cl, lookup):
551 551 """Sort nodes for changegroup generation and turn into revnums."""
552 552 # Ellipses serving mode.
553 553 #
554 554 # In a perfect world, we'd generate better ellipsis-ified graphs
555 555 # for non-changelog revlogs. In practice, we haven't started doing
556 556 # that yet, so the resulting DAGs for the manifestlog and filelogs
557 557 # are actually full of bogus parentage on all the ellipsis
558 558 # nodes. This has the side effect that, while the contents are
559 559 # correct, the individual DAGs might be completely out of whack in
560 560 # a case like 882681bc3166 and its ancestors (back about 10
561 561 # revisions or so) in the main hg repo.
562 562 #
563 563 # The one invariant we *know* holds is that the new (potentially
564 564 # bogus) DAG shape will be valid if we order the nodes in the
565 565 # order that they're introduced in dramatis personae by the
566 566 # changelog, so what we do is we sort the non-changelog histories
567 567 # by the order in which they are used by the changelog.
568 568 key = lambda n: cl.rev(lookup(n))
569 569 return [store.rev(n) for n in sorted(nodes, key=key)]
570 570
571 571 def _makenarrowdeltarequest(cl, store, ischangelog, rev, node, linkrev,
572 572 linknode, clrevtolocalrev, fullclnodes,
573 573 precomputedellipsis):
574 574 linkparents = precomputedellipsis[linkrev]
575 575 def local(clrev):
576 576 """Turn a changelog revnum into a local revnum.
577 577
578 578 The ellipsis dag is stored as revnums on the changelog,
579 579 but when we're producing ellipsis entries for
580 580 non-changelog revlogs, we need to turn those numbers into
581 581 something local. This does that for us, and during the
582 582 changelog sending phase will also expand the stored
583 583 mappings as needed.
584 584 """
585 585 if clrev == nullrev:
586 586 return nullrev
587 587
588 588 if ischangelog:
589 589 return clrev
590 590
591 591 # Walk the ellipsis-ized changelog breadth-first looking for a
592 592 # change that has been linked from the current revlog.
593 593 #
594 594 # For a flat manifest revlog only a single step should be necessary
595 595 # as all relevant changelog entries are relevant to the flat
596 596 # manifest.
597 597 #
598 598 # For a filelog or tree manifest dirlog however not every changelog
599 599 # entry will have been relevant, so we need to skip some changelog
600 600 # nodes even after ellipsis-izing.
601 601 walk = [clrev]
602 602 while walk:
603 603 p = walk[0]
604 604 walk = walk[1:]
605 605 if p in clrevtolocalrev:
606 606 return clrevtolocalrev[p]
607 607 elif p in fullclnodes:
608 608 walk.extend([pp for pp in cl.parentrevs(p)
609 609 if pp != nullrev])
610 610 elif p in precomputedellipsis:
611 611 walk.extend([pp for pp in precomputedellipsis[p]
612 612 if pp != nullrev])
613 613 else:
614 614 # In this case, we've got an ellipsis with parents
615 615 # outside the current bundle (likely an
616 616 # incremental pull). We "know" that we can use the
617 617 # value of this same revlog at whatever revision
618 618 # is pointed to by linknode. "Know" is in scare
619 619 # quotes because I haven't done enough examination
620 620 # of edge cases to convince myself this is really
621 621 # a fact - it works for all the (admittedly
622 622 # thorough) cases in our testsuite, but I would be
623 623 # somewhat unsurprised to find a case in the wild
624 624 # where this breaks down a bit. That said, I don't
625 625 # know if it would hurt anything.
626 626 for i in pycompat.xrange(rev, 0, -1):
627 627 if store.linkrev(i) == clrev:
628 628 return i
629 629 # We failed to resolve a parent for this node, so
630 630 # we crash the changegroup construction.
631 631 raise error.Abort(
632 632 'unable to resolve parent while packing %r %r'
633 633 ' for changeset %r' % (store.indexfile, rev, clrev))
634 634
635 635 return nullrev
636 636
637 637 if not linkparents or (
638 638 store.parentrevs(rev) == (nullrev, nullrev)):
639 639 p1, p2 = nullrev, nullrev
640 640 elif len(linkparents) == 1:
641 641 p1, = sorted(local(p) for p in linkparents)
642 642 p2 = nullrev
643 643 else:
644 644 p1, p2 = sorted(local(p) for p in linkparents)
645 645
646 646 p1node, p2node = store.node(p1), store.node(p2)
647 647
648 648 # TODO: try and actually send deltas for ellipsis data blocks
649 649 return revisiondeltarequest(
650 650 node=node,
651 651 p1node=p1node,
652 652 p2node=p2node,
653 653 linknode=linknode,
654 654 basenode=nullid,
655 655 ellipsis=True,
656 656 )
657 657
658 658 def deltagroup(repo, store, nodes, ischangelog, lookup, forcedeltaparentprev,
659 659 allowreorder,
660 660 topic=None,
661 661 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
662 662 precomputedellipsis=None):
663 663 """Calculate deltas for a set of revisions.
664 664
665 665 Is a generator of ``revisiondelta`` instances.
666 666
667 667 If topic is not None, progress detail will be generated using this
668 668 topic name (e.g. changesets, manifests, etc).
669 669 """
670 670 if not nodes:
671 671 return
672 672
673 673 # We perform two passes over the revisions whose data we will emit.
674 674 #
675 675 # In the first pass, we obtain information about the deltas that will
676 676 # be generated. This involves computing linknodes and adjusting the
677 677 # request to take shallow fetching into account. The end result of
678 678 # this pass is a list of "request" objects stating which deltas
679 679 # to obtain.
680 680 #
681 681 # The second pass is simply resolving the requested deltas.
682 682
683 683 cl = repo.changelog
684 684
685 685 if ischangelog:
686 686 # Changelog doesn't benefit from reordering revisions. So send
687 687 # out revisions in store order.
688 688 # TODO the API would be cleaner if this were controlled by the
689 689 # store producing the deltas.
690 690 revs = sorted(cl.rev(n) for n in nodes)
691 691 elif ellipses:
692 692 revs = _sortnodesellipsis(store, nodes, cl, lookup)
693 693 else:
694 694 revs = _sortnodesnormal(store, nodes, allowreorder)
695 695
696 696 # In the first pass, collect info about the deltas we'll be
697 697 # generating.
698 698 requests = []
699 699
700 700 # Add the parent of the first rev.
701 701 revs.insert(0, store.parentrevs(revs[0])[0])
702 702
703 703 for i in pycompat.xrange(len(revs) - 1):
704 704 prev = revs[i]
705 705 curr = revs[i + 1]
706 706
707 707 node = store.node(curr)
708 708 linknode = lookup(node)
709 709 p1node, p2node = store.parents(node)
710 710
711 711 if ellipses:
712 712 linkrev = cl.rev(linknode)
713 713 clrevtolocalrev[linkrev] = curr
714 714
715 715 # This is a node to send in full, because the changeset it
716 716 # corresponds to was a full changeset.
717 717 if linknode in fullclnodes:
718 718 requests.append(revisiondeltarequest(
719 719 node=node,
720 720 p1node=p1node,
721 721 p2node=p2node,
722 722 linknode=linknode,
723 723 basenode=None,
724 724 ))
725 725
726 726 elif linkrev not in precomputedellipsis:
727 727 pass
728 728 else:
729 729 requests.append(_makenarrowdeltarequest(
730 730 cl, store, ischangelog, curr, node, linkrev, linknode,
731 731 clrevtolocalrev, fullclnodes,
732 732 precomputedellipsis))
733 733 else:
734 734 requests.append(revisiondeltarequest(
735 735 node=node,
736 736 p1node=p1node,
737 737 p2node=p2node,
738 738 linknode=linknode,
739 739 basenode=store.node(prev) if forcedeltaparentprev else None,
740 740 ))
741 741
742 742 # We expect the first pass to be fast, so we only engage the progress
743 743 # meter for constructing the revision deltas.
744 744 progress = None
745 745 if topic is not None:
746 746 progress = repo.ui.makeprogress(topic, unit=_('chunks'),
747 747 total=len(requests))
748 748
749 749 for i, delta in enumerate(store.emitrevisiondeltas(requests)):
750 750 if progress:
751 751 progress.update(i + 1)
752 752
753 753 yield delta
754 754
755 755 if progress:
756 756 progress.complete()
757 757
758 758 class cgpacker(object):
759 759 def __init__(self, repo, filematcher, version, allowreorder,
760 760 builddeltaheader, manifestsend,
761 761 forcedeltaparentprev=False,
762 762 bundlecaps=None, ellipses=False,
763 763 shallow=False, ellipsisroots=None, fullnodes=None):
764 764 """Given a source repo, construct a bundler.
765 765
766 766 filematcher is a matcher that matches on files to include in the
767 767 changegroup. Used to facilitate sparse changegroups.
768 768
769 769 allowreorder controls whether reordering of revisions is allowed.
770 770 This value is used when ``bundle.reorder`` is ``auto`` or isn't
771 771 set.
772 772
773 773 forcedeltaparentprev indicates whether delta parents must be against
774 774 the previous revision in a delta group. This should only be used for
775 775 compatibility with changegroup version 1.
776 776
777 777 builddeltaheader is a callable that constructs the header for a group
778 778 delta.
779 779
780 780 manifestsend is a chunk to send after manifests have been fully emitted.
781 781
782 782 ellipses indicates whether ellipsis serving mode is enabled.
783 783
784 784 bundlecaps is optional and can be used to specify the set of
785 785 capabilities which can be used to build the bundle. While bundlecaps is
786 786 unused in core Mercurial, extensions rely on this feature to communicate
787 787 capabilities to customize the changegroup packer.
788 788
789 789 shallow indicates whether shallow data might be sent. The packer may
790 790 need to pack file contents not introduced by the changes being packed.
791 791
792 792 fullnodes is the set of changelog nodes which should not be ellipsis
793 793 nodes. We store this rather than the set of nodes that should be
794 794 ellipsis because for very large histories we expect this to be
795 795 significantly smaller.
796 796 """
797 797 assert filematcher
798 798 self._filematcher = filematcher
799 799
800 800 self.version = version
801 801 self._forcedeltaparentprev = forcedeltaparentprev
802 802 self._builddeltaheader = builddeltaheader
803 803 self._manifestsend = manifestsend
804 804 self._ellipses = ellipses
805 805
806 806 # Set of capabilities we can use to build the bundle.
807 807 if bundlecaps is None:
808 808 bundlecaps = set()
809 809 self._bundlecaps = bundlecaps
810 810 self._isshallow = shallow
811 811 self._fullclnodes = fullnodes
812 812
813 813 # Maps ellipsis revs to their roots at the changelog level.
814 814 self._precomputedellipsis = ellipsisroots
815 815
816 816 # experimental config: bundle.reorder
817 817 reorder = repo.ui.config('bundle', 'reorder')
818 818 if reorder == 'auto':
819 819 self._reorder = allowreorder
820 820 else:
821 821 self._reorder = stringutil.parsebool(reorder)
822 822
823 823 self._repo = repo
824 824
825 825 if self._repo.ui.verbose and not self._repo.ui.debugflag:
826 826 self._verbosenote = self._repo.ui.note
827 827 else:
828 828 self._verbosenote = lambda s: None
829 829
830 830 def generate(self, commonrevs, clnodes, fastpathlinkrev, source,
831 831 changelog=True):
832 832 """Yield a sequence of changegroup byte chunks.
833 833 If changelog is False, changelog data won't be added to changegroup
834 834 """
835 835
836 836 repo = self._repo
837 837 cl = repo.changelog
838 838
839 839 self._verbosenote(_('uncompressed size of bundle content:\n'))
840 840 size = 0
841 841
842 842 clstate, deltas = self._generatechangelog(cl, clnodes)
843 843 for delta in deltas:
844 844 if changelog:
845 845 for chunk in _revisiondeltatochunks(delta,
846 846 self._builddeltaheader):
847 847 size += len(chunk)
848 848 yield chunk
849 849
850 850 close = closechunk()
851 851 size += len(close)
852 852 yield closechunk()
853 853
854 854 self._verbosenote(_('%8.i (changelog)\n') % size)
855 855
856 856 clrevorder = clstate['clrevorder']
857 857 manifests = clstate['manifests']
858 858 changedfiles = clstate['changedfiles']
859 859
860 860 # We need to make sure that the linkrev in the changegroup refers to
861 861 # the first changeset that introduced the manifest or file revision.
862 862 # The fastpath is usually safer than the slowpath, because the filelogs
863 863 # are walked in revlog order.
864 864 #
865 865 # When taking the slowpath with reorder=None and the manifest revlog
866 866 # uses generaldelta, the manifest may be walked in the "wrong" order.
867 867 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
868 868 # cc0ff93d0c0c).
869 869 #
870 870 # When taking the fastpath, we are only vulnerable to reordering
871 871 # of the changelog itself. The changelog never uses generaldelta, so
872 872 # it is only reordered when reorder=True. To handle this case, we
873 873 # simply take the slowpath, which already has the 'clrevorder' logic.
874 874 # This was also fixed in cc0ff93d0c0c.
875 875 fastpathlinkrev = fastpathlinkrev and not self._reorder
876 876 # Treemanifests don't work correctly with fastpathlinkrev
877 877 # either, because we don't discover which directory nodes to
878 878 # send along with files. This could probably be fixed.
879 879 fastpathlinkrev = fastpathlinkrev and (
880 880 'treemanifest' not in repo.requirements)
881 881
882 882 fnodes = {} # needed file nodes
883 883
884 884 size = 0
885 885 it = self.generatemanifests(
886 886 commonrevs, clrevorder, fastpathlinkrev, manifests, fnodes, source,
887 887 clstate['clrevtomanifestrev'])
888 888
889 889 for tree, deltas in it:
890 890 if tree:
891 891 assert self.version == b'03'
892 892 chunk = _fileheader(tree)
893 893 size += len(chunk)
894 894 yield chunk
895 895
896 896 for delta in deltas:
897 897 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
898 898 for chunk in chunks:
899 899 size += len(chunk)
900 900 yield chunk
901 901
902 902 close = closechunk()
903 903 size += len(close)
904 904 yield close
905 905
906 906 self._verbosenote(_('%8.i (manifests)\n') % size)
907 907 yield self._manifestsend
908 908
909 909 mfdicts = None
910 910 if self._ellipses and self._isshallow:
911 911 mfdicts = [(self._repo.manifestlog[n].read(), lr)
912 912 for (n, lr) in manifests.iteritems()]
913 913
914 914 manifests.clear()
915 915 clrevs = set(cl.rev(x) for x in clnodes)
916 916
917 917 it = self.generatefiles(changedfiles, commonrevs,
918 918 source, mfdicts, fastpathlinkrev,
919 919 fnodes, clrevs)
920 920
921 921 for path, deltas in it:
922 922 h = _fileheader(path)
923 923 size = len(h)
924 924 yield h
925 925
926 926 for delta in deltas:
927 927 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
928 928 for chunk in chunks:
929 929 size += len(chunk)
930 930 yield chunk
931 931
932 932 close = closechunk()
933 933 size += len(close)
934 934 yield close
935 935
936 936 self._verbosenote(_('%8.i %s\n') % (size, path))
937 937
938 938 yield closechunk()
939 939
940 940 if clnodes:
941 941 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
942 942
943 943 def _generatechangelog(self, cl, nodes):
944 944 """Generate data for changelog chunks.
945 945
946 946 Returns a 2-tuple of a dict containing state and an iterable of
947 947 byte chunks. The state will not be fully populated until the
948 948 chunk stream has been fully consumed.
949 949 """
950 950 clrevorder = {}
951 951 manifests = {}
952 952 mfl = self._repo.manifestlog
953 953 changedfiles = set()
954 954 clrevtomanifestrev = {}
955 955
956 956 # Callback for the changelog, used to collect changed files and
957 957 # manifest nodes.
958 958 # Returns the linkrev node (identity in the changelog case).
959 959 def lookupcl(x):
960 960 c = cl.changelogrevision(x)
961 961 clrevorder[x] = len(clrevorder)
962 962
963 963 if self._ellipses:
964 964 # Only update manifests if x is going to be sent. Otherwise we
965 965 # end up with bogus linkrevs specified for manifests and
966 966 # we skip some manifest nodes that we should otherwise
967 967 # have sent.
968 968 if (x in self._fullclnodes
969 969 or cl.rev(x) in self._precomputedellipsis):
970 970
971 971 manifestnode = c.manifest
972 972 # Record the first changeset introducing this manifest
973 973 # version.
974 974 manifests.setdefault(manifestnode, x)
975 975 # Set this narrow-specific dict so we have the lowest
976 976 # manifest revnum to look up for this cl revnum. (Part of
977 977 # mapping changelog ellipsis parents to manifest ellipsis
978 978 # parents)
979 979 clrevtomanifestrev.setdefault(
980 980 cl.rev(x), mfl.rev(manifestnode))
981 981 # We can't trust the changed files list in the changeset if the
982 982 # client requested a shallow clone.
983 983 if self._isshallow:
984 984 changedfiles.update(mfl[c.manifest].read().keys())
985 985 else:
986 986 changedfiles.update(c.files)
987 987 else:
988 988 # record the first changeset introducing this manifest version
989 989 manifests.setdefault(c.manifest, x)
990 990 # Record a complete list of potentially-changed files in
991 991 # this manifest.
992 992 changedfiles.update(c.files)
993 993
994 994 return x
995 995
996 996 state = {
997 997 'clrevorder': clrevorder,
998 998 'manifests': manifests,
999 999 'changedfiles': changedfiles,
1000 1000 'clrevtomanifestrev': clrevtomanifestrev,
1001 1001 }
1002 1002
1003 1003 gen = deltagroup(
1004 1004 self._repo, cl, nodes, True, lookupcl,
1005 1005 self._forcedeltaparentprev,
1006 1006 # Reorder settings are currently ignored for changelog.
1007 1007 True,
1008 1008 ellipses=self._ellipses,
1009 1009 topic=_('changesets'),
1010 1010 clrevtolocalrev={},
1011 1011 fullclnodes=self._fullclnodes,
1012 1012 precomputedellipsis=self._precomputedellipsis)
1013 1013
1014 1014 return state, gen
1015 1015
1016 1016 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev,
1017 1017 manifests, fnodes, source, clrevtolocalrev):
1018 1018 """Returns an iterator of changegroup chunks containing manifests.
1019 1019
1020 1020 `source` is unused here, but is used by extensions like remotefilelog to
1021 1021 change what is sent based in pulls vs pushes, etc.
1022 1022 """
1023 1023 repo = self._repo
1024 1024 mfl = repo.manifestlog
1025 1025 tmfnodes = {'': manifests}
1026 1026
1027 1027 # Callback for the manifest, used to collect linkrevs for filelog
1028 1028 # revisions.
1029 1029 # Returns the linkrev node (collected in lookupcl).
1030 1030 def makelookupmflinknode(tree, nodes):
1031 1031 if fastpathlinkrev:
1032 1032 assert not tree
1033 1033 return manifests.__getitem__
1034 1034
1035 1035 def lookupmflinknode(x):
1036 1036 """Callback for looking up the linknode for manifests.
1037 1037
1038 1038 Returns the linkrev node for the specified manifest.
1039 1039
1040 1040 SIDE EFFECT:
1041 1041
1042 1042 1) fclnodes gets populated with the list of relevant
1043 1043 file nodes if we're not using fastpathlinkrev
1044 1044 2) When treemanifests are in use, collects treemanifest nodes
1045 1045 to send
1046 1046
1047 1047 Note that this means manifests must be completely sent to
1048 1048 the client before you can trust the list of files and
1049 1049 treemanifests to send.
1050 1050 """
1051 1051 clnode = nodes[x]
1052 1052 mdata = mfl.get(tree, x).readfast(shallow=True)
1053 1053 for p, n, fl in mdata.iterentries():
1054 1054 if fl == 't': # subdirectory manifest
1055 1055 subtree = tree + p + '/'
1056 1056 tmfclnodes = tmfnodes.setdefault(subtree, {})
1057 1057 tmfclnode = tmfclnodes.setdefault(n, clnode)
1058 1058 if clrevorder[clnode] < clrevorder[tmfclnode]:
1059 1059 tmfclnodes[n] = clnode
1060 1060 else:
1061 1061 f = tree + p
1062 1062 fclnodes = fnodes.setdefault(f, {})
1063 1063 fclnode = fclnodes.setdefault(n, clnode)
1064 1064 if clrevorder[clnode] < clrevorder[fclnode]:
1065 1065 fclnodes[n] = clnode
1066 1066 return clnode
1067 1067 return lookupmflinknode
1068 1068
1069 1069 while tmfnodes:
1070 1070 tree, nodes = tmfnodes.popitem()
1071 1071 store = mfl.getstorage(tree)
1072 1072
1073 1073 if not self._filematcher.visitdir(store.tree[:-1] or '.'):
1074 # No nodes to send because this directory is out of
1075 # the client's view of the repository (probably
1076 # because of narrow clones).
1074 1077 prunednodes = []
1075 1078 else:
1079 # Avoid sending any manifest nodes we can prove the
1080 # client already has by checking linkrevs. See the
1081 # related comment in generatefiles().
1076 1082 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1077 1083 if tree and not prunednodes:
1078 1084 continue
1079 1085
1080 1086 lookupfn = makelookupmflinknode(tree, nodes)
1081 1087
1082 1088 deltas = deltagroup(
1083 1089 self._repo, store, prunednodes, False, lookupfn,
1084 1090 self._forcedeltaparentprev, self._reorder,
1085 1091 ellipses=self._ellipses,
1086 1092 topic=_('manifests'),
1087 1093 clrevtolocalrev=clrevtolocalrev,
1088 1094 fullclnodes=self._fullclnodes,
1089 1095 precomputedellipsis=self._precomputedellipsis)
1090 1096
1091 1097 yield tree, deltas
1092 1098
1093 1099 def _prunemanifests(self, store, nodes, commonrevs):
1094 1100 # This is split out as a separate method to allow filtering
1095 1101 # commonrevs in extension code.
1096 1102 #
1097 1103 # TODO(augie): this shouldn't be required, instead we should
1098 1104 # make filtering of revisions to send delegated to the store
1099 1105 # layer.
1100 1106 frev, flr = store.rev, store.linkrev
1101 1107 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1102 1108
1103 1109 # The 'source' parameter is useful for extensions
1104 1110 def generatefiles(self, changedfiles, commonrevs, source,
1105 1111 mfdicts, fastpathlinkrev, fnodes, clrevs):
1106 1112 changedfiles = list(filter(self._filematcher, changedfiles))
1107 1113
1108 1114 if not fastpathlinkrev:
1109 1115 def normallinknodes(unused, fname):
1110 1116 return fnodes.get(fname, {})
1111 1117 else:
1112 1118 cln = self._repo.changelog.node
1113 1119
1114 1120 def normallinknodes(store, fname):
1115 1121 flinkrev = store.linkrev
1116 1122 fnode = store.node
1117 1123 revs = ((r, flinkrev(r)) for r in store)
1118 1124 return dict((fnode(r), cln(lr))
1119 1125 for r, lr in revs if lr in clrevs)
1120 1126
1121 1127 clrevtolocalrev = {}
1122 1128
1123 1129 if self._isshallow:
1124 1130 # In a shallow clone, the linknodes callback needs to also include
1125 1131 # those file nodes that are in the manifests we sent but weren't
1126 1132 # introduced by those manifests.
1127 1133 commonctxs = [self._repo[c] for c in commonrevs]
1128 1134 clrev = self._repo.changelog.rev
1129 1135
1130 1136 def linknodes(flog, fname):
1131 1137 for c in commonctxs:
1132 1138 try:
1133 1139 fnode = c.filenode(fname)
1134 1140 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1135 1141 except error.ManifestLookupError:
1136 1142 pass
1137 1143 links = normallinknodes(flog, fname)
1138 1144 if len(links) != len(mfdicts):
1139 1145 for mf, lr in mfdicts:
1140 1146 fnode = mf.get(fname, None)
1141 1147 if fnode in links:
1142 1148 links[fnode] = min(links[fnode], lr, key=clrev)
1143 1149 elif fnode:
1144 1150 links[fnode] = lr
1145 1151 return links
1146 1152 else:
1147 1153 linknodes = normallinknodes
1148 1154
1149 1155 repo = self._repo
1150 1156 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1151 1157 total=len(changedfiles))
1152 1158 for i, fname in enumerate(sorted(changedfiles)):
1153 1159 filerevlog = repo.file(fname)
1154 1160 if not filerevlog:
1155 1161 raise error.Abort(_("empty or missing file data for %s") %
1156 1162 fname)
1157 1163
1158 1164 clrevtolocalrev.clear()
1159 1165
1160 1166 linkrevnodes = linknodes(filerevlog, fname)
1161 1167 # Lookup for filenodes, we collected the linkrev nodes above in the
1162 1168 # fastpath case and with lookupmf in the slowpath case.
1163 1169 def lookupfilelog(x):
1164 1170 return linkrevnodes[x]
1165 1171
1166 1172 frev, flr = filerevlog.rev, filerevlog.linkrev
1173 # Skip sending any filenode we know the client already
1174 # has. This avoids over-sending files relatively
1175 # inexpensively, so it's not a problem if we under-filter
1176 # here.
1167 1177 filenodes = [n for n in linkrevnodes
1168 1178 if flr(frev(n)) not in commonrevs]
1169 1179
1170 1180 if not filenodes:
1171 1181 continue
1172 1182
1173 1183 progress.update(i + 1, item=fname)
1174 1184
1175 1185 deltas = deltagroup(
1176 1186 self._repo, filerevlog, filenodes, False, lookupfilelog,
1177 1187 self._forcedeltaparentprev, self._reorder,
1178 1188 ellipses=self._ellipses,
1179 1189 clrevtolocalrev=clrevtolocalrev,
1180 1190 fullclnodes=self._fullclnodes,
1181 1191 precomputedellipsis=self._precomputedellipsis)
1182 1192
1183 1193 yield fname, deltas
1184 1194
1185 1195 progress.complete()
1186 1196
1187 1197 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1188 1198 shallow=False, ellipsisroots=None, fullnodes=None):
1189 1199 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1190 1200 d.node, d.p1node, d.p2node, d.linknode)
1191 1201
1192 1202 return cgpacker(repo, filematcher, b'01',
1193 1203 allowreorder=None,
1194 1204 builddeltaheader=builddeltaheader,
1195 1205 manifestsend=b'',
1196 1206 forcedeltaparentprev=True,
1197 1207 bundlecaps=bundlecaps,
1198 1208 ellipses=ellipses,
1199 1209 shallow=shallow,
1200 1210 ellipsisroots=ellipsisroots,
1201 1211 fullnodes=fullnodes)
1202 1212
1203 1213 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1204 1214 shallow=False, ellipsisroots=None, fullnodes=None):
1205 1215 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1206 1216 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1207 1217
1208 1218 # Since generaldelta is directly supported by cg2, reordering
1209 1219 # generally doesn't help, so we disable it by default (treating
1210 1220 # bundle.reorder=auto just like bundle.reorder=False).
1211 1221 return cgpacker(repo, filematcher, b'02',
1212 1222 allowreorder=False,
1213 1223 builddeltaheader=builddeltaheader,
1214 1224 manifestsend=b'',
1215 1225 bundlecaps=bundlecaps,
1216 1226 ellipses=ellipses,
1217 1227 shallow=shallow,
1218 1228 ellipsisroots=ellipsisroots,
1219 1229 fullnodes=fullnodes)
1220 1230
1221 1231 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1222 1232 shallow=False, ellipsisroots=None, fullnodes=None):
1223 1233 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1224 1234 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1225 1235
1226 1236 return cgpacker(repo, filematcher, b'03',
1227 1237 allowreorder=False,
1228 1238 builddeltaheader=builddeltaheader,
1229 1239 manifestsend=closechunk(),
1230 1240 bundlecaps=bundlecaps,
1231 1241 ellipses=ellipses,
1232 1242 shallow=shallow,
1233 1243 ellipsisroots=ellipsisroots,
1234 1244 fullnodes=fullnodes)
1235 1245
1236 1246 _packermap = {'01': (_makecg1packer, cg1unpacker),
1237 1247 # cg2 adds support for exchanging generaldelta
1238 1248 '02': (_makecg2packer, cg2unpacker),
1239 1249 # cg3 adds support for exchanging revlog flags and treemanifests
1240 1250 '03': (_makecg3packer, cg3unpacker),
1241 1251 }
1242 1252
1243 1253 def allsupportedversions(repo):
1244 1254 versions = set(_packermap.keys())
1245 1255 if not (repo.ui.configbool('experimental', 'changegroup3') or
1246 1256 repo.ui.configbool('experimental', 'treemanifest') or
1247 1257 'treemanifest' in repo.requirements):
1248 1258 versions.discard('03')
1249 1259 return versions
1250 1260
1251 1261 # Changegroup versions that can be applied to the repo
1252 1262 def supportedincomingversions(repo):
1253 1263 return allsupportedversions(repo)
1254 1264
1255 1265 # Changegroup versions that can be created from the repo
1256 1266 def supportedoutgoingversions(repo):
1257 1267 versions = allsupportedversions(repo)
1258 1268 if 'treemanifest' in repo.requirements:
1259 1269 # Versions 01 and 02 support only flat manifests and it's just too
1260 1270 # expensive to convert between the flat manifest and tree manifest on
1261 1271 # the fly. Since tree manifests are hashed differently, all of history
1262 1272 # would have to be converted. Instead, we simply don't even pretend to
1263 1273 # support versions 01 and 02.
1264 1274 versions.discard('01')
1265 1275 versions.discard('02')
1266 1276 if repository.NARROW_REQUIREMENT in repo.requirements:
1267 1277 # Versions 01 and 02 don't support revlog flags, and we need to
1268 1278 # support that for stripping and unbundling to work.
1269 1279 versions.discard('01')
1270 1280 versions.discard('02')
1271 1281 if LFS_REQUIREMENT in repo.requirements:
1272 1282 # Versions 01 and 02 don't support revlog flags, and we need to
1273 1283 # mark LFS entries with REVIDX_EXTSTORED.
1274 1284 versions.discard('01')
1275 1285 versions.discard('02')
1276 1286
1277 1287 return versions
1278 1288
1279 1289 def localversion(repo):
1280 1290 # Finds the best version to use for bundles that are meant to be used
1281 1291 # locally, such as those from strip and shelve, and temporary bundles.
1282 1292 return max(supportedoutgoingversions(repo))
1283 1293
1284 1294 def safeversion(repo):
1285 1295 # Finds the smallest version that it's safe to assume clients of the repo
1286 1296 # will support. For example, all hg versions that support generaldelta also
1287 1297 # support changegroup 02.
1288 1298 versions = supportedoutgoingversions(repo)
1289 1299 if 'generaldelta' in repo.requirements:
1290 1300 versions.discard('01')
1291 1301 assert versions
1292 1302 return min(versions)
1293 1303
1294 1304 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1295 1305 ellipses=False, shallow=False, ellipsisroots=None,
1296 1306 fullnodes=None):
1297 1307 assert version in supportedoutgoingversions(repo)
1298 1308
1299 1309 if filematcher is None:
1300 1310 filematcher = matchmod.alwaysmatcher(repo.root, '')
1301 1311
1302 1312 if version == '01' and not filematcher.always():
1303 1313 raise error.ProgrammingError('version 01 changegroups do not support '
1304 1314 'sparse file matchers')
1305 1315
1306 1316 if ellipses and version in (b'01', b'02'):
1307 1317 raise error.Abort(
1308 1318 _('ellipsis nodes require at least cg3 on client and server, '
1309 1319 'but negotiated version %s') % version)
1310 1320
1311 1321 # Requested files could include files not in the local store. So
1312 1322 # filter those out.
1313 1323 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1314 1324 filematcher)
1315 1325
1316 1326 fn = _packermap[version][0]
1317 1327 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1318 1328 shallow=shallow, ellipsisroots=ellipsisroots,
1319 1329 fullnodes=fullnodes)
1320 1330
1321 1331 def getunbundler(version, fh, alg, extras=None):
1322 1332 return _packermap[version][1](fh, alg, extras=extras)
1323 1333
1324 1334 def _changegroupinfo(repo, nodes, source):
1325 1335 if repo.ui.verbose or source == 'bundle':
1326 1336 repo.ui.status(_("%d changesets found\n") % len(nodes))
1327 1337 if repo.ui.debugflag:
1328 1338 repo.ui.debug("list of changesets:\n")
1329 1339 for node in nodes:
1330 1340 repo.ui.debug("%s\n" % hex(node))
1331 1341
1332 1342 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1333 1343 bundlecaps=None):
1334 1344 cgstream = makestream(repo, outgoing, version, source,
1335 1345 fastpath=fastpath, bundlecaps=bundlecaps)
1336 1346 return getunbundler(version, util.chunkbuffer(cgstream), None,
1337 1347 {'clcount': len(outgoing.missing) })
1338 1348
1339 1349 def makestream(repo, outgoing, version, source, fastpath=False,
1340 1350 bundlecaps=None, filematcher=None):
1341 1351 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1342 1352 filematcher=filematcher)
1343 1353
1344 1354 repo = repo.unfiltered()
1345 1355 commonrevs = outgoing.common
1346 1356 csets = outgoing.missing
1347 1357 heads = outgoing.missingheads
1348 1358 # We go through the fast path if we get told to, or if all (unfiltered
1349 1359 # heads have been requested (since we then know there all linkrevs will
1350 1360 # be pulled by the client).
1351 1361 heads.sort()
1352 1362 fastpathlinkrev = fastpath or (
1353 1363 repo.filtername is None and heads == sorted(repo.heads()))
1354 1364
1355 1365 repo.hook('preoutgoing', throw=True, source=source)
1356 1366 _changegroupinfo(repo, csets, source)
1357 1367 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1358 1368
1359 1369 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1360 1370 revisions = 0
1361 1371 files = 0
1362 1372 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1363 1373 total=expectedfiles)
1364 1374 for chunkdata in iter(source.filelogheader, {}):
1365 1375 files += 1
1366 1376 f = chunkdata["filename"]
1367 1377 repo.ui.debug("adding %s revisions\n" % f)
1368 1378 progress.increment()
1369 1379 fl = repo.file(f)
1370 1380 o = len(fl)
1371 1381 try:
1372 1382 deltas = source.deltaiter()
1373 1383 if not fl.addgroup(deltas, revmap, trp):
1374 1384 raise error.Abort(_("received file revlog group is empty"))
1375 1385 except error.CensoredBaseError as e:
1376 1386 raise error.Abort(_("received delta base is censored: %s") % e)
1377 1387 revisions += len(fl) - o
1378 1388 if f in needfiles:
1379 1389 needs = needfiles[f]
1380 1390 for new in pycompat.xrange(o, len(fl)):
1381 1391 n = fl.node(new)
1382 1392 if n in needs:
1383 1393 needs.remove(n)
1384 1394 else:
1385 1395 raise error.Abort(
1386 1396 _("received spurious file revlog entry"))
1387 1397 if not needs:
1388 1398 del needfiles[f]
1389 1399 progress.complete()
1390 1400
1391 1401 for f, needs in needfiles.iteritems():
1392 1402 fl = repo.file(f)
1393 1403 for n in needs:
1394 1404 try:
1395 1405 fl.rev(n)
1396 1406 except error.LookupError:
1397 1407 raise error.Abort(
1398 1408 _('missing file data for %s:%s - run hg verify') %
1399 1409 (f, hex(n)))
1400 1410
1401 1411 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now