##// END OF EJS Templates
changegroup: move non-pruning of non-ellipsis manifests to _prunemanifests()...
Martin von Zweigbergk -
r41933:1c1c4ef8 default
parent child Browse files
Show More
@@ -1,1423 +1,1423 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from . import (
23 23 error,
24 24 match as matchmod,
25 25 mdiff,
26 26 phases,
27 27 pycompat,
28 28 repository,
29 29 util,
30 30 )
31 31
32 32 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
33 33 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
34 34 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
35 35
36 36 LFS_REQUIREMENT = 'lfs'
37 37
38 38 readexactly = util.readexactly
39 39
40 40 def getchunk(stream):
41 41 """return the next chunk from stream as a string"""
42 42 d = readexactly(stream, 4)
43 43 l = struct.unpack(">l", d)[0]
44 44 if l <= 4:
45 45 if l:
46 46 raise error.Abort(_("invalid chunk length %d") % l)
47 47 return ""
48 48 return readexactly(stream, l - 4)
49 49
50 50 def chunkheader(length):
51 51 """return a changegroup chunk header (string)"""
52 52 return struct.pack(">l", length + 4)
53 53
54 54 def closechunk():
55 55 """return a changegroup chunk header (string) for a zero-length chunk"""
56 56 return struct.pack(">l", 0)
57 57
58 58 def _fileheader(path):
59 59 """Obtain a changegroup chunk header for a named path."""
60 60 return chunkheader(len(path)) + path
61 61
62 62 def writechunks(ui, chunks, filename, vfs=None):
63 63 """Write chunks to a file and return its filename.
64 64
65 65 The stream is assumed to be a bundle file.
66 66 Existing files will not be overwritten.
67 67 If no filename is specified, a temporary file is created.
68 68 """
69 69 fh = None
70 70 cleanup = None
71 71 try:
72 72 if filename:
73 73 if vfs:
74 74 fh = vfs.open(filename, "wb")
75 75 else:
76 76 # Increase default buffer size because default is usually
77 77 # small (4k is common on Linux).
78 78 fh = open(filename, "wb", 131072)
79 79 else:
80 80 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
81 81 fh = os.fdopen(fd, r"wb")
82 82 cleanup = filename
83 83 for c in chunks:
84 84 fh.write(c)
85 85 cleanup = None
86 86 return filename
87 87 finally:
88 88 if fh is not None:
89 89 fh.close()
90 90 if cleanup is not None:
91 91 if filename and vfs:
92 92 vfs.unlink(cleanup)
93 93 else:
94 94 os.unlink(cleanup)
95 95
96 96 class cg1unpacker(object):
97 97 """Unpacker for cg1 changegroup streams.
98 98
99 99 A changegroup unpacker handles the framing of the revision data in
100 100 the wire format. Most consumers will want to use the apply()
101 101 method to add the changes from the changegroup to a repository.
102 102
103 103 If you're forwarding a changegroup unmodified to another consumer,
104 104 use getchunks(), which returns an iterator of changegroup
105 105 chunks. This is mostly useful for cases where you need to know the
106 106 data stream has ended by observing the end of the changegroup.
107 107
108 108 deltachunk() is useful only if you're applying delta data. Most
109 109 consumers should prefer apply() instead.
110 110
111 111 A few other public methods exist. Those are used only for
112 112 bundlerepo and some debug commands - their use is discouraged.
113 113 """
114 114 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
115 115 deltaheadersize = deltaheader.size
116 116 version = '01'
117 117 _grouplistcount = 1 # One list of files after the manifests
118 118
119 119 def __init__(self, fh, alg, extras=None):
120 120 if alg is None:
121 121 alg = 'UN'
122 122 if alg not in util.compengines.supportedbundletypes:
123 123 raise error.Abort(_('unknown stream compression type: %s')
124 124 % alg)
125 125 if alg == 'BZ':
126 126 alg = '_truncatedBZ'
127 127
128 128 compengine = util.compengines.forbundletype(alg)
129 129 self._stream = compengine.decompressorreader(fh)
130 130 self._type = alg
131 131 self.extras = extras or {}
132 132 self.callback = None
133 133
134 134 # These methods (compressed, read, seek, tell) all appear to only
135 135 # be used by bundlerepo, but it's a little hard to tell.
136 136 def compressed(self):
137 137 return self._type is not None and self._type != 'UN'
138 138 def read(self, l):
139 139 return self._stream.read(l)
140 140 def seek(self, pos):
141 141 return self._stream.seek(pos)
142 142 def tell(self):
143 143 return self._stream.tell()
144 144 def close(self):
145 145 return self._stream.close()
146 146
147 147 def _chunklength(self):
148 148 d = readexactly(self._stream, 4)
149 149 l = struct.unpack(">l", d)[0]
150 150 if l <= 4:
151 151 if l:
152 152 raise error.Abort(_("invalid chunk length %d") % l)
153 153 return 0
154 154 if self.callback:
155 155 self.callback()
156 156 return l - 4
157 157
158 158 def changelogheader(self):
159 159 """v10 does not have a changelog header chunk"""
160 160 return {}
161 161
162 162 def manifestheader(self):
163 163 """v10 does not have a manifest header chunk"""
164 164 return {}
165 165
166 166 def filelogheader(self):
167 167 """return the header of the filelogs chunk, v10 only has the filename"""
168 168 l = self._chunklength()
169 169 if not l:
170 170 return {}
171 171 fname = readexactly(self._stream, l)
172 172 return {'filename': fname}
173 173
174 174 def _deltaheader(self, headertuple, prevnode):
175 175 node, p1, p2, cs = headertuple
176 176 if prevnode is None:
177 177 deltabase = p1
178 178 else:
179 179 deltabase = prevnode
180 180 flags = 0
181 181 return node, p1, p2, deltabase, cs, flags
182 182
183 183 def deltachunk(self, prevnode):
184 184 l = self._chunklength()
185 185 if not l:
186 186 return {}
187 187 headerdata = readexactly(self._stream, self.deltaheadersize)
188 188 header = self.deltaheader.unpack(headerdata)
189 189 delta = readexactly(self._stream, l - self.deltaheadersize)
190 190 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
191 191 return (node, p1, p2, cs, deltabase, delta, flags)
192 192
193 193 def getchunks(self):
194 194 """returns all the chunks contains in the bundle
195 195
196 196 Used when you need to forward the binary stream to a file or another
197 197 network API. To do so, it parse the changegroup data, otherwise it will
198 198 block in case of sshrepo because it don't know the end of the stream.
199 199 """
200 200 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
201 201 # and a list of filelogs. For changegroup 3, we expect 4 parts:
202 202 # changelog, manifestlog, a list of tree manifestlogs, and a list of
203 203 # filelogs.
204 204 #
205 205 # Changelog and manifestlog parts are terminated with empty chunks. The
206 206 # tree and file parts are a list of entry sections. Each entry section
207 207 # is a series of chunks terminating in an empty chunk. The list of these
208 208 # entry sections is terminated in yet another empty chunk, so we know
209 209 # we've reached the end of the tree/file list when we reach an empty
210 210 # chunk that was proceeded by no non-empty chunks.
211 211
212 212 parts = 0
213 213 while parts < 2 + self._grouplistcount:
214 214 noentries = True
215 215 while True:
216 216 chunk = getchunk(self)
217 217 if not chunk:
218 218 # The first two empty chunks represent the end of the
219 219 # changelog and the manifestlog portions. The remaining
220 220 # empty chunks represent either A) the end of individual
221 221 # tree or file entries in the file list, or B) the end of
222 222 # the entire list. It's the end of the entire list if there
223 223 # were no entries (i.e. noentries is True).
224 224 if parts < 2:
225 225 parts += 1
226 226 elif noentries:
227 227 parts += 1
228 228 break
229 229 noentries = False
230 230 yield chunkheader(len(chunk))
231 231 pos = 0
232 232 while pos < len(chunk):
233 233 next = pos + 2**20
234 234 yield chunk[pos:next]
235 235 pos = next
236 236 yield closechunk()
237 237
238 238 def _unpackmanifests(self, repo, revmap, trp, prog):
239 239 self.callback = prog.increment
240 240 # no need to check for empty manifest group here:
241 241 # if the result of the merge of 1 and 2 is the same in 3 and 4,
242 242 # no new manifest will be created and the manifest group will
243 243 # be empty during the pull
244 244 self.manifestheader()
245 245 deltas = self.deltaiter()
246 246 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
247 247 prog.complete()
248 248 self.callback = None
249 249
250 250 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
251 251 expectedtotal=None):
252 252 """Add the changegroup returned by source.read() to this repo.
253 253 srctype is a string like 'push', 'pull', or 'unbundle'. url is
254 254 the URL of the repo where this changegroup is coming from.
255 255
256 256 Return an integer summarizing the change to this repo:
257 257 - nothing changed or no source: 0
258 258 - more heads than before: 1+added heads (2..n)
259 259 - fewer heads than before: -1-removed heads (-2..-n)
260 260 - number of heads stays the same: 1
261 261 """
262 262 repo = repo.unfiltered()
263 263 def csmap(x):
264 264 repo.ui.debug("add changeset %s\n" % short(x))
265 265 return len(cl)
266 266
267 267 def revmap(x):
268 268 return cl.rev(x)
269 269
270 270 changesets = files = revisions = 0
271 271
272 272 try:
273 273 # The transaction may already carry source information. In this
274 274 # case we use the top level data. We overwrite the argument
275 275 # because we need to use the top level value (if they exist)
276 276 # in this function.
277 277 srctype = tr.hookargs.setdefault('source', srctype)
278 278 tr.hookargs.setdefault('url', url)
279 279 repo.hook('prechangegroup',
280 280 throw=True, **pycompat.strkwargs(tr.hookargs))
281 281
282 282 # write changelog data to temp files so concurrent readers
283 283 # will not see an inconsistent view
284 284 cl = repo.changelog
285 285 cl.delayupdate(tr)
286 286 oldheads = set(cl.heads())
287 287
288 288 trp = weakref.proxy(tr)
289 289 # pull off the changeset group
290 290 repo.ui.status(_("adding changesets\n"))
291 291 clstart = len(cl)
292 292 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
293 293 total=expectedtotal)
294 294 self.callback = progress.increment
295 295
296 296 efiles = set()
297 297 def onchangelog(cl, node):
298 298 efiles.update(cl.readfiles(node))
299 299
300 300 self.changelogheader()
301 301 deltas = self.deltaiter()
302 302 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
303 303 efiles = len(efiles)
304 304
305 305 if not cgnodes:
306 306 repo.ui.develwarn('applied empty changelog from changegroup',
307 307 config='warn-empty-changegroup')
308 308 clend = len(cl)
309 309 changesets = clend - clstart
310 310 progress.complete()
311 311 self.callback = None
312 312
313 313 # pull off the manifest group
314 314 repo.ui.status(_("adding manifests\n"))
315 315 # We know that we'll never have more manifests than we had
316 316 # changesets.
317 317 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
318 318 total=changesets)
319 319 self._unpackmanifests(repo, revmap, trp, progress)
320 320
321 321 needfiles = {}
322 322 if repo.ui.configbool('server', 'validate'):
323 323 cl = repo.changelog
324 324 ml = repo.manifestlog
325 325 # validate incoming csets have their manifests
326 326 for cset in pycompat.xrange(clstart, clend):
327 327 mfnode = cl.changelogrevision(cset).manifest
328 328 mfest = ml[mfnode].readdelta()
329 329 # store file cgnodes we must see
330 330 for f, n in mfest.iteritems():
331 331 needfiles.setdefault(f, set()).add(n)
332 332
333 333 # process the files
334 334 repo.ui.status(_("adding file changes\n"))
335 335 newrevs, newfiles = _addchangegroupfiles(
336 336 repo, self, revmap, trp, efiles, needfiles)
337 337 revisions += newrevs
338 338 files += newfiles
339 339
340 340 deltaheads = 0
341 341 if oldheads:
342 342 heads = cl.heads()
343 343 deltaheads = len(heads) - len(oldheads)
344 344 for h in heads:
345 345 if h not in oldheads and repo[h].closesbranch():
346 346 deltaheads -= 1
347 347 htext = ""
348 348 if deltaheads:
349 349 htext = _(" (%+d heads)") % deltaheads
350 350
351 351 repo.ui.status(_("added %d changesets"
352 352 " with %d changes to %d files%s\n")
353 353 % (changesets, revisions, files, htext))
354 354 repo.invalidatevolatilesets()
355 355
356 356 if changesets > 0:
357 357 if 'node' not in tr.hookargs:
358 358 tr.hookargs['node'] = hex(cl.node(clstart))
359 359 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
360 360 hookargs = dict(tr.hookargs)
361 361 else:
362 362 hookargs = dict(tr.hookargs)
363 363 hookargs['node'] = hex(cl.node(clstart))
364 364 hookargs['node_last'] = hex(cl.node(clend - 1))
365 365 repo.hook('pretxnchangegroup',
366 366 throw=True, **pycompat.strkwargs(hookargs))
367 367
368 368 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
369 369 phaseall = None
370 370 if srctype in ('push', 'serve'):
371 371 # Old servers can not push the boundary themselves.
372 372 # New servers won't push the boundary if changeset already
373 373 # exists locally as secret
374 374 #
375 375 # We should not use added here but the list of all change in
376 376 # the bundle
377 377 if repo.publishing():
378 378 targetphase = phaseall = phases.public
379 379 else:
380 380 # closer target phase computation
381 381
382 382 # Those changesets have been pushed from the
383 383 # outside, their phases are going to be pushed
384 384 # alongside. Therefor `targetphase` is
385 385 # ignored.
386 386 targetphase = phaseall = phases.draft
387 387 if added:
388 388 phases.registernew(repo, tr, targetphase, added)
389 389 if phaseall is not None:
390 390 phases.advanceboundary(repo, tr, phaseall, cgnodes)
391 391
392 392 if changesets > 0:
393 393
394 394 def runhooks():
395 395 # These hooks run when the lock releases, not when the
396 396 # transaction closes. So it's possible for the changelog
397 397 # to have changed since we last saw it.
398 398 if clstart >= len(repo):
399 399 return
400 400
401 401 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
402 402
403 403 for n in added:
404 404 args = hookargs.copy()
405 405 args['node'] = hex(n)
406 406 del args['node_last']
407 407 repo.hook("incoming", **pycompat.strkwargs(args))
408 408
409 409 newheads = [h for h in repo.heads()
410 410 if h not in oldheads]
411 411 repo.ui.log("incoming",
412 412 "%d incoming changes - new heads: %s\n",
413 413 len(added),
414 414 ', '.join([hex(c[:6]) for c in newheads]))
415 415
416 416 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
417 417 lambda tr: repo._afterlock(runhooks))
418 418 finally:
419 419 repo.ui.flush()
420 420 # never return 0 here:
421 421 if deltaheads < 0:
422 422 ret = deltaheads - 1
423 423 else:
424 424 ret = deltaheads + 1
425 425 return ret
426 426
427 427 def deltaiter(self):
428 428 """
429 429 returns an iterator of the deltas in this changegroup
430 430
431 431 Useful for passing to the underlying storage system to be stored.
432 432 """
433 433 chain = None
434 434 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
435 435 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
436 436 yield chunkdata
437 437 chain = chunkdata[0]
438 438
439 439 class cg2unpacker(cg1unpacker):
440 440 """Unpacker for cg2 streams.
441 441
442 442 cg2 streams add support for generaldelta, so the delta header
443 443 format is slightly different. All other features about the data
444 444 remain the same.
445 445 """
446 446 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
447 447 deltaheadersize = deltaheader.size
448 448 version = '02'
449 449
450 450 def _deltaheader(self, headertuple, prevnode):
451 451 node, p1, p2, deltabase, cs = headertuple
452 452 flags = 0
453 453 return node, p1, p2, deltabase, cs, flags
454 454
455 455 class cg3unpacker(cg2unpacker):
456 456 """Unpacker for cg3 streams.
457 457
458 458 cg3 streams add support for exchanging treemanifests and revlog
459 459 flags. It adds the revlog flags to the delta header and an empty chunk
460 460 separating manifests and files.
461 461 """
462 462 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
463 463 deltaheadersize = deltaheader.size
464 464 version = '03'
465 465 _grouplistcount = 2 # One list of manifests and one list of files
466 466
467 467 def _deltaheader(self, headertuple, prevnode):
468 468 node, p1, p2, deltabase, cs, flags = headertuple
469 469 return node, p1, p2, deltabase, cs, flags
470 470
471 471 def _unpackmanifests(self, repo, revmap, trp, prog):
472 472 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
473 473 for chunkdata in iter(self.filelogheader, {}):
474 474 # If we get here, there are directory manifests in the changegroup
475 475 d = chunkdata["filename"]
476 476 repo.ui.debug("adding %s revisions\n" % d)
477 477 deltas = self.deltaiter()
478 478 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
479 479 raise error.Abort(_("received dir revlog group is empty"))
480 480
481 481 class headerlessfixup(object):
482 482 def __init__(self, fh, h):
483 483 self._h = h
484 484 self._fh = fh
485 485 def read(self, n):
486 486 if self._h:
487 487 d, self._h = self._h[:n], self._h[n:]
488 488 if len(d) < n:
489 489 d += readexactly(self._fh, n - len(d))
490 490 return d
491 491 return readexactly(self._fh, n)
492 492
493 493 def _revisiondeltatochunks(delta, headerfn):
494 494 """Serialize a revisiondelta to changegroup chunks."""
495 495
496 496 # The captured revision delta may be encoded as a delta against
497 497 # a base revision or as a full revision. The changegroup format
498 498 # requires that everything on the wire be deltas. So for full
499 499 # revisions, we need to invent a header that says to rewrite
500 500 # data.
501 501
502 502 if delta.delta is not None:
503 503 prefix, data = b'', delta.delta
504 504 elif delta.basenode == nullid:
505 505 data = delta.revision
506 506 prefix = mdiff.trivialdiffheader(len(data))
507 507 else:
508 508 data = delta.revision
509 509 prefix = mdiff.replacediffheader(delta.baserevisionsize,
510 510 len(data))
511 511
512 512 meta = headerfn(delta)
513 513
514 514 yield chunkheader(len(meta) + len(prefix) + len(data))
515 515 yield meta
516 516 if prefix:
517 517 yield prefix
518 518 yield data
519 519
520 520 def _sortnodesellipsis(store, nodes, cl, lookup):
521 521 """Sort nodes for changegroup generation."""
522 522 # Ellipses serving mode.
523 523 #
524 524 # In a perfect world, we'd generate better ellipsis-ified graphs
525 525 # for non-changelog revlogs. In practice, we haven't started doing
526 526 # that yet, so the resulting DAGs for the manifestlog and filelogs
527 527 # are actually full of bogus parentage on all the ellipsis
528 528 # nodes. This has the side effect that, while the contents are
529 529 # correct, the individual DAGs might be completely out of whack in
530 530 # a case like 882681bc3166 and its ancestors (back about 10
531 531 # revisions or so) in the main hg repo.
532 532 #
533 533 # The one invariant we *know* holds is that the new (potentially
534 534 # bogus) DAG shape will be valid if we order the nodes in the
535 535 # order that they're introduced in dramatis personae by the
536 536 # changelog, so what we do is we sort the non-changelog histories
537 537 # by the order in which they are used by the changelog.
538 538 key = lambda n: cl.rev(lookup(n))
539 539 return sorted(nodes, key=key)
540 540
541 541 def _resolvenarrowrevisioninfo(cl, store, ischangelog, rev, linkrev,
542 542 linknode, clrevtolocalrev, fullclnodes,
543 543 precomputedellipsis):
544 544 linkparents = precomputedellipsis[linkrev]
545 545 def local(clrev):
546 546 """Turn a changelog revnum into a local revnum.
547 547
548 548 The ellipsis dag is stored as revnums on the changelog,
549 549 but when we're producing ellipsis entries for
550 550 non-changelog revlogs, we need to turn those numbers into
551 551 something local. This does that for us, and during the
552 552 changelog sending phase will also expand the stored
553 553 mappings as needed.
554 554 """
555 555 if clrev == nullrev:
556 556 return nullrev
557 557
558 558 if ischangelog:
559 559 return clrev
560 560
561 561 # Walk the ellipsis-ized changelog breadth-first looking for a
562 562 # change that has been linked from the current revlog.
563 563 #
564 564 # For a flat manifest revlog only a single step should be necessary
565 565 # as all relevant changelog entries are relevant to the flat
566 566 # manifest.
567 567 #
568 568 # For a filelog or tree manifest dirlog however not every changelog
569 569 # entry will have been relevant, so we need to skip some changelog
570 570 # nodes even after ellipsis-izing.
571 571 walk = [clrev]
572 572 while walk:
573 573 p = walk[0]
574 574 walk = walk[1:]
575 575 if p in clrevtolocalrev:
576 576 return clrevtolocalrev[p]
577 577 elif p in fullclnodes:
578 578 walk.extend([pp for pp in cl.parentrevs(p)
579 579 if pp != nullrev])
580 580 elif p in precomputedellipsis:
581 581 walk.extend([pp for pp in precomputedellipsis[p]
582 582 if pp != nullrev])
583 583 else:
584 584 # In this case, we've got an ellipsis with parents
585 585 # outside the current bundle (likely an
586 586 # incremental pull). We "know" that we can use the
587 587 # value of this same revlog at whatever revision
588 588 # is pointed to by linknode. "Know" is in scare
589 589 # quotes because I haven't done enough examination
590 590 # of edge cases to convince myself this is really
591 591 # a fact - it works for all the (admittedly
592 592 # thorough) cases in our testsuite, but I would be
593 593 # somewhat unsurprised to find a case in the wild
594 594 # where this breaks down a bit. That said, I don't
595 595 # know if it would hurt anything.
596 596 for i in pycompat.xrange(rev, 0, -1):
597 597 if store.linkrev(i) == clrev:
598 598 return i
599 599 # We failed to resolve a parent for this node, so
600 600 # we crash the changegroup construction.
601 601 raise error.Abort(
602 602 'unable to resolve parent while packing %r %r'
603 603 ' for changeset %r' % (store.indexfile, rev, clrev))
604 604
605 605 return nullrev
606 606
607 607 if not linkparents or (
608 608 store.parentrevs(rev) == (nullrev, nullrev)):
609 609 p1, p2 = nullrev, nullrev
610 610 elif len(linkparents) == 1:
611 611 p1, = sorted(local(p) for p in linkparents)
612 612 p2 = nullrev
613 613 else:
614 614 p1, p2 = sorted(local(p) for p in linkparents)
615 615
616 616 p1node, p2node = store.node(p1), store.node(p2)
617 617
618 618 return p1node, p2node, linknode
619 619
620 620 def deltagroup(repo, store, nodes, ischangelog, lookup, forcedeltaparentprev,
621 621 topic=None,
622 622 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
623 623 precomputedellipsis=None):
624 624 """Calculate deltas for a set of revisions.
625 625
626 626 Is a generator of ``revisiondelta`` instances.
627 627
628 628 If topic is not None, progress detail will be generated using this
629 629 topic name (e.g. changesets, manifests, etc).
630 630 """
631 631 if not nodes:
632 632 return
633 633
634 634 cl = repo.changelog
635 635
636 636 if ischangelog:
637 637 # `hg log` shows changesets in storage order. To preserve order
638 638 # across clones, send out changesets in storage order.
639 639 nodesorder = 'storage'
640 640 elif ellipses:
641 641 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
642 642 nodesorder = 'nodes'
643 643 else:
644 644 nodesorder = None
645 645
646 646 # Perform ellipses filtering and revision massaging. We do this before
647 647 # emitrevisions() because a) filtering out revisions creates less work
648 648 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
649 649 # assumptions about delta choices and we would possibly send a delta
650 650 # referencing a missing base revision.
651 651 #
652 652 # Also, calling lookup() has side-effects with regards to populating
653 653 # data structures. If we don't call lookup() for each node or if we call
654 654 # lookup() after the first pass through each node, things can break -
655 655 # possibly intermittently depending on the python hash seed! For that
656 656 # reason, we store a mapping of all linknodes during the initial node
657 657 # pass rather than use lookup() on the output side.
658 658 if ellipses:
659 659 filtered = []
660 660 adjustedparents = {}
661 661 linknodes = {}
662 662
663 663 for node in nodes:
664 664 rev = store.rev(node)
665 665 linknode = lookup(node)
666 666 linkrev = cl.rev(linknode)
667 667 clrevtolocalrev[linkrev] = rev
668 668
669 669 # If linknode is in fullclnodes, it means the corresponding
670 670 # changeset was a full changeset and is being sent unaltered.
671 671 if linknode in fullclnodes:
672 672 linknodes[node] = linknode
673 673
674 674 # If the corresponding changeset wasn't in the set computed
675 675 # as relevant to us, it should be dropped outright.
676 676 elif linkrev not in precomputedellipsis:
677 677 continue
678 678
679 679 else:
680 680 # We could probably do this later and avoid the dict
681 681 # holding state. But it likely doesn't matter.
682 682 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
683 683 cl, store, ischangelog, rev, linkrev, linknode,
684 684 clrevtolocalrev, fullclnodes, precomputedellipsis)
685 685
686 686 adjustedparents[node] = (p1node, p2node)
687 687 linknodes[node] = linknode
688 688
689 689 filtered.append(node)
690 690
691 691 nodes = filtered
692 692
693 693 # We expect the first pass to be fast, so we only engage the progress
694 694 # meter for constructing the revision deltas.
695 695 progress = None
696 696 if topic is not None:
697 697 progress = repo.ui.makeprogress(topic, unit=_('chunks'),
698 698 total=len(nodes))
699 699
700 700 configtarget = repo.ui.config('devel', 'bundle.delta')
701 701 if configtarget not in ('', 'p1', 'full'):
702 702 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
703 703 repo.ui.warn(msg % configtarget)
704 704
705 705 deltamode = repository.CG_DELTAMODE_STD
706 706 if forcedeltaparentprev:
707 707 deltamode = repository.CG_DELTAMODE_PREV
708 708 elif configtarget == 'p1':
709 709 deltamode = repository.CG_DELTAMODE_P1
710 710 elif configtarget == 'full':
711 711 deltamode = repository.CG_DELTAMODE_FULL
712 712
713 713 revisions = store.emitrevisions(
714 714 nodes,
715 715 nodesorder=nodesorder,
716 716 revisiondata=True,
717 717 assumehaveparentrevisions=not ellipses,
718 718 deltamode=deltamode)
719 719
720 720 for i, revision in enumerate(revisions):
721 721 if progress:
722 722 progress.update(i + 1)
723 723
724 724 if ellipses:
725 725 linknode = linknodes[revision.node]
726 726
727 727 if revision.node in adjustedparents:
728 728 p1node, p2node = adjustedparents[revision.node]
729 729 revision.p1node = p1node
730 730 revision.p2node = p2node
731 731 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
732 732
733 733 else:
734 734 linknode = lookup(revision.node)
735 735
736 736 revision.linknode = linknode
737 737 yield revision
738 738
739 739 if progress:
740 740 progress.complete()
741 741
742 742 class cgpacker(object):
743 743 def __init__(self, repo, oldmatcher, matcher, version,
744 744 builddeltaheader, manifestsend,
745 745 forcedeltaparentprev=False,
746 746 bundlecaps=None, ellipses=False,
747 747 shallow=False, ellipsisroots=None, fullnodes=None):
748 748 """Given a source repo, construct a bundler.
749 749
750 750 oldmatcher is a matcher that matches on files the client already has.
751 751 These will not be included in the changegroup.
752 752
753 753 matcher is a matcher that matches on files to include in the
754 754 changegroup. Used to facilitate sparse changegroups.
755 755
756 756 forcedeltaparentprev indicates whether delta parents must be against
757 757 the previous revision in a delta group. This should only be used for
758 758 compatibility with changegroup version 1.
759 759
760 760 builddeltaheader is a callable that constructs the header for a group
761 761 delta.
762 762
763 763 manifestsend is a chunk to send after manifests have been fully emitted.
764 764
765 765 ellipses indicates whether ellipsis serving mode is enabled.
766 766
767 767 bundlecaps is optional and can be used to specify the set of
768 768 capabilities which can be used to build the bundle. While bundlecaps is
769 769 unused in core Mercurial, extensions rely on this feature to communicate
770 770 capabilities to customize the changegroup packer.
771 771
772 772 shallow indicates whether shallow data might be sent. The packer may
773 773 need to pack file contents not introduced by the changes being packed.
774 774
775 775 fullnodes is the set of changelog nodes which should not be ellipsis
776 776 nodes. We store this rather than the set of nodes that should be
777 777 ellipsis because for very large histories we expect this to be
778 778 significantly smaller.
779 779 """
780 780 assert oldmatcher
781 781 assert matcher
782 782 self._oldmatcher = oldmatcher
783 783 self._matcher = matcher
784 784
785 785 self.version = version
786 786 self._forcedeltaparentprev = forcedeltaparentprev
787 787 self._builddeltaheader = builddeltaheader
788 788 self._manifestsend = manifestsend
789 789 self._ellipses = ellipses
790 790
791 791 # Set of capabilities we can use to build the bundle.
792 792 if bundlecaps is None:
793 793 bundlecaps = set()
794 794 self._bundlecaps = bundlecaps
795 795 self._isshallow = shallow
796 796 self._fullclnodes = fullnodes
797 797
798 798 # Maps ellipsis revs to their roots at the changelog level.
799 799 self._precomputedellipsis = ellipsisroots
800 800
801 801 self._repo = repo
802 802
803 803 if self._repo.ui.verbose and not self._repo.ui.debugflag:
804 804 self._verbosenote = self._repo.ui.note
805 805 else:
806 806 self._verbosenote = lambda s: None
807 807
808 808 def generate(self, commonrevs, clnodes, fastpathlinkrev, source,
809 809 changelog=True):
810 810 """Yield a sequence of changegroup byte chunks.
811 811 If changelog is False, changelog data won't be added to changegroup
812 812 """
813 813
814 814 repo = self._repo
815 815 cl = repo.changelog
816 816
817 817 self._verbosenote(_('uncompressed size of bundle content:\n'))
818 818 size = 0
819 819
820 820 clstate, deltas = self._generatechangelog(cl, clnodes,
821 821 generate=changelog)
822 822 for delta in deltas:
823 823 for chunk in _revisiondeltatochunks(delta,
824 824 self._builddeltaheader):
825 825 size += len(chunk)
826 826 yield chunk
827 827
828 828 close = closechunk()
829 829 size += len(close)
830 830 yield closechunk()
831 831
832 832 self._verbosenote(_('%8.i (changelog)\n') % size)
833 833
834 834 clrevorder = clstate['clrevorder']
835 835 manifests = clstate['manifests']
836 836 changedfiles = clstate['changedfiles']
837 837
838 838 # We need to make sure that the linkrev in the changegroup refers to
839 839 # the first changeset that introduced the manifest or file revision.
840 840 # The fastpath is usually safer than the slowpath, because the filelogs
841 841 # are walked in revlog order.
842 842 #
843 843 # When taking the slowpath when the manifest revlog uses generaldelta,
844 844 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
845 845 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
846 846 #
847 847 # When taking the fastpath, we are only vulnerable to reordering
848 848 # of the changelog itself. The changelog never uses generaldelta and is
849 849 # never reordered. To handle this case, we simply take the slowpath,
850 850 # which already has the 'clrevorder' logic. This was also fixed in
851 851 # cc0ff93d0c0c.
852 852
853 853 # Treemanifests don't work correctly with fastpathlinkrev
854 854 # either, because we don't discover which directory nodes to
855 855 # send along with files. This could probably be fixed.
856 856 fastpathlinkrev = fastpathlinkrev and (
857 857 'treemanifest' not in repo.requirements)
858 858
859 859 fnodes = {} # needed file nodes
860 860
861 861 size = 0
862 862 it = self.generatemanifests(
863 863 commonrevs, clrevorder, fastpathlinkrev, manifests, fnodes, source,
864 864 clstate['clrevtomanifestrev'])
865 865
866 866 for tree, deltas in it:
867 867 if tree:
868 868 assert self.version == b'03'
869 869 chunk = _fileheader(tree)
870 870 size += len(chunk)
871 871 yield chunk
872 872
873 873 for delta in deltas:
874 874 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
875 875 for chunk in chunks:
876 876 size += len(chunk)
877 877 yield chunk
878 878
879 879 close = closechunk()
880 880 size += len(close)
881 881 yield close
882 882
883 883 self._verbosenote(_('%8.i (manifests)\n') % size)
884 884 yield self._manifestsend
885 885
886 886 mfdicts = None
887 887 if self._ellipses and self._isshallow:
888 888 mfdicts = [(self._repo.manifestlog[n].read(), lr)
889 889 for (n, lr) in manifests.iteritems()]
890 890
891 891 manifests.clear()
892 892 clrevs = set(cl.rev(x) for x in clnodes)
893 893
894 894 it = self.generatefiles(changedfiles, commonrevs,
895 895 source, mfdicts, fastpathlinkrev,
896 896 fnodes, clrevs)
897 897
898 898 for path, deltas in it:
899 899 h = _fileheader(path)
900 900 size = len(h)
901 901 yield h
902 902
903 903 for delta in deltas:
904 904 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
905 905 for chunk in chunks:
906 906 size += len(chunk)
907 907 yield chunk
908 908
909 909 close = closechunk()
910 910 size += len(close)
911 911 yield close
912 912
913 913 self._verbosenote(_('%8.i %s\n') % (size, path))
914 914
915 915 yield closechunk()
916 916
917 917 if clnodes:
918 918 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
919 919
920 920 def _generatechangelog(self, cl, nodes, generate=True):
921 921 """Generate data for changelog chunks.
922 922
923 923 Returns a 2-tuple of a dict containing state and an iterable of
924 924 byte chunks. The state will not be fully populated until the
925 925 chunk stream has been fully consumed.
926 926
927 927 if generate is False, the state will be fully populated and no chunk
928 928 stream will be yielded
929 929 """
930 930 clrevorder = {}
931 931 manifests = {}
932 932 mfl = self._repo.manifestlog
933 933 changedfiles = set()
934 934 clrevtomanifestrev = {}
935 935
936 936 state = {
937 937 'clrevorder': clrevorder,
938 938 'manifests': manifests,
939 939 'changedfiles': changedfiles,
940 940 'clrevtomanifestrev': clrevtomanifestrev,
941 941 }
942 942
943 943 if not (generate or self._ellipses):
944 944 # sort the nodes in storage order
945 945 nodes = sorted(nodes, key=cl.rev)
946 946 for node in nodes:
947 947 c = cl.changelogrevision(node)
948 948 clrevorder[node] = len(clrevorder)
949 949 # record the first changeset introducing this manifest version
950 950 manifests.setdefault(c.manifest, node)
951 951 # Record a complete list of potentially-changed files in
952 952 # this manifest.
953 953 changedfiles.update(c.files)
954 954
955 955 return state, ()
956 956
957 957 # Callback for the changelog, used to collect changed files and
958 958 # manifest nodes.
959 959 # Returns the linkrev node (identity in the changelog case).
960 960 def lookupcl(x):
961 961 c = cl.changelogrevision(x)
962 962 clrevorder[x] = len(clrevorder)
963 963
964 964 if self._ellipses:
965 965 # Only update manifests if x is going to be sent. Otherwise we
966 966 # end up with bogus linkrevs specified for manifests and
967 967 # we skip some manifest nodes that we should otherwise
968 968 # have sent.
969 969 if (x in self._fullclnodes
970 970 or cl.rev(x) in self._precomputedellipsis):
971 971
972 972 manifestnode = c.manifest
973 973 # Record the first changeset introducing this manifest
974 974 # version.
975 975 manifests.setdefault(manifestnode, x)
976 976 # Set this narrow-specific dict so we have the lowest
977 977 # manifest revnum to look up for this cl revnum. (Part of
978 978 # mapping changelog ellipsis parents to manifest ellipsis
979 979 # parents)
980 980 clrevtomanifestrev.setdefault(
981 981 cl.rev(x), mfl.rev(manifestnode))
982 982 # We can't trust the changed files list in the changeset if the
983 983 # client requested a shallow clone.
984 984 if self._isshallow:
985 985 changedfiles.update(mfl[c.manifest].read().keys())
986 986 else:
987 987 changedfiles.update(c.files)
988 988 else:
989 989 # record the first changeset introducing this manifest version
990 990 manifests.setdefault(c.manifest, x)
991 991 # Record a complete list of potentially-changed files in
992 992 # this manifest.
993 993 changedfiles.update(c.files)
994 994
995 995 return x
996 996
997 997 gen = deltagroup(
998 998 self._repo, cl, nodes, True, lookupcl,
999 999 self._forcedeltaparentprev,
1000 1000 ellipses=self._ellipses,
1001 1001 topic=_('changesets'),
1002 1002 clrevtolocalrev={},
1003 1003 fullclnodes=self._fullclnodes,
1004 1004 precomputedellipsis=self._precomputedellipsis)
1005 1005
1006 1006 return state, gen
1007 1007
1008 1008 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev,
1009 1009 manifests, fnodes, source, clrevtolocalrev):
1010 1010 """Returns an iterator of changegroup chunks containing manifests.
1011 1011
1012 1012 `source` is unused here, but is used by extensions like remotefilelog to
1013 1013 change what is sent based in pulls vs pushes, etc.
1014 1014 """
1015 1015 repo = self._repo
1016 1016 mfl = repo.manifestlog
1017 1017 tmfnodes = {'': manifests}
1018 1018
1019 1019 # Callback for the manifest, used to collect linkrevs for filelog
1020 1020 # revisions.
1021 1021 # Returns the linkrev node (collected in lookupcl).
1022 1022 def makelookupmflinknode(tree, nodes):
1023 1023 if fastpathlinkrev:
1024 1024 assert not tree
1025 1025 return manifests.__getitem__
1026 1026
1027 1027 def lookupmflinknode(x):
1028 1028 """Callback for looking up the linknode for manifests.
1029 1029
1030 1030 Returns the linkrev node for the specified manifest.
1031 1031
1032 1032 SIDE EFFECT:
1033 1033
1034 1034 1) fclnodes gets populated with the list of relevant
1035 1035 file nodes if we're not using fastpathlinkrev
1036 1036 2) When treemanifests are in use, collects treemanifest nodes
1037 1037 to send
1038 1038
1039 1039 Note that this means manifests must be completely sent to
1040 1040 the client before you can trust the list of files and
1041 1041 treemanifests to send.
1042 1042 """
1043 1043 clnode = nodes[x]
1044 1044 mdata = mfl.get(tree, x).readfast(shallow=True)
1045 1045 for p, n, fl in mdata.iterentries():
1046 1046 if fl == 't': # subdirectory manifest
1047 1047 subtree = tree + p + '/'
1048 1048 tmfclnodes = tmfnodes.setdefault(subtree, {})
1049 1049 tmfclnode = tmfclnodes.setdefault(n, clnode)
1050 1050 if clrevorder[clnode] < clrevorder[tmfclnode]:
1051 1051 tmfclnodes[n] = clnode
1052 1052 else:
1053 1053 f = tree + p
1054 1054 fclnodes = fnodes.setdefault(f, {})
1055 1055 fclnode = fclnodes.setdefault(n, clnode)
1056 1056 if clrevorder[clnode] < clrevorder[fclnode]:
1057 1057 fclnodes[n] = clnode
1058 1058 return clnode
1059 1059 return lookupmflinknode
1060 1060
1061 1061 while tmfnodes:
1062 1062 tree, nodes = tmfnodes.popitem()
1063 1063
1064 1064 should_visit = self._matcher.visitdir(tree[:-1] or '.')
1065 1065 if tree and not should_visit:
1066 1066 continue
1067 1067
1068 1068 store = mfl.getstorage(tree)
1069 1069
1070 1070 if not should_visit:
1071 1071 # No nodes to send because this directory is out of
1072 1072 # the client's view of the repository (probably
1073 1073 # because of narrow clones). Do this even for the root
1074 1074 # directory (tree=='')
1075 1075 prunednodes = []
1076 elif not self._ellipses:
1077 # In non-ellipses case and large repositories, it is better to
1078 # prevent calling of store.rev and store.linkrev on a lot of
1079 # nodes as compared to sending some extra data
1080 prunednodes = nodes.copy()
1081 1076 else:
1082 1077 # Avoid sending any manifest nodes we can prove the
1083 1078 # client already has by checking linkrevs. See the
1084 1079 # related comment in generatefiles().
1085 1080 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1086 1081
1087 1082 if tree and not prunednodes:
1088 1083 continue
1089 1084
1090 1085 lookupfn = makelookupmflinknode(tree, nodes)
1091 1086
1092 1087 deltas = deltagroup(
1093 1088 self._repo, store, prunednodes, False, lookupfn,
1094 1089 self._forcedeltaparentprev,
1095 1090 ellipses=self._ellipses,
1096 1091 topic=_('manifests'),
1097 1092 clrevtolocalrev=clrevtolocalrev,
1098 1093 fullclnodes=self._fullclnodes,
1099 1094 precomputedellipsis=self._precomputedellipsis)
1100 1095
1101 1096 if not self._oldmatcher.visitdir(store.tree[:-1] or '.'):
1102 1097 yield tree, deltas
1103 1098 else:
1104 1099 # 'deltas' is a generator and we need to consume it even if
1105 1100 # we are not going to send it because a side-effect is that
1106 1101 # it updates tmdnodes (via lookupfn)
1107 1102 for d in deltas:
1108 1103 pass
1109 1104 if not tree:
1110 1105 yield tree, []
1111 1106
1112 1107 def _prunemanifests(self, store, nodes, commonrevs):
1108 if not self._ellipses:
1109 # In non-ellipses case and large repositories, it is better to
1110 # prevent calling of store.rev and store.linkrev on a lot of
1111 # nodes as compared to sending some extra data
1112 return nodes.copy()
1113 1113 # This is split out as a separate method to allow filtering
1114 1114 # commonrevs in extension code.
1115 1115 #
1116 1116 # TODO(augie): this shouldn't be required, instead we should
1117 1117 # make filtering of revisions to send delegated to the store
1118 1118 # layer.
1119 1119 frev, flr = store.rev, store.linkrev
1120 1120 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1121 1121
1122 1122 # The 'source' parameter is useful for extensions
1123 1123 def generatefiles(self, changedfiles, commonrevs, source,
1124 1124 mfdicts, fastpathlinkrev, fnodes, clrevs):
1125 1125 changedfiles = [f for f in changedfiles
1126 1126 if self._matcher(f) and not self._oldmatcher(f)]
1127 1127
1128 1128 if not fastpathlinkrev:
1129 1129 def normallinknodes(unused, fname):
1130 1130 return fnodes.get(fname, {})
1131 1131 else:
1132 1132 cln = self._repo.changelog.node
1133 1133
1134 1134 def normallinknodes(store, fname):
1135 1135 flinkrev = store.linkrev
1136 1136 fnode = store.node
1137 1137 revs = ((r, flinkrev(r)) for r in store)
1138 1138 return dict((fnode(r), cln(lr))
1139 1139 for r, lr in revs if lr in clrevs)
1140 1140
1141 1141 clrevtolocalrev = {}
1142 1142
1143 1143 if self._isshallow:
1144 1144 # In a shallow clone, the linknodes callback needs to also include
1145 1145 # those file nodes that are in the manifests we sent but weren't
1146 1146 # introduced by those manifests.
1147 1147 commonctxs = [self._repo[c] for c in commonrevs]
1148 1148 clrev = self._repo.changelog.rev
1149 1149
1150 1150 def linknodes(flog, fname):
1151 1151 for c in commonctxs:
1152 1152 try:
1153 1153 fnode = c.filenode(fname)
1154 1154 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1155 1155 except error.ManifestLookupError:
1156 1156 pass
1157 1157 links = normallinknodes(flog, fname)
1158 1158 if len(links) != len(mfdicts):
1159 1159 for mf, lr in mfdicts:
1160 1160 fnode = mf.get(fname, None)
1161 1161 if fnode in links:
1162 1162 links[fnode] = min(links[fnode], lr, key=clrev)
1163 1163 elif fnode:
1164 1164 links[fnode] = lr
1165 1165 return links
1166 1166 else:
1167 1167 linknodes = normallinknodes
1168 1168
1169 1169 repo = self._repo
1170 1170 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1171 1171 total=len(changedfiles))
1172 1172 for i, fname in enumerate(sorted(changedfiles)):
1173 1173 filerevlog = repo.file(fname)
1174 1174 if not filerevlog:
1175 1175 raise error.Abort(_("empty or missing file data for %s") %
1176 1176 fname)
1177 1177
1178 1178 clrevtolocalrev.clear()
1179 1179
1180 1180 linkrevnodes = linknodes(filerevlog, fname)
1181 1181 # Lookup for filenodes, we collected the linkrev nodes above in the
1182 1182 # fastpath case and with lookupmf in the slowpath case.
1183 1183 def lookupfilelog(x):
1184 1184 return linkrevnodes[x]
1185 1185
1186 1186 frev, flr = filerevlog.rev, filerevlog.linkrev
1187 1187 # Skip sending any filenode we know the client already
1188 1188 # has. This avoids over-sending files relatively
1189 1189 # inexpensively, so it's not a problem if we under-filter
1190 1190 # here.
1191 1191 filenodes = [n for n in linkrevnodes
1192 1192 if flr(frev(n)) not in commonrevs]
1193 1193
1194 1194 if not filenodes:
1195 1195 continue
1196 1196
1197 1197 progress.update(i + 1, item=fname)
1198 1198
1199 1199 deltas = deltagroup(
1200 1200 self._repo, filerevlog, filenodes, False, lookupfilelog,
1201 1201 self._forcedeltaparentprev,
1202 1202 ellipses=self._ellipses,
1203 1203 clrevtolocalrev=clrevtolocalrev,
1204 1204 fullclnodes=self._fullclnodes,
1205 1205 precomputedellipsis=self._precomputedellipsis)
1206 1206
1207 1207 yield fname, deltas
1208 1208
1209 1209 progress.complete()
1210 1210
1211 1211 def _makecg1packer(repo, oldmatcher, matcher, bundlecaps,
1212 1212 ellipses=False, shallow=False, ellipsisroots=None,
1213 1213 fullnodes=None):
1214 1214 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1215 1215 d.node, d.p1node, d.p2node, d.linknode)
1216 1216
1217 1217 return cgpacker(repo, oldmatcher, matcher, b'01',
1218 1218 builddeltaheader=builddeltaheader,
1219 1219 manifestsend=b'',
1220 1220 forcedeltaparentprev=True,
1221 1221 bundlecaps=bundlecaps,
1222 1222 ellipses=ellipses,
1223 1223 shallow=shallow,
1224 1224 ellipsisroots=ellipsisroots,
1225 1225 fullnodes=fullnodes)
1226 1226
1227 1227 def _makecg2packer(repo, oldmatcher, matcher, bundlecaps,
1228 1228 ellipses=False, shallow=False, ellipsisroots=None,
1229 1229 fullnodes=None):
1230 1230 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1231 1231 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1232 1232
1233 1233 return cgpacker(repo, oldmatcher, matcher, b'02',
1234 1234 builddeltaheader=builddeltaheader,
1235 1235 manifestsend=b'',
1236 1236 bundlecaps=bundlecaps,
1237 1237 ellipses=ellipses,
1238 1238 shallow=shallow,
1239 1239 ellipsisroots=ellipsisroots,
1240 1240 fullnodes=fullnodes)
1241 1241
1242 1242 def _makecg3packer(repo, oldmatcher, matcher, bundlecaps,
1243 1243 ellipses=False, shallow=False, ellipsisroots=None,
1244 1244 fullnodes=None):
1245 1245 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1246 1246 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1247 1247
1248 1248 return cgpacker(repo, oldmatcher, matcher, b'03',
1249 1249 builddeltaheader=builddeltaheader,
1250 1250 manifestsend=closechunk(),
1251 1251 bundlecaps=bundlecaps,
1252 1252 ellipses=ellipses,
1253 1253 shallow=shallow,
1254 1254 ellipsisroots=ellipsisroots,
1255 1255 fullnodes=fullnodes)
1256 1256
1257 1257 _packermap = {'01': (_makecg1packer, cg1unpacker),
1258 1258 # cg2 adds support for exchanging generaldelta
1259 1259 '02': (_makecg2packer, cg2unpacker),
1260 1260 # cg3 adds support for exchanging revlog flags and treemanifests
1261 1261 '03': (_makecg3packer, cg3unpacker),
1262 1262 }
1263 1263
1264 1264 def allsupportedversions(repo):
1265 1265 versions = set(_packermap.keys())
1266 1266 if not (repo.ui.configbool('experimental', 'changegroup3') or
1267 1267 repo.ui.configbool('experimental', 'treemanifest') or
1268 1268 'treemanifest' in repo.requirements):
1269 1269 versions.discard('03')
1270 1270 return versions
1271 1271
1272 1272 # Changegroup versions that can be applied to the repo
1273 1273 def supportedincomingversions(repo):
1274 1274 return allsupportedversions(repo)
1275 1275
1276 1276 # Changegroup versions that can be created from the repo
1277 1277 def supportedoutgoingversions(repo):
1278 1278 versions = allsupportedversions(repo)
1279 1279 if 'treemanifest' in repo.requirements:
1280 1280 # Versions 01 and 02 support only flat manifests and it's just too
1281 1281 # expensive to convert between the flat manifest and tree manifest on
1282 1282 # the fly. Since tree manifests are hashed differently, all of history
1283 1283 # would have to be converted. Instead, we simply don't even pretend to
1284 1284 # support versions 01 and 02.
1285 1285 versions.discard('01')
1286 1286 versions.discard('02')
1287 1287 if repository.NARROW_REQUIREMENT in repo.requirements:
1288 1288 # Versions 01 and 02 don't support revlog flags, and we need to
1289 1289 # support that for stripping and unbundling to work.
1290 1290 versions.discard('01')
1291 1291 versions.discard('02')
1292 1292 if LFS_REQUIREMENT in repo.requirements:
1293 1293 # Versions 01 and 02 don't support revlog flags, and we need to
1294 1294 # mark LFS entries with REVIDX_EXTSTORED.
1295 1295 versions.discard('01')
1296 1296 versions.discard('02')
1297 1297
1298 1298 return versions
1299 1299
1300 1300 def localversion(repo):
1301 1301 # Finds the best version to use for bundles that are meant to be used
1302 1302 # locally, such as those from strip and shelve, and temporary bundles.
1303 1303 return max(supportedoutgoingversions(repo))
1304 1304
1305 1305 def safeversion(repo):
1306 1306 # Finds the smallest version that it's safe to assume clients of the repo
1307 1307 # will support. For example, all hg versions that support generaldelta also
1308 1308 # support changegroup 02.
1309 1309 versions = supportedoutgoingversions(repo)
1310 1310 if 'generaldelta' in repo.requirements:
1311 1311 versions.discard('01')
1312 1312 assert versions
1313 1313 return min(versions)
1314 1314
1315 1315 def getbundler(version, repo, bundlecaps=None, oldmatcher=None,
1316 1316 matcher=None, ellipses=False, shallow=False,
1317 1317 ellipsisroots=None, fullnodes=None):
1318 1318 assert version in supportedoutgoingversions(repo)
1319 1319
1320 1320 if matcher is None:
1321 1321 matcher = matchmod.always()
1322 1322 if oldmatcher is None:
1323 1323 oldmatcher = matchmod.never()
1324 1324
1325 1325 if version == '01' and not matcher.always():
1326 1326 raise error.ProgrammingError('version 01 changegroups do not support '
1327 1327 'sparse file matchers')
1328 1328
1329 1329 if ellipses and version in (b'01', b'02'):
1330 1330 raise error.Abort(
1331 1331 _('ellipsis nodes require at least cg3 on client and server, '
1332 1332 'but negotiated version %s') % version)
1333 1333
1334 1334 # Requested files could include files not in the local store. So
1335 1335 # filter those out.
1336 1336 matcher = repo.narrowmatch(matcher)
1337 1337
1338 1338 fn = _packermap[version][0]
1339 1339 return fn(repo, oldmatcher, matcher, bundlecaps, ellipses=ellipses,
1340 1340 shallow=shallow, ellipsisroots=ellipsisroots,
1341 1341 fullnodes=fullnodes)
1342 1342
1343 1343 def getunbundler(version, fh, alg, extras=None):
1344 1344 return _packermap[version][1](fh, alg, extras=extras)
1345 1345
1346 1346 def _changegroupinfo(repo, nodes, source):
1347 1347 if repo.ui.verbose or source == 'bundle':
1348 1348 repo.ui.status(_("%d changesets found\n") % len(nodes))
1349 1349 if repo.ui.debugflag:
1350 1350 repo.ui.debug("list of changesets:\n")
1351 1351 for node in nodes:
1352 1352 repo.ui.debug("%s\n" % hex(node))
1353 1353
1354 1354 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1355 1355 bundlecaps=None):
1356 1356 cgstream = makestream(repo, outgoing, version, source,
1357 1357 fastpath=fastpath, bundlecaps=bundlecaps)
1358 1358 return getunbundler(version, util.chunkbuffer(cgstream), None,
1359 1359 {'clcount': len(outgoing.missing) })
1360 1360
1361 1361 def makestream(repo, outgoing, version, source, fastpath=False,
1362 1362 bundlecaps=None, matcher=None):
1363 1363 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1364 1364 matcher=matcher)
1365 1365
1366 1366 repo = repo.unfiltered()
1367 1367 commonrevs = outgoing.common
1368 1368 csets = outgoing.missing
1369 1369 heads = outgoing.missingheads
1370 1370 # We go through the fast path if we get told to, or if all (unfiltered
1371 1371 # heads have been requested (since we then know there all linkrevs will
1372 1372 # be pulled by the client).
1373 1373 heads.sort()
1374 1374 fastpathlinkrev = fastpath or (
1375 1375 repo.filtername is None and heads == sorted(repo.heads()))
1376 1376
1377 1377 repo.hook('preoutgoing', throw=True, source=source)
1378 1378 _changegroupinfo(repo, csets, source)
1379 1379 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1380 1380
1381 1381 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1382 1382 revisions = 0
1383 1383 files = 0
1384 1384 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1385 1385 total=expectedfiles)
1386 1386 for chunkdata in iter(source.filelogheader, {}):
1387 1387 files += 1
1388 1388 f = chunkdata["filename"]
1389 1389 repo.ui.debug("adding %s revisions\n" % f)
1390 1390 progress.increment()
1391 1391 fl = repo.file(f)
1392 1392 o = len(fl)
1393 1393 try:
1394 1394 deltas = source.deltaiter()
1395 1395 if not fl.addgroup(deltas, revmap, trp):
1396 1396 raise error.Abort(_("received file revlog group is empty"))
1397 1397 except error.CensoredBaseError as e:
1398 1398 raise error.Abort(_("received delta base is censored: %s") % e)
1399 1399 revisions += len(fl) - o
1400 1400 if f in needfiles:
1401 1401 needs = needfiles[f]
1402 1402 for new in pycompat.xrange(o, len(fl)):
1403 1403 n = fl.node(new)
1404 1404 if n in needs:
1405 1405 needs.remove(n)
1406 1406 else:
1407 1407 raise error.Abort(
1408 1408 _("received spurious file revlog entry"))
1409 1409 if not needs:
1410 1410 del needfiles[f]
1411 1411 progress.complete()
1412 1412
1413 1413 for f, needs in needfiles.iteritems():
1414 1414 fl = repo.file(f)
1415 1415 for n in needs:
1416 1416 try:
1417 1417 fl.rev(n)
1418 1418 except error.LookupError:
1419 1419 raise error.Abort(
1420 1420 _('missing file data for %s:%s - run hg verify') %
1421 1421 (f, hex(n)))
1422 1422
1423 1423 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now