##// END OF EJS Templates
changegroup: inline _prune() into call sites...
Gregory Szorc -
r39043:39f5c7af default
parent child Browse files
Show More
@@ -1,1418 +1,1416 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 manifest,
30 29 match as matchmod,
31 30 mdiff,
32 31 phases,
33 32 pycompat,
34 33 repository,
35 34 revlog,
36 35 util,
37 36 )
38 37
39 38 from .utils import (
40 39 stringutil,
41 40 )
42 41
43 42 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 43 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 44 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 45
47 46 LFS_REQUIREMENT = 'lfs'
48 47
49 48 readexactly = util.readexactly
50 49
51 50 def getchunk(stream):
52 51 """return the next chunk from stream as a string"""
53 52 d = readexactly(stream, 4)
54 53 l = struct.unpack(">l", d)[0]
55 54 if l <= 4:
56 55 if l:
57 56 raise error.Abort(_("invalid chunk length %d") % l)
58 57 return ""
59 58 return readexactly(stream, l - 4)
60 59
61 60 def chunkheader(length):
62 61 """return a changegroup chunk header (string)"""
63 62 return struct.pack(">l", length + 4)
64 63
65 64 def closechunk():
66 65 """return a changegroup chunk header (string) for a zero-length chunk"""
67 66 return struct.pack(">l", 0)
68 67
69 68 def _fileheader(path):
70 69 """Obtain a changegroup chunk header for a named path."""
71 70 return chunkheader(len(path)) + path
72 71
73 72 def writechunks(ui, chunks, filename, vfs=None):
74 73 """Write chunks to a file and return its filename.
75 74
76 75 The stream is assumed to be a bundle file.
77 76 Existing files will not be overwritten.
78 77 If no filename is specified, a temporary file is created.
79 78 """
80 79 fh = None
81 80 cleanup = None
82 81 try:
83 82 if filename:
84 83 if vfs:
85 84 fh = vfs.open(filename, "wb")
86 85 else:
87 86 # Increase default buffer size because default is usually
88 87 # small (4k is common on Linux).
89 88 fh = open(filename, "wb", 131072)
90 89 else:
91 90 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
92 91 fh = os.fdopen(fd, r"wb")
93 92 cleanup = filename
94 93 for c in chunks:
95 94 fh.write(c)
96 95 cleanup = None
97 96 return filename
98 97 finally:
99 98 if fh is not None:
100 99 fh.close()
101 100 if cleanup is not None:
102 101 if filename and vfs:
103 102 vfs.unlink(cleanup)
104 103 else:
105 104 os.unlink(cleanup)
106 105
107 106 class cg1unpacker(object):
108 107 """Unpacker for cg1 changegroup streams.
109 108
110 109 A changegroup unpacker handles the framing of the revision data in
111 110 the wire format. Most consumers will want to use the apply()
112 111 method to add the changes from the changegroup to a repository.
113 112
114 113 If you're forwarding a changegroup unmodified to another consumer,
115 114 use getchunks(), which returns an iterator of changegroup
116 115 chunks. This is mostly useful for cases where you need to know the
117 116 data stream has ended by observing the end of the changegroup.
118 117
119 118 deltachunk() is useful only if you're applying delta data. Most
120 119 consumers should prefer apply() instead.
121 120
122 121 A few other public methods exist. Those are used only for
123 122 bundlerepo and some debug commands - their use is discouraged.
124 123 """
125 124 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 125 deltaheadersize = deltaheader.size
127 126 version = '01'
128 127 _grouplistcount = 1 # One list of files after the manifests
129 128
130 129 def __init__(self, fh, alg, extras=None):
131 130 if alg is None:
132 131 alg = 'UN'
133 132 if alg not in util.compengines.supportedbundletypes:
134 133 raise error.Abort(_('unknown stream compression type: %s')
135 134 % alg)
136 135 if alg == 'BZ':
137 136 alg = '_truncatedBZ'
138 137
139 138 compengine = util.compengines.forbundletype(alg)
140 139 self._stream = compengine.decompressorreader(fh)
141 140 self._type = alg
142 141 self.extras = extras or {}
143 142 self.callback = None
144 143
145 144 # These methods (compressed, read, seek, tell) all appear to only
146 145 # be used by bundlerepo, but it's a little hard to tell.
147 146 def compressed(self):
148 147 return self._type is not None and self._type != 'UN'
149 148 def read(self, l):
150 149 return self._stream.read(l)
151 150 def seek(self, pos):
152 151 return self._stream.seek(pos)
153 152 def tell(self):
154 153 return self._stream.tell()
155 154 def close(self):
156 155 return self._stream.close()
157 156
158 157 def _chunklength(self):
159 158 d = readexactly(self._stream, 4)
160 159 l = struct.unpack(">l", d)[0]
161 160 if l <= 4:
162 161 if l:
163 162 raise error.Abort(_("invalid chunk length %d") % l)
164 163 return 0
165 164 if self.callback:
166 165 self.callback()
167 166 return l - 4
168 167
169 168 def changelogheader(self):
170 169 """v10 does not have a changelog header chunk"""
171 170 return {}
172 171
173 172 def manifestheader(self):
174 173 """v10 does not have a manifest header chunk"""
175 174 return {}
176 175
177 176 def filelogheader(self):
178 177 """return the header of the filelogs chunk, v10 only has the filename"""
179 178 l = self._chunklength()
180 179 if not l:
181 180 return {}
182 181 fname = readexactly(self._stream, l)
183 182 return {'filename': fname}
184 183
185 184 def _deltaheader(self, headertuple, prevnode):
186 185 node, p1, p2, cs = headertuple
187 186 if prevnode is None:
188 187 deltabase = p1
189 188 else:
190 189 deltabase = prevnode
191 190 flags = 0
192 191 return node, p1, p2, deltabase, cs, flags
193 192
194 193 def deltachunk(self, prevnode):
195 194 l = self._chunklength()
196 195 if not l:
197 196 return {}
198 197 headerdata = readexactly(self._stream, self.deltaheadersize)
199 198 header = self.deltaheader.unpack(headerdata)
200 199 delta = readexactly(self._stream, l - self.deltaheadersize)
201 200 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
202 201 return (node, p1, p2, cs, deltabase, delta, flags)
203 202
204 203 def getchunks(self):
205 204 """returns all the chunks contains in the bundle
206 205
207 206 Used when you need to forward the binary stream to a file or another
208 207 network API. To do so, it parse the changegroup data, otherwise it will
209 208 block in case of sshrepo because it don't know the end of the stream.
210 209 """
211 210 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
212 211 # and a list of filelogs. For changegroup 3, we expect 4 parts:
213 212 # changelog, manifestlog, a list of tree manifestlogs, and a list of
214 213 # filelogs.
215 214 #
216 215 # Changelog and manifestlog parts are terminated with empty chunks. The
217 216 # tree and file parts are a list of entry sections. Each entry section
218 217 # is a series of chunks terminating in an empty chunk. The list of these
219 218 # entry sections is terminated in yet another empty chunk, so we know
220 219 # we've reached the end of the tree/file list when we reach an empty
221 220 # chunk that was proceeded by no non-empty chunks.
222 221
223 222 parts = 0
224 223 while parts < 2 + self._grouplistcount:
225 224 noentries = True
226 225 while True:
227 226 chunk = getchunk(self)
228 227 if not chunk:
229 228 # The first two empty chunks represent the end of the
230 229 # changelog and the manifestlog portions. The remaining
231 230 # empty chunks represent either A) the end of individual
232 231 # tree or file entries in the file list, or B) the end of
233 232 # the entire list. It's the end of the entire list if there
234 233 # were no entries (i.e. noentries is True).
235 234 if parts < 2:
236 235 parts += 1
237 236 elif noentries:
238 237 parts += 1
239 238 break
240 239 noentries = False
241 240 yield chunkheader(len(chunk))
242 241 pos = 0
243 242 while pos < len(chunk):
244 243 next = pos + 2**20
245 244 yield chunk[pos:next]
246 245 pos = next
247 246 yield closechunk()
248 247
249 248 def _unpackmanifests(self, repo, revmap, trp, prog):
250 249 self.callback = prog.increment
251 250 # no need to check for empty manifest group here:
252 251 # if the result of the merge of 1 and 2 is the same in 3 and 4,
253 252 # no new manifest will be created and the manifest group will
254 253 # be empty during the pull
255 254 self.manifestheader()
256 255 deltas = self.deltaiter()
257 256 repo.manifestlog.addgroup(deltas, revmap, trp)
258 257 prog.complete()
259 258 self.callback = None
260 259
261 260 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
262 261 expectedtotal=None):
263 262 """Add the changegroup returned by source.read() to this repo.
264 263 srctype is a string like 'push', 'pull', or 'unbundle'. url is
265 264 the URL of the repo where this changegroup is coming from.
266 265
267 266 Return an integer summarizing the change to this repo:
268 267 - nothing changed or no source: 0
269 268 - more heads than before: 1+added heads (2..n)
270 269 - fewer heads than before: -1-removed heads (-2..-n)
271 270 - number of heads stays the same: 1
272 271 """
273 272 repo = repo.unfiltered()
274 273 def csmap(x):
275 274 repo.ui.debug("add changeset %s\n" % short(x))
276 275 return len(cl)
277 276
278 277 def revmap(x):
279 278 return cl.rev(x)
280 279
281 280 changesets = files = revisions = 0
282 281
283 282 try:
284 283 # The transaction may already carry source information. In this
285 284 # case we use the top level data. We overwrite the argument
286 285 # because we need to use the top level value (if they exist)
287 286 # in this function.
288 287 srctype = tr.hookargs.setdefault('source', srctype)
289 288 url = tr.hookargs.setdefault('url', url)
290 289 repo.hook('prechangegroup',
291 290 throw=True, **pycompat.strkwargs(tr.hookargs))
292 291
293 292 # write changelog data to temp files so concurrent readers
294 293 # will not see an inconsistent view
295 294 cl = repo.changelog
296 295 cl.delayupdate(tr)
297 296 oldheads = set(cl.heads())
298 297
299 298 trp = weakref.proxy(tr)
300 299 # pull off the changeset group
301 300 repo.ui.status(_("adding changesets\n"))
302 301 clstart = len(cl)
303 302 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
304 303 total=expectedtotal)
305 304 self.callback = progress.increment
306 305
307 306 efiles = set()
308 307 def onchangelog(cl, node):
309 308 efiles.update(cl.readfiles(node))
310 309
311 310 self.changelogheader()
312 311 deltas = self.deltaiter()
313 312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
314 313 efiles = len(efiles)
315 314
316 315 if not cgnodes:
317 316 repo.ui.develwarn('applied empty changegroup',
318 317 config='warn-empty-changegroup')
319 318 clend = len(cl)
320 319 changesets = clend - clstart
321 320 progress.complete()
322 321 self.callback = None
323 322
324 323 # pull off the manifest group
325 324 repo.ui.status(_("adding manifests\n"))
326 325 # We know that we'll never have more manifests than we had
327 326 # changesets.
328 327 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
329 328 total=changesets)
330 329 self._unpackmanifests(repo, revmap, trp, progress)
331 330
332 331 needfiles = {}
333 332 if repo.ui.configbool('server', 'validate'):
334 333 cl = repo.changelog
335 334 ml = repo.manifestlog
336 335 # validate incoming csets have their manifests
337 336 for cset in pycompat.xrange(clstart, clend):
338 337 mfnode = cl.changelogrevision(cset).manifest
339 338 mfest = ml[mfnode].readdelta()
340 339 # store file cgnodes we must see
341 340 for f, n in mfest.iteritems():
342 341 needfiles.setdefault(f, set()).add(n)
343 342
344 343 # process the files
345 344 repo.ui.status(_("adding file changes\n"))
346 345 newrevs, newfiles = _addchangegroupfiles(
347 346 repo, self, revmap, trp, efiles, needfiles)
348 347 revisions += newrevs
349 348 files += newfiles
350 349
351 350 deltaheads = 0
352 351 if oldheads:
353 352 heads = cl.heads()
354 353 deltaheads = len(heads) - len(oldheads)
355 354 for h in heads:
356 355 if h not in oldheads and repo[h].closesbranch():
357 356 deltaheads -= 1
358 357 htext = ""
359 358 if deltaheads:
360 359 htext = _(" (%+d heads)") % deltaheads
361 360
362 361 repo.ui.status(_("added %d changesets"
363 362 " with %d changes to %d files%s\n")
364 363 % (changesets, revisions, files, htext))
365 364 repo.invalidatevolatilesets()
366 365
367 366 if changesets > 0:
368 367 if 'node' not in tr.hookargs:
369 368 tr.hookargs['node'] = hex(cl.node(clstart))
370 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
371 370 hookargs = dict(tr.hookargs)
372 371 else:
373 372 hookargs = dict(tr.hookargs)
374 373 hookargs['node'] = hex(cl.node(clstart))
375 374 hookargs['node_last'] = hex(cl.node(clend - 1))
376 375 repo.hook('pretxnchangegroup',
377 376 throw=True, **pycompat.strkwargs(hookargs))
378 377
379 378 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
380 379 phaseall = None
381 380 if srctype in ('push', 'serve'):
382 381 # Old servers can not push the boundary themselves.
383 382 # New servers won't push the boundary if changeset already
384 383 # exists locally as secret
385 384 #
386 385 # We should not use added here but the list of all change in
387 386 # the bundle
388 387 if repo.publishing():
389 388 targetphase = phaseall = phases.public
390 389 else:
391 390 # closer target phase computation
392 391
393 392 # Those changesets have been pushed from the
394 393 # outside, their phases are going to be pushed
395 394 # alongside. Therefor `targetphase` is
396 395 # ignored.
397 396 targetphase = phaseall = phases.draft
398 397 if added:
399 398 phases.registernew(repo, tr, targetphase, added)
400 399 if phaseall is not None:
401 400 phases.advanceboundary(repo, tr, phaseall, cgnodes)
402 401
403 402 if changesets > 0:
404 403
405 404 def runhooks():
406 405 # These hooks run when the lock releases, not when the
407 406 # transaction closes. So it's possible for the changelog
408 407 # to have changed since we last saw it.
409 408 if clstart >= len(repo):
410 409 return
411 410
412 411 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
413 412
414 413 for n in added:
415 414 args = hookargs.copy()
416 415 args['node'] = hex(n)
417 416 del args['node_last']
418 417 repo.hook("incoming", **pycompat.strkwargs(args))
419 418
420 419 newheads = [h for h in repo.heads()
421 420 if h not in oldheads]
422 421 repo.ui.log("incoming",
423 422 "%d incoming changes - new heads: %s\n",
424 423 len(added),
425 424 ', '.join([hex(c[:6]) for c in newheads]))
426 425
427 426 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
428 427 lambda tr: repo._afterlock(runhooks))
429 428 finally:
430 429 repo.ui.flush()
431 430 # never return 0 here:
432 431 if deltaheads < 0:
433 432 ret = deltaheads - 1
434 433 else:
435 434 ret = deltaheads + 1
436 435 return ret
437 436
438 437 def deltaiter(self):
439 438 """
440 439 returns an iterator of the deltas in this changegroup
441 440
442 441 Useful for passing to the underlying storage system to be stored.
443 442 """
444 443 chain = None
445 444 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
446 445 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
447 446 yield chunkdata
448 447 chain = chunkdata[0]
449 448
450 449 class cg2unpacker(cg1unpacker):
451 450 """Unpacker for cg2 streams.
452 451
453 452 cg2 streams add support for generaldelta, so the delta header
454 453 format is slightly different. All other features about the data
455 454 remain the same.
456 455 """
457 456 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
458 457 deltaheadersize = deltaheader.size
459 458 version = '02'
460 459
461 460 def _deltaheader(self, headertuple, prevnode):
462 461 node, p1, p2, deltabase, cs = headertuple
463 462 flags = 0
464 463 return node, p1, p2, deltabase, cs, flags
465 464
466 465 class cg3unpacker(cg2unpacker):
467 466 """Unpacker for cg3 streams.
468 467
469 468 cg3 streams add support for exchanging treemanifests and revlog
470 469 flags. It adds the revlog flags to the delta header and an empty chunk
471 470 separating manifests and files.
472 471 """
473 472 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
474 473 deltaheadersize = deltaheader.size
475 474 version = '03'
476 475 _grouplistcount = 2 # One list of manifests and one list of files
477 476
478 477 def _deltaheader(self, headertuple, prevnode):
479 478 node, p1, p2, deltabase, cs, flags = headertuple
480 479 return node, p1, p2, deltabase, cs, flags
481 480
482 481 def _unpackmanifests(self, repo, revmap, trp, prog):
483 482 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
484 483 for chunkdata in iter(self.filelogheader, {}):
485 484 # If we get here, there are directory manifests in the changegroup
486 485 d = chunkdata["filename"]
487 486 repo.ui.debug("adding %s revisions\n" % d)
488 487 dirlog = repo.manifestlog._revlog.dirlog(d)
489 488 deltas = self.deltaiter()
490 489 if not dirlog.addgroup(deltas, revmap, trp):
491 490 raise error.Abort(_("received dir revlog group is empty"))
492 491
493 492 class headerlessfixup(object):
494 493 def __init__(self, fh, h):
495 494 self._h = h
496 495 self._fh = fh
497 496 def read(self, n):
498 497 if self._h:
499 498 d, self._h = self._h[:n], self._h[n:]
500 499 if len(d) < n:
501 500 d += readexactly(self._fh, n - len(d))
502 501 return d
503 502 return readexactly(self._fh, n)
504 503
505 504 @attr.s(slots=True, frozen=True)
506 505 class revisiondelta(object):
507 506 """Describes a delta entry in a changegroup.
508 507
509 508 Captured data is sufficient to serialize the delta into multiple
510 509 formats.
511 510 """
512 511 # 20 byte node of this revision.
513 512 node = attr.ib()
514 513 # 20 byte nodes of parent revisions.
515 514 p1node = attr.ib()
516 515 p2node = attr.ib()
517 516 # 20 byte node of node this delta is against.
518 517 basenode = attr.ib()
519 518 # 20 byte node of changeset revision this delta is associated with.
520 519 linknode = attr.ib()
521 520 # 2 bytes of flags to apply to revision data.
522 521 flags = attr.ib()
523 522 # Iterable of chunks holding raw delta data.
524 523 deltachunks = attr.ib()
525 524
526 525 def _sortnodesnormal(store, nodes, reorder):
527 526 """Sort nodes for changegroup generation and turn into revnums."""
528 527 # for generaldelta revlogs, we linearize the revs; this will both be
529 528 # much quicker and generate a much smaller bundle
530 529 if (store._generaldelta and reorder is None) or reorder:
531 530 dag = dagutil.revlogdag(store)
532 531 return dag.linearize(set(store.rev(n) for n in nodes))
533 532 else:
534 533 return sorted([store.rev(n) for n in nodes])
535 534
536 535 def _sortnodesellipsis(store, nodes, cl, lookup):
537 536 """Sort nodes for changegroup generation and turn into revnums."""
538 537 # Ellipses serving mode.
539 538 #
540 539 # In a perfect world, we'd generate better ellipsis-ified graphs
541 540 # for non-changelog revlogs. In practice, we haven't started doing
542 541 # that yet, so the resulting DAGs for the manifestlog and filelogs
543 542 # are actually full of bogus parentage on all the ellipsis
544 543 # nodes. This has the side effect that, while the contents are
545 544 # correct, the individual DAGs might be completely out of whack in
546 545 # a case like 882681bc3166 and its ancestors (back about 10
547 546 # revisions or so) in the main hg repo.
548 547 #
549 548 # The one invariant we *know* holds is that the new (potentially
550 549 # bogus) DAG shape will be valid if we order the nodes in the
551 550 # order that they're introduced in dramatis personae by the
552 551 # changelog, so what we do is we sort the non-changelog histories
553 552 # by the order in which they are used by the changelog.
554 553 key = lambda n: cl.rev(lookup(n))
555 554 return [store.rev(n) for n in sorted(nodes, key=key)]
556 555
557 556 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
558 557 """Construct a revision delta for non-ellipses changegroup generation."""
559 558 node = store.node(rev)
560 559 p1, p2 = store.parentrevs(rev)
561 560 base = deltaparentfn(store, rev, p1, p2, prev)
562 561
563 562 prefix = ''
564 563 if store.iscensored(base) or store.iscensored(rev):
565 564 try:
566 565 delta = store.revision(node, raw=True)
567 566 except error.CensoredNodeError as e:
568 567 delta = e.tombstone
569 568 if base == nullrev:
570 569 prefix = mdiff.trivialdiffheader(len(delta))
571 570 else:
572 571 baselen = store.rawsize(base)
573 572 prefix = mdiff.replacediffheader(baselen, len(delta))
574 573 elif base == nullrev:
575 574 delta = store.revision(node, raw=True)
576 575 prefix = mdiff.trivialdiffheader(len(delta))
577 576 else:
578 577 delta = store.revdiff(base, rev)
579 578 p1n, p2n = store.parents(node)
580 579
581 580 return revisiondelta(
582 581 node=node,
583 582 p1node=p1n,
584 583 p2node=p2n,
585 584 basenode=store.node(base),
586 585 linknode=linknode,
587 586 flags=store.flags(rev),
588 587 deltachunks=(prefix, delta),
589 588 )
590 589
591 590 def _revisiondeltanarrow(cl, store, ischangelog, rev, linkrev,
592 591 linknode, clrevtolocalrev, fullclnodes,
593 592 precomputedellipsis):
594 593 linkparents = precomputedellipsis[linkrev]
595 594 def local(clrev):
596 595 """Turn a changelog revnum into a local revnum.
597 596
598 597 The ellipsis dag is stored as revnums on the changelog,
599 598 but when we're producing ellipsis entries for
600 599 non-changelog revlogs, we need to turn those numbers into
601 600 something local. This does that for us, and during the
602 601 changelog sending phase will also expand the stored
603 602 mappings as needed.
604 603 """
605 604 if clrev == nullrev:
606 605 return nullrev
607 606
608 607 if ischangelog:
609 608 return clrev
610 609
611 610 # Walk the ellipsis-ized changelog breadth-first looking for a
612 611 # change that has been linked from the current revlog.
613 612 #
614 613 # For a flat manifest revlog only a single step should be necessary
615 614 # as all relevant changelog entries are relevant to the flat
616 615 # manifest.
617 616 #
618 617 # For a filelog or tree manifest dirlog however not every changelog
619 618 # entry will have been relevant, so we need to skip some changelog
620 619 # nodes even after ellipsis-izing.
621 620 walk = [clrev]
622 621 while walk:
623 622 p = walk[0]
624 623 walk = walk[1:]
625 624 if p in clrevtolocalrev:
626 625 return clrevtolocalrev[p]
627 626 elif p in fullclnodes:
628 627 walk.extend([pp for pp in cl.parentrevs(p)
629 628 if pp != nullrev])
630 629 elif p in precomputedellipsis:
631 630 walk.extend([pp for pp in precomputedellipsis[p]
632 631 if pp != nullrev])
633 632 else:
634 633 # In this case, we've got an ellipsis with parents
635 634 # outside the current bundle (likely an
636 635 # incremental pull). We "know" that we can use the
637 636 # value of this same revlog at whatever revision
638 637 # is pointed to by linknode. "Know" is in scare
639 638 # quotes because I haven't done enough examination
640 639 # of edge cases to convince myself this is really
641 640 # a fact - it works for all the (admittedly
642 641 # thorough) cases in our testsuite, but I would be
643 642 # somewhat unsurprised to find a case in the wild
644 643 # where this breaks down a bit. That said, I don't
645 644 # know if it would hurt anything.
646 645 for i in pycompat.xrange(rev, 0, -1):
647 646 if store.linkrev(i) == clrev:
648 647 return i
649 648 # We failed to resolve a parent for this node, so
650 649 # we crash the changegroup construction.
651 650 raise error.Abort(
652 651 'unable to resolve parent while packing %r %r'
653 652 ' for changeset %r' % (store.indexfile, rev, clrev))
654 653
655 654 return nullrev
656 655
657 656 if not linkparents or (
658 657 store.parentrevs(rev) == (nullrev, nullrev)):
659 658 p1, p2 = nullrev, nullrev
660 659 elif len(linkparents) == 1:
661 660 p1, = sorted(local(p) for p in linkparents)
662 661 p2 = nullrev
663 662 else:
664 663 p1, p2 = sorted(local(p) for p in linkparents)
665 664
666 665 n = store.node(rev)
667 666 p1n, p2n = store.node(p1), store.node(p2)
668 667 flags = store.flags(rev)
669 668 flags |= revlog.REVIDX_ELLIPSIS
670 669
671 670 # TODO: try and actually send deltas for ellipsis data blocks
672 671 data = store.revision(n)
673 672 diffheader = mdiff.trivialdiffheader(len(data))
674 673
675 674 return revisiondelta(
676 675 node=n,
677 676 p1node=p1n,
678 677 p2node=p2n,
679 678 basenode=nullid,
680 679 linknode=linknode,
681 680 flags=flags,
682 681 deltachunks=(diffheader, data),
683 682 )
684 683
685 684 class cgpacker(object):
686 685 def __init__(self, repo, filematcher, version, allowreorder,
687 686 deltaparentfn, builddeltaheader, manifestsend,
688 687 bundlecaps=None, ellipses=False,
689 688 shallow=False, ellipsisroots=None, fullnodes=None):
690 689 """Given a source repo, construct a bundler.
691 690
692 691 filematcher is a matcher that matches on files to include in the
693 692 changegroup. Used to facilitate sparse changegroups.
694 693
695 694 allowreorder controls whether reordering of revisions is allowed.
696 695 This value is used when ``bundle.reorder`` is ``auto`` or isn't
697 696 set.
698 697
699 698 deltaparentfn is a callable that resolves the delta parent for
700 699 a specific revision.
701 700
702 701 builddeltaheader is a callable that constructs the header for a group
703 702 delta.
704 703
705 704 manifestsend is a chunk to send after manifests have been fully emitted.
706 705
707 706 ellipses indicates whether ellipsis serving mode is enabled.
708 707
709 708 bundlecaps is optional and can be used to specify the set of
710 709 capabilities which can be used to build the bundle. While bundlecaps is
711 710 unused in core Mercurial, extensions rely on this feature to communicate
712 711 capabilities to customize the changegroup packer.
713 712
714 713 shallow indicates whether shallow data might be sent. The packer may
715 714 need to pack file contents not introduced by the changes being packed.
716 715
717 716 fullnodes is the set of changelog nodes which should not be ellipsis
718 717 nodes. We store this rather than the set of nodes that should be
719 718 ellipsis because for very large histories we expect this to be
720 719 significantly smaller.
721 720 """
722 721 assert filematcher
723 722 self._filematcher = filematcher
724 723
725 724 self.version = version
726 725 self._deltaparentfn = deltaparentfn
727 726 self._builddeltaheader = builddeltaheader
728 727 self._manifestsend = manifestsend
729 728 self._ellipses = ellipses
730 729
731 730 # Set of capabilities we can use to build the bundle.
732 731 if bundlecaps is None:
733 732 bundlecaps = set()
734 733 self._bundlecaps = bundlecaps
735 734 self._isshallow = shallow
736 735 self._fullclnodes = fullnodes
737 736
738 737 # Maps ellipsis revs to their roots at the changelog level.
739 738 self._precomputedellipsis = ellipsisroots
740 739
741 740 # experimental config: bundle.reorder
742 741 reorder = repo.ui.config('bundle', 'reorder')
743 742 if reorder == 'auto':
744 743 self._reorder = allowreorder
745 744 else:
746 745 self._reorder = stringutil.parsebool(reorder)
747 746
748 747 self._repo = repo
749 748
750 749 if self._repo.ui.verbose and not self._repo.ui.debugflag:
751 750 self._verbosenote = self._repo.ui.note
752 751 else:
753 752 self._verbosenote = lambda s: None
754 753
755 754 def group(self, revs, store, ischangelog, lookup, units=None,
756 755 clrevtolocalrev=None):
757 756 """Calculate a delta group, yielding a sequence of changegroup chunks
758 757 (strings).
759 758
760 759 Given a list of changeset revs, return a set of deltas and
761 760 metadata corresponding to nodes. The first delta is
762 761 first parent(nodelist[0]) -> nodelist[0], the receiver is
763 762 guaranteed to have this parent as it has all history before
764 763 these changesets. In the case firstparent is nullrev the
765 764 changegroup starts with a full revision.
766 765
767 766 If units is not None, progress detail will be generated, units specifies
768 767 the type of revlog that is touched (changelog, manifest, etc.).
769 768 """
770 769 # if we don't have any revisions touched by these changesets, bail
771 770 if len(revs) == 0:
772 771 yield closechunk()
773 772 return
774 773
775 774 cl = self._repo.changelog
776 775
777 776 # add the parent of the first rev
778 777 p = store.parentrevs(revs[0])[0]
779 778 revs.insert(0, p)
780 779
781 780 # build deltas
782 781 progress = None
783 782 if units is not None:
784 783 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
785 784 total=(len(revs) - 1))
786 785 for r in pycompat.xrange(len(revs) - 1):
787 786 if progress:
788 787 progress.update(r + 1)
789 788 prev, curr = revs[r], revs[r + 1]
790 789 linknode = lookup(store.node(curr))
791 790
792 791 if self._ellipses:
793 792 linkrev = cl.rev(linknode)
794 793 clrevtolocalrev[linkrev] = curr
795 794
796 795 # This is a node to send in full, because the changeset it
797 796 # corresponds to was a full changeset.
798 797 if linknode in self._fullclnodes:
799 798 delta = _revisiondeltanormal(store, curr, prev, linknode,
800 799 self._deltaparentfn)
801 800 elif linkrev not in self._precomputedellipsis:
802 801 delta = None
803 802 else:
804 803 delta = _revisiondeltanarrow(
805 804 cl, store, ischangelog, curr, linkrev, linknode,
806 805 clrevtolocalrev, self._fullclnodes,
807 806 self._precomputedellipsis)
808 807 else:
809 808 delta = _revisiondeltanormal(store, curr, prev, linknode,
810 809 self._deltaparentfn)
811 810
812 811 if not delta:
813 812 continue
814 813
815 814 meta = self._builddeltaheader(delta)
816 815 l = len(meta) + sum(len(x) for x in delta.deltachunks)
817 816 yield chunkheader(l)
818 817 yield meta
819 818 for x in delta.deltachunks:
820 819 yield x
821 820
822 821 if progress:
823 822 progress.complete()
824 823
825 824 yield closechunk()
826 825
827 # filter any nodes that claim to be part of the known set
828 def _prune(self, store, missing, commonrevs):
829 # TODO this violates storage abstraction for manifests.
830 if isinstance(store, manifest.manifestrevlog):
831 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
832 return []
833
834 rr, rl = store.rev, store.linkrev
835 return [n for n in missing if rl(rr(n)) not in commonrevs]
836
837 826 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
838 827 """Yield a sequence of changegroup byte chunks."""
839 828
840 829 repo = self._repo
841 830 cl = repo.changelog
842 831
843 832 self._verbosenote(_('uncompressed size of bundle content:\n'))
844 833 size = 0
845 834
846 835 clstate, chunks = self._generatechangelog(cl, clnodes)
847 836 for chunk in chunks:
848 837 size += len(chunk)
849 838 yield chunk
850 839
851 840 self._verbosenote(_('%8.i (changelog)\n') % size)
852 841
853 842 clrevorder = clstate['clrevorder']
854 843 mfs = clstate['mfs']
855 844 changedfiles = clstate['changedfiles']
856 845
857 846 # We need to make sure that the linkrev in the changegroup refers to
858 847 # the first changeset that introduced the manifest or file revision.
859 848 # The fastpath is usually safer than the slowpath, because the filelogs
860 849 # are walked in revlog order.
861 850 #
862 851 # When taking the slowpath with reorder=None and the manifest revlog
863 852 # uses generaldelta, the manifest may be walked in the "wrong" order.
864 853 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
865 854 # cc0ff93d0c0c).
866 855 #
867 856 # When taking the fastpath, we are only vulnerable to reordering
868 857 # of the changelog itself. The changelog never uses generaldelta, so
869 858 # it is only reordered when reorder=True. To handle this case, we
870 859 # simply take the slowpath, which already has the 'clrevorder' logic.
871 860 # This was also fixed in cc0ff93d0c0c.
872 861 fastpathlinkrev = fastpathlinkrev and not self._reorder
873 862 # Treemanifests don't work correctly with fastpathlinkrev
874 863 # either, because we don't discover which directory nodes to
875 864 # send along with files. This could probably be fixed.
876 865 fastpathlinkrev = fastpathlinkrev and (
877 866 'treemanifest' not in repo.requirements)
878 867
879 868 fnodes = {} # needed file nodes
880 869
881 870 for chunk in self.generatemanifests(commonrevs, clrevorder,
882 871 fastpathlinkrev, mfs, fnodes, source,
883 872 clstate['clrevtomanifestrev']):
884 873 yield chunk
885 874
886 875 mfdicts = None
887 876 if self._ellipses and self._isshallow:
888 877 mfdicts = [(self._repo.manifestlog[n].read(), lr)
889 878 for (n, lr) in mfs.iteritems()]
890 879
891 880 mfs.clear()
892 881 clrevs = set(cl.rev(x) for x in clnodes)
893 882
894 883 for chunk in self.generatefiles(changedfiles, commonrevs,
895 884 source, mfdicts, fastpathlinkrev,
896 885 fnodes, clrevs):
897 886 yield chunk
898 887
899 888 yield closechunk()
900 889
901 890 if clnodes:
902 891 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
903 892
904 893 def _generatechangelog(self, cl, nodes):
905 894 """Generate data for changelog chunks.
906 895
907 896 Returns a 2-tuple of a dict containing state and an iterable of
908 897 byte chunks. The state will not be fully populated until the
909 898 chunk stream has been fully consumed.
910 899 """
911 900 clrevorder = {}
912 901 mfs = {} # needed manifests
913 902 mfl = self._repo.manifestlog
914 903 # TODO violates storage abstraction.
915 904 mfrevlog = mfl._revlog
916 905 changedfiles = set()
917 906 clrevtomanifestrev = {}
918 907
919 908 # Callback for the changelog, used to collect changed files and
920 909 # manifest nodes.
921 910 # Returns the linkrev node (identity in the changelog case).
922 911 def lookupcl(x):
923 912 c = cl.read(x)
924 913 clrevorder[x] = len(clrevorder)
925 914
926 915 if self._ellipses:
927 916 # Only update mfs if x is going to be sent. Otherwise we
928 917 # end up with bogus linkrevs specified for manifests and
929 918 # we skip some manifest nodes that we should otherwise
930 919 # have sent.
931 920 if (x in self._fullclnodes
932 921 or cl.rev(x) in self._precomputedellipsis):
933 922 n = c[0]
934 923 # Record the first changeset introducing this manifest
935 924 # version.
936 925 mfs.setdefault(n, x)
937 926 # Set this narrow-specific dict so we have the lowest
938 927 # manifest revnum to look up for this cl revnum. (Part of
939 928 # mapping changelog ellipsis parents to manifest ellipsis
940 929 # parents)
941 930 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
942 931 # We can't trust the changed files list in the changeset if the
943 932 # client requested a shallow clone.
944 933 if self._isshallow:
945 934 changedfiles.update(mfl[c[0]].read().keys())
946 935 else:
947 936 changedfiles.update(c[3])
948 937 else:
949 938
950 939 n = c[0]
951 940 # record the first changeset introducing this manifest version
952 941 mfs.setdefault(n, x)
953 942 # Record a complete list of potentially-changed files in
954 943 # this manifest.
955 944 changedfiles.update(c[3])
956 945
957 946 return x
958 947
959 948 # Changelog doesn't benefit from reordering revisions. So send out
960 949 # revisions in store order.
961 950 revs = sorted(cl.rev(n) for n in nodes)
962 951
963 952 state = {
964 953 'clrevorder': clrevorder,
965 954 'mfs': mfs,
966 955 'changedfiles': changedfiles,
967 956 'clrevtomanifestrev': clrevtomanifestrev,
968 957 }
969 958
970 959 gen = self.group(revs, cl, True, lookupcl, units=_('changesets'),
971 960 clrevtolocalrev={})
972 961
973 962 return state, gen
974 963
975 964 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
976 965 fnodes, source, clrevtolocalrev):
977 966 """Returns an iterator of changegroup chunks containing manifests.
978 967
979 968 `source` is unused here, but is used by extensions like remotefilelog to
980 969 change what is sent based in pulls vs pushes, etc.
981 970 """
982 971 repo = self._repo
983 972 cl = repo.changelog
984 973 mfl = repo.manifestlog
985 974 dirlog = mfl._revlog.dirlog
986 975 tmfnodes = {'': mfs}
987 976
988 977 # Callback for the manifest, used to collect linkrevs for filelog
989 978 # revisions.
990 979 # Returns the linkrev node (collected in lookupcl).
991 980 def makelookupmflinknode(dir, nodes):
992 981 if fastpathlinkrev:
993 982 assert not dir
994 983 return mfs.__getitem__
995 984
996 985 def lookupmflinknode(x):
997 986 """Callback for looking up the linknode for manifests.
998 987
999 988 Returns the linkrev node for the specified manifest.
1000 989
1001 990 SIDE EFFECT:
1002 991
1003 992 1) fclnodes gets populated with the list of relevant
1004 993 file nodes if we're not using fastpathlinkrev
1005 994 2) When treemanifests are in use, collects treemanifest nodes
1006 995 to send
1007 996
1008 997 Note that this means manifests must be completely sent to
1009 998 the client before you can trust the list of files and
1010 999 treemanifests to send.
1011 1000 """
1012 1001 clnode = nodes[x]
1013 1002 mdata = mfl.get(dir, x).readfast(shallow=True)
1014 1003 for p, n, fl in mdata.iterentries():
1015 1004 if fl == 't': # subdirectory manifest
1016 1005 subdir = dir + p + '/'
1017 1006 tmfclnodes = tmfnodes.setdefault(subdir, {})
1018 1007 tmfclnode = tmfclnodes.setdefault(n, clnode)
1019 1008 if clrevorder[clnode] < clrevorder[tmfclnode]:
1020 1009 tmfclnodes[n] = clnode
1021 1010 else:
1022 1011 f = dir + p
1023 1012 fclnodes = fnodes.setdefault(f, {})
1024 1013 fclnode = fclnodes.setdefault(n, clnode)
1025 1014 if clrevorder[clnode] < clrevorder[fclnode]:
1026 1015 fclnodes[n] = clnode
1027 1016 return clnode
1028 1017 return lookupmflinknode
1029 1018
1030 1019 size = 0
1031 1020 while tmfnodes:
1032 1021 dir, nodes = tmfnodes.popitem()
1033 1022 store = dirlog(dir)
1034 prunednodes = self._prune(store, nodes, commonrevs)
1023
1024 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
1025 prunednodes = []
1026 else:
1027 frev, flr = store.rev, store.linkrev
1028 prunednodes = [n for n in nodes
1029 if flr(frev(n)) not in commonrevs]
1035 1030
1036 1031 if dir and not prunednodes:
1037 1032 continue
1038 1033
1039 1034 lookupfn = makelookupmflinknode(dir, nodes)
1040 1035
1041 1036 if self._ellipses:
1042 1037 revs = _sortnodesellipsis(store, prunednodes, cl,
1043 1038 lookupfn)
1044 1039 else:
1045 1040 revs = _sortnodesnormal(store, prunednodes,
1046 1041 self._reorder)
1047 1042
1048 1043 if dir:
1049 1044 assert self.version == b'03'
1050 1045 chunk = _fileheader(dir)
1051 1046 size += len(chunk)
1052 1047 yield chunk
1053 1048
1054 1049 for chunk in self.group(revs, store, False, lookupfn,
1055 1050 units=_('manifests'),
1056 1051 clrevtolocalrev=clrevtolocalrev):
1057 1052 size += len(chunk)
1058 1053 yield chunk
1059 1054
1060 1055 self._verbosenote(_('%8.i (manifests)\n') % size)
1061 1056 yield self._manifestsend
1062 1057
1063 1058 # The 'source' parameter is useful for extensions
1064 1059 def generatefiles(self, changedfiles, commonrevs, source,
1065 1060 mfdicts, fastpathlinkrev, fnodes, clrevs):
1066 1061 changedfiles = list(filter(self._filematcher, changedfiles))
1067 1062
1068 1063 if not fastpathlinkrev:
1069 1064 def normallinknodes(unused, fname):
1070 1065 return fnodes.get(fname, {})
1071 1066 else:
1072 1067 cln = self._repo.changelog.node
1073 1068
1074 1069 def normallinknodes(store, fname):
1075 1070 flinkrev = store.linkrev
1076 1071 fnode = store.node
1077 1072 revs = ((r, flinkrev(r)) for r in store)
1078 1073 return dict((fnode(r), cln(lr))
1079 1074 for r, lr in revs if lr in clrevs)
1080 1075
1081 1076 clrevtolocalrev = {}
1082 1077
1083 1078 if self._isshallow:
1084 1079 # In a shallow clone, the linknodes callback needs to also include
1085 1080 # those file nodes that are in the manifests we sent but weren't
1086 1081 # introduced by those manifests.
1087 1082 commonctxs = [self._repo[c] for c in commonrevs]
1088 1083 clrev = self._repo.changelog.rev
1089 1084
1090 1085 # Defining this function has a side-effect of overriding the
1091 1086 # function of the same name that was passed in as an argument.
1092 1087 # TODO have caller pass in appropriate function.
1093 1088 def linknodes(flog, fname):
1094 1089 for c in commonctxs:
1095 1090 try:
1096 1091 fnode = c.filenode(fname)
1097 1092 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1098 1093 except error.ManifestLookupError:
1099 1094 pass
1100 1095 links = normallinknodes(flog, fname)
1101 1096 if len(links) != len(mfdicts):
1102 1097 for mf, lr in mfdicts:
1103 1098 fnode = mf.get(fname, None)
1104 1099 if fnode in links:
1105 1100 links[fnode] = min(links[fnode], lr, key=clrev)
1106 1101 elif fnode:
1107 1102 links[fnode] = lr
1108 1103 return links
1109 1104 else:
1110 1105 linknodes = normallinknodes
1111 1106
1112 1107 repo = self._repo
1113 1108 cl = repo.changelog
1114 1109 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1115 1110 total=len(changedfiles))
1116 1111 for i, fname in enumerate(sorted(changedfiles)):
1117 1112 filerevlog = repo.file(fname)
1118 1113 if not filerevlog:
1119 1114 raise error.Abort(_("empty or missing file data for %s") %
1120 1115 fname)
1121 1116
1122 1117 clrevtolocalrev.clear()
1123 1118
1124 1119 linkrevnodes = linknodes(filerevlog, fname)
1125 1120 # Lookup for filenodes, we collected the linkrev nodes above in the
1126 1121 # fastpath case and with lookupmf in the slowpath case.
1127 1122 def lookupfilelog(x):
1128 1123 return linkrevnodes[x]
1129 1124
1130 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
1125 frev, flr = filerevlog.rev, filerevlog.linkrev
1126 filenodes = [n for n in linkrevnodes
1127 if flr(frev(n)) not in commonrevs]
1128
1131 1129 if filenodes:
1132 1130 if self._ellipses:
1133 1131 revs = _sortnodesellipsis(filerevlog, filenodes,
1134 1132 cl, lookupfilelog)
1135 1133 else:
1136 1134 revs = _sortnodesnormal(filerevlog, filenodes,
1137 1135 self._reorder)
1138 1136
1139 1137 progress.update(i + 1, item=fname)
1140 1138 h = _fileheader(fname)
1141 1139 size = len(h)
1142 1140 yield h
1143 1141 for chunk in self.group(revs, filerevlog, False, lookupfilelog,
1144 1142 clrevtolocalrev=clrevtolocalrev):
1145 1143 size += len(chunk)
1146 1144 yield chunk
1147 1145 self._verbosenote(_('%8.i %s\n') % (size, fname))
1148 1146 progress.complete()
1149 1147
1150 1148 def _deltaparentprev(store, rev, p1, p2, prev):
1151 1149 """Resolve a delta parent to the previous revision.
1152 1150
1153 1151 Used for version 1 changegroups, which don't support generaldelta.
1154 1152 """
1155 1153 return prev
1156 1154
1157 1155 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1158 1156 """Resolve a delta parent when general deltas are supported."""
1159 1157 dp = store.deltaparent(rev)
1160 1158 if dp == nullrev and store.storedeltachains:
1161 1159 # Avoid sending full revisions when delta parent is null. Pick prev
1162 1160 # in that case. It's tempting to pick p1 in this case, as p1 will
1163 1161 # be smaller in the common case. However, computing a delta against
1164 1162 # p1 may require resolving the raw text of p1, which could be
1165 1163 # expensive. The revlog caches should have prev cached, meaning
1166 1164 # less CPU for changegroup generation. There is likely room to add
1167 1165 # a flag and/or config option to control this behavior.
1168 1166 base = prev
1169 1167 elif dp == nullrev:
1170 1168 # revlog is configured to use full snapshot for a reason,
1171 1169 # stick to full snapshot.
1172 1170 base = nullrev
1173 1171 elif dp not in (p1, p2, prev):
1174 1172 # Pick prev when we can't be sure remote has the base revision.
1175 1173 return prev
1176 1174 else:
1177 1175 base = dp
1178 1176
1179 1177 if base != nullrev and not store.candelta(base, rev):
1180 1178 base = nullrev
1181 1179
1182 1180 return base
1183 1181
1184 1182 def _deltaparentellipses(store, rev, p1, p2, prev):
1185 1183 """Resolve a delta parent when in ellipses mode."""
1186 1184 # TODO: send better deltas when in narrow mode.
1187 1185 #
1188 1186 # changegroup.group() loops over revisions to send,
1189 1187 # including revisions we'll skip. What this means is that
1190 1188 # `prev` will be a potentially useless delta base for all
1191 1189 # ellipsis nodes, as the client likely won't have it. In
1192 1190 # the future we should do bookkeeping about which nodes
1193 1191 # have been sent to the client, and try to be
1194 1192 # significantly smarter about delta bases. This is
1195 1193 # slightly tricky because this same code has to work for
1196 1194 # all revlogs, and we don't have the linkrev/linknode here.
1197 1195 return p1
1198 1196
1199 1197 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1200 1198 shallow=False, ellipsisroots=None, fullnodes=None):
1201 1199 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1202 1200 d.node, d.p1node, d.p2node, d.linknode)
1203 1201
1204 1202 return cgpacker(repo, filematcher, b'01',
1205 1203 deltaparentfn=_deltaparentprev,
1206 1204 allowreorder=None,
1207 1205 builddeltaheader=builddeltaheader,
1208 1206 manifestsend=b'',
1209 1207 bundlecaps=bundlecaps,
1210 1208 ellipses=ellipses,
1211 1209 shallow=shallow,
1212 1210 ellipsisroots=ellipsisroots,
1213 1211 fullnodes=fullnodes)
1214 1212
1215 1213 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1216 1214 shallow=False, ellipsisroots=None, fullnodes=None):
1217 1215 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1218 1216 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1219 1217
1220 1218 # Since generaldelta is directly supported by cg2, reordering
1221 1219 # generally doesn't help, so we disable it by default (treating
1222 1220 # bundle.reorder=auto just like bundle.reorder=False).
1223 1221 return cgpacker(repo, filematcher, b'02',
1224 1222 deltaparentfn=_deltaparentgeneraldelta,
1225 1223 allowreorder=False,
1226 1224 builddeltaheader=builddeltaheader,
1227 1225 manifestsend=b'',
1228 1226 bundlecaps=bundlecaps,
1229 1227 ellipses=ellipses,
1230 1228 shallow=shallow,
1231 1229 ellipsisroots=ellipsisroots,
1232 1230 fullnodes=fullnodes)
1233 1231
1234 1232 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1235 1233 shallow=False, ellipsisroots=None, fullnodes=None):
1236 1234 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1237 1235 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1238 1236
1239 1237 deltaparentfn = (_deltaparentellipses if ellipses
1240 1238 else _deltaparentgeneraldelta)
1241 1239
1242 1240 return cgpacker(repo, filematcher, b'03',
1243 1241 deltaparentfn=deltaparentfn,
1244 1242 allowreorder=False,
1245 1243 builddeltaheader=builddeltaheader,
1246 1244 manifestsend=closechunk(),
1247 1245 bundlecaps=bundlecaps,
1248 1246 ellipses=ellipses,
1249 1247 shallow=shallow,
1250 1248 ellipsisroots=ellipsisroots,
1251 1249 fullnodes=fullnodes)
1252 1250
1253 1251 _packermap = {'01': (_makecg1packer, cg1unpacker),
1254 1252 # cg2 adds support for exchanging generaldelta
1255 1253 '02': (_makecg2packer, cg2unpacker),
1256 1254 # cg3 adds support for exchanging revlog flags and treemanifests
1257 1255 '03': (_makecg3packer, cg3unpacker),
1258 1256 }
1259 1257
1260 1258 def allsupportedversions(repo):
1261 1259 versions = set(_packermap.keys())
1262 1260 if not (repo.ui.configbool('experimental', 'changegroup3') or
1263 1261 repo.ui.configbool('experimental', 'treemanifest') or
1264 1262 'treemanifest' in repo.requirements):
1265 1263 versions.discard('03')
1266 1264 return versions
1267 1265
1268 1266 # Changegroup versions that can be applied to the repo
1269 1267 def supportedincomingversions(repo):
1270 1268 return allsupportedversions(repo)
1271 1269
1272 1270 # Changegroup versions that can be created from the repo
1273 1271 def supportedoutgoingversions(repo):
1274 1272 versions = allsupportedversions(repo)
1275 1273 if 'treemanifest' in repo.requirements:
1276 1274 # Versions 01 and 02 support only flat manifests and it's just too
1277 1275 # expensive to convert between the flat manifest and tree manifest on
1278 1276 # the fly. Since tree manifests are hashed differently, all of history
1279 1277 # would have to be converted. Instead, we simply don't even pretend to
1280 1278 # support versions 01 and 02.
1281 1279 versions.discard('01')
1282 1280 versions.discard('02')
1283 1281 if repository.NARROW_REQUIREMENT in repo.requirements:
1284 1282 # Versions 01 and 02 don't support revlog flags, and we need to
1285 1283 # support that for stripping and unbundling to work.
1286 1284 versions.discard('01')
1287 1285 versions.discard('02')
1288 1286 if LFS_REQUIREMENT in repo.requirements:
1289 1287 # Versions 01 and 02 don't support revlog flags, and we need to
1290 1288 # mark LFS entries with REVIDX_EXTSTORED.
1291 1289 versions.discard('01')
1292 1290 versions.discard('02')
1293 1291
1294 1292 return versions
1295 1293
1296 1294 def localversion(repo):
1297 1295 # Finds the best version to use for bundles that are meant to be used
1298 1296 # locally, such as those from strip and shelve, and temporary bundles.
1299 1297 return max(supportedoutgoingversions(repo))
1300 1298
1301 1299 def safeversion(repo):
1302 1300 # Finds the smallest version that it's safe to assume clients of the repo
1303 1301 # will support. For example, all hg versions that support generaldelta also
1304 1302 # support changegroup 02.
1305 1303 versions = supportedoutgoingversions(repo)
1306 1304 if 'generaldelta' in repo.requirements:
1307 1305 versions.discard('01')
1308 1306 assert versions
1309 1307 return min(versions)
1310 1308
1311 1309 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1312 1310 ellipses=False, shallow=False, ellipsisroots=None,
1313 1311 fullnodes=None):
1314 1312 assert version in supportedoutgoingversions(repo)
1315 1313
1316 1314 if filematcher is None:
1317 1315 filematcher = matchmod.alwaysmatcher(repo.root, '')
1318 1316
1319 1317 if version == '01' and not filematcher.always():
1320 1318 raise error.ProgrammingError('version 01 changegroups do not support '
1321 1319 'sparse file matchers')
1322 1320
1323 1321 if ellipses and version in (b'01', b'02'):
1324 1322 raise error.Abort(
1325 1323 _('ellipsis nodes require at least cg3 on client and server, '
1326 1324 'but negotiated version %s') % version)
1327 1325
1328 1326 # Requested files could include files not in the local store. So
1329 1327 # filter those out.
1330 1328 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1331 1329 filematcher)
1332 1330
1333 1331 fn = _packermap[version][0]
1334 1332 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1335 1333 shallow=shallow, ellipsisroots=ellipsisroots,
1336 1334 fullnodes=fullnodes)
1337 1335
1338 1336 def getunbundler(version, fh, alg, extras=None):
1339 1337 return _packermap[version][1](fh, alg, extras=extras)
1340 1338
1341 1339 def _changegroupinfo(repo, nodes, source):
1342 1340 if repo.ui.verbose or source == 'bundle':
1343 1341 repo.ui.status(_("%d changesets found\n") % len(nodes))
1344 1342 if repo.ui.debugflag:
1345 1343 repo.ui.debug("list of changesets:\n")
1346 1344 for node in nodes:
1347 1345 repo.ui.debug("%s\n" % hex(node))
1348 1346
1349 1347 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1350 1348 bundlecaps=None):
1351 1349 cgstream = makestream(repo, outgoing, version, source,
1352 1350 fastpath=fastpath, bundlecaps=bundlecaps)
1353 1351 return getunbundler(version, util.chunkbuffer(cgstream), None,
1354 1352 {'clcount': len(outgoing.missing) })
1355 1353
1356 1354 def makestream(repo, outgoing, version, source, fastpath=False,
1357 1355 bundlecaps=None, filematcher=None):
1358 1356 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1359 1357 filematcher=filematcher)
1360 1358
1361 1359 repo = repo.unfiltered()
1362 1360 commonrevs = outgoing.common
1363 1361 csets = outgoing.missing
1364 1362 heads = outgoing.missingheads
1365 1363 # We go through the fast path if we get told to, or if all (unfiltered
1366 1364 # heads have been requested (since we then know there all linkrevs will
1367 1365 # be pulled by the client).
1368 1366 heads.sort()
1369 1367 fastpathlinkrev = fastpath or (
1370 1368 repo.filtername is None and heads == sorted(repo.heads()))
1371 1369
1372 1370 repo.hook('preoutgoing', throw=True, source=source)
1373 1371 _changegroupinfo(repo, csets, source)
1374 1372 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1375 1373
1376 1374 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1377 1375 revisions = 0
1378 1376 files = 0
1379 1377 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1380 1378 total=expectedfiles)
1381 1379 for chunkdata in iter(source.filelogheader, {}):
1382 1380 files += 1
1383 1381 f = chunkdata["filename"]
1384 1382 repo.ui.debug("adding %s revisions\n" % f)
1385 1383 progress.increment()
1386 1384 fl = repo.file(f)
1387 1385 o = len(fl)
1388 1386 try:
1389 1387 deltas = source.deltaiter()
1390 1388 if not fl.addgroup(deltas, revmap, trp):
1391 1389 raise error.Abort(_("received file revlog group is empty"))
1392 1390 except error.CensoredBaseError as e:
1393 1391 raise error.Abort(_("received delta base is censored: %s") % e)
1394 1392 revisions += len(fl) - o
1395 1393 if f in needfiles:
1396 1394 needs = needfiles[f]
1397 1395 for new in pycompat.xrange(o, len(fl)):
1398 1396 n = fl.node(new)
1399 1397 if n in needs:
1400 1398 needs.remove(n)
1401 1399 else:
1402 1400 raise error.Abort(
1403 1401 _("received spurious file revlog entry"))
1404 1402 if not needs:
1405 1403 del needfiles[f]
1406 1404 progress.complete()
1407 1405
1408 1406 for f, needs in needfiles.iteritems():
1409 1407 fl = repo.file(f)
1410 1408 for n in needs:
1411 1409 try:
1412 1410 fl.rev(n)
1413 1411 except error.LookupError:
1414 1412 raise error.Abort(
1415 1413 _('missing file data for %s:%s - run hg verify') %
1416 1414 (f, hex(n)))
1417 1415
1418 1416 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now