##// END OF EJS Templates
changegroup: extract cgpacker.group() to standalone function...
Gregory Szorc -
r39045:9e8eb2b4 default
parent child Browse files
Show More
@@ -1,1438 +1,1439 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 match as matchmod,
30 30 mdiff,
31 31 phases,
32 32 pycompat,
33 33 repository,
34 34 revlog,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 stringutil,
40 40 )
41 41
42 42 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
43 43 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
44 44 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
45 45
46 46 LFS_REQUIREMENT = 'lfs'
47 47
48 48 readexactly = util.readexactly
49 49
50 50 def getchunk(stream):
51 51 """return the next chunk from stream as a string"""
52 52 d = readexactly(stream, 4)
53 53 l = struct.unpack(">l", d)[0]
54 54 if l <= 4:
55 55 if l:
56 56 raise error.Abort(_("invalid chunk length %d") % l)
57 57 return ""
58 58 return readexactly(stream, l - 4)
59 59
60 60 def chunkheader(length):
61 61 """return a changegroup chunk header (string)"""
62 62 return struct.pack(">l", length + 4)
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(">l", 0)
67 67
68 68 def _fileheader(path):
69 69 """Obtain a changegroup chunk header for a named path."""
70 70 return chunkheader(len(path)) + path
71 71
72 72 def writechunks(ui, chunks, filename, vfs=None):
73 73 """Write chunks to a file and return its filename.
74 74
75 75 The stream is assumed to be a bundle file.
76 76 Existing files will not be overwritten.
77 77 If no filename is specified, a temporary file is created.
78 78 """
79 79 fh = None
80 80 cleanup = None
81 81 try:
82 82 if filename:
83 83 if vfs:
84 84 fh = vfs.open(filename, "wb")
85 85 else:
86 86 # Increase default buffer size because default is usually
87 87 # small (4k is common on Linux).
88 88 fh = open(filename, "wb", 131072)
89 89 else:
90 90 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
91 91 fh = os.fdopen(fd, r"wb")
92 92 cleanup = filename
93 93 for c in chunks:
94 94 fh.write(c)
95 95 cleanup = None
96 96 return filename
97 97 finally:
98 98 if fh is not None:
99 99 fh.close()
100 100 if cleanup is not None:
101 101 if filename and vfs:
102 102 vfs.unlink(cleanup)
103 103 else:
104 104 os.unlink(cleanup)
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
125 125 deltaheadersize = deltaheader.size
126 126 version = '01'
127 127 _grouplistcount = 1 # One list of files after the manifests
128 128
129 129 def __init__(self, fh, alg, extras=None):
130 130 if alg is None:
131 131 alg = 'UN'
132 132 if alg not in util.compengines.supportedbundletypes:
133 133 raise error.Abort(_('unknown stream compression type: %s')
134 134 % alg)
135 135 if alg == 'BZ':
136 136 alg = '_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != 'UN'
148 148 def read(self, l):
149 149 return self._stream.read(l)
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152 def tell(self):
153 153 return self._stream.tell()
154 154 def close(self):
155 155 return self._stream.close()
156 156
157 157 def _chunklength(self):
158 158 d = readexactly(self._stream, 4)
159 159 l = struct.unpack(">l", d)[0]
160 160 if l <= 4:
161 161 if l:
162 162 raise error.Abort(_("invalid chunk length %d") % l)
163 163 return 0
164 164 if self.callback:
165 165 self.callback()
166 166 return l - 4
167 167
168 168 def changelogheader(self):
169 169 """v10 does not have a changelog header chunk"""
170 170 return {}
171 171
172 172 def manifestheader(self):
173 173 """v10 does not have a manifest header chunk"""
174 174 return {}
175 175
176 176 def filelogheader(self):
177 177 """return the header of the filelogs chunk, v10 only has the filename"""
178 178 l = self._chunklength()
179 179 if not l:
180 180 return {}
181 181 fname = readexactly(self._stream, l)
182 182 return {'filename': fname}
183 183
184 184 def _deltaheader(self, headertuple, prevnode):
185 185 node, p1, p2, cs = headertuple
186 186 if prevnode is None:
187 187 deltabase = p1
188 188 else:
189 189 deltabase = prevnode
190 190 flags = 0
191 191 return node, p1, p2, deltabase, cs, flags
192 192
193 193 def deltachunk(self, prevnode):
194 194 l = self._chunklength()
195 195 if not l:
196 196 return {}
197 197 headerdata = readexactly(self._stream, self.deltaheadersize)
198 198 header = self.deltaheader.unpack(headerdata)
199 199 delta = readexactly(self._stream, l - self.deltaheadersize)
200 200 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
201 201 return (node, p1, p2, cs, deltabase, delta, flags)
202 202
203 203 def getchunks(self):
204 204 """returns all the chunks contains in the bundle
205 205
206 206 Used when you need to forward the binary stream to a file or another
207 207 network API. To do so, it parse the changegroup data, otherwise it will
208 208 block in case of sshrepo because it don't know the end of the stream.
209 209 """
210 210 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
211 211 # and a list of filelogs. For changegroup 3, we expect 4 parts:
212 212 # changelog, manifestlog, a list of tree manifestlogs, and a list of
213 213 # filelogs.
214 214 #
215 215 # Changelog and manifestlog parts are terminated with empty chunks. The
216 216 # tree and file parts are a list of entry sections. Each entry section
217 217 # is a series of chunks terminating in an empty chunk. The list of these
218 218 # entry sections is terminated in yet another empty chunk, so we know
219 219 # we've reached the end of the tree/file list when we reach an empty
220 220 # chunk that was proceeded by no non-empty chunks.
221 221
222 222 parts = 0
223 223 while parts < 2 + self._grouplistcount:
224 224 noentries = True
225 225 while True:
226 226 chunk = getchunk(self)
227 227 if not chunk:
228 228 # The first two empty chunks represent the end of the
229 229 # changelog and the manifestlog portions. The remaining
230 230 # empty chunks represent either A) the end of individual
231 231 # tree or file entries in the file list, or B) the end of
232 232 # the entire list. It's the end of the entire list if there
233 233 # were no entries (i.e. noentries is True).
234 234 if parts < 2:
235 235 parts += 1
236 236 elif noentries:
237 237 parts += 1
238 238 break
239 239 noentries = False
240 240 yield chunkheader(len(chunk))
241 241 pos = 0
242 242 while pos < len(chunk):
243 243 next = pos + 2**20
244 244 yield chunk[pos:next]
245 245 pos = next
246 246 yield closechunk()
247 247
248 248 def _unpackmanifests(self, repo, revmap, trp, prog):
249 249 self.callback = prog.increment
250 250 # no need to check for empty manifest group here:
251 251 # if the result of the merge of 1 and 2 is the same in 3 and 4,
252 252 # no new manifest will be created and the manifest group will
253 253 # be empty during the pull
254 254 self.manifestheader()
255 255 deltas = self.deltaiter()
256 256 repo.manifestlog.addgroup(deltas, revmap, trp)
257 257 prog.complete()
258 258 self.callback = None
259 259
260 260 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
261 261 expectedtotal=None):
262 262 """Add the changegroup returned by source.read() to this repo.
263 263 srctype is a string like 'push', 'pull', or 'unbundle'. url is
264 264 the URL of the repo where this changegroup is coming from.
265 265
266 266 Return an integer summarizing the change to this repo:
267 267 - nothing changed or no source: 0
268 268 - more heads than before: 1+added heads (2..n)
269 269 - fewer heads than before: -1-removed heads (-2..-n)
270 270 - number of heads stays the same: 1
271 271 """
272 272 repo = repo.unfiltered()
273 273 def csmap(x):
274 274 repo.ui.debug("add changeset %s\n" % short(x))
275 275 return len(cl)
276 276
277 277 def revmap(x):
278 278 return cl.rev(x)
279 279
280 280 changesets = files = revisions = 0
281 281
282 282 try:
283 283 # The transaction may already carry source information. In this
284 284 # case we use the top level data. We overwrite the argument
285 285 # because we need to use the top level value (if they exist)
286 286 # in this function.
287 287 srctype = tr.hookargs.setdefault('source', srctype)
288 288 url = tr.hookargs.setdefault('url', url)
289 289 repo.hook('prechangegroup',
290 290 throw=True, **pycompat.strkwargs(tr.hookargs))
291 291
292 292 # write changelog data to temp files so concurrent readers
293 293 # will not see an inconsistent view
294 294 cl = repo.changelog
295 295 cl.delayupdate(tr)
296 296 oldheads = set(cl.heads())
297 297
298 298 trp = weakref.proxy(tr)
299 299 # pull off the changeset group
300 300 repo.ui.status(_("adding changesets\n"))
301 301 clstart = len(cl)
302 302 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
303 303 total=expectedtotal)
304 304 self.callback = progress.increment
305 305
306 306 efiles = set()
307 307 def onchangelog(cl, node):
308 308 efiles.update(cl.readfiles(node))
309 309
310 310 self.changelogheader()
311 311 deltas = self.deltaiter()
312 312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
313 313 efiles = len(efiles)
314 314
315 315 if not cgnodes:
316 316 repo.ui.develwarn('applied empty changegroup',
317 317 config='warn-empty-changegroup')
318 318 clend = len(cl)
319 319 changesets = clend - clstart
320 320 progress.complete()
321 321 self.callback = None
322 322
323 323 # pull off the manifest group
324 324 repo.ui.status(_("adding manifests\n"))
325 325 # We know that we'll never have more manifests than we had
326 326 # changesets.
327 327 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
328 328 total=changesets)
329 329 self._unpackmanifests(repo, revmap, trp, progress)
330 330
331 331 needfiles = {}
332 332 if repo.ui.configbool('server', 'validate'):
333 333 cl = repo.changelog
334 334 ml = repo.manifestlog
335 335 # validate incoming csets have their manifests
336 336 for cset in pycompat.xrange(clstart, clend):
337 337 mfnode = cl.changelogrevision(cset).manifest
338 338 mfest = ml[mfnode].readdelta()
339 339 # store file cgnodes we must see
340 340 for f, n in mfest.iteritems():
341 341 needfiles.setdefault(f, set()).add(n)
342 342
343 343 # process the files
344 344 repo.ui.status(_("adding file changes\n"))
345 345 newrevs, newfiles = _addchangegroupfiles(
346 346 repo, self, revmap, trp, efiles, needfiles)
347 347 revisions += newrevs
348 348 files += newfiles
349 349
350 350 deltaheads = 0
351 351 if oldheads:
352 352 heads = cl.heads()
353 353 deltaheads = len(heads) - len(oldheads)
354 354 for h in heads:
355 355 if h not in oldheads and repo[h].closesbranch():
356 356 deltaheads -= 1
357 357 htext = ""
358 358 if deltaheads:
359 359 htext = _(" (%+d heads)") % deltaheads
360 360
361 361 repo.ui.status(_("added %d changesets"
362 362 " with %d changes to %d files%s\n")
363 363 % (changesets, revisions, files, htext))
364 364 repo.invalidatevolatilesets()
365 365
366 366 if changesets > 0:
367 367 if 'node' not in tr.hookargs:
368 368 tr.hookargs['node'] = hex(cl.node(clstart))
369 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
370 370 hookargs = dict(tr.hookargs)
371 371 else:
372 372 hookargs = dict(tr.hookargs)
373 373 hookargs['node'] = hex(cl.node(clstart))
374 374 hookargs['node_last'] = hex(cl.node(clend - 1))
375 375 repo.hook('pretxnchangegroup',
376 376 throw=True, **pycompat.strkwargs(hookargs))
377 377
378 378 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
379 379 phaseall = None
380 380 if srctype in ('push', 'serve'):
381 381 # Old servers can not push the boundary themselves.
382 382 # New servers won't push the boundary if changeset already
383 383 # exists locally as secret
384 384 #
385 385 # We should not use added here but the list of all change in
386 386 # the bundle
387 387 if repo.publishing():
388 388 targetphase = phaseall = phases.public
389 389 else:
390 390 # closer target phase computation
391 391
392 392 # Those changesets have been pushed from the
393 393 # outside, their phases are going to be pushed
394 394 # alongside. Therefor `targetphase` is
395 395 # ignored.
396 396 targetphase = phaseall = phases.draft
397 397 if added:
398 398 phases.registernew(repo, tr, targetphase, added)
399 399 if phaseall is not None:
400 400 phases.advanceboundary(repo, tr, phaseall, cgnodes)
401 401
402 402 if changesets > 0:
403 403
404 404 def runhooks():
405 405 # These hooks run when the lock releases, not when the
406 406 # transaction closes. So it's possible for the changelog
407 407 # to have changed since we last saw it.
408 408 if clstart >= len(repo):
409 409 return
410 410
411 411 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
412 412
413 413 for n in added:
414 414 args = hookargs.copy()
415 415 args['node'] = hex(n)
416 416 del args['node_last']
417 417 repo.hook("incoming", **pycompat.strkwargs(args))
418 418
419 419 newheads = [h for h in repo.heads()
420 420 if h not in oldheads]
421 421 repo.ui.log("incoming",
422 422 "%d incoming changes - new heads: %s\n",
423 423 len(added),
424 424 ', '.join([hex(c[:6]) for c in newheads]))
425 425
426 426 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
427 427 lambda tr: repo._afterlock(runhooks))
428 428 finally:
429 429 repo.ui.flush()
430 430 # never return 0 here:
431 431 if deltaheads < 0:
432 432 ret = deltaheads - 1
433 433 else:
434 434 ret = deltaheads + 1
435 435 return ret
436 436
437 437 def deltaiter(self):
438 438 """
439 439 returns an iterator of the deltas in this changegroup
440 440
441 441 Useful for passing to the underlying storage system to be stored.
442 442 """
443 443 chain = None
444 444 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
445 445 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
446 446 yield chunkdata
447 447 chain = chunkdata[0]
448 448
449 449 class cg2unpacker(cg1unpacker):
450 450 """Unpacker for cg2 streams.
451 451
452 452 cg2 streams add support for generaldelta, so the delta header
453 453 format is slightly different. All other features about the data
454 454 remain the same.
455 455 """
456 456 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
457 457 deltaheadersize = deltaheader.size
458 458 version = '02'
459 459
460 460 def _deltaheader(self, headertuple, prevnode):
461 461 node, p1, p2, deltabase, cs = headertuple
462 462 flags = 0
463 463 return node, p1, p2, deltabase, cs, flags
464 464
465 465 class cg3unpacker(cg2unpacker):
466 466 """Unpacker for cg3 streams.
467 467
468 468 cg3 streams add support for exchanging treemanifests and revlog
469 469 flags. It adds the revlog flags to the delta header and an empty chunk
470 470 separating manifests and files.
471 471 """
472 472 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
473 473 deltaheadersize = deltaheader.size
474 474 version = '03'
475 475 _grouplistcount = 2 # One list of manifests and one list of files
476 476
477 477 def _deltaheader(self, headertuple, prevnode):
478 478 node, p1, p2, deltabase, cs, flags = headertuple
479 479 return node, p1, p2, deltabase, cs, flags
480 480
481 481 def _unpackmanifests(self, repo, revmap, trp, prog):
482 482 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
483 483 for chunkdata in iter(self.filelogheader, {}):
484 484 # If we get here, there are directory manifests in the changegroup
485 485 d = chunkdata["filename"]
486 486 repo.ui.debug("adding %s revisions\n" % d)
487 487 dirlog = repo.manifestlog._revlog.dirlog(d)
488 488 deltas = self.deltaiter()
489 489 if not dirlog.addgroup(deltas, revmap, trp):
490 490 raise error.Abort(_("received dir revlog group is empty"))
491 491
492 492 class headerlessfixup(object):
493 493 def __init__(self, fh, h):
494 494 self._h = h
495 495 self._fh = fh
496 496 def read(self, n):
497 497 if self._h:
498 498 d, self._h = self._h[:n], self._h[n:]
499 499 if len(d) < n:
500 500 d += readexactly(self._fh, n - len(d))
501 501 return d
502 502 return readexactly(self._fh, n)
503 503
504 504 @attr.s(slots=True, frozen=True)
505 505 class revisiondelta(object):
506 506 """Describes a delta entry in a changegroup.
507 507
508 508 Captured data is sufficient to serialize the delta into multiple
509 509 formats.
510 510 """
511 511 # 20 byte node of this revision.
512 512 node = attr.ib()
513 513 # 20 byte nodes of parent revisions.
514 514 p1node = attr.ib()
515 515 p2node = attr.ib()
516 516 # 20 byte node of node this delta is against.
517 517 basenode = attr.ib()
518 518 # 20 byte node of changeset revision this delta is associated with.
519 519 linknode = attr.ib()
520 520 # 2 bytes of flags to apply to revision data.
521 521 flags = attr.ib()
522 522 # Iterable of chunks holding raw delta data.
523 523 deltachunks = attr.ib()
524 524
525 525 def _sortnodesnormal(store, nodes, reorder):
526 526 """Sort nodes for changegroup generation and turn into revnums."""
527 527 # for generaldelta revlogs, we linearize the revs; this will both be
528 528 # much quicker and generate a much smaller bundle
529 529 if (store._generaldelta and reorder is None) or reorder:
530 530 dag = dagutil.revlogdag(store)
531 531 return dag.linearize(set(store.rev(n) for n in nodes))
532 532 else:
533 533 return sorted([store.rev(n) for n in nodes])
534 534
535 535 def _sortnodesellipsis(store, nodes, cl, lookup):
536 536 """Sort nodes for changegroup generation and turn into revnums."""
537 537 # Ellipses serving mode.
538 538 #
539 539 # In a perfect world, we'd generate better ellipsis-ified graphs
540 540 # for non-changelog revlogs. In practice, we haven't started doing
541 541 # that yet, so the resulting DAGs for the manifestlog and filelogs
542 542 # are actually full of bogus parentage on all the ellipsis
543 543 # nodes. This has the side effect that, while the contents are
544 544 # correct, the individual DAGs might be completely out of whack in
545 545 # a case like 882681bc3166 and its ancestors (back about 10
546 546 # revisions or so) in the main hg repo.
547 547 #
548 548 # The one invariant we *know* holds is that the new (potentially
549 549 # bogus) DAG shape will be valid if we order the nodes in the
550 550 # order that they're introduced in dramatis personae by the
551 551 # changelog, so what we do is we sort the non-changelog histories
552 552 # by the order in which they are used by the changelog.
553 553 key = lambda n: cl.rev(lookup(n))
554 554 return [store.rev(n) for n in sorted(nodes, key=key)]
555 555
556 556 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
557 557 """Construct a revision delta for non-ellipses changegroup generation."""
558 558 node = store.node(rev)
559 559 p1, p2 = store.parentrevs(rev)
560 560 base = deltaparentfn(store, rev, p1, p2, prev)
561 561
562 562 prefix = ''
563 563 if store.iscensored(base) or store.iscensored(rev):
564 564 try:
565 565 delta = store.revision(node, raw=True)
566 566 except error.CensoredNodeError as e:
567 567 delta = e.tombstone
568 568 if base == nullrev:
569 569 prefix = mdiff.trivialdiffheader(len(delta))
570 570 else:
571 571 baselen = store.rawsize(base)
572 572 prefix = mdiff.replacediffheader(baselen, len(delta))
573 573 elif base == nullrev:
574 574 delta = store.revision(node, raw=True)
575 575 prefix = mdiff.trivialdiffheader(len(delta))
576 576 else:
577 577 delta = store.revdiff(base, rev)
578 578 p1n, p2n = store.parents(node)
579 579
580 580 return revisiondelta(
581 581 node=node,
582 582 p1node=p1n,
583 583 p2node=p2n,
584 584 basenode=store.node(base),
585 585 linknode=linknode,
586 586 flags=store.flags(rev),
587 587 deltachunks=(prefix, delta),
588 588 )
589 589
590 590 def _revisiondeltanarrow(cl, store, ischangelog, rev, linkrev,
591 591 linknode, clrevtolocalrev, fullclnodes,
592 592 precomputedellipsis):
593 593 linkparents = precomputedellipsis[linkrev]
594 594 def local(clrev):
595 595 """Turn a changelog revnum into a local revnum.
596 596
597 597 The ellipsis dag is stored as revnums on the changelog,
598 598 but when we're producing ellipsis entries for
599 599 non-changelog revlogs, we need to turn those numbers into
600 600 something local. This does that for us, and during the
601 601 changelog sending phase will also expand the stored
602 602 mappings as needed.
603 603 """
604 604 if clrev == nullrev:
605 605 return nullrev
606 606
607 607 if ischangelog:
608 608 return clrev
609 609
610 610 # Walk the ellipsis-ized changelog breadth-first looking for a
611 611 # change that has been linked from the current revlog.
612 612 #
613 613 # For a flat manifest revlog only a single step should be necessary
614 614 # as all relevant changelog entries are relevant to the flat
615 615 # manifest.
616 616 #
617 617 # For a filelog or tree manifest dirlog however not every changelog
618 618 # entry will have been relevant, so we need to skip some changelog
619 619 # nodes even after ellipsis-izing.
620 620 walk = [clrev]
621 621 while walk:
622 622 p = walk[0]
623 623 walk = walk[1:]
624 624 if p in clrevtolocalrev:
625 625 return clrevtolocalrev[p]
626 626 elif p in fullclnodes:
627 627 walk.extend([pp for pp in cl.parentrevs(p)
628 628 if pp != nullrev])
629 629 elif p in precomputedellipsis:
630 630 walk.extend([pp for pp in precomputedellipsis[p]
631 631 if pp != nullrev])
632 632 else:
633 633 # In this case, we've got an ellipsis with parents
634 634 # outside the current bundle (likely an
635 635 # incremental pull). We "know" that we can use the
636 636 # value of this same revlog at whatever revision
637 637 # is pointed to by linknode. "Know" is in scare
638 638 # quotes because I haven't done enough examination
639 639 # of edge cases to convince myself this is really
640 640 # a fact - it works for all the (admittedly
641 641 # thorough) cases in our testsuite, but I would be
642 642 # somewhat unsurprised to find a case in the wild
643 643 # where this breaks down a bit. That said, I don't
644 644 # know if it would hurt anything.
645 645 for i in pycompat.xrange(rev, 0, -1):
646 646 if store.linkrev(i) == clrev:
647 647 return i
648 648 # We failed to resolve a parent for this node, so
649 649 # we crash the changegroup construction.
650 650 raise error.Abort(
651 651 'unable to resolve parent while packing %r %r'
652 652 ' for changeset %r' % (store.indexfile, rev, clrev))
653 653
654 654 return nullrev
655 655
656 656 if not linkparents or (
657 657 store.parentrevs(rev) == (nullrev, nullrev)):
658 658 p1, p2 = nullrev, nullrev
659 659 elif len(linkparents) == 1:
660 660 p1, = sorted(local(p) for p in linkparents)
661 661 p2 = nullrev
662 662 else:
663 663 p1, p2 = sorted(local(p) for p in linkparents)
664 664
665 665 n = store.node(rev)
666 666 p1n, p2n = store.node(p1), store.node(p2)
667 667 flags = store.flags(rev)
668 668 flags |= revlog.REVIDX_ELLIPSIS
669 669
670 670 # TODO: try and actually send deltas for ellipsis data blocks
671 671 data = store.revision(n)
672 672 diffheader = mdiff.trivialdiffheader(len(data))
673 673
674 674 return revisiondelta(
675 675 node=n,
676 676 p1node=p1n,
677 677 p2node=p2n,
678 678 basenode=nullid,
679 679 linknode=linknode,
680 680 flags=flags,
681 681 deltachunks=(diffheader, data),
682 682 )
683 683
684 def deltagroup(repo, revs, store, ischangelog, lookup, deltaparentfn,
685 deltaheaderfn, units=None,
686 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
687 precomputedellipsis=None):
688 """Calculate a delta group, yielding a sequence of changegroup chunks
689 (strings).
690
691 Given a list of changeset revs, return a set of deltas and
692 metadata corresponding to nodes. The first delta is
693 first parent(nodelist[0]) -> nodelist[0], the receiver is
694 guaranteed to have this parent as it has all history before
695 these changesets. In the case firstparent is nullrev the
696 changegroup starts with a full revision.
697
698 If units is not None, progress detail will be generated, units specifies
699 the type of revlog that is touched (changelog, manifest, etc.).
700 """
701 # if we don't have any revisions touched by these changesets, bail
702 if len(revs) == 0:
703 yield closechunk()
704 return
705
706 cl = repo.changelog
707
708 # add the parent of the first rev
709 p = store.parentrevs(revs[0])[0]
710 revs.insert(0, p)
711
712 # build deltas
713 progress = None
714 if units is not None:
715 progress = repo.ui.makeprogress(_('bundling'), unit=units,
716 total=(len(revs) - 1))
717 for r in pycompat.xrange(len(revs) - 1):
718 if progress:
719 progress.update(r + 1)
720 prev, curr = revs[r], revs[r + 1]
721 linknode = lookup(store.node(curr))
722
723 if ellipses:
724 linkrev = cl.rev(linknode)
725 clrevtolocalrev[linkrev] = curr
726
727 # This is a node to send in full, because the changeset it
728 # corresponds to was a full changeset.
729 if linknode in fullclnodes:
730 delta = _revisiondeltanormal(store, curr, prev, linknode,
731 deltaparentfn)
732 elif linkrev not in precomputedellipsis:
733 delta = None
734 else:
735 delta = _revisiondeltanarrow(
736 cl, store, ischangelog, curr, linkrev, linknode,
737 clrevtolocalrev, fullclnodes,
738 precomputedellipsis)
739 else:
740 delta = _revisiondeltanormal(store, curr, prev, linknode,
741 deltaparentfn)
742
743 if not delta:
744 continue
745
746 meta = deltaheaderfn(delta)
747 l = len(meta) + sum(len(x) for x in delta.deltachunks)
748 yield chunkheader(l)
749 yield meta
750 for x in delta.deltachunks:
751 yield x
752
753 if progress:
754 progress.complete()
755
756 yield closechunk()
757
684 758 class cgpacker(object):
685 759 def __init__(self, repo, filematcher, version, allowreorder,
686 760 deltaparentfn, builddeltaheader, manifestsend,
687 761 bundlecaps=None, ellipses=False,
688 762 shallow=False, ellipsisroots=None, fullnodes=None):
689 763 """Given a source repo, construct a bundler.
690 764
691 765 filematcher is a matcher that matches on files to include in the
692 766 changegroup. Used to facilitate sparse changegroups.
693 767
694 768 allowreorder controls whether reordering of revisions is allowed.
695 769 This value is used when ``bundle.reorder`` is ``auto`` or isn't
696 770 set.
697 771
698 772 deltaparentfn is a callable that resolves the delta parent for
699 773 a specific revision.
700 774
701 775 builddeltaheader is a callable that constructs the header for a group
702 776 delta.
703 777
704 778 manifestsend is a chunk to send after manifests have been fully emitted.
705 779
706 780 ellipses indicates whether ellipsis serving mode is enabled.
707 781
708 782 bundlecaps is optional and can be used to specify the set of
709 783 capabilities which can be used to build the bundle. While bundlecaps is
710 784 unused in core Mercurial, extensions rely on this feature to communicate
711 785 capabilities to customize the changegroup packer.
712 786
713 787 shallow indicates whether shallow data might be sent. The packer may
714 788 need to pack file contents not introduced by the changes being packed.
715 789
716 790 fullnodes is the set of changelog nodes which should not be ellipsis
717 791 nodes. We store this rather than the set of nodes that should be
718 792 ellipsis because for very large histories we expect this to be
719 793 significantly smaller.
720 794 """
721 795 assert filematcher
722 796 self._filematcher = filematcher
723 797
724 798 self.version = version
725 799 self._deltaparentfn = deltaparentfn
726 800 self._builddeltaheader = builddeltaheader
727 801 self._manifestsend = manifestsend
728 802 self._ellipses = ellipses
729 803
730 804 # Set of capabilities we can use to build the bundle.
731 805 if bundlecaps is None:
732 806 bundlecaps = set()
733 807 self._bundlecaps = bundlecaps
734 808 self._isshallow = shallow
735 809 self._fullclnodes = fullnodes
736 810
737 811 # Maps ellipsis revs to their roots at the changelog level.
738 812 self._precomputedellipsis = ellipsisroots
739 813
740 814 # experimental config: bundle.reorder
741 815 reorder = repo.ui.config('bundle', 'reorder')
742 816 if reorder == 'auto':
743 817 self._reorder = allowreorder
744 818 else:
745 819 self._reorder = stringutil.parsebool(reorder)
746 820
747 821 self._repo = repo
748 822
749 823 if self._repo.ui.verbose and not self._repo.ui.debugflag:
750 824 self._verbosenote = self._repo.ui.note
751 825 else:
752 826 self._verbosenote = lambda s: None
753 827
754 def group(self, repo, revs, store, ischangelog, lookup, deltaparentfn,
755 deltaheaderfn, units=None,
756 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
757 precomputedellipsis=None):
758 """Calculate a delta group, yielding a sequence of changegroup chunks
759 (strings).
760
761 Given a list of changeset revs, return a set of deltas and
762 metadata corresponding to nodes. The first delta is
763 first parent(nodelist[0]) -> nodelist[0], the receiver is
764 guaranteed to have this parent as it has all history before
765 these changesets. In the case firstparent is nullrev the
766 changegroup starts with a full revision.
767
768 If units is not None, progress detail will be generated, units specifies
769 the type of revlog that is touched (changelog, manifest, etc.).
770 """
771 # if we don't have any revisions touched by these changesets, bail
772 if len(revs) == 0:
773 yield closechunk()
774 return
775
776 cl = repo.changelog
777
778 # add the parent of the first rev
779 p = store.parentrevs(revs[0])[0]
780 revs.insert(0, p)
781
782 # build deltas
783 progress = None
784 if units is not None:
785 progress = repo.ui.makeprogress(_('bundling'), unit=units,
786 total=(len(revs) - 1))
787 for r in pycompat.xrange(len(revs) - 1):
788 if progress:
789 progress.update(r + 1)
790 prev, curr = revs[r], revs[r + 1]
791 linknode = lookup(store.node(curr))
792
793 if ellipses:
794 linkrev = cl.rev(linknode)
795 clrevtolocalrev[linkrev] = curr
796
797 # This is a node to send in full, because the changeset it
798 # corresponds to was a full changeset.
799 if linknode in fullclnodes:
800 delta = _revisiondeltanormal(store, curr, prev, linknode,
801 deltaparentfn)
802 elif linkrev not in precomputedellipsis:
803 delta = None
804 else:
805 delta = _revisiondeltanarrow(
806 cl, store, ischangelog, curr, linkrev, linknode,
807 clrevtolocalrev, fullclnodes,
808 precomputedellipsis)
809 else:
810 delta = _revisiondeltanormal(store, curr, prev, linknode,
811 deltaparentfn)
812
813 if not delta:
814 continue
815
816 meta = deltaheaderfn(delta)
817 l = len(meta) + sum(len(x) for x in delta.deltachunks)
818 yield chunkheader(l)
819 yield meta
820 for x in delta.deltachunks:
821 yield x
822
823 if progress:
824 progress.complete()
825
826 yield closechunk()
827
828 828 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
829 829 """Yield a sequence of changegroup byte chunks."""
830 830
831 831 repo = self._repo
832 832 cl = repo.changelog
833 833
834 834 self._verbosenote(_('uncompressed size of bundle content:\n'))
835 835 size = 0
836 836
837 837 clstate, chunks = self._generatechangelog(cl, clnodes)
838 838 for chunk in chunks:
839 839 size += len(chunk)
840 840 yield chunk
841 841
842 842 self._verbosenote(_('%8.i (changelog)\n') % size)
843 843
844 844 clrevorder = clstate['clrevorder']
845 845 mfs = clstate['mfs']
846 846 changedfiles = clstate['changedfiles']
847 847
848 848 # We need to make sure that the linkrev in the changegroup refers to
849 849 # the first changeset that introduced the manifest or file revision.
850 850 # The fastpath is usually safer than the slowpath, because the filelogs
851 851 # are walked in revlog order.
852 852 #
853 853 # When taking the slowpath with reorder=None and the manifest revlog
854 854 # uses generaldelta, the manifest may be walked in the "wrong" order.
855 855 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
856 856 # cc0ff93d0c0c).
857 857 #
858 858 # When taking the fastpath, we are only vulnerable to reordering
859 859 # of the changelog itself. The changelog never uses generaldelta, so
860 860 # it is only reordered when reorder=True. To handle this case, we
861 861 # simply take the slowpath, which already has the 'clrevorder' logic.
862 862 # This was also fixed in cc0ff93d0c0c.
863 863 fastpathlinkrev = fastpathlinkrev and not self._reorder
864 864 # Treemanifests don't work correctly with fastpathlinkrev
865 865 # either, because we don't discover which directory nodes to
866 866 # send along with files. This could probably be fixed.
867 867 fastpathlinkrev = fastpathlinkrev and (
868 868 'treemanifest' not in repo.requirements)
869 869
870 870 fnodes = {} # needed file nodes
871 871
872 872 for chunk in self.generatemanifests(commonrevs, clrevorder,
873 873 fastpathlinkrev, mfs, fnodes, source,
874 874 clstate['clrevtomanifestrev']):
875 875 yield chunk
876 876
877 877 mfdicts = None
878 878 if self._ellipses and self._isshallow:
879 879 mfdicts = [(self._repo.manifestlog[n].read(), lr)
880 880 for (n, lr) in mfs.iteritems()]
881 881
882 882 mfs.clear()
883 883 clrevs = set(cl.rev(x) for x in clnodes)
884 884
885 885 for chunk in self.generatefiles(changedfiles, commonrevs,
886 886 source, mfdicts, fastpathlinkrev,
887 887 fnodes, clrevs):
888 888 yield chunk
889 889
890 890 yield closechunk()
891 891
892 892 if clnodes:
893 893 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
894 894
895 895 def _generatechangelog(self, cl, nodes):
896 896 """Generate data for changelog chunks.
897 897
898 898 Returns a 2-tuple of a dict containing state and an iterable of
899 899 byte chunks. The state will not be fully populated until the
900 900 chunk stream has been fully consumed.
901 901 """
902 902 clrevorder = {}
903 903 mfs = {} # needed manifests
904 904 mfl = self._repo.manifestlog
905 905 # TODO violates storage abstraction.
906 906 mfrevlog = mfl._revlog
907 907 changedfiles = set()
908 908 clrevtomanifestrev = {}
909 909
910 910 # Callback for the changelog, used to collect changed files and
911 911 # manifest nodes.
912 912 # Returns the linkrev node (identity in the changelog case).
913 913 def lookupcl(x):
914 914 c = cl.read(x)
915 915 clrevorder[x] = len(clrevorder)
916 916
917 917 if self._ellipses:
918 918 # Only update mfs if x is going to be sent. Otherwise we
919 919 # end up with bogus linkrevs specified for manifests and
920 920 # we skip some manifest nodes that we should otherwise
921 921 # have sent.
922 922 if (x in self._fullclnodes
923 923 or cl.rev(x) in self._precomputedellipsis):
924 924 n = c[0]
925 925 # Record the first changeset introducing this manifest
926 926 # version.
927 927 mfs.setdefault(n, x)
928 928 # Set this narrow-specific dict so we have the lowest
929 929 # manifest revnum to look up for this cl revnum. (Part of
930 930 # mapping changelog ellipsis parents to manifest ellipsis
931 931 # parents)
932 932 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
933 933 # We can't trust the changed files list in the changeset if the
934 934 # client requested a shallow clone.
935 935 if self._isshallow:
936 936 changedfiles.update(mfl[c[0]].read().keys())
937 937 else:
938 938 changedfiles.update(c[3])
939 939 else:
940 940
941 941 n = c[0]
942 942 # record the first changeset introducing this manifest version
943 943 mfs.setdefault(n, x)
944 944 # Record a complete list of potentially-changed files in
945 945 # this manifest.
946 946 changedfiles.update(c[3])
947 947
948 948 return x
949 949
950 950 # Changelog doesn't benefit from reordering revisions. So send out
951 951 # revisions in store order.
952 952 revs = sorted(cl.rev(n) for n in nodes)
953 953
954 954 state = {
955 955 'clrevorder': clrevorder,
956 956 'mfs': mfs,
957 957 'changedfiles': changedfiles,
958 958 'clrevtomanifestrev': clrevtomanifestrev,
959 959 }
960 960
961 gen = self.group(self._repo, revs, cl, True, lookupcl,
962 self._deltaparentfn, self._builddeltaheader,
963 ellipses=self._ellipses,
964 units=_('changesets'),
965 clrevtolocalrev={},
966 fullclnodes=self._fullclnodes,
967 precomputedellipsis=self._precomputedellipsis)
961 gen = deltagroup(
962 self._repo, revs, cl, True, lookupcl,
963 self._deltaparentfn, self._builddeltaheader,
964 ellipses=self._ellipses,
965 units=_('changesets'),
966 clrevtolocalrev={},
967 fullclnodes=self._fullclnodes,
968 precomputedellipsis=self._precomputedellipsis)
968 969
969 970 return state, gen
970 971
971 972 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
972 973 fnodes, source, clrevtolocalrev):
973 974 """Returns an iterator of changegroup chunks containing manifests.
974 975
975 976 `source` is unused here, but is used by extensions like remotefilelog to
976 977 change what is sent based in pulls vs pushes, etc.
977 978 """
978 979 repo = self._repo
979 980 cl = repo.changelog
980 981 mfl = repo.manifestlog
981 982 dirlog = mfl._revlog.dirlog
982 983 tmfnodes = {'': mfs}
983 984
984 985 # Callback for the manifest, used to collect linkrevs for filelog
985 986 # revisions.
986 987 # Returns the linkrev node (collected in lookupcl).
987 988 def makelookupmflinknode(dir, nodes):
988 989 if fastpathlinkrev:
989 990 assert not dir
990 991 return mfs.__getitem__
991 992
992 993 def lookupmflinknode(x):
993 994 """Callback for looking up the linknode for manifests.
994 995
995 996 Returns the linkrev node for the specified manifest.
996 997
997 998 SIDE EFFECT:
998 999
999 1000 1) fclnodes gets populated with the list of relevant
1000 1001 file nodes if we're not using fastpathlinkrev
1001 1002 2) When treemanifests are in use, collects treemanifest nodes
1002 1003 to send
1003 1004
1004 1005 Note that this means manifests must be completely sent to
1005 1006 the client before you can trust the list of files and
1006 1007 treemanifests to send.
1007 1008 """
1008 1009 clnode = nodes[x]
1009 1010 mdata = mfl.get(dir, x).readfast(shallow=True)
1010 1011 for p, n, fl in mdata.iterentries():
1011 1012 if fl == 't': # subdirectory manifest
1012 1013 subdir = dir + p + '/'
1013 1014 tmfclnodes = tmfnodes.setdefault(subdir, {})
1014 1015 tmfclnode = tmfclnodes.setdefault(n, clnode)
1015 1016 if clrevorder[clnode] < clrevorder[tmfclnode]:
1016 1017 tmfclnodes[n] = clnode
1017 1018 else:
1018 1019 f = dir + p
1019 1020 fclnodes = fnodes.setdefault(f, {})
1020 1021 fclnode = fclnodes.setdefault(n, clnode)
1021 1022 if clrevorder[clnode] < clrevorder[fclnode]:
1022 1023 fclnodes[n] = clnode
1023 1024 return clnode
1024 1025 return lookupmflinknode
1025 1026
1026 1027 size = 0
1027 1028 while tmfnodes:
1028 1029 dir, nodes = tmfnodes.popitem()
1029 1030 store = dirlog(dir)
1030 1031
1031 1032 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
1032 1033 prunednodes = []
1033 1034 else:
1034 1035 frev, flr = store.rev, store.linkrev
1035 1036 prunednodes = [n for n in nodes
1036 1037 if flr(frev(n)) not in commonrevs]
1037 1038
1038 1039 if dir and not prunednodes:
1039 1040 continue
1040 1041
1041 1042 lookupfn = makelookupmflinknode(dir, nodes)
1042 1043
1043 1044 if self._ellipses:
1044 1045 revs = _sortnodesellipsis(store, prunednodes, cl,
1045 1046 lookupfn)
1046 1047 else:
1047 1048 revs = _sortnodesnormal(store, prunednodes,
1048 1049 self._reorder)
1049 1050
1050 1051 if dir:
1051 1052 assert self.version == b'03'
1052 1053 chunk = _fileheader(dir)
1053 1054 size += len(chunk)
1054 1055 yield chunk
1055 1056
1056 it = self.group(
1057 it = deltagroup(
1057 1058 self._repo, revs, store, False, lookupfn,
1058 1059 self._deltaparentfn, self._builddeltaheader,
1059 1060 ellipses=self._ellipses,
1060 1061 units=_('manifests'),
1061 1062 clrevtolocalrev=clrevtolocalrev,
1062 1063 fullclnodes=self._fullclnodes,
1063 1064 precomputedellipsis=self._precomputedellipsis)
1064 1065
1065 1066 for chunk in it:
1066 1067 size += len(chunk)
1067 1068 yield chunk
1068 1069
1069 1070 self._verbosenote(_('%8.i (manifests)\n') % size)
1070 1071 yield self._manifestsend
1071 1072
1072 1073 # The 'source' parameter is useful for extensions
1073 1074 def generatefiles(self, changedfiles, commonrevs, source,
1074 1075 mfdicts, fastpathlinkrev, fnodes, clrevs):
1075 1076 changedfiles = list(filter(self._filematcher, changedfiles))
1076 1077
1077 1078 if not fastpathlinkrev:
1078 1079 def normallinknodes(unused, fname):
1079 1080 return fnodes.get(fname, {})
1080 1081 else:
1081 1082 cln = self._repo.changelog.node
1082 1083
1083 1084 def normallinknodes(store, fname):
1084 1085 flinkrev = store.linkrev
1085 1086 fnode = store.node
1086 1087 revs = ((r, flinkrev(r)) for r in store)
1087 1088 return dict((fnode(r), cln(lr))
1088 1089 for r, lr in revs if lr in clrevs)
1089 1090
1090 1091 clrevtolocalrev = {}
1091 1092
1092 1093 if self._isshallow:
1093 1094 # In a shallow clone, the linknodes callback needs to also include
1094 1095 # those file nodes that are in the manifests we sent but weren't
1095 1096 # introduced by those manifests.
1096 1097 commonctxs = [self._repo[c] for c in commonrevs]
1097 1098 clrev = self._repo.changelog.rev
1098 1099
1099 1100 # Defining this function has a side-effect of overriding the
1100 1101 # function of the same name that was passed in as an argument.
1101 1102 # TODO have caller pass in appropriate function.
1102 1103 def linknodes(flog, fname):
1103 1104 for c in commonctxs:
1104 1105 try:
1105 1106 fnode = c.filenode(fname)
1106 1107 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1107 1108 except error.ManifestLookupError:
1108 1109 pass
1109 1110 links = normallinknodes(flog, fname)
1110 1111 if len(links) != len(mfdicts):
1111 1112 for mf, lr in mfdicts:
1112 1113 fnode = mf.get(fname, None)
1113 1114 if fnode in links:
1114 1115 links[fnode] = min(links[fnode], lr, key=clrev)
1115 1116 elif fnode:
1116 1117 links[fnode] = lr
1117 1118 return links
1118 1119 else:
1119 1120 linknodes = normallinknodes
1120 1121
1121 1122 repo = self._repo
1122 1123 cl = repo.changelog
1123 1124 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1124 1125 total=len(changedfiles))
1125 1126 for i, fname in enumerate(sorted(changedfiles)):
1126 1127 filerevlog = repo.file(fname)
1127 1128 if not filerevlog:
1128 1129 raise error.Abort(_("empty or missing file data for %s") %
1129 1130 fname)
1130 1131
1131 1132 clrevtolocalrev.clear()
1132 1133
1133 1134 linkrevnodes = linknodes(filerevlog, fname)
1134 1135 # Lookup for filenodes, we collected the linkrev nodes above in the
1135 1136 # fastpath case and with lookupmf in the slowpath case.
1136 1137 def lookupfilelog(x):
1137 1138 return linkrevnodes[x]
1138 1139
1139 1140 frev, flr = filerevlog.rev, filerevlog.linkrev
1140 1141 filenodes = [n for n in linkrevnodes
1141 1142 if flr(frev(n)) not in commonrevs]
1142 1143
1143 1144 if filenodes:
1144 1145 if self._ellipses:
1145 1146 revs = _sortnodesellipsis(filerevlog, filenodes,
1146 1147 cl, lookupfilelog)
1147 1148 else:
1148 1149 revs = _sortnodesnormal(filerevlog, filenodes,
1149 1150 self._reorder)
1150 1151
1151 1152 progress.update(i + 1, item=fname)
1152 1153 h = _fileheader(fname)
1153 1154 size = len(h)
1154 1155 yield h
1155 1156
1156 it = self.group(
1157 it = deltagroup(
1157 1158 self._repo, revs, filerevlog, False, lookupfilelog,
1158 1159 self._deltaparentfn, self._builddeltaheader,
1159 1160 ellipses=self._ellipses,
1160 1161 clrevtolocalrev=clrevtolocalrev,
1161 1162 fullclnodes=self._fullclnodes,
1162 1163 precomputedellipsis=self._precomputedellipsis)
1163 1164
1164 1165 for chunk in it:
1165 1166 size += len(chunk)
1166 1167 yield chunk
1167 1168 self._verbosenote(_('%8.i %s\n') % (size, fname))
1168 1169 progress.complete()
1169 1170
1170 1171 def _deltaparentprev(store, rev, p1, p2, prev):
1171 1172 """Resolve a delta parent to the previous revision.
1172 1173
1173 1174 Used for version 1 changegroups, which don't support generaldelta.
1174 1175 """
1175 1176 return prev
1176 1177
1177 1178 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1178 1179 """Resolve a delta parent when general deltas are supported."""
1179 1180 dp = store.deltaparent(rev)
1180 1181 if dp == nullrev and store.storedeltachains:
1181 1182 # Avoid sending full revisions when delta parent is null. Pick prev
1182 1183 # in that case. It's tempting to pick p1 in this case, as p1 will
1183 1184 # be smaller in the common case. However, computing a delta against
1184 1185 # p1 may require resolving the raw text of p1, which could be
1185 1186 # expensive. The revlog caches should have prev cached, meaning
1186 1187 # less CPU for changegroup generation. There is likely room to add
1187 1188 # a flag and/or config option to control this behavior.
1188 1189 base = prev
1189 1190 elif dp == nullrev:
1190 1191 # revlog is configured to use full snapshot for a reason,
1191 1192 # stick to full snapshot.
1192 1193 base = nullrev
1193 1194 elif dp not in (p1, p2, prev):
1194 1195 # Pick prev when we can't be sure remote has the base revision.
1195 1196 return prev
1196 1197 else:
1197 1198 base = dp
1198 1199
1199 1200 if base != nullrev and not store.candelta(base, rev):
1200 1201 base = nullrev
1201 1202
1202 1203 return base
1203 1204
1204 1205 def _deltaparentellipses(store, rev, p1, p2, prev):
1205 1206 """Resolve a delta parent when in ellipses mode."""
1206 1207 # TODO: send better deltas when in narrow mode.
1207 1208 #
1208 1209 # changegroup.group() loops over revisions to send,
1209 1210 # including revisions we'll skip. What this means is that
1210 1211 # `prev` will be a potentially useless delta base for all
1211 1212 # ellipsis nodes, as the client likely won't have it. In
1212 1213 # the future we should do bookkeeping about which nodes
1213 1214 # have been sent to the client, and try to be
1214 1215 # significantly smarter about delta bases. This is
1215 1216 # slightly tricky because this same code has to work for
1216 1217 # all revlogs, and we don't have the linkrev/linknode here.
1217 1218 return p1
1218 1219
1219 1220 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1220 1221 shallow=False, ellipsisroots=None, fullnodes=None):
1221 1222 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1222 1223 d.node, d.p1node, d.p2node, d.linknode)
1223 1224
1224 1225 return cgpacker(repo, filematcher, b'01',
1225 1226 deltaparentfn=_deltaparentprev,
1226 1227 allowreorder=None,
1227 1228 builddeltaheader=builddeltaheader,
1228 1229 manifestsend=b'',
1229 1230 bundlecaps=bundlecaps,
1230 1231 ellipses=ellipses,
1231 1232 shallow=shallow,
1232 1233 ellipsisroots=ellipsisroots,
1233 1234 fullnodes=fullnodes)
1234 1235
1235 1236 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1236 1237 shallow=False, ellipsisroots=None, fullnodes=None):
1237 1238 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1238 1239 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1239 1240
1240 1241 # Since generaldelta is directly supported by cg2, reordering
1241 1242 # generally doesn't help, so we disable it by default (treating
1242 1243 # bundle.reorder=auto just like bundle.reorder=False).
1243 1244 return cgpacker(repo, filematcher, b'02',
1244 1245 deltaparentfn=_deltaparentgeneraldelta,
1245 1246 allowreorder=False,
1246 1247 builddeltaheader=builddeltaheader,
1247 1248 manifestsend=b'',
1248 1249 bundlecaps=bundlecaps,
1249 1250 ellipses=ellipses,
1250 1251 shallow=shallow,
1251 1252 ellipsisroots=ellipsisroots,
1252 1253 fullnodes=fullnodes)
1253 1254
1254 1255 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1255 1256 shallow=False, ellipsisroots=None, fullnodes=None):
1256 1257 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1257 1258 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1258 1259
1259 1260 deltaparentfn = (_deltaparentellipses if ellipses
1260 1261 else _deltaparentgeneraldelta)
1261 1262
1262 1263 return cgpacker(repo, filematcher, b'03',
1263 1264 deltaparentfn=deltaparentfn,
1264 1265 allowreorder=False,
1265 1266 builddeltaheader=builddeltaheader,
1266 1267 manifestsend=closechunk(),
1267 1268 bundlecaps=bundlecaps,
1268 1269 ellipses=ellipses,
1269 1270 shallow=shallow,
1270 1271 ellipsisroots=ellipsisroots,
1271 1272 fullnodes=fullnodes)
1272 1273
1273 1274 _packermap = {'01': (_makecg1packer, cg1unpacker),
1274 1275 # cg2 adds support for exchanging generaldelta
1275 1276 '02': (_makecg2packer, cg2unpacker),
1276 1277 # cg3 adds support for exchanging revlog flags and treemanifests
1277 1278 '03': (_makecg3packer, cg3unpacker),
1278 1279 }
1279 1280
1280 1281 def allsupportedversions(repo):
1281 1282 versions = set(_packermap.keys())
1282 1283 if not (repo.ui.configbool('experimental', 'changegroup3') or
1283 1284 repo.ui.configbool('experimental', 'treemanifest') or
1284 1285 'treemanifest' in repo.requirements):
1285 1286 versions.discard('03')
1286 1287 return versions
1287 1288
1288 1289 # Changegroup versions that can be applied to the repo
1289 1290 def supportedincomingversions(repo):
1290 1291 return allsupportedversions(repo)
1291 1292
1292 1293 # Changegroup versions that can be created from the repo
1293 1294 def supportedoutgoingversions(repo):
1294 1295 versions = allsupportedversions(repo)
1295 1296 if 'treemanifest' in repo.requirements:
1296 1297 # Versions 01 and 02 support only flat manifests and it's just too
1297 1298 # expensive to convert between the flat manifest and tree manifest on
1298 1299 # the fly. Since tree manifests are hashed differently, all of history
1299 1300 # would have to be converted. Instead, we simply don't even pretend to
1300 1301 # support versions 01 and 02.
1301 1302 versions.discard('01')
1302 1303 versions.discard('02')
1303 1304 if repository.NARROW_REQUIREMENT in repo.requirements:
1304 1305 # Versions 01 and 02 don't support revlog flags, and we need to
1305 1306 # support that for stripping and unbundling to work.
1306 1307 versions.discard('01')
1307 1308 versions.discard('02')
1308 1309 if LFS_REQUIREMENT in repo.requirements:
1309 1310 # Versions 01 and 02 don't support revlog flags, and we need to
1310 1311 # mark LFS entries with REVIDX_EXTSTORED.
1311 1312 versions.discard('01')
1312 1313 versions.discard('02')
1313 1314
1314 1315 return versions
1315 1316
1316 1317 def localversion(repo):
1317 1318 # Finds the best version to use for bundles that are meant to be used
1318 1319 # locally, such as those from strip and shelve, and temporary bundles.
1319 1320 return max(supportedoutgoingversions(repo))
1320 1321
1321 1322 def safeversion(repo):
1322 1323 # Finds the smallest version that it's safe to assume clients of the repo
1323 1324 # will support. For example, all hg versions that support generaldelta also
1324 1325 # support changegroup 02.
1325 1326 versions = supportedoutgoingversions(repo)
1326 1327 if 'generaldelta' in repo.requirements:
1327 1328 versions.discard('01')
1328 1329 assert versions
1329 1330 return min(versions)
1330 1331
1331 1332 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1332 1333 ellipses=False, shallow=False, ellipsisroots=None,
1333 1334 fullnodes=None):
1334 1335 assert version in supportedoutgoingversions(repo)
1335 1336
1336 1337 if filematcher is None:
1337 1338 filematcher = matchmod.alwaysmatcher(repo.root, '')
1338 1339
1339 1340 if version == '01' and not filematcher.always():
1340 1341 raise error.ProgrammingError('version 01 changegroups do not support '
1341 1342 'sparse file matchers')
1342 1343
1343 1344 if ellipses and version in (b'01', b'02'):
1344 1345 raise error.Abort(
1345 1346 _('ellipsis nodes require at least cg3 on client and server, '
1346 1347 'but negotiated version %s') % version)
1347 1348
1348 1349 # Requested files could include files not in the local store. So
1349 1350 # filter those out.
1350 1351 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1351 1352 filematcher)
1352 1353
1353 1354 fn = _packermap[version][0]
1354 1355 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1355 1356 shallow=shallow, ellipsisroots=ellipsisroots,
1356 1357 fullnodes=fullnodes)
1357 1358
1358 1359 def getunbundler(version, fh, alg, extras=None):
1359 1360 return _packermap[version][1](fh, alg, extras=extras)
1360 1361
1361 1362 def _changegroupinfo(repo, nodes, source):
1362 1363 if repo.ui.verbose or source == 'bundle':
1363 1364 repo.ui.status(_("%d changesets found\n") % len(nodes))
1364 1365 if repo.ui.debugflag:
1365 1366 repo.ui.debug("list of changesets:\n")
1366 1367 for node in nodes:
1367 1368 repo.ui.debug("%s\n" % hex(node))
1368 1369
1369 1370 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1370 1371 bundlecaps=None):
1371 1372 cgstream = makestream(repo, outgoing, version, source,
1372 1373 fastpath=fastpath, bundlecaps=bundlecaps)
1373 1374 return getunbundler(version, util.chunkbuffer(cgstream), None,
1374 1375 {'clcount': len(outgoing.missing) })
1375 1376
1376 1377 def makestream(repo, outgoing, version, source, fastpath=False,
1377 1378 bundlecaps=None, filematcher=None):
1378 1379 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1379 1380 filematcher=filematcher)
1380 1381
1381 1382 repo = repo.unfiltered()
1382 1383 commonrevs = outgoing.common
1383 1384 csets = outgoing.missing
1384 1385 heads = outgoing.missingheads
1385 1386 # We go through the fast path if we get told to, or if all (unfiltered
1386 1387 # heads have been requested (since we then know there all linkrevs will
1387 1388 # be pulled by the client).
1388 1389 heads.sort()
1389 1390 fastpathlinkrev = fastpath or (
1390 1391 repo.filtername is None and heads == sorted(repo.heads()))
1391 1392
1392 1393 repo.hook('preoutgoing', throw=True, source=source)
1393 1394 _changegroupinfo(repo, csets, source)
1394 1395 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1395 1396
1396 1397 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1397 1398 revisions = 0
1398 1399 files = 0
1399 1400 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1400 1401 total=expectedfiles)
1401 1402 for chunkdata in iter(source.filelogheader, {}):
1402 1403 files += 1
1403 1404 f = chunkdata["filename"]
1404 1405 repo.ui.debug("adding %s revisions\n" % f)
1405 1406 progress.increment()
1406 1407 fl = repo.file(f)
1407 1408 o = len(fl)
1408 1409 try:
1409 1410 deltas = source.deltaiter()
1410 1411 if not fl.addgroup(deltas, revmap, trp):
1411 1412 raise error.Abort(_("received file revlog group is empty"))
1412 1413 except error.CensoredBaseError as e:
1413 1414 raise error.Abort(_("received delta base is censored: %s") % e)
1414 1415 revisions += len(fl) - o
1415 1416 if f in needfiles:
1416 1417 needs = needfiles[f]
1417 1418 for new in pycompat.xrange(o, len(fl)):
1418 1419 n = fl.node(new)
1419 1420 if n in needs:
1420 1421 needs.remove(n)
1421 1422 else:
1422 1423 raise error.Abort(
1423 1424 _("received spurious file revlog entry"))
1424 1425 if not needs:
1425 1426 del needfiles[f]
1426 1427 progress.complete()
1427 1428
1428 1429 for f, needs in needfiles.iteritems():
1429 1430 fl = repo.file(f)
1430 1431 for n in needs:
1431 1432 try:
1432 1433 fl.rev(n)
1433 1434 except error.LookupError:
1434 1435 raise error.Abort(
1435 1436 _('missing file data for %s:%s - run hg verify') %
1436 1437 (f, hex(n)))
1437 1438
1438 1439 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now