##// END OF EJS Templates
changegroup: move file chunk emission to generate()...
Gregory Szorc -
r39049:c4a2d19d default
parent child Browse files
Show More
@@ -1,1451 +1,1455 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 match as matchmod,
30 30 mdiff,
31 31 phases,
32 32 pycompat,
33 33 repository,
34 34 revlog,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 stringutil,
40 40 )
41 41
42 42 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
43 43 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
44 44 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
45 45
46 46 LFS_REQUIREMENT = 'lfs'
47 47
48 48 readexactly = util.readexactly
49 49
50 50 def getchunk(stream):
51 51 """return the next chunk from stream as a string"""
52 52 d = readexactly(stream, 4)
53 53 l = struct.unpack(">l", d)[0]
54 54 if l <= 4:
55 55 if l:
56 56 raise error.Abort(_("invalid chunk length %d") % l)
57 57 return ""
58 58 return readexactly(stream, l - 4)
59 59
60 60 def chunkheader(length):
61 61 """return a changegroup chunk header (string)"""
62 62 return struct.pack(">l", length + 4)
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(">l", 0)
67 67
68 68 def _fileheader(path):
69 69 """Obtain a changegroup chunk header for a named path."""
70 70 return chunkheader(len(path)) + path
71 71
72 72 def writechunks(ui, chunks, filename, vfs=None):
73 73 """Write chunks to a file and return its filename.
74 74
75 75 The stream is assumed to be a bundle file.
76 76 Existing files will not be overwritten.
77 77 If no filename is specified, a temporary file is created.
78 78 """
79 79 fh = None
80 80 cleanup = None
81 81 try:
82 82 if filename:
83 83 if vfs:
84 84 fh = vfs.open(filename, "wb")
85 85 else:
86 86 # Increase default buffer size because default is usually
87 87 # small (4k is common on Linux).
88 88 fh = open(filename, "wb", 131072)
89 89 else:
90 90 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
91 91 fh = os.fdopen(fd, r"wb")
92 92 cleanup = filename
93 93 for c in chunks:
94 94 fh.write(c)
95 95 cleanup = None
96 96 return filename
97 97 finally:
98 98 if fh is not None:
99 99 fh.close()
100 100 if cleanup is not None:
101 101 if filename and vfs:
102 102 vfs.unlink(cleanup)
103 103 else:
104 104 os.unlink(cleanup)
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
125 125 deltaheadersize = deltaheader.size
126 126 version = '01'
127 127 _grouplistcount = 1 # One list of files after the manifests
128 128
129 129 def __init__(self, fh, alg, extras=None):
130 130 if alg is None:
131 131 alg = 'UN'
132 132 if alg not in util.compengines.supportedbundletypes:
133 133 raise error.Abort(_('unknown stream compression type: %s')
134 134 % alg)
135 135 if alg == 'BZ':
136 136 alg = '_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != 'UN'
148 148 def read(self, l):
149 149 return self._stream.read(l)
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152 def tell(self):
153 153 return self._stream.tell()
154 154 def close(self):
155 155 return self._stream.close()
156 156
157 157 def _chunklength(self):
158 158 d = readexactly(self._stream, 4)
159 159 l = struct.unpack(">l", d)[0]
160 160 if l <= 4:
161 161 if l:
162 162 raise error.Abort(_("invalid chunk length %d") % l)
163 163 return 0
164 164 if self.callback:
165 165 self.callback()
166 166 return l - 4
167 167
168 168 def changelogheader(self):
169 169 """v10 does not have a changelog header chunk"""
170 170 return {}
171 171
172 172 def manifestheader(self):
173 173 """v10 does not have a manifest header chunk"""
174 174 return {}
175 175
176 176 def filelogheader(self):
177 177 """return the header of the filelogs chunk, v10 only has the filename"""
178 178 l = self._chunklength()
179 179 if not l:
180 180 return {}
181 181 fname = readexactly(self._stream, l)
182 182 return {'filename': fname}
183 183
184 184 def _deltaheader(self, headertuple, prevnode):
185 185 node, p1, p2, cs = headertuple
186 186 if prevnode is None:
187 187 deltabase = p1
188 188 else:
189 189 deltabase = prevnode
190 190 flags = 0
191 191 return node, p1, p2, deltabase, cs, flags
192 192
193 193 def deltachunk(self, prevnode):
194 194 l = self._chunklength()
195 195 if not l:
196 196 return {}
197 197 headerdata = readexactly(self._stream, self.deltaheadersize)
198 198 header = self.deltaheader.unpack(headerdata)
199 199 delta = readexactly(self._stream, l - self.deltaheadersize)
200 200 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
201 201 return (node, p1, p2, cs, deltabase, delta, flags)
202 202
203 203 def getchunks(self):
204 204 """returns all the chunks contains in the bundle
205 205
206 206 Used when you need to forward the binary stream to a file or another
207 207 network API. To do so, it parse the changegroup data, otherwise it will
208 208 block in case of sshrepo because it don't know the end of the stream.
209 209 """
210 210 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
211 211 # and a list of filelogs. For changegroup 3, we expect 4 parts:
212 212 # changelog, manifestlog, a list of tree manifestlogs, and a list of
213 213 # filelogs.
214 214 #
215 215 # Changelog and manifestlog parts are terminated with empty chunks. The
216 216 # tree and file parts are a list of entry sections. Each entry section
217 217 # is a series of chunks terminating in an empty chunk. The list of these
218 218 # entry sections is terminated in yet another empty chunk, so we know
219 219 # we've reached the end of the tree/file list when we reach an empty
220 220 # chunk that was proceeded by no non-empty chunks.
221 221
222 222 parts = 0
223 223 while parts < 2 + self._grouplistcount:
224 224 noentries = True
225 225 while True:
226 226 chunk = getchunk(self)
227 227 if not chunk:
228 228 # The first two empty chunks represent the end of the
229 229 # changelog and the manifestlog portions. The remaining
230 230 # empty chunks represent either A) the end of individual
231 231 # tree or file entries in the file list, or B) the end of
232 232 # the entire list. It's the end of the entire list if there
233 233 # were no entries (i.e. noentries is True).
234 234 if parts < 2:
235 235 parts += 1
236 236 elif noentries:
237 237 parts += 1
238 238 break
239 239 noentries = False
240 240 yield chunkheader(len(chunk))
241 241 pos = 0
242 242 while pos < len(chunk):
243 243 next = pos + 2**20
244 244 yield chunk[pos:next]
245 245 pos = next
246 246 yield closechunk()
247 247
248 248 def _unpackmanifests(self, repo, revmap, trp, prog):
249 249 self.callback = prog.increment
250 250 # no need to check for empty manifest group here:
251 251 # if the result of the merge of 1 and 2 is the same in 3 and 4,
252 252 # no new manifest will be created and the manifest group will
253 253 # be empty during the pull
254 254 self.manifestheader()
255 255 deltas = self.deltaiter()
256 256 repo.manifestlog.addgroup(deltas, revmap, trp)
257 257 prog.complete()
258 258 self.callback = None
259 259
260 260 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
261 261 expectedtotal=None):
262 262 """Add the changegroup returned by source.read() to this repo.
263 263 srctype is a string like 'push', 'pull', or 'unbundle'. url is
264 264 the URL of the repo where this changegroup is coming from.
265 265
266 266 Return an integer summarizing the change to this repo:
267 267 - nothing changed or no source: 0
268 268 - more heads than before: 1+added heads (2..n)
269 269 - fewer heads than before: -1-removed heads (-2..-n)
270 270 - number of heads stays the same: 1
271 271 """
272 272 repo = repo.unfiltered()
273 273 def csmap(x):
274 274 repo.ui.debug("add changeset %s\n" % short(x))
275 275 return len(cl)
276 276
277 277 def revmap(x):
278 278 return cl.rev(x)
279 279
280 280 changesets = files = revisions = 0
281 281
282 282 try:
283 283 # The transaction may already carry source information. In this
284 284 # case we use the top level data. We overwrite the argument
285 285 # because we need to use the top level value (if they exist)
286 286 # in this function.
287 287 srctype = tr.hookargs.setdefault('source', srctype)
288 288 url = tr.hookargs.setdefault('url', url)
289 289 repo.hook('prechangegroup',
290 290 throw=True, **pycompat.strkwargs(tr.hookargs))
291 291
292 292 # write changelog data to temp files so concurrent readers
293 293 # will not see an inconsistent view
294 294 cl = repo.changelog
295 295 cl.delayupdate(tr)
296 296 oldheads = set(cl.heads())
297 297
298 298 trp = weakref.proxy(tr)
299 299 # pull off the changeset group
300 300 repo.ui.status(_("adding changesets\n"))
301 301 clstart = len(cl)
302 302 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
303 303 total=expectedtotal)
304 304 self.callback = progress.increment
305 305
306 306 efiles = set()
307 307 def onchangelog(cl, node):
308 308 efiles.update(cl.readfiles(node))
309 309
310 310 self.changelogheader()
311 311 deltas = self.deltaiter()
312 312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
313 313 efiles = len(efiles)
314 314
315 315 if not cgnodes:
316 316 repo.ui.develwarn('applied empty changegroup',
317 317 config='warn-empty-changegroup')
318 318 clend = len(cl)
319 319 changesets = clend - clstart
320 320 progress.complete()
321 321 self.callback = None
322 322
323 323 # pull off the manifest group
324 324 repo.ui.status(_("adding manifests\n"))
325 325 # We know that we'll never have more manifests than we had
326 326 # changesets.
327 327 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
328 328 total=changesets)
329 329 self._unpackmanifests(repo, revmap, trp, progress)
330 330
331 331 needfiles = {}
332 332 if repo.ui.configbool('server', 'validate'):
333 333 cl = repo.changelog
334 334 ml = repo.manifestlog
335 335 # validate incoming csets have their manifests
336 336 for cset in pycompat.xrange(clstart, clend):
337 337 mfnode = cl.changelogrevision(cset).manifest
338 338 mfest = ml[mfnode].readdelta()
339 339 # store file cgnodes we must see
340 340 for f, n in mfest.iteritems():
341 341 needfiles.setdefault(f, set()).add(n)
342 342
343 343 # process the files
344 344 repo.ui.status(_("adding file changes\n"))
345 345 newrevs, newfiles = _addchangegroupfiles(
346 346 repo, self, revmap, trp, efiles, needfiles)
347 347 revisions += newrevs
348 348 files += newfiles
349 349
350 350 deltaheads = 0
351 351 if oldheads:
352 352 heads = cl.heads()
353 353 deltaheads = len(heads) - len(oldheads)
354 354 for h in heads:
355 355 if h not in oldheads and repo[h].closesbranch():
356 356 deltaheads -= 1
357 357 htext = ""
358 358 if deltaheads:
359 359 htext = _(" (%+d heads)") % deltaheads
360 360
361 361 repo.ui.status(_("added %d changesets"
362 362 " with %d changes to %d files%s\n")
363 363 % (changesets, revisions, files, htext))
364 364 repo.invalidatevolatilesets()
365 365
366 366 if changesets > 0:
367 367 if 'node' not in tr.hookargs:
368 368 tr.hookargs['node'] = hex(cl.node(clstart))
369 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
370 370 hookargs = dict(tr.hookargs)
371 371 else:
372 372 hookargs = dict(tr.hookargs)
373 373 hookargs['node'] = hex(cl.node(clstart))
374 374 hookargs['node_last'] = hex(cl.node(clend - 1))
375 375 repo.hook('pretxnchangegroup',
376 376 throw=True, **pycompat.strkwargs(hookargs))
377 377
378 378 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
379 379 phaseall = None
380 380 if srctype in ('push', 'serve'):
381 381 # Old servers can not push the boundary themselves.
382 382 # New servers won't push the boundary if changeset already
383 383 # exists locally as secret
384 384 #
385 385 # We should not use added here but the list of all change in
386 386 # the bundle
387 387 if repo.publishing():
388 388 targetphase = phaseall = phases.public
389 389 else:
390 390 # closer target phase computation
391 391
392 392 # Those changesets have been pushed from the
393 393 # outside, their phases are going to be pushed
394 394 # alongside. Therefor `targetphase` is
395 395 # ignored.
396 396 targetphase = phaseall = phases.draft
397 397 if added:
398 398 phases.registernew(repo, tr, targetphase, added)
399 399 if phaseall is not None:
400 400 phases.advanceboundary(repo, tr, phaseall, cgnodes)
401 401
402 402 if changesets > 0:
403 403
404 404 def runhooks():
405 405 # These hooks run when the lock releases, not when the
406 406 # transaction closes. So it's possible for the changelog
407 407 # to have changed since we last saw it.
408 408 if clstart >= len(repo):
409 409 return
410 410
411 411 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
412 412
413 413 for n in added:
414 414 args = hookargs.copy()
415 415 args['node'] = hex(n)
416 416 del args['node_last']
417 417 repo.hook("incoming", **pycompat.strkwargs(args))
418 418
419 419 newheads = [h for h in repo.heads()
420 420 if h not in oldheads]
421 421 repo.ui.log("incoming",
422 422 "%d incoming changes - new heads: %s\n",
423 423 len(added),
424 424 ', '.join([hex(c[:6]) for c in newheads]))
425 425
426 426 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
427 427 lambda tr: repo._afterlock(runhooks))
428 428 finally:
429 429 repo.ui.flush()
430 430 # never return 0 here:
431 431 if deltaheads < 0:
432 432 ret = deltaheads - 1
433 433 else:
434 434 ret = deltaheads + 1
435 435 return ret
436 436
437 437 def deltaiter(self):
438 438 """
439 439 returns an iterator of the deltas in this changegroup
440 440
441 441 Useful for passing to the underlying storage system to be stored.
442 442 """
443 443 chain = None
444 444 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
445 445 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
446 446 yield chunkdata
447 447 chain = chunkdata[0]
448 448
449 449 class cg2unpacker(cg1unpacker):
450 450 """Unpacker for cg2 streams.
451 451
452 452 cg2 streams add support for generaldelta, so the delta header
453 453 format is slightly different. All other features about the data
454 454 remain the same.
455 455 """
456 456 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
457 457 deltaheadersize = deltaheader.size
458 458 version = '02'
459 459
460 460 def _deltaheader(self, headertuple, prevnode):
461 461 node, p1, p2, deltabase, cs = headertuple
462 462 flags = 0
463 463 return node, p1, p2, deltabase, cs, flags
464 464
465 465 class cg3unpacker(cg2unpacker):
466 466 """Unpacker for cg3 streams.
467 467
468 468 cg3 streams add support for exchanging treemanifests and revlog
469 469 flags. It adds the revlog flags to the delta header and an empty chunk
470 470 separating manifests and files.
471 471 """
472 472 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
473 473 deltaheadersize = deltaheader.size
474 474 version = '03'
475 475 _grouplistcount = 2 # One list of manifests and one list of files
476 476
477 477 def _deltaheader(self, headertuple, prevnode):
478 478 node, p1, p2, deltabase, cs, flags = headertuple
479 479 return node, p1, p2, deltabase, cs, flags
480 480
481 481 def _unpackmanifests(self, repo, revmap, trp, prog):
482 482 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
483 483 for chunkdata in iter(self.filelogheader, {}):
484 484 # If we get here, there are directory manifests in the changegroup
485 485 d = chunkdata["filename"]
486 486 repo.ui.debug("adding %s revisions\n" % d)
487 487 dirlog = repo.manifestlog._revlog.dirlog(d)
488 488 deltas = self.deltaiter()
489 489 if not dirlog.addgroup(deltas, revmap, trp):
490 490 raise error.Abort(_("received dir revlog group is empty"))
491 491
492 492 class headerlessfixup(object):
493 493 def __init__(self, fh, h):
494 494 self._h = h
495 495 self._fh = fh
496 496 def read(self, n):
497 497 if self._h:
498 498 d, self._h = self._h[:n], self._h[n:]
499 499 if len(d) < n:
500 500 d += readexactly(self._fh, n - len(d))
501 501 return d
502 502 return readexactly(self._fh, n)
503 503
504 504 @attr.s(slots=True, frozen=True)
505 505 class revisiondelta(object):
506 506 """Describes a delta entry in a changegroup.
507 507
508 508 Captured data is sufficient to serialize the delta into multiple
509 509 formats.
510 510 """
511 511 # 20 byte node of this revision.
512 512 node = attr.ib()
513 513 # 20 byte nodes of parent revisions.
514 514 p1node = attr.ib()
515 515 p2node = attr.ib()
516 516 # 20 byte node of node this delta is against.
517 517 basenode = attr.ib()
518 518 # 20 byte node of changeset revision this delta is associated with.
519 519 linknode = attr.ib()
520 520 # 2 bytes of flags to apply to revision data.
521 521 flags = attr.ib()
522 522 # Iterable of chunks holding raw delta data.
523 523 deltachunks = attr.ib()
524 524
525 525 def _sortnodesnormal(store, nodes, reorder):
526 526 """Sort nodes for changegroup generation and turn into revnums."""
527 527 # for generaldelta revlogs, we linearize the revs; this will both be
528 528 # much quicker and generate a much smaller bundle
529 529 if (store._generaldelta and reorder is None) or reorder:
530 530 dag = dagutil.revlogdag(store)
531 531 return dag.linearize(set(store.rev(n) for n in nodes))
532 532 else:
533 533 return sorted([store.rev(n) for n in nodes])
534 534
535 535 def _sortnodesellipsis(store, nodes, cl, lookup):
536 536 """Sort nodes for changegroup generation and turn into revnums."""
537 537 # Ellipses serving mode.
538 538 #
539 539 # In a perfect world, we'd generate better ellipsis-ified graphs
540 540 # for non-changelog revlogs. In practice, we haven't started doing
541 541 # that yet, so the resulting DAGs for the manifestlog and filelogs
542 542 # are actually full of bogus parentage on all the ellipsis
543 543 # nodes. This has the side effect that, while the contents are
544 544 # correct, the individual DAGs might be completely out of whack in
545 545 # a case like 882681bc3166 and its ancestors (back about 10
546 546 # revisions or so) in the main hg repo.
547 547 #
548 548 # The one invariant we *know* holds is that the new (potentially
549 549 # bogus) DAG shape will be valid if we order the nodes in the
550 550 # order that they're introduced in dramatis personae by the
551 551 # changelog, so what we do is we sort the non-changelog histories
552 552 # by the order in which they are used by the changelog.
553 553 key = lambda n: cl.rev(lookup(n))
554 554 return [store.rev(n) for n in sorted(nodes, key=key)]
555 555
556 556 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
557 557 """Construct a revision delta for non-ellipses changegroup generation."""
558 558 node = store.node(rev)
559 559 p1, p2 = store.parentrevs(rev)
560 560 base = deltaparentfn(store, rev, p1, p2, prev)
561 561
562 562 prefix = ''
563 563 if store.iscensored(base) or store.iscensored(rev):
564 564 try:
565 565 delta = store.revision(node, raw=True)
566 566 except error.CensoredNodeError as e:
567 567 delta = e.tombstone
568 568 if base == nullrev:
569 569 prefix = mdiff.trivialdiffheader(len(delta))
570 570 else:
571 571 baselen = store.rawsize(base)
572 572 prefix = mdiff.replacediffheader(baselen, len(delta))
573 573 elif base == nullrev:
574 574 delta = store.revision(node, raw=True)
575 575 prefix = mdiff.trivialdiffheader(len(delta))
576 576 else:
577 577 delta = store.revdiff(base, rev)
578 578 p1n, p2n = store.parents(node)
579 579
580 580 return revisiondelta(
581 581 node=node,
582 582 p1node=p1n,
583 583 p2node=p2n,
584 584 basenode=store.node(base),
585 585 linknode=linknode,
586 586 flags=store.flags(rev),
587 587 deltachunks=(prefix, delta),
588 588 )
589 589
590 590 def _revisiondeltanarrow(cl, store, ischangelog, rev, linkrev,
591 591 linknode, clrevtolocalrev, fullclnodes,
592 592 precomputedellipsis):
593 593 linkparents = precomputedellipsis[linkrev]
594 594 def local(clrev):
595 595 """Turn a changelog revnum into a local revnum.
596 596
597 597 The ellipsis dag is stored as revnums on the changelog,
598 598 but when we're producing ellipsis entries for
599 599 non-changelog revlogs, we need to turn those numbers into
600 600 something local. This does that for us, and during the
601 601 changelog sending phase will also expand the stored
602 602 mappings as needed.
603 603 """
604 604 if clrev == nullrev:
605 605 return nullrev
606 606
607 607 if ischangelog:
608 608 return clrev
609 609
610 610 # Walk the ellipsis-ized changelog breadth-first looking for a
611 611 # change that has been linked from the current revlog.
612 612 #
613 613 # For a flat manifest revlog only a single step should be necessary
614 614 # as all relevant changelog entries are relevant to the flat
615 615 # manifest.
616 616 #
617 617 # For a filelog or tree manifest dirlog however not every changelog
618 618 # entry will have been relevant, so we need to skip some changelog
619 619 # nodes even after ellipsis-izing.
620 620 walk = [clrev]
621 621 while walk:
622 622 p = walk[0]
623 623 walk = walk[1:]
624 624 if p in clrevtolocalrev:
625 625 return clrevtolocalrev[p]
626 626 elif p in fullclnodes:
627 627 walk.extend([pp for pp in cl.parentrevs(p)
628 628 if pp != nullrev])
629 629 elif p in precomputedellipsis:
630 630 walk.extend([pp for pp in precomputedellipsis[p]
631 631 if pp != nullrev])
632 632 else:
633 633 # In this case, we've got an ellipsis with parents
634 634 # outside the current bundle (likely an
635 635 # incremental pull). We "know" that we can use the
636 636 # value of this same revlog at whatever revision
637 637 # is pointed to by linknode. "Know" is in scare
638 638 # quotes because I haven't done enough examination
639 639 # of edge cases to convince myself this is really
640 640 # a fact - it works for all the (admittedly
641 641 # thorough) cases in our testsuite, but I would be
642 642 # somewhat unsurprised to find a case in the wild
643 643 # where this breaks down a bit. That said, I don't
644 644 # know if it would hurt anything.
645 645 for i in pycompat.xrange(rev, 0, -1):
646 646 if store.linkrev(i) == clrev:
647 647 return i
648 648 # We failed to resolve a parent for this node, so
649 649 # we crash the changegroup construction.
650 650 raise error.Abort(
651 651 'unable to resolve parent while packing %r %r'
652 652 ' for changeset %r' % (store.indexfile, rev, clrev))
653 653
654 654 return nullrev
655 655
656 656 if not linkparents or (
657 657 store.parentrevs(rev) == (nullrev, nullrev)):
658 658 p1, p2 = nullrev, nullrev
659 659 elif len(linkparents) == 1:
660 660 p1, = sorted(local(p) for p in linkparents)
661 661 p2 = nullrev
662 662 else:
663 663 p1, p2 = sorted(local(p) for p in linkparents)
664 664
665 665 n = store.node(rev)
666 666 p1n, p2n = store.node(p1), store.node(p2)
667 667 flags = store.flags(rev)
668 668 flags |= revlog.REVIDX_ELLIPSIS
669 669
670 670 # TODO: try and actually send deltas for ellipsis data blocks
671 671 data = store.revision(n)
672 672 diffheader = mdiff.trivialdiffheader(len(data))
673 673
674 674 return revisiondelta(
675 675 node=n,
676 676 p1node=p1n,
677 677 p2node=p2n,
678 678 basenode=nullid,
679 679 linknode=linknode,
680 680 flags=flags,
681 681 deltachunks=(diffheader, data),
682 682 )
683 683
684 684 def deltagroup(repo, revs, store, ischangelog, lookup, deltaparentfn,
685 685 deltaheaderfn, units=None,
686 686 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
687 687 precomputedellipsis=None):
688 688 """Calculate a delta group, yielding a sequence of changegroup chunks
689 689 (strings).
690 690
691 691 Given a list of changeset revs, return a set of deltas and
692 692 metadata corresponding to nodes. The first delta is
693 693 first parent(nodelist[0]) -> nodelist[0], the receiver is
694 694 guaranteed to have this parent as it has all history before
695 695 these changesets. In the case firstparent is nullrev the
696 696 changegroup starts with a full revision.
697 697
698 698 If units is not None, progress detail will be generated, units specifies
699 699 the type of revlog that is touched (changelog, manifest, etc.).
700 700 """
701 701 # if we don't have any revisions touched by these changesets, bail
702 702 if len(revs) == 0:
703 703 return
704 704
705 705 cl = repo.changelog
706 706
707 707 # add the parent of the first rev
708 708 p = store.parentrevs(revs[0])[0]
709 709 revs.insert(0, p)
710 710
711 711 # build deltas
712 712 progress = None
713 713 if units is not None:
714 714 progress = repo.ui.makeprogress(_('bundling'), unit=units,
715 715 total=(len(revs) - 1))
716 716 for r in pycompat.xrange(len(revs) - 1):
717 717 if progress:
718 718 progress.update(r + 1)
719 719 prev, curr = revs[r], revs[r + 1]
720 720 linknode = lookup(store.node(curr))
721 721
722 722 if ellipses:
723 723 linkrev = cl.rev(linknode)
724 724 clrevtolocalrev[linkrev] = curr
725 725
726 726 # This is a node to send in full, because the changeset it
727 727 # corresponds to was a full changeset.
728 728 if linknode in fullclnodes:
729 729 delta = _revisiondeltanormal(store, curr, prev, linknode,
730 730 deltaparentfn)
731 731 elif linkrev not in precomputedellipsis:
732 732 delta = None
733 733 else:
734 734 delta = _revisiondeltanarrow(
735 735 cl, store, ischangelog, curr, linkrev, linknode,
736 736 clrevtolocalrev, fullclnodes,
737 737 precomputedellipsis)
738 738 else:
739 739 delta = _revisiondeltanormal(store, curr, prev, linknode,
740 740 deltaparentfn)
741 741
742 742 if not delta:
743 743 continue
744 744
745 745 meta = deltaheaderfn(delta)
746 746 l = len(meta) + sum(len(x) for x in delta.deltachunks)
747 747 yield chunkheader(l)
748 748 yield meta
749 749 for x in delta.deltachunks:
750 750 yield x
751 751
752 752 if progress:
753 753 progress.complete()
754 754
755 755 class cgpacker(object):
756 756 def __init__(self, repo, filematcher, version, allowreorder,
757 757 deltaparentfn, builddeltaheader, manifestsend,
758 758 bundlecaps=None, ellipses=False,
759 759 shallow=False, ellipsisroots=None, fullnodes=None):
760 760 """Given a source repo, construct a bundler.
761 761
762 762 filematcher is a matcher that matches on files to include in the
763 763 changegroup. Used to facilitate sparse changegroups.
764 764
765 765 allowreorder controls whether reordering of revisions is allowed.
766 766 This value is used when ``bundle.reorder`` is ``auto`` or isn't
767 767 set.
768 768
769 769 deltaparentfn is a callable that resolves the delta parent for
770 770 a specific revision.
771 771
772 772 builddeltaheader is a callable that constructs the header for a group
773 773 delta.
774 774
775 775 manifestsend is a chunk to send after manifests have been fully emitted.
776 776
777 777 ellipses indicates whether ellipsis serving mode is enabled.
778 778
779 779 bundlecaps is optional and can be used to specify the set of
780 780 capabilities which can be used to build the bundle. While bundlecaps is
781 781 unused in core Mercurial, extensions rely on this feature to communicate
782 782 capabilities to customize the changegroup packer.
783 783
784 784 shallow indicates whether shallow data might be sent. The packer may
785 785 need to pack file contents not introduced by the changes being packed.
786 786
787 787 fullnodes is the set of changelog nodes which should not be ellipsis
788 788 nodes. We store this rather than the set of nodes that should be
789 789 ellipsis because for very large histories we expect this to be
790 790 significantly smaller.
791 791 """
792 792 assert filematcher
793 793 self._filematcher = filematcher
794 794
795 795 self.version = version
796 796 self._deltaparentfn = deltaparentfn
797 797 self._builddeltaheader = builddeltaheader
798 798 self._manifestsend = manifestsend
799 799 self._ellipses = ellipses
800 800
801 801 # Set of capabilities we can use to build the bundle.
802 802 if bundlecaps is None:
803 803 bundlecaps = set()
804 804 self._bundlecaps = bundlecaps
805 805 self._isshallow = shallow
806 806 self._fullclnodes = fullnodes
807 807
808 808 # Maps ellipsis revs to their roots at the changelog level.
809 809 self._precomputedellipsis = ellipsisroots
810 810
811 811 # experimental config: bundle.reorder
812 812 reorder = repo.ui.config('bundle', 'reorder')
813 813 if reorder == 'auto':
814 814 self._reorder = allowreorder
815 815 else:
816 816 self._reorder = stringutil.parsebool(reorder)
817 817
818 818 self._repo = repo
819 819
820 820 if self._repo.ui.verbose and not self._repo.ui.debugflag:
821 821 self._verbosenote = self._repo.ui.note
822 822 else:
823 823 self._verbosenote = lambda s: None
824 824
825 825 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
826 826 """Yield a sequence of changegroup byte chunks."""
827 827
828 828 repo = self._repo
829 829 cl = repo.changelog
830 830
831 831 self._verbosenote(_('uncompressed size of bundle content:\n'))
832 832 size = 0
833 833
834 834 clstate, chunks = self._generatechangelog(cl, clnodes)
835 835 for chunk in chunks:
836 836 size += len(chunk)
837 837 yield chunk
838 838
839 839 close = closechunk()
840 840 size += len(close)
841 841 yield closechunk()
842 842
843 843 self._verbosenote(_('%8.i (changelog)\n') % size)
844 844
845 845 clrevorder = clstate['clrevorder']
846 846 mfs = clstate['mfs']
847 847 changedfiles = clstate['changedfiles']
848 848
849 849 # We need to make sure that the linkrev in the changegroup refers to
850 850 # the first changeset that introduced the manifest or file revision.
851 851 # The fastpath is usually safer than the slowpath, because the filelogs
852 852 # are walked in revlog order.
853 853 #
854 854 # When taking the slowpath with reorder=None and the manifest revlog
855 855 # uses generaldelta, the manifest may be walked in the "wrong" order.
856 856 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
857 857 # cc0ff93d0c0c).
858 858 #
859 859 # When taking the fastpath, we are only vulnerable to reordering
860 860 # of the changelog itself. The changelog never uses generaldelta, so
861 861 # it is only reordered when reorder=True. To handle this case, we
862 862 # simply take the slowpath, which already has the 'clrevorder' logic.
863 863 # This was also fixed in cc0ff93d0c0c.
864 864 fastpathlinkrev = fastpathlinkrev and not self._reorder
865 865 # Treemanifests don't work correctly with fastpathlinkrev
866 866 # either, because we don't discover which directory nodes to
867 867 # send along with files. This could probably be fixed.
868 868 fastpathlinkrev = fastpathlinkrev and (
869 869 'treemanifest' not in repo.requirements)
870 870
871 871 fnodes = {} # needed file nodes
872 872
873 873 size = 0
874 874 it = self.generatemanifests(
875 875 commonrevs, clrevorder, fastpathlinkrev, mfs, fnodes, source,
876 876 clstate['clrevtomanifestrev'])
877 877
878 878 for dir, chunks in it:
879 879 if dir:
880 880 assert self.version == b'03'
881 881 chunk = _fileheader(dir)
882 882 size += len(chunk)
883 883 yield chunk
884 884
885 885 for chunk in chunks:
886 886 size += len(chunk)
887 887 yield chunk
888 888
889 889 close = closechunk()
890 890 size += len(close)
891 891 yield close
892 892
893 893 self._verbosenote(_('%8.i (manifests)\n') % size)
894 894 yield self._manifestsend
895 895
896 896 mfdicts = None
897 897 if self._ellipses and self._isshallow:
898 898 mfdicts = [(self._repo.manifestlog[n].read(), lr)
899 899 for (n, lr) in mfs.iteritems()]
900 900
901 901 mfs.clear()
902 902 clrevs = set(cl.rev(x) for x in clnodes)
903 903
904 for chunk in self.generatefiles(changedfiles, commonrevs,
905 source, mfdicts, fastpathlinkrev,
906 fnodes, clrevs):
907 yield chunk
904 it = self.generatefiles(changedfiles, commonrevs,
905 source, mfdicts, fastpathlinkrev,
906 fnodes, clrevs)
907
908 for path, chunks in it:
909 h = _fileheader(path)
910 size = len(h)
911 yield h
912
913 for chunk in chunks:
914 size += len(chunk)
915 yield chunk
916
917 close = closechunk()
918 size += len(close)
919 yield close
920
921 self._verbosenote(_('%8.i %s\n') % (size, path))
908 922
909 923 yield closechunk()
910 924
911 925 if clnodes:
912 926 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
913 927
914 928 def _generatechangelog(self, cl, nodes):
915 929 """Generate data for changelog chunks.
916 930
917 931 Returns a 2-tuple of a dict containing state and an iterable of
918 932 byte chunks. The state will not be fully populated until the
919 933 chunk stream has been fully consumed.
920 934 """
921 935 clrevorder = {}
922 936 mfs = {} # needed manifests
923 937 mfl = self._repo.manifestlog
924 938 # TODO violates storage abstraction.
925 939 mfrevlog = mfl._revlog
926 940 changedfiles = set()
927 941 clrevtomanifestrev = {}
928 942
929 943 # Callback for the changelog, used to collect changed files and
930 944 # manifest nodes.
931 945 # Returns the linkrev node (identity in the changelog case).
932 946 def lookupcl(x):
933 947 c = cl.read(x)
934 948 clrevorder[x] = len(clrevorder)
935 949
936 950 if self._ellipses:
937 951 # Only update mfs if x is going to be sent. Otherwise we
938 952 # end up with bogus linkrevs specified for manifests and
939 953 # we skip some manifest nodes that we should otherwise
940 954 # have sent.
941 955 if (x in self._fullclnodes
942 956 or cl.rev(x) in self._precomputedellipsis):
943 957 n = c[0]
944 958 # Record the first changeset introducing this manifest
945 959 # version.
946 960 mfs.setdefault(n, x)
947 961 # Set this narrow-specific dict so we have the lowest
948 962 # manifest revnum to look up for this cl revnum. (Part of
949 963 # mapping changelog ellipsis parents to manifest ellipsis
950 964 # parents)
951 965 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
952 966 # We can't trust the changed files list in the changeset if the
953 967 # client requested a shallow clone.
954 968 if self._isshallow:
955 969 changedfiles.update(mfl[c[0]].read().keys())
956 970 else:
957 971 changedfiles.update(c[3])
958 972 else:
959 973
960 974 n = c[0]
961 975 # record the first changeset introducing this manifest version
962 976 mfs.setdefault(n, x)
963 977 # Record a complete list of potentially-changed files in
964 978 # this manifest.
965 979 changedfiles.update(c[3])
966 980
967 981 return x
968 982
969 983 # Changelog doesn't benefit from reordering revisions. So send out
970 984 # revisions in store order.
971 985 revs = sorted(cl.rev(n) for n in nodes)
972 986
973 987 state = {
974 988 'clrevorder': clrevorder,
975 989 'mfs': mfs,
976 990 'changedfiles': changedfiles,
977 991 'clrevtomanifestrev': clrevtomanifestrev,
978 992 }
979 993
980 994 gen = deltagroup(
981 995 self._repo, revs, cl, True, lookupcl,
982 996 self._deltaparentfn, self._builddeltaheader,
983 997 ellipses=self._ellipses,
984 998 units=_('changesets'),
985 999 clrevtolocalrev={},
986 1000 fullclnodes=self._fullclnodes,
987 1001 precomputedellipsis=self._precomputedellipsis)
988 1002
989 1003 return state, gen
990 1004
991 1005 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
992 1006 fnodes, source, clrevtolocalrev):
993 1007 """Returns an iterator of changegroup chunks containing manifests.
994 1008
995 1009 `source` is unused here, but is used by extensions like remotefilelog to
996 1010 change what is sent based in pulls vs pushes, etc.
997 1011 """
998 1012 repo = self._repo
999 1013 cl = repo.changelog
1000 1014 mfl = repo.manifestlog
1001 1015 dirlog = mfl._revlog.dirlog
1002 1016 tmfnodes = {'': mfs}
1003 1017
1004 1018 # Callback for the manifest, used to collect linkrevs for filelog
1005 1019 # revisions.
1006 1020 # Returns the linkrev node (collected in lookupcl).
1007 1021 def makelookupmflinknode(dir, nodes):
1008 1022 if fastpathlinkrev:
1009 1023 assert not dir
1010 1024 return mfs.__getitem__
1011 1025
1012 1026 def lookupmflinknode(x):
1013 1027 """Callback for looking up the linknode for manifests.
1014 1028
1015 1029 Returns the linkrev node for the specified manifest.
1016 1030
1017 1031 SIDE EFFECT:
1018 1032
1019 1033 1) fclnodes gets populated with the list of relevant
1020 1034 file nodes if we're not using fastpathlinkrev
1021 1035 2) When treemanifests are in use, collects treemanifest nodes
1022 1036 to send
1023 1037
1024 1038 Note that this means manifests must be completely sent to
1025 1039 the client before you can trust the list of files and
1026 1040 treemanifests to send.
1027 1041 """
1028 1042 clnode = nodes[x]
1029 1043 mdata = mfl.get(dir, x).readfast(shallow=True)
1030 1044 for p, n, fl in mdata.iterentries():
1031 1045 if fl == 't': # subdirectory manifest
1032 1046 subdir = dir + p + '/'
1033 1047 tmfclnodes = tmfnodes.setdefault(subdir, {})
1034 1048 tmfclnode = tmfclnodes.setdefault(n, clnode)
1035 1049 if clrevorder[clnode] < clrevorder[tmfclnode]:
1036 1050 tmfclnodes[n] = clnode
1037 1051 else:
1038 1052 f = dir + p
1039 1053 fclnodes = fnodes.setdefault(f, {})
1040 1054 fclnode = fclnodes.setdefault(n, clnode)
1041 1055 if clrevorder[clnode] < clrevorder[fclnode]:
1042 1056 fclnodes[n] = clnode
1043 1057 return clnode
1044 1058 return lookupmflinknode
1045 1059
1046 1060 while tmfnodes:
1047 1061 dir, nodes = tmfnodes.popitem()
1048 1062 store = dirlog(dir)
1049 1063
1050 1064 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
1051 1065 prunednodes = []
1052 1066 else:
1053 1067 frev, flr = store.rev, store.linkrev
1054 1068 prunednodes = [n for n in nodes
1055 1069 if flr(frev(n)) not in commonrevs]
1056 1070
1057 1071 if dir and not prunednodes:
1058 1072 continue
1059 1073
1060 1074 lookupfn = makelookupmflinknode(dir, nodes)
1061 1075
1062 1076 if self._ellipses:
1063 1077 revs = _sortnodesellipsis(store, prunednodes, cl,
1064 1078 lookupfn)
1065 1079 else:
1066 1080 revs = _sortnodesnormal(store, prunednodes,
1067 1081 self._reorder)
1068 1082
1069 1083 it = deltagroup(
1070 1084 self._repo, revs, store, False, lookupfn,
1071 1085 self._deltaparentfn, self._builddeltaheader,
1072 1086 ellipses=self._ellipses,
1073 1087 units=_('manifests'),
1074 1088 clrevtolocalrev=clrevtolocalrev,
1075 1089 fullclnodes=self._fullclnodes,
1076 1090 precomputedellipsis=self._precomputedellipsis)
1077 1091
1078 1092 yield dir, it
1079 1093
1080 1094 # The 'source' parameter is useful for extensions
1081 1095 def generatefiles(self, changedfiles, commonrevs, source,
1082 1096 mfdicts, fastpathlinkrev, fnodes, clrevs):
1083 1097 changedfiles = list(filter(self._filematcher, changedfiles))
1084 1098
1085 1099 if not fastpathlinkrev:
1086 1100 def normallinknodes(unused, fname):
1087 1101 return fnodes.get(fname, {})
1088 1102 else:
1089 1103 cln = self._repo.changelog.node
1090 1104
1091 1105 def normallinknodes(store, fname):
1092 1106 flinkrev = store.linkrev
1093 1107 fnode = store.node
1094 1108 revs = ((r, flinkrev(r)) for r in store)
1095 1109 return dict((fnode(r), cln(lr))
1096 1110 for r, lr in revs if lr in clrevs)
1097 1111
1098 1112 clrevtolocalrev = {}
1099 1113
1100 1114 if self._isshallow:
1101 1115 # In a shallow clone, the linknodes callback needs to also include
1102 1116 # those file nodes that are in the manifests we sent but weren't
1103 1117 # introduced by those manifests.
1104 1118 commonctxs = [self._repo[c] for c in commonrevs]
1105 1119 clrev = self._repo.changelog.rev
1106 1120
1107 1121 # Defining this function has a side-effect of overriding the
1108 1122 # function of the same name that was passed in as an argument.
1109 1123 # TODO have caller pass in appropriate function.
1110 1124 def linknodes(flog, fname):
1111 1125 for c in commonctxs:
1112 1126 try:
1113 1127 fnode = c.filenode(fname)
1114 1128 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1115 1129 except error.ManifestLookupError:
1116 1130 pass
1117 1131 links = normallinknodes(flog, fname)
1118 1132 if len(links) != len(mfdicts):
1119 1133 for mf, lr in mfdicts:
1120 1134 fnode = mf.get(fname, None)
1121 1135 if fnode in links:
1122 1136 links[fnode] = min(links[fnode], lr, key=clrev)
1123 1137 elif fnode:
1124 1138 links[fnode] = lr
1125 1139 return links
1126 1140 else:
1127 1141 linknodes = normallinknodes
1128 1142
1129 1143 repo = self._repo
1130 1144 cl = repo.changelog
1131 1145 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1132 1146 total=len(changedfiles))
1133 1147 for i, fname in enumerate(sorted(changedfiles)):
1134 1148 filerevlog = repo.file(fname)
1135 1149 if not filerevlog:
1136 1150 raise error.Abort(_("empty or missing file data for %s") %
1137 1151 fname)
1138 1152
1139 1153 clrevtolocalrev.clear()
1140 1154
1141 1155 linkrevnodes = linknodes(filerevlog, fname)
1142 1156 # Lookup for filenodes, we collected the linkrev nodes above in the
1143 1157 # fastpath case and with lookupmf in the slowpath case.
1144 1158 def lookupfilelog(x):
1145 1159 return linkrevnodes[x]
1146 1160
1147 1161 frev, flr = filerevlog.rev, filerevlog.linkrev
1148 1162 filenodes = [n for n in linkrevnodes
1149 1163 if flr(frev(n)) not in commonrevs]
1150 1164
1151 1165 if filenodes:
1152 1166 if self._ellipses:
1153 1167 revs = _sortnodesellipsis(filerevlog, filenodes,
1154 1168 cl, lookupfilelog)
1155 1169 else:
1156 1170 revs = _sortnodesnormal(filerevlog, filenodes,
1157 1171 self._reorder)
1158 1172
1159 1173 progress.update(i + 1, item=fname)
1160 h = _fileheader(fname)
1161 size = len(h)
1162 yield h
1163 1174
1164 1175 it = deltagroup(
1165 1176 self._repo, revs, filerevlog, False, lookupfilelog,
1166 1177 self._deltaparentfn, self._builddeltaheader,
1167 1178 ellipses=self._ellipses,
1168 1179 clrevtolocalrev=clrevtolocalrev,
1169 1180 fullclnodes=self._fullclnodes,
1170 1181 precomputedellipsis=self._precomputedellipsis)
1171 1182
1172 for chunk in it:
1173 size += len(chunk)
1174 yield chunk
1183 yield fname, it
1175 1184
1176 close = closechunk()
1177 size += len(close)
1178 yield close
1179
1180 self._verbosenote(_('%8.i %s\n') % (size, fname))
1181 1185 progress.complete()
1182 1186
1183 1187 def _deltaparentprev(store, rev, p1, p2, prev):
1184 1188 """Resolve a delta parent to the previous revision.
1185 1189
1186 1190 Used for version 1 changegroups, which don't support generaldelta.
1187 1191 """
1188 1192 return prev
1189 1193
1190 1194 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1191 1195 """Resolve a delta parent when general deltas are supported."""
1192 1196 dp = store.deltaparent(rev)
1193 1197 if dp == nullrev and store.storedeltachains:
1194 1198 # Avoid sending full revisions when delta parent is null. Pick prev
1195 1199 # in that case. It's tempting to pick p1 in this case, as p1 will
1196 1200 # be smaller in the common case. However, computing a delta against
1197 1201 # p1 may require resolving the raw text of p1, which could be
1198 1202 # expensive. The revlog caches should have prev cached, meaning
1199 1203 # less CPU for changegroup generation. There is likely room to add
1200 1204 # a flag and/or config option to control this behavior.
1201 1205 base = prev
1202 1206 elif dp == nullrev:
1203 1207 # revlog is configured to use full snapshot for a reason,
1204 1208 # stick to full snapshot.
1205 1209 base = nullrev
1206 1210 elif dp not in (p1, p2, prev):
1207 1211 # Pick prev when we can't be sure remote has the base revision.
1208 1212 return prev
1209 1213 else:
1210 1214 base = dp
1211 1215
1212 1216 if base != nullrev and not store.candelta(base, rev):
1213 1217 base = nullrev
1214 1218
1215 1219 return base
1216 1220
1217 1221 def _deltaparentellipses(store, rev, p1, p2, prev):
1218 1222 """Resolve a delta parent when in ellipses mode."""
1219 1223 # TODO: send better deltas when in narrow mode.
1220 1224 #
1221 1225 # changegroup.group() loops over revisions to send,
1222 1226 # including revisions we'll skip. What this means is that
1223 1227 # `prev` will be a potentially useless delta base for all
1224 1228 # ellipsis nodes, as the client likely won't have it. In
1225 1229 # the future we should do bookkeeping about which nodes
1226 1230 # have been sent to the client, and try to be
1227 1231 # significantly smarter about delta bases. This is
1228 1232 # slightly tricky because this same code has to work for
1229 1233 # all revlogs, and we don't have the linkrev/linknode here.
1230 1234 return p1
1231 1235
1232 1236 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1233 1237 shallow=False, ellipsisroots=None, fullnodes=None):
1234 1238 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1235 1239 d.node, d.p1node, d.p2node, d.linknode)
1236 1240
1237 1241 return cgpacker(repo, filematcher, b'01',
1238 1242 deltaparentfn=_deltaparentprev,
1239 1243 allowreorder=None,
1240 1244 builddeltaheader=builddeltaheader,
1241 1245 manifestsend=b'',
1242 1246 bundlecaps=bundlecaps,
1243 1247 ellipses=ellipses,
1244 1248 shallow=shallow,
1245 1249 ellipsisroots=ellipsisroots,
1246 1250 fullnodes=fullnodes)
1247 1251
1248 1252 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1249 1253 shallow=False, ellipsisroots=None, fullnodes=None):
1250 1254 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1251 1255 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1252 1256
1253 1257 # Since generaldelta is directly supported by cg2, reordering
1254 1258 # generally doesn't help, so we disable it by default (treating
1255 1259 # bundle.reorder=auto just like bundle.reorder=False).
1256 1260 return cgpacker(repo, filematcher, b'02',
1257 1261 deltaparentfn=_deltaparentgeneraldelta,
1258 1262 allowreorder=False,
1259 1263 builddeltaheader=builddeltaheader,
1260 1264 manifestsend=b'',
1261 1265 bundlecaps=bundlecaps,
1262 1266 ellipses=ellipses,
1263 1267 shallow=shallow,
1264 1268 ellipsisroots=ellipsisroots,
1265 1269 fullnodes=fullnodes)
1266 1270
1267 1271 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1268 1272 shallow=False, ellipsisroots=None, fullnodes=None):
1269 1273 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1270 1274 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1271 1275
1272 1276 deltaparentfn = (_deltaparentellipses if ellipses
1273 1277 else _deltaparentgeneraldelta)
1274 1278
1275 1279 return cgpacker(repo, filematcher, b'03',
1276 1280 deltaparentfn=deltaparentfn,
1277 1281 allowreorder=False,
1278 1282 builddeltaheader=builddeltaheader,
1279 1283 manifestsend=closechunk(),
1280 1284 bundlecaps=bundlecaps,
1281 1285 ellipses=ellipses,
1282 1286 shallow=shallow,
1283 1287 ellipsisroots=ellipsisroots,
1284 1288 fullnodes=fullnodes)
1285 1289
1286 1290 _packermap = {'01': (_makecg1packer, cg1unpacker),
1287 1291 # cg2 adds support for exchanging generaldelta
1288 1292 '02': (_makecg2packer, cg2unpacker),
1289 1293 # cg3 adds support for exchanging revlog flags and treemanifests
1290 1294 '03': (_makecg3packer, cg3unpacker),
1291 1295 }
1292 1296
1293 1297 def allsupportedversions(repo):
1294 1298 versions = set(_packermap.keys())
1295 1299 if not (repo.ui.configbool('experimental', 'changegroup3') or
1296 1300 repo.ui.configbool('experimental', 'treemanifest') or
1297 1301 'treemanifest' in repo.requirements):
1298 1302 versions.discard('03')
1299 1303 return versions
1300 1304
1301 1305 # Changegroup versions that can be applied to the repo
1302 1306 def supportedincomingversions(repo):
1303 1307 return allsupportedversions(repo)
1304 1308
1305 1309 # Changegroup versions that can be created from the repo
1306 1310 def supportedoutgoingversions(repo):
1307 1311 versions = allsupportedversions(repo)
1308 1312 if 'treemanifest' in repo.requirements:
1309 1313 # Versions 01 and 02 support only flat manifests and it's just too
1310 1314 # expensive to convert between the flat manifest and tree manifest on
1311 1315 # the fly. Since tree manifests are hashed differently, all of history
1312 1316 # would have to be converted. Instead, we simply don't even pretend to
1313 1317 # support versions 01 and 02.
1314 1318 versions.discard('01')
1315 1319 versions.discard('02')
1316 1320 if repository.NARROW_REQUIREMENT in repo.requirements:
1317 1321 # Versions 01 and 02 don't support revlog flags, and we need to
1318 1322 # support that for stripping and unbundling to work.
1319 1323 versions.discard('01')
1320 1324 versions.discard('02')
1321 1325 if LFS_REQUIREMENT in repo.requirements:
1322 1326 # Versions 01 and 02 don't support revlog flags, and we need to
1323 1327 # mark LFS entries with REVIDX_EXTSTORED.
1324 1328 versions.discard('01')
1325 1329 versions.discard('02')
1326 1330
1327 1331 return versions
1328 1332
1329 1333 def localversion(repo):
1330 1334 # Finds the best version to use for bundles that are meant to be used
1331 1335 # locally, such as those from strip and shelve, and temporary bundles.
1332 1336 return max(supportedoutgoingversions(repo))
1333 1337
1334 1338 def safeversion(repo):
1335 1339 # Finds the smallest version that it's safe to assume clients of the repo
1336 1340 # will support. For example, all hg versions that support generaldelta also
1337 1341 # support changegroup 02.
1338 1342 versions = supportedoutgoingversions(repo)
1339 1343 if 'generaldelta' in repo.requirements:
1340 1344 versions.discard('01')
1341 1345 assert versions
1342 1346 return min(versions)
1343 1347
1344 1348 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1345 1349 ellipses=False, shallow=False, ellipsisroots=None,
1346 1350 fullnodes=None):
1347 1351 assert version in supportedoutgoingversions(repo)
1348 1352
1349 1353 if filematcher is None:
1350 1354 filematcher = matchmod.alwaysmatcher(repo.root, '')
1351 1355
1352 1356 if version == '01' and not filematcher.always():
1353 1357 raise error.ProgrammingError('version 01 changegroups do not support '
1354 1358 'sparse file matchers')
1355 1359
1356 1360 if ellipses and version in (b'01', b'02'):
1357 1361 raise error.Abort(
1358 1362 _('ellipsis nodes require at least cg3 on client and server, '
1359 1363 'but negotiated version %s') % version)
1360 1364
1361 1365 # Requested files could include files not in the local store. So
1362 1366 # filter those out.
1363 1367 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1364 1368 filematcher)
1365 1369
1366 1370 fn = _packermap[version][0]
1367 1371 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1368 1372 shallow=shallow, ellipsisroots=ellipsisroots,
1369 1373 fullnodes=fullnodes)
1370 1374
1371 1375 def getunbundler(version, fh, alg, extras=None):
1372 1376 return _packermap[version][1](fh, alg, extras=extras)
1373 1377
1374 1378 def _changegroupinfo(repo, nodes, source):
1375 1379 if repo.ui.verbose or source == 'bundle':
1376 1380 repo.ui.status(_("%d changesets found\n") % len(nodes))
1377 1381 if repo.ui.debugflag:
1378 1382 repo.ui.debug("list of changesets:\n")
1379 1383 for node in nodes:
1380 1384 repo.ui.debug("%s\n" % hex(node))
1381 1385
1382 1386 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1383 1387 bundlecaps=None):
1384 1388 cgstream = makestream(repo, outgoing, version, source,
1385 1389 fastpath=fastpath, bundlecaps=bundlecaps)
1386 1390 return getunbundler(version, util.chunkbuffer(cgstream), None,
1387 1391 {'clcount': len(outgoing.missing) })
1388 1392
1389 1393 def makestream(repo, outgoing, version, source, fastpath=False,
1390 1394 bundlecaps=None, filematcher=None):
1391 1395 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1392 1396 filematcher=filematcher)
1393 1397
1394 1398 repo = repo.unfiltered()
1395 1399 commonrevs = outgoing.common
1396 1400 csets = outgoing.missing
1397 1401 heads = outgoing.missingheads
1398 1402 # We go through the fast path if we get told to, or if all (unfiltered
1399 1403 # heads have been requested (since we then know there all linkrevs will
1400 1404 # be pulled by the client).
1401 1405 heads.sort()
1402 1406 fastpathlinkrev = fastpath or (
1403 1407 repo.filtername is None and heads == sorted(repo.heads()))
1404 1408
1405 1409 repo.hook('preoutgoing', throw=True, source=source)
1406 1410 _changegroupinfo(repo, csets, source)
1407 1411 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1408 1412
1409 1413 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1410 1414 revisions = 0
1411 1415 files = 0
1412 1416 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1413 1417 total=expectedfiles)
1414 1418 for chunkdata in iter(source.filelogheader, {}):
1415 1419 files += 1
1416 1420 f = chunkdata["filename"]
1417 1421 repo.ui.debug("adding %s revisions\n" % f)
1418 1422 progress.increment()
1419 1423 fl = repo.file(f)
1420 1424 o = len(fl)
1421 1425 try:
1422 1426 deltas = source.deltaiter()
1423 1427 if not fl.addgroup(deltas, revmap, trp):
1424 1428 raise error.Abort(_("received file revlog group is empty"))
1425 1429 except error.CensoredBaseError as e:
1426 1430 raise error.Abort(_("received delta base is censored: %s") % e)
1427 1431 revisions += len(fl) - o
1428 1432 if f in needfiles:
1429 1433 needs = needfiles[f]
1430 1434 for new in pycompat.xrange(o, len(fl)):
1431 1435 n = fl.node(new)
1432 1436 if n in needs:
1433 1437 needs.remove(n)
1434 1438 else:
1435 1439 raise error.Abort(
1436 1440 _("received spurious file revlog entry"))
1437 1441 if not needs:
1438 1442 del needfiles[f]
1439 1443 progress.complete()
1440 1444
1441 1445 for f, needs in needfiles.iteritems():
1442 1446 fl = repo.file(f)
1443 1447 for n in needs:
1444 1448 try:
1445 1449 fl.rev(n)
1446 1450 except error.LookupError:
1447 1451 raise error.Abort(
1448 1452 _('missing file data for %s:%s - run hg verify') %
1449 1453 (f, hex(n)))
1450 1454
1451 1455 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now