##// END OF EJS Templates
changegroup: extract _revisiondeltanormal() to standalone function...
Gregory Szorc -
r39021:d85b0d81 default
parent child Browse files
Show More
@@ -1,1435 +1,1436 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def _fileheader(path):
70 70 """Obtain a changegroup chunk header for a named path."""
71 71 return chunkheader(len(path)) + path
72 72
73 73 def writechunks(ui, chunks, filename, vfs=None):
74 74 """Write chunks to a file and return its filename.
75 75
76 76 The stream is assumed to be a bundle file.
77 77 Existing files will not be overwritten.
78 78 If no filename is specified, a temporary file is created.
79 79 """
80 80 fh = None
81 81 cleanup = None
82 82 try:
83 83 if filename:
84 84 if vfs:
85 85 fh = vfs.open(filename, "wb")
86 86 else:
87 87 # Increase default buffer size because default is usually
88 88 # small (4k is common on Linux).
89 89 fh = open(filename, "wb", 131072)
90 90 else:
91 91 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
92 92 fh = os.fdopen(fd, r"wb")
93 93 cleanup = filename
94 94 for c in chunks:
95 95 fh.write(c)
96 96 cleanup = None
97 97 return filename
98 98 finally:
99 99 if fh is not None:
100 100 fh.close()
101 101 if cleanup is not None:
102 102 if filename and vfs:
103 103 vfs.unlink(cleanup)
104 104 else:
105 105 os.unlink(cleanup)
106 106
107 107 class cg1unpacker(object):
108 108 """Unpacker for cg1 changegroup streams.
109 109
110 110 A changegroup unpacker handles the framing of the revision data in
111 111 the wire format. Most consumers will want to use the apply()
112 112 method to add the changes from the changegroup to a repository.
113 113
114 114 If you're forwarding a changegroup unmodified to another consumer,
115 115 use getchunks(), which returns an iterator of changegroup
116 116 chunks. This is mostly useful for cases where you need to know the
117 117 data stream has ended by observing the end of the changegroup.
118 118
119 119 deltachunk() is useful only if you're applying delta data. Most
120 120 consumers should prefer apply() instead.
121 121
122 122 A few other public methods exist. Those are used only for
123 123 bundlerepo and some debug commands - their use is discouraged.
124 124 """
125 125 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 126 deltaheadersize = deltaheader.size
127 127 version = '01'
128 128 _grouplistcount = 1 # One list of files after the manifests
129 129
130 130 def __init__(self, fh, alg, extras=None):
131 131 if alg is None:
132 132 alg = 'UN'
133 133 if alg not in util.compengines.supportedbundletypes:
134 134 raise error.Abort(_('unknown stream compression type: %s')
135 135 % alg)
136 136 if alg == 'BZ':
137 137 alg = '_truncatedBZ'
138 138
139 139 compengine = util.compengines.forbundletype(alg)
140 140 self._stream = compengine.decompressorreader(fh)
141 141 self._type = alg
142 142 self.extras = extras or {}
143 143 self.callback = None
144 144
145 145 # These methods (compressed, read, seek, tell) all appear to only
146 146 # be used by bundlerepo, but it's a little hard to tell.
147 147 def compressed(self):
148 148 return self._type is not None and self._type != 'UN'
149 149 def read(self, l):
150 150 return self._stream.read(l)
151 151 def seek(self, pos):
152 152 return self._stream.seek(pos)
153 153 def tell(self):
154 154 return self._stream.tell()
155 155 def close(self):
156 156 return self._stream.close()
157 157
158 158 def _chunklength(self):
159 159 d = readexactly(self._stream, 4)
160 160 l = struct.unpack(">l", d)[0]
161 161 if l <= 4:
162 162 if l:
163 163 raise error.Abort(_("invalid chunk length %d") % l)
164 164 return 0
165 165 if self.callback:
166 166 self.callback()
167 167 return l - 4
168 168
169 169 def changelogheader(self):
170 170 """v10 does not have a changelog header chunk"""
171 171 return {}
172 172
173 173 def manifestheader(self):
174 174 """v10 does not have a manifest header chunk"""
175 175 return {}
176 176
177 177 def filelogheader(self):
178 178 """return the header of the filelogs chunk, v10 only has the filename"""
179 179 l = self._chunklength()
180 180 if not l:
181 181 return {}
182 182 fname = readexactly(self._stream, l)
183 183 return {'filename': fname}
184 184
185 185 def _deltaheader(self, headertuple, prevnode):
186 186 node, p1, p2, cs = headertuple
187 187 if prevnode is None:
188 188 deltabase = p1
189 189 else:
190 190 deltabase = prevnode
191 191 flags = 0
192 192 return node, p1, p2, deltabase, cs, flags
193 193
194 194 def deltachunk(self, prevnode):
195 195 l = self._chunklength()
196 196 if not l:
197 197 return {}
198 198 headerdata = readexactly(self._stream, self.deltaheadersize)
199 199 header = self.deltaheader.unpack(headerdata)
200 200 delta = readexactly(self._stream, l - self.deltaheadersize)
201 201 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
202 202 return (node, p1, p2, cs, deltabase, delta, flags)
203 203
204 204 def getchunks(self):
205 205 """returns all the chunks contains in the bundle
206 206
207 207 Used when you need to forward the binary stream to a file or another
208 208 network API. To do so, it parse the changegroup data, otherwise it will
209 209 block in case of sshrepo because it don't know the end of the stream.
210 210 """
211 211 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
212 212 # and a list of filelogs. For changegroup 3, we expect 4 parts:
213 213 # changelog, manifestlog, a list of tree manifestlogs, and a list of
214 214 # filelogs.
215 215 #
216 216 # Changelog and manifestlog parts are terminated with empty chunks. The
217 217 # tree and file parts are a list of entry sections. Each entry section
218 218 # is a series of chunks terminating in an empty chunk. The list of these
219 219 # entry sections is terminated in yet another empty chunk, so we know
220 220 # we've reached the end of the tree/file list when we reach an empty
221 221 # chunk that was proceeded by no non-empty chunks.
222 222
223 223 parts = 0
224 224 while parts < 2 + self._grouplistcount:
225 225 noentries = True
226 226 while True:
227 227 chunk = getchunk(self)
228 228 if not chunk:
229 229 # The first two empty chunks represent the end of the
230 230 # changelog and the manifestlog portions. The remaining
231 231 # empty chunks represent either A) the end of individual
232 232 # tree or file entries in the file list, or B) the end of
233 233 # the entire list. It's the end of the entire list if there
234 234 # were no entries (i.e. noentries is True).
235 235 if parts < 2:
236 236 parts += 1
237 237 elif noentries:
238 238 parts += 1
239 239 break
240 240 noentries = False
241 241 yield chunkheader(len(chunk))
242 242 pos = 0
243 243 while pos < len(chunk):
244 244 next = pos + 2**20
245 245 yield chunk[pos:next]
246 246 pos = next
247 247 yield closechunk()
248 248
249 249 def _unpackmanifests(self, repo, revmap, trp, prog):
250 250 self.callback = prog.increment
251 251 # no need to check for empty manifest group here:
252 252 # if the result of the merge of 1 and 2 is the same in 3 and 4,
253 253 # no new manifest will be created and the manifest group will
254 254 # be empty during the pull
255 255 self.manifestheader()
256 256 deltas = self.deltaiter()
257 257 repo.manifestlog.addgroup(deltas, revmap, trp)
258 258 prog.complete()
259 259 self.callback = None
260 260
261 261 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
262 262 expectedtotal=None):
263 263 """Add the changegroup returned by source.read() to this repo.
264 264 srctype is a string like 'push', 'pull', or 'unbundle'. url is
265 265 the URL of the repo where this changegroup is coming from.
266 266
267 267 Return an integer summarizing the change to this repo:
268 268 - nothing changed or no source: 0
269 269 - more heads than before: 1+added heads (2..n)
270 270 - fewer heads than before: -1-removed heads (-2..-n)
271 271 - number of heads stays the same: 1
272 272 """
273 273 repo = repo.unfiltered()
274 274 def csmap(x):
275 275 repo.ui.debug("add changeset %s\n" % short(x))
276 276 return len(cl)
277 277
278 278 def revmap(x):
279 279 return cl.rev(x)
280 280
281 281 changesets = files = revisions = 0
282 282
283 283 try:
284 284 # The transaction may already carry source information. In this
285 285 # case we use the top level data. We overwrite the argument
286 286 # because we need to use the top level value (if they exist)
287 287 # in this function.
288 288 srctype = tr.hookargs.setdefault('source', srctype)
289 289 url = tr.hookargs.setdefault('url', url)
290 290 repo.hook('prechangegroup',
291 291 throw=True, **pycompat.strkwargs(tr.hookargs))
292 292
293 293 # write changelog data to temp files so concurrent readers
294 294 # will not see an inconsistent view
295 295 cl = repo.changelog
296 296 cl.delayupdate(tr)
297 297 oldheads = set(cl.heads())
298 298
299 299 trp = weakref.proxy(tr)
300 300 # pull off the changeset group
301 301 repo.ui.status(_("adding changesets\n"))
302 302 clstart = len(cl)
303 303 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
304 304 total=expectedtotal)
305 305 self.callback = progress.increment
306 306
307 307 efiles = set()
308 308 def onchangelog(cl, node):
309 309 efiles.update(cl.readfiles(node))
310 310
311 311 self.changelogheader()
312 312 deltas = self.deltaiter()
313 313 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
314 314 efiles = len(efiles)
315 315
316 316 if not cgnodes:
317 317 repo.ui.develwarn('applied empty changegroup',
318 318 config='warn-empty-changegroup')
319 319 clend = len(cl)
320 320 changesets = clend - clstart
321 321 progress.complete()
322 322 self.callback = None
323 323
324 324 # pull off the manifest group
325 325 repo.ui.status(_("adding manifests\n"))
326 326 # We know that we'll never have more manifests than we had
327 327 # changesets.
328 328 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
329 329 total=changesets)
330 330 self._unpackmanifests(repo, revmap, trp, progress)
331 331
332 332 needfiles = {}
333 333 if repo.ui.configbool('server', 'validate'):
334 334 cl = repo.changelog
335 335 ml = repo.manifestlog
336 336 # validate incoming csets have their manifests
337 337 for cset in pycompat.xrange(clstart, clend):
338 338 mfnode = cl.changelogrevision(cset).manifest
339 339 mfest = ml[mfnode].readdelta()
340 340 # store file cgnodes we must see
341 341 for f, n in mfest.iteritems():
342 342 needfiles.setdefault(f, set()).add(n)
343 343
344 344 # process the files
345 345 repo.ui.status(_("adding file changes\n"))
346 346 newrevs, newfiles = _addchangegroupfiles(
347 347 repo, self, revmap, trp, efiles, needfiles)
348 348 revisions += newrevs
349 349 files += newfiles
350 350
351 351 deltaheads = 0
352 352 if oldheads:
353 353 heads = cl.heads()
354 354 deltaheads = len(heads) - len(oldheads)
355 355 for h in heads:
356 356 if h not in oldheads and repo[h].closesbranch():
357 357 deltaheads -= 1
358 358 htext = ""
359 359 if deltaheads:
360 360 htext = _(" (%+d heads)") % deltaheads
361 361
362 362 repo.ui.status(_("added %d changesets"
363 363 " with %d changes to %d files%s\n")
364 364 % (changesets, revisions, files, htext))
365 365 repo.invalidatevolatilesets()
366 366
367 367 if changesets > 0:
368 368 if 'node' not in tr.hookargs:
369 369 tr.hookargs['node'] = hex(cl.node(clstart))
370 370 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
371 371 hookargs = dict(tr.hookargs)
372 372 else:
373 373 hookargs = dict(tr.hookargs)
374 374 hookargs['node'] = hex(cl.node(clstart))
375 375 hookargs['node_last'] = hex(cl.node(clend - 1))
376 376 repo.hook('pretxnchangegroup',
377 377 throw=True, **pycompat.strkwargs(hookargs))
378 378
379 379 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
380 380 phaseall = None
381 381 if srctype in ('push', 'serve'):
382 382 # Old servers can not push the boundary themselves.
383 383 # New servers won't push the boundary if changeset already
384 384 # exists locally as secret
385 385 #
386 386 # We should not use added here but the list of all change in
387 387 # the bundle
388 388 if repo.publishing():
389 389 targetphase = phaseall = phases.public
390 390 else:
391 391 # closer target phase computation
392 392
393 393 # Those changesets have been pushed from the
394 394 # outside, their phases are going to be pushed
395 395 # alongside. Therefor `targetphase` is
396 396 # ignored.
397 397 targetphase = phaseall = phases.draft
398 398 if added:
399 399 phases.registernew(repo, tr, targetphase, added)
400 400 if phaseall is not None:
401 401 phases.advanceboundary(repo, tr, phaseall, cgnodes)
402 402
403 403 if changesets > 0:
404 404
405 405 def runhooks():
406 406 # These hooks run when the lock releases, not when the
407 407 # transaction closes. So it's possible for the changelog
408 408 # to have changed since we last saw it.
409 409 if clstart >= len(repo):
410 410 return
411 411
412 412 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
413 413
414 414 for n in added:
415 415 args = hookargs.copy()
416 416 args['node'] = hex(n)
417 417 del args['node_last']
418 418 repo.hook("incoming", **pycompat.strkwargs(args))
419 419
420 420 newheads = [h for h in repo.heads()
421 421 if h not in oldheads]
422 422 repo.ui.log("incoming",
423 423 "%d incoming changes - new heads: %s\n",
424 424 len(added),
425 425 ', '.join([hex(c[:6]) for c in newheads]))
426 426
427 427 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
428 428 lambda tr: repo._afterlock(runhooks))
429 429 finally:
430 430 repo.ui.flush()
431 431 # never return 0 here:
432 432 if deltaheads < 0:
433 433 ret = deltaheads - 1
434 434 else:
435 435 ret = deltaheads + 1
436 436 return ret
437 437
438 438 def deltaiter(self):
439 439 """
440 440 returns an iterator of the deltas in this changegroup
441 441
442 442 Useful for passing to the underlying storage system to be stored.
443 443 """
444 444 chain = None
445 445 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
446 446 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
447 447 yield chunkdata
448 448 chain = chunkdata[0]
449 449
450 450 class cg2unpacker(cg1unpacker):
451 451 """Unpacker for cg2 streams.
452 452
453 453 cg2 streams add support for generaldelta, so the delta header
454 454 format is slightly different. All other features about the data
455 455 remain the same.
456 456 """
457 457 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
458 458 deltaheadersize = deltaheader.size
459 459 version = '02'
460 460
461 461 def _deltaheader(self, headertuple, prevnode):
462 462 node, p1, p2, deltabase, cs = headertuple
463 463 flags = 0
464 464 return node, p1, p2, deltabase, cs, flags
465 465
466 466 class cg3unpacker(cg2unpacker):
467 467 """Unpacker for cg3 streams.
468 468
469 469 cg3 streams add support for exchanging treemanifests and revlog
470 470 flags. It adds the revlog flags to the delta header and an empty chunk
471 471 separating manifests and files.
472 472 """
473 473 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
474 474 deltaheadersize = deltaheader.size
475 475 version = '03'
476 476 _grouplistcount = 2 # One list of manifests and one list of files
477 477
478 478 def _deltaheader(self, headertuple, prevnode):
479 479 node, p1, p2, deltabase, cs, flags = headertuple
480 480 return node, p1, p2, deltabase, cs, flags
481 481
482 482 def _unpackmanifests(self, repo, revmap, trp, prog):
483 483 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
484 484 for chunkdata in iter(self.filelogheader, {}):
485 485 # If we get here, there are directory manifests in the changegroup
486 486 d = chunkdata["filename"]
487 487 repo.ui.debug("adding %s revisions\n" % d)
488 488 dirlog = repo.manifestlog._revlog.dirlog(d)
489 489 deltas = self.deltaiter()
490 490 if not dirlog.addgroup(deltas, revmap, trp):
491 491 raise error.Abort(_("received dir revlog group is empty"))
492 492
493 493 class headerlessfixup(object):
494 494 def __init__(self, fh, h):
495 495 self._h = h
496 496 self._fh = fh
497 497 def read(self, n):
498 498 if self._h:
499 499 d, self._h = self._h[:n], self._h[n:]
500 500 if len(d) < n:
501 501 d += readexactly(self._fh, n - len(d))
502 502 return d
503 503 return readexactly(self._fh, n)
504 504
505 505 @attr.s(slots=True, frozen=True)
506 506 class revisiondelta(object):
507 507 """Describes a delta entry in a changegroup.
508 508
509 509 Captured data is sufficient to serialize the delta into multiple
510 510 formats.
511 511 """
512 512 # 20 byte node of this revision.
513 513 node = attr.ib()
514 514 # 20 byte nodes of parent revisions.
515 515 p1node = attr.ib()
516 516 p2node = attr.ib()
517 517 # 20 byte node of node this delta is against.
518 518 basenode = attr.ib()
519 519 # 20 byte node of changeset revision this delta is associated with.
520 520 linknode = attr.ib()
521 521 # 2 bytes of flags to apply to revision data.
522 522 flags = attr.ib()
523 523 # Iterable of chunks holding raw delta data.
524 524 deltachunks = attr.ib()
525 525
526 526 def _sortnodesnormal(store, nodes, reorder):
527 527 """Sort nodes for changegroup generation and turn into revnums."""
528 528 # for generaldelta revlogs, we linearize the revs; this will both be
529 529 # much quicker and generate a much smaller bundle
530 530 if (store._generaldelta and reorder is None) or reorder:
531 531 dag = dagutil.revlogdag(store)
532 532 return dag.linearize(set(store.rev(n) for n in nodes))
533 533 else:
534 534 return sorted([store.rev(n) for n in nodes])
535 535
536 536 def _sortnodesellipsis(store, nodes, clnodetorev, lookup):
537 537 """Sort nodes for changegroup generation and turn into revnums."""
538 538 # Ellipses serving mode.
539 539 #
540 540 # In a perfect world, we'd generate better ellipsis-ified graphs
541 541 # for non-changelog revlogs. In practice, we haven't started doing
542 542 # that yet, so the resulting DAGs for the manifestlog and filelogs
543 543 # are actually full of bogus parentage on all the ellipsis
544 544 # nodes. This has the side effect that, while the contents are
545 545 # correct, the individual DAGs might be completely out of whack in
546 546 # a case like 882681bc3166 and its ancestors (back about 10
547 547 # revisions or so) in the main hg repo.
548 548 #
549 549 # The one invariant we *know* holds is that the new (potentially
550 550 # bogus) DAG shape will be valid if we order the nodes in the
551 551 # order that they're introduced in dramatis personae by the
552 552 # changelog, so what we do is we sort the non-changelog histories
553 553 # by the order in which they are used by the changelog.
554 554 key = lambda n: clnodetorev[lookup(n)]
555 555 return [store.rev(n) for n in sorted(nodes, key=key)]
556 556
557 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
558 """Construct a revision delta for non-ellipses changegroup generation."""
559 node = store.node(rev)
560 p1, p2 = store.parentrevs(rev)
561 base = deltaparentfn(store, rev, p1, p2, prev)
562
563 prefix = ''
564 if store.iscensored(base) or store.iscensored(rev):
565 try:
566 delta = store.revision(node, raw=True)
567 except error.CensoredNodeError as e:
568 delta = e.tombstone
569 if base == nullrev:
570 prefix = mdiff.trivialdiffheader(len(delta))
571 else:
572 baselen = store.rawsize(base)
573 prefix = mdiff.replacediffheader(baselen, len(delta))
574 elif base == nullrev:
575 delta = store.revision(node, raw=True)
576 prefix = mdiff.trivialdiffheader(len(delta))
577 else:
578 delta = store.revdiff(base, rev)
579 p1n, p2n = store.parents(node)
580
581 return revisiondelta(
582 node=node,
583 p1node=p1n,
584 p2node=p2n,
585 basenode=store.node(base),
586 linknode=linknode,
587 flags=store.flags(rev),
588 deltachunks=(prefix, delta),
589 )
590
557 591 class cgpacker(object):
558 592 def __init__(self, repo, filematcher, version, allowreorder,
559 593 deltaparentfn, builddeltaheader, manifestsend,
560 594 bundlecaps=None, ellipses=False,
561 595 shallow=False, ellipsisroots=None, fullnodes=None):
562 596 """Given a source repo, construct a bundler.
563 597
564 598 filematcher is a matcher that matches on files to include in the
565 599 changegroup. Used to facilitate sparse changegroups.
566 600
567 601 allowreorder controls whether reordering of revisions is allowed.
568 602 This value is used when ``bundle.reorder`` is ``auto`` or isn't
569 603 set.
570 604
571 605 deltaparentfn is a callable that resolves the delta parent for
572 606 a specific revision.
573 607
574 608 builddeltaheader is a callable that constructs the header for a group
575 609 delta.
576 610
577 611 manifestsend is a chunk to send after manifests have been fully emitted.
578 612
579 613 ellipses indicates whether ellipsis serving mode is enabled.
580 614
581 615 bundlecaps is optional and can be used to specify the set of
582 616 capabilities which can be used to build the bundle. While bundlecaps is
583 617 unused in core Mercurial, extensions rely on this feature to communicate
584 618 capabilities to customize the changegroup packer.
585 619
586 620 shallow indicates whether shallow data might be sent. The packer may
587 621 need to pack file contents not introduced by the changes being packed.
588 622
589 623 fullnodes is the list of nodes which should not be ellipsis nodes. We
590 624 store this rather than the set of nodes that should be ellipsis because
591 625 for very large histories we expect this to be significantly smaller.
592 626 """
593 627 assert filematcher
594 628 self._filematcher = filematcher
595 629
596 630 self.version = version
597 631 self._deltaparentfn = deltaparentfn
598 632 self._builddeltaheader = builddeltaheader
599 633 self._manifestsend = manifestsend
600 634 self._ellipses = ellipses
601 635
602 636 # Set of capabilities we can use to build the bundle.
603 637 if bundlecaps is None:
604 638 bundlecaps = set()
605 639 self._bundlecaps = bundlecaps
606 640 self._isshallow = shallow
607 641 self._fullnodes = fullnodes
608 642
609 643 # Maps ellipsis revs to their roots at the changelog level.
610 644 self._precomputedellipsis = ellipsisroots
611 645
612 646 # experimental config: bundle.reorder
613 647 reorder = repo.ui.config('bundle', 'reorder')
614 648 if reorder == 'auto':
615 649 self._reorder = allowreorder
616 650 else:
617 651 self._reorder = stringutil.parsebool(reorder)
618 652
619 653 self._repo = repo
620 654
621 655 if self._repo.ui.verbose and not self._repo.ui.debugflag:
622 656 self._verbosenote = self._repo.ui.note
623 657 else:
624 658 self._verbosenote = lambda s: None
625 659
626 660 # Maps CL revs to per-revlog revisions. Cleared in close() at
627 661 # the end of each group.
628 662 self._clrevtolocalrev = {}
629 663 self._nextclrevtolocalrev = {}
630 664
631 665 # Maps changelog nodes to changelog revs. Filled in once
632 666 # during changelog stage and then left unmodified.
633 667 self._clnodetorev = {}
634 668
635 669 def _close(self):
636 670 # Ellipses serving mode.
637 671 self._clrevtolocalrev.clear()
638 672 if self._nextclrevtolocalrev is not None:
639 673 self._clrevtolocalrev = self._nextclrevtolocalrev
640 674 self._nextclrevtolocalrev = None
641 675
642 676 return closechunk()
643 677
644 678 def group(self, revs, store, ischangelog, lookup, units=None):
645 679 """Calculate a delta group, yielding a sequence of changegroup chunks
646 680 (strings).
647 681
648 682 Given a list of changeset revs, return a set of deltas and
649 683 metadata corresponding to nodes. The first delta is
650 684 first parent(nodelist[0]) -> nodelist[0], the receiver is
651 685 guaranteed to have this parent as it has all history before
652 686 these changesets. In the case firstparent is nullrev the
653 687 changegroup starts with a full revision.
654 688
655 689 If units is not None, progress detail will be generated, units specifies
656 690 the type of revlog that is touched (changelog, manifest, etc.).
657 691 """
658 692 # if we don't have any revisions touched by these changesets, bail
659 693 if len(revs) == 0:
660 694 yield self._close()
661 695 return
662 696
663 697 # add the parent of the first rev
664 698 p = store.parentrevs(revs[0])[0]
665 699 revs.insert(0, p)
666 700
667 701 # build deltas
668 702 progress = None
669 703 if units is not None:
670 704 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
671 705 total=(len(revs) - 1))
672 706 for r in pycompat.xrange(len(revs) - 1):
673 707 if progress:
674 708 progress.update(r + 1)
675 709 prev, curr = revs[r], revs[r + 1]
676 710 linknode = lookup(store.node(curr))
677 711
678 712 if self._ellipses:
679 713 delta = self._revisiondeltanarrow(store, ischangelog,
680 714 curr, prev, linknode)
681 715 else:
682 delta = self._revisiondeltanormal(store, ischangelog,
683 curr, prev, linknode)
716 delta = _revisiondeltanormal(store, curr, prev, linknode,
717 self._deltaparentfn)
684 718
685 719 if not delta:
686 720 continue
687 721
688 722 meta = self._builddeltaheader(delta)
689 723 l = len(meta) + sum(len(x) for x in delta.deltachunks)
690 724 yield chunkheader(l)
691 725 yield meta
692 726 for x in delta.deltachunks:
693 727 yield x
694 728
695 729 if progress:
696 730 progress.complete()
697 731 yield self._close()
698 732
699 733 # filter any nodes that claim to be part of the known set
700 734 def _prune(self, store, missing, commonrevs):
701 735 # TODO this violates storage abstraction for manifests.
702 736 if isinstance(store, manifest.manifestrevlog):
703 737 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
704 738 return []
705 739
706 740 rr, rl = store.rev, store.linkrev
707 741 return [n for n in missing if rl(rr(n)) not in commonrevs]
708 742
709 743 def _packmanifests(self, dir, dirlog, revs, lookuplinknode):
710 744 """Pack manifests into a changegroup stream.
711 745
712 746 Encodes the directory name in the output so multiple manifests
713 747 can be sent. Multiple manifests is not supported by cg1 and cg2.
714 748 """
715 749 if dir:
716 750 assert self.version == b'03'
717 751 yield _fileheader(dir)
718 752
719 753 for chunk in self.group(revs, dirlog, False, lookuplinknode,
720 754 units=_('manifests')):
721 755 yield chunk
722 756
723 757 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
724 758 """Yield a sequence of changegroup byte chunks."""
725 759
726 760 repo = self._repo
727 761 cl = repo.changelog
728 762
729 763 self._verbosenote(_('uncompressed size of bundle content:\n'))
730 764 size = 0
731 765
732 766 clstate, chunks = self._generatechangelog(cl, clnodes)
733 767 for chunk in chunks:
734 768 size += len(chunk)
735 769 yield chunk
736 770
737 771 self._verbosenote(_('%8.i (changelog)\n') % size)
738 772
739 773 clrevorder = clstate['clrevorder']
740 774 mfs = clstate['mfs']
741 775 changedfiles = clstate['changedfiles']
742 776
743 777 # We need to make sure that the linkrev in the changegroup refers to
744 778 # the first changeset that introduced the manifest or file revision.
745 779 # The fastpath is usually safer than the slowpath, because the filelogs
746 780 # are walked in revlog order.
747 781 #
748 782 # When taking the slowpath with reorder=None and the manifest revlog
749 783 # uses generaldelta, the manifest may be walked in the "wrong" order.
750 784 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
751 785 # cc0ff93d0c0c).
752 786 #
753 787 # When taking the fastpath, we are only vulnerable to reordering
754 788 # of the changelog itself. The changelog never uses generaldelta, so
755 789 # it is only reordered when reorder=True. To handle this case, we
756 790 # simply take the slowpath, which already has the 'clrevorder' logic.
757 791 # This was also fixed in cc0ff93d0c0c.
758 792 fastpathlinkrev = fastpathlinkrev and not self._reorder
759 793 # Treemanifests don't work correctly with fastpathlinkrev
760 794 # either, because we don't discover which directory nodes to
761 795 # send along with files. This could probably be fixed.
762 796 fastpathlinkrev = fastpathlinkrev and (
763 797 'treemanifest' not in repo.requirements)
764 798
765 799 fnodes = {} # needed file nodes
766 800
767 801 for chunk in self.generatemanifests(commonrevs, clrevorder,
768 802 fastpathlinkrev, mfs, fnodes, source):
769 803 yield chunk
770 804
771 805 mfdicts = None
772 806 if self._ellipses and self._isshallow:
773 807 mfdicts = [(self._repo.manifestlog[n].read(), lr)
774 808 for (n, lr) in mfs.iteritems()]
775 809
776 810 mfs.clear()
777 811 clrevs = set(cl.rev(x) for x in clnodes)
778 812
779 813 if not fastpathlinkrev:
780 814 def linknodes(unused, fname):
781 815 return fnodes.get(fname, {})
782 816 else:
783 817 cln = cl.node
784 818 def linknodes(filerevlog, fname):
785 819 llr = filerevlog.linkrev
786 820 fln = filerevlog.node
787 821 revs = ((r, llr(r)) for r in filerevlog)
788 822 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
789 823
790 824 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
791 825 source, mfdicts):
792 826 yield chunk
793 827
794 828 yield self._close()
795 829
796 830 if clnodes:
797 831 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
798 832
799 833 def _generatechangelog(self, cl, nodes):
800 834 """Generate data for changelog chunks.
801 835
802 836 Returns a 2-tuple of a dict containing state and an iterable of
803 837 byte chunks. The state will not be fully populated until the
804 838 chunk stream has been fully consumed.
805 839 """
806 840 clrevorder = {}
807 841 mfs = {} # needed manifests
808 842 mfl = self._repo.manifestlog
809 843 # TODO violates storage abstraction.
810 844 mfrevlog = mfl._revlog
811 845 changedfiles = set()
812 846
813 847 # Callback for the changelog, used to collect changed files and
814 848 # manifest nodes.
815 849 # Returns the linkrev node (identity in the changelog case).
816 850 def lookupcl(x):
817 851 c = cl.read(x)
818 852 clrevorder[x] = len(clrevorder)
819 853
820 854 if self._ellipses:
821 855 # Only update mfs if x is going to be sent. Otherwise we
822 856 # end up with bogus linkrevs specified for manifests and
823 857 # we skip some manifest nodes that we should otherwise
824 858 # have sent.
825 859 if (x in self._fullnodes
826 860 or cl.rev(x) in self._precomputedellipsis):
827 861 n = c[0]
828 862 # Record the first changeset introducing this manifest
829 863 # version.
830 864 mfs.setdefault(n, x)
831 865 # Set this narrow-specific dict so we have the lowest
832 866 # manifest revnum to look up for this cl revnum. (Part of
833 867 # mapping changelog ellipsis parents to manifest ellipsis
834 868 # parents)
835 869 self._nextclrevtolocalrev.setdefault(cl.rev(x),
836 870 mfrevlog.rev(n))
837 871 # We can't trust the changed files list in the changeset if the
838 872 # client requested a shallow clone.
839 873 if self._isshallow:
840 874 changedfiles.update(mfl[c[0]].read().keys())
841 875 else:
842 876 changedfiles.update(c[3])
843 877 else:
844 878
845 879 n = c[0]
846 880 # record the first changeset introducing this manifest version
847 881 mfs.setdefault(n, x)
848 882 # Record a complete list of potentially-changed files in
849 883 # this manifest.
850 884 changedfiles.update(c[3])
851 885
852 886 return x
853 887
854 888 # Changelog doesn't benefit from reordering revisions. So send out
855 889 # revisions in store order.
856 890 revs = sorted(cl.rev(n) for n in nodes)
857 891
858 892 state = {
859 893 'clrevorder': clrevorder,
860 894 'mfs': mfs,
861 895 'changedfiles': changedfiles,
862 896 }
863 897
864 898 gen = self.group(revs, cl, True, lookupcl, units=_('changesets'))
865 899
866 900 return state, gen
867 901
868 902 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
869 903 fnodes, source):
870 904 """Returns an iterator of changegroup chunks containing manifests.
871 905
872 906 `source` is unused here, but is used by extensions like remotefilelog to
873 907 change what is sent based in pulls vs pushes, etc.
874 908 """
875 909 repo = self._repo
876 910 mfl = repo.manifestlog
877 911 dirlog = mfl._revlog.dirlog
878 912 tmfnodes = {'': mfs}
879 913
880 914 # Callback for the manifest, used to collect linkrevs for filelog
881 915 # revisions.
882 916 # Returns the linkrev node (collected in lookupcl).
883 917 def makelookupmflinknode(dir, nodes):
884 918 if fastpathlinkrev:
885 919 assert not dir
886 920 return mfs.__getitem__
887 921
888 922 def lookupmflinknode(x):
889 923 """Callback for looking up the linknode for manifests.
890 924
891 925 Returns the linkrev node for the specified manifest.
892 926
893 927 SIDE EFFECT:
894 928
895 929 1) fclnodes gets populated with the list of relevant
896 930 file nodes if we're not using fastpathlinkrev
897 931 2) When treemanifests are in use, collects treemanifest nodes
898 932 to send
899 933
900 934 Note that this means manifests must be completely sent to
901 935 the client before you can trust the list of files and
902 936 treemanifests to send.
903 937 """
904 938 clnode = nodes[x]
905 939 mdata = mfl.get(dir, x).readfast(shallow=True)
906 940 for p, n, fl in mdata.iterentries():
907 941 if fl == 't': # subdirectory manifest
908 942 subdir = dir + p + '/'
909 943 tmfclnodes = tmfnodes.setdefault(subdir, {})
910 944 tmfclnode = tmfclnodes.setdefault(n, clnode)
911 945 if clrevorder[clnode] < clrevorder[tmfclnode]:
912 946 tmfclnodes[n] = clnode
913 947 else:
914 948 f = dir + p
915 949 fclnodes = fnodes.setdefault(f, {})
916 950 fclnode = fclnodes.setdefault(n, clnode)
917 951 if clrevorder[clnode] < clrevorder[fclnode]:
918 952 fclnodes[n] = clnode
919 953 return clnode
920 954 return lookupmflinknode
921 955
922 956 size = 0
923 957 while tmfnodes:
924 958 dir, nodes = tmfnodes.popitem()
925 959 store = dirlog(dir)
926 960 prunednodes = self._prune(store, nodes, commonrevs)
927 961 if not dir or prunednodes:
928 962 lookupfn = makelookupmflinknode(dir, nodes)
929 963
930 964 if self._ellipses:
931 965 revs = _sortnodesellipsis(store, prunednodes,
932 966 self._clnodetorev, lookupfn)
933 967 else:
934 968 revs = _sortnodesnormal(store, prunednodes,
935 969 self._reorder)
936 970
937 971 for x in self._packmanifests(dir, store, revs, lookupfn):
938 972 size += len(x)
939 973 yield x
940 974 self._verbosenote(_('%8.i (manifests)\n') % size)
941 975 yield self._manifestsend
942 976
943 977 # The 'source' parameter is useful for extensions
944 978 def generatefiles(self, changedfiles, linknodes, commonrevs, source,
945 979 mfdicts):
946 980 changedfiles = list(filter(self._filematcher, changedfiles))
947 981
948 982 if self._isshallow:
949 983 # In a shallow clone, the linknodes callback needs to also include
950 984 # those file nodes that are in the manifests we sent but weren't
951 985 # introduced by those manifests.
952 986 commonctxs = [self._repo[c] for c in commonrevs]
953 987 oldlinknodes = linknodes
954 988 clrev = self._repo.changelog.rev
955 989
956 990 # Defining this function has a side-effect of overriding the
957 991 # function of the same name that was passed in as an argument.
958 992 # TODO have caller pass in appropriate function.
959 993 def linknodes(flog, fname):
960 994 for c in commonctxs:
961 995 try:
962 996 fnode = c.filenode(fname)
963 997 self._clrevtolocalrev[c.rev()] = flog.rev(fnode)
964 998 except error.ManifestLookupError:
965 999 pass
966 1000 links = oldlinknodes(flog, fname)
967 1001 if len(links) != len(mfdicts):
968 1002 for mf, lr in mfdicts:
969 1003 fnode = mf.get(fname, None)
970 1004 if fnode in links:
971 1005 links[fnode] = min(links[fnode], lr, key=clrev)
972 1006 elif fnode:
973 1007 links[fnode] = lr
974 1008 return links
975 1009
976 1010 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
977 1011
978 1012 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
979 1013 repo = self._repo
980 1014 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
981 1015 total=len(changedfiles))
982 1016 for i, fname in enumerate(sorted(changedfiles)):
983 1017 filerevlog = repo.file(fname)
984 1018 if not filerevlog:
985 1019 raise error.Abort(_("empty or missing file data for %s") %
986 1020 fname)
987 1021
988 1022 linkrevnodes = linknodes(filerevlog, fname)
989 1023 # Lookup for filenodes, we collected the linkrev nodes above in the
990 1024 # fastpath case and with lookupmf in the slowpath case.
991 1025 def lookupfilelog(x):
992 1026 return linkrevnodes[x]
993 1027
994 1028 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
995 1029 if filenodes:
996 1030 if self._ellipses:
997 1031 revs = _sortnodesellipsis(filerevlog, filenodes,
998 1032 self._clnodetorev, lookupfilelog)
999 1033 else:
1000 1034 revs = _sortnodesnormal(filerevlog, filenodes,
1001 1035 self._reorder)
1002 1036
1003 1037 progress.update(i + 1, item=fname)
1004 1038 h = _fileheader(fname)
1005 1039 size = len(h)
1006 1040 yield h
1007 1041 for chunk in self.group(revs, filerevlog, False, lookupfilelog):
1008 1042 size += len(chunk)
1009 1043 yield chunk
1010 1044 self._verbosenote(_('%8.i %s\n') % (size, fname))
1011 1045 progress.complete()
1012 1046
1013 def _revisiondeltanormal(self, store, ischangelog, rev, prev, linknode):
1014 node = store.node(rev)
1015 p1, p2 = store.parentrevs(rev)
1016 base = self._deltaparentfn(store, rev, p1, p2, prev)
1017
1018 prefix = ''
1019 if store.iscensored(base) or store.iscensored(rev):
1020 try:
1021 delta = store.revision(node, raw=True)
1022 except error.CensoredNodeError as e:
1023 delta = e.tombstone
1024 if base == nullrev:
1025 prefix = mdiff.trivialdiffheader(len(delta))
1026 else:
1027 baselen = store.rawsize(base)
1028 prefix = mdiff.replacediffheader(baselen, len(delta))
1029 elif base == nullrev:
1030 delta = store.revision(node, raw=True)
1031 prefix = mdiff.trivialdiffheader(len(delta))
1032 else:
1033 delta = store.revdiff(base, rev)
1034 p1n, p2n = store.parents(node)
1035
1036 return revisiondelta(
1037 node=node,
1038 p1node=p1n,
1039 p2node=p2n,
1040 basenode=store.node(base),
1041 linknode=linknode,
1042 flags=store.flags(rev),
1043 deltachunks=(prefix, delta),
1044 )
1045
1046 1047 def _revisiondeltanarrow(self, store, ischangelog, rev, prev, linknode):
1047 1048 # build up some mapping information that's useful later. See
1048 1049 # the local() nested function below.
1049 1050 if ischangelog:
1050 1051 self._clnodetorev[linknode] = rev
1051 1052 linkrev = rev
1052 1053 self._clrevtolocalrev[linkrev] = rev
1053 1054 else:
1054 1055 linkrev = self._clnodetorev[linknode]
1055 1056 self._clrevtolocalrev[linkrev] = rev
1056 1057
1057 1058 # This is a node to send in full, because the changeset it
1058 1059 # corresponds to was a full changeset.
1059 1060 if linknode in self._fullnodes:
1060 return self._revisiondeltanormal(store, ischangelog, rev, prev,
1061 linknode)
1061 return _revisiondeltanormal(store, rev, prev, linknode,
1062 self._deltaparentfn)
1062 1063
1063 1064 # At this point, a node can either be one we should skip or an
1064 1065 # ellipsis. If it's not an ellipsis, bail immediately.
1065 1066 if linkrev not in self._precomputedellipsis:
1066 1067 return
1067 1068
1068 1069 linkparents = self._precomputedellipsis[linkrev]
1069 1070 def local(clrev):
1070 1071 """Turn a changelog revnum into a local revnum.
1071 1072
1072 1073 The ellipsis dag is stored as revnums on the changelog,
1073 1074 but when we're producing ellipsis entries for
1074 1075 non-changelog revlogs, we need to turn those numbers into
1075 1076 something local. This does that for us, and during the
1076 1077 changelog sending phase will also expand the stored
1077 1078 mappings as needed.
1078 1079 """
1079 1080 if clrev == nullrev:
1080 1081 return nullrev
1081 1082
1082 1083 if ischangelog:
1083 1084 # If we're doing the changelog, it's possible that we
1084 1085 # have a parent that is already on the client, and we
1085 1086 # need to store some extra mapping information so that
1086 1087 # our contained ellipsis nodes will be able to resolve
1087 1088 # their parents.
1088 1089 if clrev not in self._clrevtolocalrev:
1089 1090 clnode = store.node(clrev)
1090 1091 self._clnodetorev[clnode] = clrev
1091 1092 return clrev
1092 1093
1093 1094 # Walk the ellipsis-ized changelog breadth-first looking for a
1094 1095 # change that has been linked from the current revlog.
1095 1096 #
1096 1097 # For a flat manifest revlog only a single step should be necessary
1097 1098 # as all relevant changelog entries are relevant to the flat
1098 1099 # manifest.
1099 1100 #
1100 1101 # For a filelog or tree manifest dirlog however not every changelog
1101 1102 # entry will have been relevant, so we need to skip some changelog
1102 1103 # nodes even after ellipsis-izing.
1103 1104 walk = [clrev]
1104 1105 while walk:
1105 1106 p = walk[0]
1106 1107 walk = walk[1:]
1107 1108 if p in self._clrevtolocalrev:
1108 1109 return self._clrevtolocalrev[p]
1109 1110 elif p in self._fullnodes:
1110 1111 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1111 1112 if pp != nullrev])
1112 1113 elif p in self._precomputedellipsis:
1113 1114 walk.extend([pp for pp in self._precomputedellipsis[p]
1114 1115 if pp != nullrev])
1115 1116 else:
1116 1117 # In this case, we've got an ellipsis with parents
1117 1118 # outside the current bundle (likely an
1118 1119 # incremental pull). We "know" that we can use the
1119 1120 # value of this same revlog at whatever revision
1120 1121 # is pointed to by linknode. "Know" is in scare
1121 1122 # quotes because I haven't done enough examination
1122 1123 # of edge cases to convince myself this is really
1123 1124 # a fact - it works for all the (admittedly
1124 1125 # thorough) cases in our testsuite, but I would be
1125 1126 # somewhat unsurprised to find a case in the wild
1126 1127 # where this breaks down a bit. That said, I don't
1127 1128 # know if it would hurt anything.
1128 1129 for i in pycompat.xrange(rev, 0, -1):
1129 1130 if store.linkrev(i) == clrev:
1130 1131 return i
1131 1132 # We failed to resolve a parent for this node, so
1132 1133 # we crash the changegroup construction.
1133 1134 raise error.Abort(
1134 1135 'unable to resolve parent while packing %r %r'
1135 1136 ' for changeset %r' % (store.indexfile, rev, clrev))
1136 1137
1137 1138 return nullrev
1138 1139
1139 1140 if not linkparents or (
1140 1141 store.parentrevs(rev) == (nullrev, nullrev)):
1141 1142 p1, p2 = nullrev, nullrev
1142 1143 elif len(linkparents) == 1:
1143 1144 p1, = sorted(local(p) for p in linkparents)
1144 1145 p2 = nullrev
1145 1146 else:
1146 1147 p1, p2 = sorted(local(p) for p in linkparents)
1147 1148
1148 1149 n = store.node(rev)
1149 1150 p1n, p2n = store.node(p1), store.node(p2)
1150 1151 flags = store.flags(rev)
1151 1152 flags |= revlog.REVIDX_ELLIPSIS
1152 1153
1153 1154 # TODO: try and actually send deltas for ellipsis data blocks
1154 1155 data = store.revision(n)
1155 1156 diffheader = mdiff.trivialdiffheader(len(data))
1156 1157
1157 1158 return revisiondelta(
1158 1159 node=n,
1159 1160 p1node=p1n,
1160 1161 p2node=p2n,
1161 1162 basenode=nullid,
1162 1163 linknode=linknode,
1163 1164 flags=flags,
1164 1165 deltachunks=(diffheader, data),
1165 1166 )
1166 1167
1167 1168 def _deltaparentprev(store, rev, p1, p2, prev):
1168 1169 """Resolve a delta parent to the previous revision.
1169 1170
1170 1171 Used for version 1 changegroups, which don't support generaldelta.
1171 1172 """
1172 1173 return prev
1173 1174
1174 1175 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1175 1176 """Resolve a delta parent when general deltas are supported."""
1176 1177 dp = store.deltaparent(rev)
1177 1178 if dp == nullrev and store.storedeltachains:
1178 1179 # Avoid sending full revisions when delta parent is null. Pick prev
1179 1180 # in that case. It's tempting to pick p1 in this case, as p1 will
1180 1181 # be smaller in the common case. However, computing a delta against
1181 1182 # p1 may require resolving the raw text of p1, which could be
1182 1183 # expensive. The revlog caches should have prev cached, meaning
1183 1184 # less CPU for changegroup generation. There is likely room to add
1184 1185 # a flag and/or config option to control this behavior.
1185 1186 base = prev
1186 1187 elif dp == nullrev:
1187 1188 # revlog is configured to use full snapshot for a reason,
1188 1189 # stick to full snapshot.
1189 1190 base = nullrev
1190 1191 elif dp not in (p1, p2, prev):
1191 1192 # Pick prev when we can't be sure remote has the base revision.
1192 1193 return prev
1193 1194 else:
1194 1195 base = dp
1195 1196
1196 1197 if base != nullrev and not store.candelta(base, rev):
1197 1198 base = nullrev
1198 1199
1199 1200 return base
1200 1201
1201 1202 def _deltaparentellipses(store, rev, p1, p2, prev):
1202 1203 """Resolve a delta parent when in ellipses mode."""
1203 1204 # TODO: send better deltas when in narrow mode.
1204 1205 #
1205 1206 # changegroup.group() loops over revisions to send,
1206 1207 # including revisions we'll skip. What this means is that
1207 1208 # `prev` will be a potentially useless delta base for all
1208 1209 # ellipsis nodes, as the client likely won't have it. In
1209 1210 # the future we should do bookkeeping about which nodes
1210 1211 # have been sent to the client, and try to be
1211 1212 # significantly smarter about delta bases. This is
1212 1213 # slightly tricky because this same code has to work for
1213 1214 # all revlogs, and we don't have the linkrev/linknode here.
1214 1215 return p1
1215 1216
1216 1217 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1217 1218 shallow=False, ellipsisroots=None, fullnodes=None):
1218 1219 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1219 1220 d.node, d.p1node, d.p2node, d.linknode)
1220 1221
1221 1222 return cgpacker(repo, filematcher, b'01',
1222 1223 deltaparentfn=_deltaparentprev,
1223 1224 allowreorder=None,
1224 1225 builddeltaheader=builddeltaheader,
1225 1226 manifestsend=b'',
1226 1227 bundlecaps=bundlecaps,
1227 1228 ellipses=ellipses,
1228 1229 shallow=shallow,
1229 1230 ellipsisroots=ellipsisroots,
1230 1231 fullnodes=fullnodes)
1231 1232
1232 1233 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1233 1234 shallow=False, ellipsisroots=None, fullnodes=None):
1234 1235 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1235 1236 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1236 1237
1237 1238 # Since generaldelta is directly supported by cg2, reordering
1238 1239 # generally doesn't help, so we disable it by default (treating
1239 1240 # bundle.reorder=auto just like bundle.reorder=False).
1240 1241 return cgpacker(repo, filematcher, b'02',
1241 1242 deltaparentfn=_deltaparentgeneraldelta,
1242 1243 allowreorder=False,
1243 1244 builddeltaheader=builddeltaheader,
1244 1245 manifestsend=b'',
1245 1246 bundlecaps=bundlecaps,
1246 1247 ellipses=ellipses,
1247 1248 shallow=shallow,
1248 1249 ellipsisroots=ellipsisroots,
1249 1250 fullnodes=fullnodes)
1250 1251
1251 1252 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1252 1253 shallow=False, ellipsisroots=None, fullnodes=None):
1253 1254 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1254 1255 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1255 1256
1256 1257 deltaparentfn = (_deltaparentellipses if ellipses
1257 1258 else _deltaparentgeneraldelta)
1258 1259
1259 1260 return cgpacker(repo, filematcher, b'03',
1260 1261 deltaparentfn=deltaparentfn,
1261 1262 allowreorder=False,
1262 1263 builddeltaheader=builddeltaheader,
1263 1264 manifestsend=closechunk(),
1264 1265 bundlecaps=bundlecaps,
1265 1266 ellipses=ellipses,
1266 1267 shallow=shallow,
1267 1268 ellipsisroots=ellipsisroots,
1268 1269 fullnodes=fullnodes)
1269 1270
1270 1271 _packermap = {'01': (_makecg1packer, cg1unpacker),
1271 1272 # cg2 adds support for exchanging generaldelta
1272 1273 '02': (_makecg2packer, cg2unpacker),
1273 1274 # cg3 adds support for exchanging revlog flags and treemanifests
1274 1275 '03': (_makecg3packer, cg3unpacker),
1275 1276 }
1276 1277
1277 1278 def allsupportedversions(repo):
1278 1279 versions = set(_packermap.keys())
1279 1280 if not (repo.ui.configbool('experimental', 'changegroup3') or
1280 1281 repo.ui.configbool('experimental', 'treemanifest') or
1281 1282 'treemanifest' in repo.requirements):
1282 1283 versions.discard('03')
1283 1284 return versions
1284 1285
1285 1286 # Changegroup versions that can be applied to the repo
1286 1287 def supportedincomingversions(repo):
1287 1288 return allsupportedversions(repo)
1288 1289
1289 1290 # Changegroup versions that can be created from the repo
1290 1291 def supportedoutgoingversions(repo):
1291 1292 versions = allsupportedversions(repo)
1292 1293 if 'treemanifest' in repo.requirements:
1293 1294 # Versions 01 and 02 support only flat manifests and it's just too
1294 1295 # expensive to convert between the flat manifest and tree manifest on
1295 1296 # the fly. Since tree manifests are hashed differently, all of history
1296 1297 # would have to be converted. Instead, we simply don't even pretend to
1297 1298 # support versions 01 and 02.
1298 1299 versions.discard('01')
1299 1300 versions.discard('02')
1300 1301 if repository.NARROW_REQUIREMENT in repo.requirements:
1301 1302 # Versions 01 and 02 don't support revlog flags, and we need to
1302 1303 # support that for stripping and unbundling to work.
1303 1304 versions.discard('01')
1304 1305 versions.discard('02')
1305 1306 if LFS_REQUIREMENT in repo.requirements:
1306 1307 # Versions 01 and 02 don't support revlog flags, and we need to
1307 1308 # mark LFS entries with REVIDX_EXTSTORED.
1308 1309 versions.discard('01')
1309 1310 versions.discard('02')
1310 1311
1311 1312 return versions
1312 1313
1313 1314 def localversion(repo):
1314 1315 # Finds the best version to use for bundles that are meant to be used
1315 1316 # locally, such as those from strip and shelve, and temporary bundles.
1316 1317 return max(supportedoutgoingversions(repo))
1317 1318
1318 1319 def safeversion(repo):
1319 1320 # Finds the smallest version that it's safe to assume clients of the repo
1320 1321 # will support. For example, all hg versions that support generaldelta also
1321 1322 # support changegroup 02.
1322 1323 versions = supportedoutgoingversions(repo)
1323 1324 if 'generaldelta' in repo.requirements:
1324 1325 versions.discard('01')
1325 1326 assert versions
1326 1327 return min(versions)
1327 1328
1328 1329 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1329 1330 ellipses=False, shallow=False, ellipsisroots=None,
1330 1331 fullnodes=None):
1331 1332 assert version in supportedoutgoingversions(repo)
1332 1333
1333 1334 if filematcher is None:
1334 1335 filematcher = matchmod.alwaysmatcher(repo.root, '')
1335 1336
1336 1337 if version == '01' and not filematcher.always():
1337 1338 raise error.ProgrammingError('version 01 changegroups do not support '
1338 1339 'sparse file matchers')
1339 1340
1340 1341 if ellipses and version in (b'01', b'02'):
1341 1342 raise error.Abort(
1342 1343 _('ellipsis nodes require at least cg3 on client and server, '
1343 1344 'but negotiated version %s') % version)
1344 1345
1345 1346 # Requested files could include files not in the local store. So
1346 1347 # filter those out.
1347 1348 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1348 1349 filematcher)
1349 1350
1350 1351 fn = _packermap[version][0]
1351 1352 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1352 1353 shallow=shallow, ellipsisroots=ellipsisroots,
1353 1354 fullnodes=fullnodes)
1354 1355
1355 1356 def getunbundler(version, fh, alg, extras=None):
1356 1357 return _packermap[version][1](fh, alg, extras=extras)
1357 1358
1358 1359 def _changegroupinfo(repo, nodes, source):
1359 1360 if repo.ui.verbose or source == 'bundle':
1360 1361 repo.ui.status(_("%d changesets found\n") % len(nodes))
1361 1362 if repo.ui.debugflag:
1362 1363 repo.ui.debug("list of changesets:\n")
1363 1364 for node in nodes:
1364 1365 repo.ui.debug("%s\n" % hex(node))
1365 1366
1366 1367 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1367 1368 bundlecaps=None):
1368 1369 cgstream = makestream(repo, outgoing, version, source,
1369 1370 fastpath=fastpath, bundlecaps=bundlecaps)
1370 1371 return getunbundler(version, util.chunkbuffer(cgstream), None,
1371 1372 {'clcount': len(outgoing.missing) })
1372 1373
1373 1374 def makestream(repo, outgoing, version, source, fastpath=False,
1374 1375 bundlecaps=None, filematcher=None):
1375 1376 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1376 1377 filematcher=filematcher)
1377 1378
1378 1379 repo = repo.unfiltered()
1379 1380 commonrevs = outgoing.common
1380 1381 csets = outgoing.missing
1381 1382 heads = outgoing.missingheads
1382 1383 # We go through the fast path if we get told to, or if all (unfiltered
1383 1384 # heads have been requested (since we then know there all linkrevs will
1384 1385 # be pulled by the client).
1385 1386 heads.sort()
1386 1387 fastpathlinkrev = fastpath or (
1387 1388 repo.filtername is None and heads == sorted(repo.heads()))
1388 1389
1389 1390 repo.hook('preoutgoing', throw=True, source=source)
1390 1391 _changegroupinfo(repo, csets, source)
1391 1392 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1392 1393
1393 1394 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1394 1395 revisions = 0
1395 1396 files = 0
1396 1397 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1397 1398 total=expectedfiles)
1398 1399 for chunkdata in iter(source.filelogheader, {}):
1399 1400 files += 1
1400 1401 f = chunkdata["filename"]
1401 1402 repo.ui.debug("adding %s revisions\n" % f)
1402 1403 progress.increment()
1403 1404 fl = repo.file(f)
1404 1405 o = len(fl)
1405 1406 try:
1406 1407 deltas = source.deltaiter()
1407 1408 if not fl.addgroup(deltas, revmap, trp):
1408 1409 raise error.Abort(_("received file revlog group is empty"))
1409 1410 except error.CensoredBaseError as e:
1410 1411 raise error.Abort(_("received delta base is censored: %s") % e)
1411 1412 revisions += len(fl) - o
1412 1413 if f in needfiles:
1413 1414 needs = needfiles[f]
1414 1415 for new in pycompat.xrange(o, len(fl)):
1415 1416 n = fl.node(new)
1416 1417 if n in needs:
1417 1418 needs.remove(n)
1418 1419 else:
1419 1420 raise error.Abort(
1420 1421 _("received spurious file revlog entry"))
1421 1422 if not needs:
1422 1423 del needfiles[f]
1423 1424 progress.complete()
1424 1425
1425 1426 for f, needs in needfiles.iteritems():
1426 1427 fl = repo.file(f)
1427 1428 for n in needs:
1428 1429 try:
1429 1430 fl.rev(n)
1430 1431 except error.LookupError:
1431 1432 raise error.Abort(
1432 1433 _('missing file data for %s:%s - run hg verify') %
1433 1434 (f, hex(n)))
1434 1435
1435 1436 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now