##// END OF EJS Templates
changegroup: differentiate between fulltext and diff based deltas...
Gregory Szorc -
r39052:39b8277e default
parent child Browse files
Show More
@@ -1,1458 +1,1487 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 match as matchmod,
30 30 mdiff,
31 31 phases,
32 32 pycompat,
33 33 repository,
34 34 revlog,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 stringutil,
40 40 )
41 41
42 42 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
43 43 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
44 44 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
45 45
46 46 LFS_REQUIREMENT = 'lfs'
47 47
48 48 readexactly = util.readexactly
49 49
50 50 def getchunk(stream):
51 51 """return the next chunk from stream as a string"""
52 52 d = readexactly(stream, 4)
53 53 l = struct.unpack(">l", d)[0]
54 54 if l <= 4:
55 55 if l:
56 56 raise error.Abort(_("invalid chunk length %d") % l)
57 57 return ""
58 58 return readexactly(stream, l - 4)
59 59
60 60 def chunkheader(length):
61 61 """return a changegroup chunk header (string)"""
62 62 return struct.pack(">l", length + 4)
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(">l", 0)
67 67
68 68 def _fileheader(path):
69 69 """Obtain a changegroup chunk header for a named path."""
70 70 return chunkheader(len(path)) + path
71 71
72 72 def writechunks(ui, chunks, filename, vfs=None):
73 73 """Write chunks to a file and return its filename.
74 74
75 75 The stream is assumed to be a bundle file.
76 76 Existing files will not be overwritten.
77 77 If no filename is specified, a temporary file is created.
78 78 """
79 79 fh = None
80 80 cleanup = None
81 81 try:
82 82 if filename:
83 83 if vfs:
84 84 fh = vfs.open(filename, "wb")
85 85 else:
86 86 # Increase default buffer size because default is usually
87 87 # small (4k is common on Linux).
88 88 fh = open(filename, "wb", 131072)
89 89 else:
90 90 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
91 91 fh = os.fdopen(fd, r"wb")
92 92 cleanup = filename
93 93 for c in chunks:
94 94 fh.write(c)
95 95 cleanup = None
96 96 return filename
97 97 finally:
98 98 if fh is not None:
99 99 fh.close()
100 100 if cleanup is not None:
101 101 if filename and vfs:
102 102 vfs.unlink(cleanup)
103 103 else:
104 104 os.unlink(cleanup)
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
125 125 deltaheadersize = deltaheader.size
126 126 version = '01'
127 127 _grouplistcount = 1 # One list of files after the manifests
128 128
129 129 def __init__(self, fh, alg, extras=None):
130 130 if alg is None:
131 131 alg = 'UN'
132 132 if alg not in util.compengines.supportedbundletypes:
133 133 raise error.Abort(_('unknown stream compression type: %s')
134 134 % alg)
135 135 if alg == 'BZ':
136 136 alg = '_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != 'UN'
148 148 def read(self, l):
149 149 return self._stream.read(l)
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152 def tell(self):
153 153 return self._stream.tell()
154 154 def close(self):
155 155 return self._stream.close()
156 156
157 157 def _chunklength(self):
158 158 d = readexactly(self._stream, 4)
159 159 l = struct.unpack(">l", d)[0]
160 160 if l <= 4:
161 161 if l:
162 162 raise error.Abort(_("invalid chunk length %d") % l)
163 163 return 0
164 164 if self.callback:
165 165 self.callback()
166 166 return l - 4
167 167
168 168 def changelogheader(self):
169 169 """v10 does not have a changelog header chunk"""
170 170 return {}
171 171
172 172 def manifestheader(self):
173 173 """v10 does not have a manifest header chunk"""
174 174 return {}
175 175
176 176 def filelogheader(self):
177 177 """return the header of the filelogs chunk, v10 only has the filename"""
178 178 l = self._chunklength()
179 179 if not l:
180 180 return {}
181 181 fname = readexactly(self._stream, l)
182 182 return {'filename': fname}
183 183
184 184 def _deltaheader(self, headertuple, prevnode):
185 185 node, p1, p2, cs = headertuple
186 186 if prevnode is None:
187 187 deltabase = p1
188 188 else:
189 189 deltabase = prevnode
190 190 flags = 0
191 191 return node, p1, p2, deltabase, cs, flags
192 192
193 193 def deltachunk(self, prevnode):
194 194 l = self._chunklength()
195 195 if not l:
196 196 return {}
197 197 headerdata = readexactly(self._stream, self.deltaheadersize)
198 198 header = self.deltaheader.unpack(headerdata)
199 199 delta = readexactly(self._stream, l - self.deltaheadersize)
200 200 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
201 201 return (node, p1, p2, cs, deltabase, delta, flags)
202 202
203 203 def getchunks(self):
204 204 """returns all the chunks contains in the bundle
205 205
206 206 Used when you need to forward the binary stream to a file or another
207 207 network API. To do so, it parse the changegroup data, otherwise it will
208 208 block in case of sshrepo because it don't know the end of the stream.
209 209 """
210 210 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
211 211 # and a list of filelogs. For changegroup 3, we expect 4 parts:
212 212 # changelog, manifestlog, a list of tree manifestlogs, and a list of
213 213 # filelogs.
214 214 #
215 215 # Changelog and manifestlog parts are terminated with empty chunks. The
216 216 # tree and file parts are a list of entry sections. Each entry section
217 217 # is a series of chunks terminating in an empty chunk. The list of these
218 218 # entry sections is terminated in yet another empty chunk, so we know
219 219 # we've reached the end of the tree/file list when we reach an empty
220 220 # chunk that was proceeded by no non-empty chunks.
221 221
222 222 parts = 0
223 223 while parts < 2 + self._grouplistcount:
224 224 noentries = True
225 225 while True:
226 226 chunk = getchunk(self)
227 227 if not chunk:
228 228 # The first two empty chunks represent the end of the
229 229 # changelog and the manifestlog portions. The remaining
230 230 # empty chunks represent either A) the end of individual
231 231 # tree or file entries in the file list, or B) the end of
232 232 # the entire list. It's the end of the entire list if there
233 233 # were no entries (i.e. noentries is True).
234 234 if parts < 2:
235 235 parts += 1
236 236 elif noentries:
237 237 parts += 1
238 238 break
239 239 noentries = False
240 240 yield chunkheader(len(chunk))
241 241 pos = 0
242 242 while pos < len(chunk):
243 243 next = pos + 2**20
244 244 yield chunk[pos:next]
245 245 pos = next
246 246 yield closechunk()
247 247
248 248 def _unpackmanifests(self, repo, revmap, trp, prog):
249 249 self.callback = prog.increment
250 250 # no need to check for empty manifest group here:
251 251 # if the result of the merge of 1 and 2 is the same in 3 and 4,
252 252 # no new manifest will be created and the manifest group will
253 253 # be empty during the pull
254 254 self.manifestheader()
255 255 deltas = self.deltaiter()
256 256 repo.manifestlog.addgroup(deltas, revmap, trp)
257 257 prog.complete()
258 258 self.callback = None
259 259
260 260 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
261 261 expectedtotal=None):
262 262 """Add the changegroup returned by source.read() to this repo.
263 263 srctype is a string like 'push', 'pull', or 'unbundle'. url is
264 264 the URL of the repo where this changegroup is coming from.
265 265
266 266 Return an integer summarizing the change to this repo:
267 267 - nothing changed or no source: 0
268 268 - more heads than before: 1+added heads (2..n)
269 269 - fewer heads than before: -1-removed heads (-2..-n)
270 270 - number of heads stays the same: 1
271 271 """
272 272 repo = repo.unfiltered()
273 273 def csmap(x):
274 274 repo.ui.debug("add changeset %s\n" % short(x))
275 275 return len(cl)
276 276
277 277 def revmap(x):
278 278 return cl.rev(x)
279 279
280 280 changesets = files = revisions = 0
281 281
282 282 try:
283 283 # The transaction may already carry source information. In this
284 284 # case we use the top level data. We overwrite the argument
285 285 # because we need to use the top level value (if they exist)
286 286 # in this function.
287 287 srctype = tr.hookargs.setdefault('source', srctype)
288 288 url = tr.hookargs.setdefault('url', url)
289 289 repo.hook('prechangegroup',
290 290 throw=True, **pycompat.strkwargs(tr.hookargs))
291 291
292 292 # write changelog data to temp files so concurrent readers
293 293 # will not see an inconsistent view
294 294 cl = repo.changelog
295 295 cl.delayupdate(tr)
296 296 oldheads = set(cl.heads())
297 297
298 298 trp = weakref.proxy(tr)
299 299 # pull off the changeset group
300 300 repo.ui.status(_("adding changesets\n"))
301 301 clstart = len(cl)
302 302 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
303 303 total=expectedtotal)
304 304 self.callback = progress.increment
305 305
306 306 efiles = set()
307 307 def onchangelog(cl, node):
308 308 efiles.update(cl.readfiles(node))
309 309
310 310 self.changelogheader()
311 311 deltas = self.deltaiter()
312 312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
313 313 efiles = len(efiles)
314 314
315 315 if not cgnodes:
316 316 repo.ui.develwarn('applied empty changegroup',
317 317 config='warn-empty-changegroup')
318 318 clend = len(cl)
319 319 changesets = clend - clstart
320 320 progress.complete()
321 321 self.callback = None
322 322
323 323 # pull off the manifest group
324 324 repo.ui.status(_("adding manifests\n"))
325 325 # We know that we'll never have more manifests than we had
326 326 # changesets.
327 327 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
328 328 total=changesets)
329 329 self._unpackmanifests(repo, revmap, trp, progress)
330 330
331 331 needfiles = {}
332 332 if repo.ui.configbool('server', 'validate'):
333 333 cl = repo.changelog
334 334 ml = repo.manifestlog
335 335 # validate incoming csets have their manifests
336 336 for cset in pycompat.xrange(clstart, clend):
337 337 mfnode = cl.changelogrevision(cset).manifest
338 338 mfest = ml[mfnode].readdelta()
339 339 # store file cgnodes we must see
340 340 for f, n in mfest.iteritems():
341 341 needfiles.setdefault(f, set()).add(n)
342 342
343 343 # process the files
344 344 repo.ui.status(_("adding file changes\n"))
345 345 newrevs, newfiles = _addchangegroupfiles(
346 346 repo, self, revmap, trp, efiles, needfiles)
347 347 revisions += newrevs
348 348 files += newfiles
349 349
350 350 deltaheads = 0
351 351 if oldheads:
352 352 heads = cl.heads()
353 353 deltaheads = len(heads) - len(oldheads)
354 354 for h in heads:
355 355 if h not in oldheads and repo[h].closesbranch():
356 356 deltaheads -= 1
357 357 htext = ""
358 358 if deltaheads:
359 359 htext = _(" (%+d heads)") % deltaheads
360 360
361 361 repo.ui.status(_("added %d changesets"
362 362 " with %d changes to %d files%s\n")
363 363 % (changesets, revisions, files, htext))
364 364 repo.invalidatevolatilesets()
365 365
366 366 if changesets > 0:
367 367 if 'node' not in tr.hookargs:
368 368 tr.hookargs['node'] = hex(cl.node(clstart))
369 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
370 370 hookargs = dict(tr.hookargs)
371 371 else:
372 372 hookargs = dict(tr.hookargs)
373 373 hookargs['node'] = hex(cl.node(clstart))
374 374 hookargs['node_last'] = hex(cl.node(clend - 1))
375 375 repo.hook('pretxnchangegroup',
376 376 throw=True, **pycompat.strkwargs(hookargs))
377 377
378 378 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
379 379 phaseall = None
380 380 if srctype in ('push', 'serve'):
381 381 # Old servers can not push the boundary themselves.
382 382 # New servers won't push the boundary if changeset already
383 383 # exists locally as secret
384 384 #
385 385 # We should not use added here but the list of all change in
386 386 # the bundle
387 387 if repo.publishing():
388 388 targetphase = phaseall = phases.public
389 389 else:
390 390 # closer target phase computation
391 391
392 392 # Those changesets have been pushed from the
393 393 # outside, their phases are going to be pushed
394 394 # alongside. Therefor `targetphase` is
395 395 # ignored.
396 396 targetphase = phaseall = phases.draft
397 397 if added:
398 398 phases.registernew(repo, tr, targetphase, added)
399 399 if phaseall is not None:
400 400 phases.advanceboundary(repo, tr, phaseall, cgnodes)
401 401
402 402 if changesets > 0:
403 403
404 404 def runhooks():
405 405 # These hooks run when the lock releases, not when the
406 406 # transaction closes. So it's possible for the changelog
407 407 # to have changed since we last saw it.
408 408 if clstart >= len(repo):
409 409 return
410 410
411 411 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
412 412
413 413 for n in added:
414 414 args = hookargs.copy()
415 415 args['node'] = hex(n)
416 416 del args['node_last']
417 417 repo.hook("incoming", **pycompat.strkwargs(args))
418 418
419 419 newheads = [h for h in repo.heads()
420 420 if h not in oldheads]
421 421 repo.ui.log("incoming",
422 422 "%d incoming changes - new heads: %s\n",
423 423 len(added),
424 424 ', '.join([hex(c[:6]) for c in newheads]))
425 425
426 426 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
427 427 lambda tr: repo._afterlock(runhooks))
428 428 finally:
429 429 repo.ui.flush()
430 430 # never return 0 here:
431 431 if deltaheads < 0:
432 432 ret = deltaheads - 1
433 433 else:
434 434 ret = deltaheads + 1
435 435 return ret
436 436
437 437 def deltaiter(self):
438 438 """
439 439 returns an iterator of the deltas in this changegroup
440 440
441 441 Useful for passing to the underlying storage system to be stored.
442 442 """
443 443 chain = None
444 444 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
445 445 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
446 446 yield chunkdata
447 447 chain = chunkdata[0]
448 448
449 449 class cg2unpacker(cg1unpacker):
450 450 """Unpacker for cg2 streams.
451 451
452 452 cg2 streams add support for generaldelta, so the delta header
453 453 format is slightly different. All other features about the data
454 454 remain the same.
455 455 """
456 456 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
457 457 deltaheadersize = deltaheader.size
458 458 version = '02'
459 459
460 460 def _deltaheader(self, headertuple, prevnode):
461 461 node, p1, p2, deltabase, cs = headertuple
462 462 flags = 0
463 463 return node, p1, p2, deltabase, cs, flags
464 464
465 465 class cg3unpacker(cg2unpacker):
466 466 """Unpacker for cg3 streams.
467 467
468 468 cg3 streams add support for exchanging treemanifests and revlog
469 469 flags. It adds the revlog flags to the delta header and an empty chunk
470 470 separating manifests and files.
471 471 """
472 472 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
473 473 deltaheadersize = deltaheader.size
474 474 version = '03'
475 475 _grouplistcount = 2 # One list of manifests and one list of files
476 476
477 477 def _deltaheader(self, headertuple, prevnode):
478 478 node, p1, p2, deltabase, cs, flags = headertuple
479 479 return node, p1, p2, deltabase, cs, flags
480 480
481 481 def _unpackmanifests(self, repo, revmap, trp, prog):
482 482 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
483 483 for chunkdata in iter(self.filelogheader, {}):
484 484 # If we get here, there are directory manifests in the changegroup
485 485 d = chunkdata["filename"]
486 486 repo.ui.debug("adding %s revisions\n" % d)
487 487 dirlog = repo.manifestlog._revlog.dirlog(d)
488 488 deltas = self.deltaiter()
489 489 if not dirlog.addgroup(deltas, revmap, trp):
490 490 raise error.Abort(_("received dir revlog group is empty"))
491 491
492 492 class headerlessfixup(object):
493 493 def __init__(self, fh, h):
494 494 self._h = h
495 495 self._fh = fh
496 496 def read(self, n):
497 497 if self._h:
498 498 d, self._h = self._h[:n], self._h[n:]
499 499 if len(d) < n:
500 500 d += readexactly(self._fh, n - len(d))
501 501 return d
502 502 return readexactly(self._fh, n)
503 503
504 504 @attr.s(slots=True, frozen=True)
505 505 class revisiondelta(object):
506 506 """Describes a delta entry in a changegroup.
507 507
508 508 Captured data is sufficient to serialize the delta into multiple
509 509 formats.
510
511 ``revision`` and ``delta`` are mutually exclusive.
510 512 """
511 513 # 20 byte node of this revision.
512 514 node = attr.ib()
513 515 # 20 byte nodes of parent revisions.
514 516 p1node = attr.ib()
515 517 p2node = attr.ib()
516 518 # 20 byte node of node this delta is against.
517 519 basenode = attr.ib()
518 520 # 20 byte node of changeset revision this delta is associated with.
519 521 linknode = attr.ib()
520 522 # 2 bytes of flags to apply to revision data.
521 523 flags = attr.ib()
522 # Iterable of chunks holding raw delta data.
523 deltachunks = attr.ib()
524 # Size of base revision this delta is against. May be None if
525 # basenode is nullid.
526 baserevisionsize = attr.ib()
527 # Raw fulltext revision data.
528 revision = attr.ib()
529 # Delta between the basenode and node.
530 delta = attr.ib()
524 531
525 532 def _revisiondeltatochunks(delta, headerfn):
526 533 """Serialize a revisiondelta to changegroup chunks."""
534
535 # The captured revision delta may be encoded as a delta against
536 # a base revision or as a full revision. The changegroup format
537 # requires that everything on the wire be deltas. So for full
538 # revisions, we need to invent a header that says to rewrite
539 # data.
540
541 if delta.delta is not None:
542 prefix, data = b'', delta.delta
543 elif delta.basenode == nullid:
544 data = delta.revision
545 prefix = mdiff.trivialdiffheader(len(data))
546 else:
547 data = delta.revision
548 prefix = mdiff.replacediffheader(delta.baserevisionsize,
549 len(data))
550
527 551 meta = headerfn(delta)
528 l = len(meta) + sum(len(x) for x in delta.deltachunks)
529 yield chunkheader(l)
552
553 yield chunkheader(len(meta) + len(prefix) + len(data))
530 554 yield meta
531 for x in delta.deltachunks:
532 yield x
555 if prefix:
556 yield prefix
557 yield data
533 558
534 559 def _sortnodesnormal(store, nodes, reorder):
535 560 """Sort nodes for changegroup generation and turn into revnums."""
536 561 # for generaldelta revlogs, we linearize the revs; this will both be
537 562 # much quicker and generate a much smaller bundle
538 563 if (store._generaldelta and reorder is None) or reorder:
539 564 dag = dagutil.revlogdag(store)
540 565 return dag.linearize(set(store.rev(n) for n in nodes))
541 566 else:
542 567 return sorted([store.rev(n) for n in nodes])
543 568
544 569 def _sortnodesellipsis(store, nodes, cl, lookup):
545 570 """Sort nodes for changegroup generation and turn into revnums."""
546 571 # Ellipses serving mode.
547 572 #
548 573 # In a perfect world, we'd generate better ellipsis-ified graphs
549 574 # for non-changelog revlogs. In practice, we haven't started doing
550 575 # that yet, so the resulting DAGs for the manifestlog and filelogs
551 576 # are actually full of bogus parentage on all the ellipsis
552 577 # nodes. This has the side effect that, while the contents are
553 578 # correct, the individual DAGs might be completely out of whack in
554 579 # a case like 882681bc3166 and its ancestors (back about 10
555 580 # revisions or so) in the main hg repo.
556 581 #
557 582 # The one invariant we *know* holds is that the new (potentially
558 583 # bogus) DAG shape will be valid if we order the nodes in the
559 584 # order that they're introduced in dramatis personae by the
560 585 # changelog, so what we do is we sort the non-changelog histories
561 586 # by the order in which they are used by the changelog.
562 587 key = lambda n: cl.rev(lookup(n))
563 588 return [store.rev(n) for n in sorted(nodes, key=key)]
564 589
565 590 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
566 591 """Construct a revision delta for non-ellipses changegroup generation."""
567 592 node = store.node(rev)
568 593 p1, p2 = store.parentrevs(rev)
569 594 base = deltaparentfn(store, rev, p1, p2, prev)
570 595
571 prefix = ''
596 revision = None
597 delta = None
598 baserevisionsize = None
599
572 600 if store.iscensored(base) or store.iscensored(rev):
573 601 try:
574 delta = store.revision(node, raw=True)
602 revision = store.revision(node, raw=True)
575 603 except error.CensoredNodeError as e:
576 delta = e.tombstone
577 if base == nullrev:
578 prefix = mdiff.trivialdiffheader(len(delta))
579 else:
580 baselen = store.rawsize(base)
581 prefix = mdiff.replacediffheader(baselen, len(delta))
604 revision = e.tombstone
605
606 if base != nullrev:
607 baserevisionsize = store.rawsize(base)
608
582 609 elif base == nullrev:
583 delta = store.revision(node, raw=True)
584 prefix = mdiff.trivialdiffheader(len(delta))
610 revision = store.revision(node, raw=True)
585 611 else:
586 612 delta = store.revdiff(base, rev)
613
587 614 p1n, p2n = store.parents(node)
588 615
589 616 return revisiondelta(
590 617 node=node,
591 618 p1node=p1n,
592 619 p2node=p2n,
593 620 basenode=store.node(base),
594 621 linknode=linknode,
595 622 flags=store.flags(rev),
596 deltachunks=(prefix, delta),
623 baserevisionsize=baserevisionsize,
624 revision=revision,
625 delta=delta,
597 626 )
598 627
599 628 def _revisiondeltanarrow(cl, store, ischangelog, rev, linkrev,
600 629 linknode, clrevtolocalrev, fullclnodes,
601 630 precomputedellipsis):
602 631 linkparents = precomputedellipsis[linkrev]
603 632 def local(clrev):
604 633 """Turn a changelog revnum into a local revnum.
605 634
606 635 The ellipsis dag is stored as revnums on the changelog,
607 636 but when we're producing ellipsis entries for
608 637 non-changelog revlogs, we need to turn those numbers into
609 638 something local. This does that for us, and during the
610 639 changelog sending phase will also expand the stored
611 640 mappings as needed.
612 641 """
613 642 if clrev == nullrev:
614 643 return nullrev
615 644
616 645 if ischangelog:
617 646 return clrev
618 647
619 648 # Walk the ellipsis-ized changelog breadth-first looking for a
620 649 # change that has been linked from the current revlog.
621 650 #
622 651 # For a flat manifest revlog only a single step should be necessary
623 652 # as all relevant changelog entries are relevant to the flat
624 653 # manifest.
625 654 #
626 655 # For a filelog or tree manifest dirlog however not every changelog
627 656 # entry will have been relevant, so we need to skip some changelog
628 657 # nodes even after ellipsis-izing.
629 658 walk = [clrev]
630 659 while walk:
631 660 p = walk[0]
632 661 walk = walk[1:]
633 662 if p in clrevtolocalrev:
634 663 return clrevtolocalrev[p]
635 664 elif p in fullclnodes:
636 665 walk.extend([pp for pp in cl.parentrevs(p)
637 666 if pp != nullrev])
638 667 elif p in precomputedellipsis:
639 668 walk.extend([pp for pp in precomputedellipsis[p]
640 669 if pp != nullrev])
641 670 else:
642 671 # In this case, we've got an ellipsis with parents
643 672 # outside the current bundle (likely an
644 673 # incremental pull). We "know" that we can use the
645 674 # value of this same revlog at whatever revision
646 675 # is pointed to by linknode. "Know" is in scare
647 676 # quotes because I haven't done enough examination
648 677 # of edge cases to convince myself this is really
649 678 # a fact - it works for all the (admittedly
650 679 # thorough) cases in our testsuite, but I would be
651 680 # somewhat unsurprised to find a case in the wild
652 681 # where this breaks down a bit. That said, I don't
653 682 # know if it would hurt anything.
654 683 for i in pycompat.xrange(rev, 0, -1):
655 684 if store.linkrev(i) == clrev:
656 685 return i
657 686 # We failed to resolve a parent for this node, so
658 687 # we crash the changegroup construction.
659 688 raise error.Abort(
660 689 'unable to resolve parent while packing %r %r'
661 690 ' for changeset %r' % (store.indexfile, rev, clrev))
662 691
663 692 return nullrev
664 693
665 694 if not linkparents or (
666 695 store.parentrevs(rev) == (nullrev, nullrev)):
667 696 p1, p2 = nullrev, nullrev
668 697 elif len(linkparents) == 1:
669 698 p1, = sorted(local(p) for p in linkparents)
670 699 p2 = nullrev
671 700 else:
672 701 p1, p2 = sorted(local(p) for p in linkparents)
673 702
674 703 n = store.node(rev)
675 704 p1n, p2n = store.node(p1), store.node(p2)
676 705 flags = store.flags(rev)
677 706 flags |= revlog.REVIDX_ELLIPSIS
678 707
679 708 # TODO: try and actually send deltas for ellipsis data blocks
680 data = store.revision(n)
681 diffheader = mdiff.trivialdiffheader(len(data))
682 709
683 710 return revisiondelta(
684 711 node=n,
685 712 p1node=p1n,
686 713 p2node=p2n,
687 714 basenode=nullid,
688 715 linknode=linknode,
689 716 flags=flags,
690 deltachunks=(diffheader, data),
717 baserevisionsize=None,
718 revision=store.revision(n),
719 delta=None,
691 720 )
692 721
693 722 def deltagroup(repo, revs, store, ischangelog, lookup, deltaparentfn,
694 723 units=None,
695 724 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
696 725 precomputedellipsis=None):
697 726 """Calculate deltas for a set of revisions.
698 727
699 728 Is a generator of ``revisiondelta`` instances.
700 729
701 730 If units is not None, progress detail will be generated, units specifies
702 731 the type of revlog that is touched (changelog, manifest, etc.).
703 732 """
704 733 if not revs:
705 734 return
706 735
707 736 cl = repo.changelog
708 737
709 738 # Add the parent of the first rev.
710 739 revs.insert(0, store.parentrevs(revs[0])[0])
711 740
712 741 # build deltas
713 742 progress = None
714 743 if units is not None:
715 744 progress = repo.ui.makeprogress(_('bundling'), unit=units,
716 745 total=(len(revs) - 1))
717 746
718 747 for i in pycompat.xrange(len(revs) - 1):
719 748 if progress:
720 749 progress.update(i + 1)
721 750
722 751 prev = revs[i]
723 752 curr = revs[i + 1]
724 753
725 754 linknode = lookup(store.node(curr))
726 755
727 756 if ellipses:
728 757 linkrev = cl.rev(linknode)
729 758 clrevtolocalrev[linkrev] = curr
730 759
731 760 # This is a node to send in full, because the changeset it
732 761 # corresponds to was a full changeset.
733 762 if linknode in fullclnodes:
734 763 delta = _revisiondeltanormal(store, curr, prev, linknode,
735 764 deltaparentfn)
736 765 elif linkrev not in precomputedellipsis:
737 766 delta = None
738 767 else:
739 768 delta = _revisiondeltanarrow(
740 769 cl, store, ischangelog, curr, linkrev, linknode,
741 770 clrevtolocalrev, fullclnodes,
742 771 precomputedellipsis)
743 772 else:
744 773 delta = _revisiondeltanormal(store, curr, prev, linknode,
745 774 deltaparentfn)
746 775
747 776 if delta:
748 777 yield delta
749 778
750 779 if progress:
751 780 progress.complete()
752 781
753 782 class cgpacker(object):
754 783 def __init__(self, repo, filematcher, version, allowreorder,
755 784 deltaparentfn, builddeltaheader, manifestsend,
756 785 bundlecaps=None, ellipses=False,
757 786 shallow=False, ellipsisroots=None, fullnodes=None):
758 787 """Given a source repo, construct a bundler.
759 788
760 789 filematcher is a matcher that matches on files to include in the
761 790 changegroup. Used to facilitate sparse changegroups.
762 791
763 792 allowreorder controls whether reordering of revisions is allowed.
764 793 This value is used when ``bundle.reorder`` is ``auto`` or isn't
765 794 set.
766 795
767 796 deltaparentfn is a callable that resolves the delta parent for
768 797 a specific revision.
769 798
770 799 builddeltaheader is a callable that constructs the header for a group
771 800 delta.
772 801
773 802 manifestsend is a chunk to send after manifests have been fully emitted.
774 803
775 804 ellipses indicates whether ellipsis serving mode is enabled.
776 805
777 806 bundlecaps is optional and can be used to specify the set of
778 807 capabilities which can be used to build the bundle. While bundlecaps is
779 808 unused in core Mercurial, extensions rely on this feature to communicate
780 809 capabilities to customize the changegroup packer.
781 810
782 811 shallow indicates whether shallow data might be sent. The packer may
783 812 need to pack file contents not introduced by the changes being packed.
784 813
785 814 fullnodes is the set of changelog nodes which should not be ellipsis
786 815 nodes. We store this rather than the set of nodes that should be
787 816 ellipsis because for very large histories we expect this to be
788 817 significantly smaller.
789 818 """
790 819 assert filematcher
791 820 self._filematcher = filematcher
792 821
793 822 self.version = version
794 823 self._deltaparentfn = deltaparentfn
795 824 self._builddeltaheader = builddeltaheader
796 825 self._manifestsend = manifestsend
797 826 self._ellipses = ellipses
798 827
799 828 # Set of capabilities we can use to build the bundle.
800 829 if bundlecaps is None:
801 830 bundlecaps = set()
802 831 self._bundlecaps = bundlecaps
803 832 self._isshallow = shallow
804 833 self._fullclnodes = fullnodes
805 834
806 835 # Maps ellipsis revs to their roots at the changelog level.
807 836 self._precomputedellipsis = ellipsisroots
808 837
809 838 # experimental config: bundle.reorder
810 839 reorder = repo.ui.config('bundle', 'reorder')
811 840 if reorder == 'auto':
812 841 self._reorder = allowreorder
813 842 else:
814 843 self._reorder = stringutil.parsebool(reorder)
815 844
816 845 self._repo = repo
817 846
818 847 if self._repo.ui.verbose and not self._repo.ui.debugflag:
819 848 self._verbosenote = self._repo.ui.note
820 849 else:
821 850 self._verbosenote = lambda s: None
822 851
823 852 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
824 853 """Yield a sequence of changegroup byte chunks."""
825 854
826 855 repo = self._repo
827 856 cl = repo.changelog
828 857
829 858 self._verbosenote(_('uncompressed size of bundle content:\n'))
830 859 size = 0
831 860
832 861 clstate, deltas = self._generatechangelog(cl, clnodes)
833 862 for delta in deltas:
834 863 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
835 864 size += len(chunk)
836 865 yield chunk
837 866
838 867 close = closechunk()
839 868 size += len(close)
840 869 yield closechunk()
841 870
842 871 self._verbosenote(_('%8.i (changelog)\n') % size)
843 872
844 873 clrevorder = clstate['clrevorder']
845 874 mfs = clstate['mfs']
846 875 changedfiles = clstate['changedfiles']
847 876
848 877 # We need to make sure that the linkrev in the changegroup refers to
849 878 # the first changeset that introduced the manifest or file revision.
850 879 # The fastpath is usually safer than the slowpath, because the filelogs
851 880 # are walked in revlog order.
852 881 #
853 882 # When taking the slowpath with reorder=None and the manifest revlog
854 883 # uses generaldelta, the manifest may be walked in the "wrong" order.
855 884 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
856 885 # cc0ff93d0c0c).
857 886 #
858 887 # When taking the fastpath, we are only vulnerable to reordering
859 888 # of the changelog itself. The changelog never uses generaldelta, so
860 889 # it is only reordered when reorder=True. To handle this case, we
861 890 # simply take the slowpath, which already has the 'clrevorder' logic.
862 891 # This was also fixed in cc0ff93d0c0c.
863 892 fastpathlinkrev = fastpathlinkrev and not self._reorder
864 893 # Treemanifests don't work correctly with fastpathlinkrev
865 894 # either, because we don't discover which directory nodes to
866 895 # send along with files. This could probably be fixed.
867 896 fastpathlinkrev = fastpathlinkrev and (
868 897 'treemanifest' not in repo.requirements)
869 898
870 899 fnodes = {} # needed file nodes
871 900
872 901 size = 0
873 902 it = self.generatemanifests(
874 903 commonrevs, clrevorder, fastpathlinkrev, mfs, fnodes, source,
875 904 clstate['clrevtomanifestrev'])
876 905
877 906 for dir, deltas in it:
878 907 if dir:
879 908 assert self.version == b'03'
880 909 chunk = _fileheader(dir)
881 910 size += len(chunk)
882 911 yield chunk
883 912
884 913 for delta in deltas:
885 914 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
886 915 for chunk in chunks:
887 916 size += len(chunk)
888 917 yield chunk
889 918
890 919 close = closechunk()
891 920 size += len(close)
892 921 yield close
893 922
894 923 self._verbosenote(_('%8.i (manifests)\n') % size)
895 924 yield self._manifestsend
896 925
897 926 mfdicts = None
898 927 if self._ellipses and self._isshallow:
899 928 mfdicts = [(self._repo.manifestlog[n].read(), lr)
900 929 for (n, lr) in mfs.iteritems()]
901 930
902 931 mfs.clear()
903 932 clrevs = set(cl.rev(x) for x in clnodes)
904 933
905 934 it = self.generatefiles(changedfiles, commonrevs,
906 935 source, mfdicts, fastpathlinkrev,
907 936 fnodes, clrevs)
908 937
909 938 for path, deltas in it:
910 939 h = _fileheader(path)
911 940 size = len(h)
912 941 yield h
913 942
914 943 for delta in deltas:
915 944 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
916 945 for chunk in chunks:
917 946 size += len(chunk)
918 947 yield chunk
919 948
920 949 close = closechunk()
921 950 size += len(close)
922 951 yield close
923 952
924 953 self._verbosenote(_('%8.i %s\n') % (size, path))
925 954
926 955 yield closechunk()
927 956
928 957 if clnodes:
929 958 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
930 959
931 960 def _generatechangelog(self, cl, nodes):
932 961 """Generate data for changelog chunks.
933 962
934 963 Returns a 2-tuple of a dict containing state and an iterable of
935 964 byte chunks. The state will not be fully populated until the
936 965 chunk stream has been fully consumed.
937 966 """
938 967 clrevorder = {}
939 968 mfs = {} # needed manifests
940 969 mfl = self._repo.manifestlog
941 970 # TODO violates storage abstraction.
942 971 mfrevlog = mfl._revlog
943 972 changedfiles = set()
944 973 clrevtomanifestrev = {}
945 974
946 975 # Callback for the changelog, used to collect changed files and
947 976 # manifest nodes.
948 977 # Returns the linkrev node (identity in the changelog case).
949 978 def lookupcl(x):
950 979 c = cl.read(x)
951 980 clrevorder[x] = len(clrevorder)
952 981
953 982 if self._ellipses:
954 983 # Only update mfs if x is going to be sent. Otherwise we
955 984 # end up with bogus linkrevs specified for manifests and
956 985 # we skip some manifest nodes that we should otherwise
957 986 # have sent.
958 987 if (x in self._fullclnodes
959 988 or cl.rev(x) in self._precomputedellipsis):
960 989 n = c[0]
961 990 # Record the first changeset introducing this manifest
962 991 # version.
963 992 mfs.setdefault(n, x)
964 993 # Set this narrow-specific dict so we have the lowest
965 994 # manifest revnum to look up for this cl revnum. (Part of
966 995 # mapping changelog ellipsis parents to manifest ellipsis
967 996 # parents)
968 997 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
969 998 # We can't trust the changed files list in the changeset if the
970 999 # client requested a shallow clone.
971 1000 if self._isshallow:
972 1001 changedfiles.update(mfl[c[0]].read().keys())
973 1002 else:
974 1003 changedfiles.update(c[3])
975 1004 else:
976 1005
977 1006 n = c[0]
978 1007 # record the first changeset introducing this manifest version
979 1008 mfs.setdefault(n, x)
980 1009 # Record a complete list of potentially-changed files in
981 1010 # this manifest.
982 1011 changedfiles.update(c[3])
983 1012
984 1013 return x
985 1014
986 1015 # Changelog doesn't benefit from reordering revisions. So send out
987 1016 # revisions in store order.
988 1017 revs = sorted(cl.rev(n) for n in nodes)
989 1018
990 1019 state = {
991 1020 'clrevorder': clrevorder,
992 1021 'mfs': mfs,
993 1022 'changedfiles': changedfiles,
994 1023 'clrevtomanifestrev': clrevtomanifestrev,
995 1024 }
996 1025
997 1026 gen = deltagroup(
998 1027 self._repo, revs, cl, True, lookupcl,
999 1028 self._deltaparentfn,
1000 1029 ellipses=self._ellipses,
1001 1030 units=_('changesets'),
1002 1031 clrevtolocalrev={},
1003 1032 fullclnodes=self._fullclnodes,
1004 1033 precomputedellipsis=self._precomputedellipsis)
1005 1034
1006 1035 return state, gen
1007 1036
1008 1037 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
1009 1038 fnodes, source, clrevtolocalrev):
1010 1039 """Returns an iterator of changegroup chunks containing manifests.
1011 1040
1012 1041 `source` is unused here, but is used by extensions like remotefilelog to
1013 1042 change what is sent based in pulls vs pushes, etc.
1014 1043 """
1015 1044 repo = self._repo
1016 1045 cl = repo.changelog
1017 1046 mfl = repo.manifestlog
1018 1047 dirlog = mfl._revlog.dirlog
1019 1048 tmfnodes = {'': mfs}
1020 1049
1021 1050 # Callback for the manifest, used to collect linkrevs for filelog
1022 1051 # revisions.
1023 1052 # Returns the linkrev node (collected in lookupcl).
1024 1053 def makelookupmflinknode(dir, nodes):
1025 1054 if fastpathlinkrev:
1026 1055 assert not dir
1027 1056 return mfs.__getitem__
1028 1057
1029 1058 def lookupmflinknode(x):
1030 1059 """Callback for looking up the linknode for manifests.
1031 1060
1032 1061 Returns the linkrev node for the specified manifest.
1033 1062
1034 1063 SIDE EFFECT:
1035 1064
1036 1065 1) fclnodes gets populated with the list of relevant
1037 1066 file nodes if we're not using fastpathlinkrev
1038 1067 2) When treemanifests are in use, collects treemanifest nodes
1039 1068 to send
1040 1069
1041 1070 Note that this means manifests must be completely sent to
1042 1071 the client before you can trust the list of files and
1043 1072 treemanifests to send.
1044 1073 """
1045 1074 clnode = nodes[x]
1046 1075 mdata = mfl.get(dir, x).readfast(shallow=True)
1047 1076 for p, n, fl in mdata.iterentries():
1048 1077 if fl == 't': # subdirectory manifest
1049 1078 subdir = dir + p + '/'
1050 1079 tmfclnodes = tmfnodes.setdefault(subdir, {})
1051 1080 tmfclnode = tmfclnodes.setdefault(n, clnode)
1052 1081 if clrevorder[clnode] < clrevorder[tmfclnode]:
1053 1082 tmfclnodes[n] = clnode
1054 1083 else:
1055 1084 f = dir + p
1056 1085 fclnodes = fnodes.setdefault(f, {})
1057 1086 fclnode = fclnodes.setdefault(n, clnode)
1058 1087 if clrevorder[clnode] < clrevorder[fclnode]:
1059 1088 fclnodes[n] = clnode
1060 1089 return clnode
1061 1090 return lookupmflinknode
1062 1091
1063 1092 while tmfnodes:
1064 1093 dir, nodes = tmfnodes.popitem()
1065 1094 store = dirlog(dir)
1066 1095
1067 1096 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
1068 1097 prunednodes = []
1069 1098 else:
1070 1099 frev, flr = store.rev, store.linkrev
1071 1100 prunednodes = [n for n in nodes
1072 1101 if flr(frev(n)) not in commonrevs]
1073 1102
1074 1103 if dir and not prunednodes:
1075 1104 continue
1076 1105
1077 1106 lookupfn = makelookupmflinknode(dir, nodes)
1078 1107
1079 1108 if self._ellipses:
1080 1109 revs = _sortnodesellipsis(store, prunednodes, cl,
1081 1110 lookupfn)
1082 1111 else:
1083 1112 revs = _sortnodesnormal(store, prunednodes,
1084 1113 self._reorder)
1085 1114
1086 1115 deltas = deltagroup(
1087 1116 self._repo, revs, store, False, lookupfn,
1088 1117 self._deltaparentfn,
1089 1118 ellipses=self._ellipses,
1090 1119 units=_('manifests'),
1091 1120 clrevtolocalrev=clrevtolocalrev,
1092 1121 fullclnodes=self._fullclnodes,
1093 1122 precomputedellipsis=self._precomputedellipsis)
1094 1123
1095 1124 yield dir, deltas
1096 1125
1097 1126 # The 'source' parameter is useful for extensions
1098 1127 def generatefiles(self, changedfiles, commonrevs, source,
1099 1128 mfdicts, fastpathlinkrev, fnodes, clrevs):
1100 1129 changedfiles = list(filter(self._filematcher, changedfiles))
1101 1130
1102 1131 if not fastpathlinkrev:
1103 1132 def normallinknodes(unused, fname):
1104 1133 return fnodes.get(fname, {})
1105 1134 else:
1106 1135 cln = self._repo.changelog.node
1107 1136
1108 1137 def normallinknodes(store, fname):
1109 1138 flinkrev = store.linkrev
1110 1139 fnode = store.node
1111 1140 revs = ((r, flinkrev(r)) for r in store)
1112 1141 return dict((fnode(r), cln(lr))
1113 1142 for r, lr in revs if lr in clrevs)
1114 1143
1115 1144 clrevtolocalrev = {}
1116 1145
1117 1146 if self._isshallow:
1118 1147 # In a shallow clone, the linknodes callback needs to also include
1119 1148 # those file nodes that are in the manifests we sent but weren't
1120 1149 # introduced by those manifests.
1121 1150 commonctxs = [self._repo[c] for c in commonrevs]
1122 1151 clrev = self._repo.changelog.rev
1123 1152
1124 1153 # Defining this function has a side-effect of overriding the
1125 1154 # function of the same name that was passed in as an argument.
1126 1155 # TODO have caller pass in appropriate function.
1127 1156 def linknodes(flog, fname):
1128 1157 for c in commonctxs:
1129 1158 try:
1130 1159 fnode = c.filenode(fname)
1131 1160 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1132 1161 except error.ManifestLookupError:
1133 1162 pass
1134 1163 links = normallinknodes(flog, fname)
1135 1164 if len(links) != len(mfdicts):
1136 1165 for mf, lr in mfdicts:
1137 1166 fnode = mf.get(fname, None)
1138 1167 if fnode in links:
1139 1168 links[fnode] = min(links[fnode], lr, key=clrev)
1140 1169 elif fnode:
1141 1170 links[fnode] = lr
1142 1171 return links
1143 1172 else:
1144 1173 linknodes = normallinknodes
1145 1174
1146 1175 repo = self._repo
1147 1176 cl = repo.changelog
1148 1177 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1149 1178 total=len(changedfiles))
1150 1179 for i, fname in enumerate(sorted(changedfiles)):
1151 1180 filerevlog = repo.file(fname)
1152 1181 if not filerevlog:
1153 1182 raise error.Abort(_("empty or missing file data for %s") %
1154 1183 fname)
1155 1184
1156 1185 clrevtolocalrev.clear()
1157 1186
1158 1187 linkrevnodes = linknodes(filerevlog, fname)
1159 1188 # Lookup for filenodes, we collected the linkrev nodes above in the
1160 1189 # fastpath case and with lookupmf in the slowpath case.
1161 1190 def lookupfilelog(x):
1162 1191 return linkrevnodes[x]
1163 1192
1164 1193 frev, flr = filerevlog.rev, filerevlog.linkrev
1165 1194 filenodes = [n for n in linkrevnodes
1166 1195 if flr(frev(n)) not in commonrevs]
1167 1196
1168 1197 if filenodes:
1169 1198 if self._ellipses:
1170 1199 revs = _sortnodesellipsis(filerevlog, filenodes,
1171 1200 cl, lookupfilelog)
1172 1201 else:
1173 1202 revs = _sortnodesnormal(filerevlog, filenodes,
1174 1203 self._reorder)
1175 1204
1176 1205 progress.update(i + 1, item=fname)
1177 1206
1178 1207 deltas = deltagroup(
1179 1208 self._repo, revs, filerevlog, False, lookupfilelog,
1180 1209 self._deltaparentfn,
1181 1210 ellipses=self._ellipses,
1182 1211 clrevtolocalrev=clrevtolocalrev,
1183 1212 fullclnodes=self._fullclnodes,
1184 1213 precomputedellipsis=self._precomputedellipsis)
1185 1214
1186 1215 yield fname, deltas
1187 1216
1188 1217 progress.complete()
1189 1218
1190 1219 def _deltaparentprev(store, rev, p1, p2, prev):
1191 1220 """Resolve a delta parent to the previous revision.
1192 1221
1193 1222 Used for version 1 changegroups, which don't support generaldelta.
1194 1223 """
1195 1224 return prev
1196 1225
1197 1226 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1198 1227 """Resolve a delta parent when general deltas are supported."""
1199 1228 dp = store.deltaparent(rev)
1200 1229 if dp == nullrev and store.storedeltachains:
1201 1230 # Avoid sending full revisions when delta parent is null. Pick prev
1202 1231 # in that case. It's tempting to pick p1 in this case, as p1 will
1203 1232 # be smaller in the common case. However, computing a delta against
1204 1233 # p1 may require resolving the raw text of p1, which could be
1205 1234 # expensive. The revlog caches should have prev cached, meaning
1206 1235 # less CPU for changegroup generation. There is likely room to add
1207 1236 # a flag and/or config option to control this behavior.
1208 1237 base = prev
1209 1238 elif dp == nullrev:
1210 1239 # revlog is configured to use full snapshot for a reason,
1211 1240 # stick to full snapshot.
1212 1241 base = nullrev
1213 1242 elif dp not in (p1, p2, prev):
1214 1243 # Pick prev when we can't be sure remote has the base revision.
1215 1244 return prev
1216 1245 else:
1217 1246 base = dp
1218 1247
1219 1248 if base != nullrev and not store.candelta(base, rev):
1220 1249 base = nullrev
1221 1250
1222 1251 return base
1223 1252
1224 1253 def _deltaparentellipses(store, rev, p1, p2, prev):
1225 1254 """Resolve a delta parent when in ellipses mode."""
1226 1255 # TODO: send better deltas when in narrow mode.
1227 1256 #
1228 1257 # changegroup.group() loops over revisions to send,
1229 1258 # including revisions we'll skip. What this means is that
1230 1259 # `prev` will be a potentially useless delta base for all
1231 1260 # ellipsis nodes, as the client likely won't have it. In
1232 1261 # the future we should do bookkeeping about which nodes
1233 1262 # have been sent to the client, and try to be
1234 1263 # significantly smarter about delta bases. This is
1235 1264 # slightly tricky because this same code has to work for
1236 1265 # all revlogs, and we don't have the linkrev/linknode here.
1237 1266 return p1
1238 1267
1239 1268 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1240 1269 shallow=False, ellipsisroots=None, fullnodes=None):
1241 1270 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1242 1271 d.node, d.p1node, d.p2node, d.linknode)
1243 1272
1244 1273 return cgpacker(repo, filematcher, b'01',
1245 1274 deltaparentfn=_deltaparentprev,
1246 1275 allowreorder=None,
1247 1276 builddeltaheader=builddeltaheader,
1248 1277 manifestsend=b'',
1249 1278 bundlecaps=bundlecaps,
1250 1279 ellipses=ellipses,
1251 1280 shallow=shallow,
1252 1281 ellipsisroots=ellipsisroots,
1253 1282 fullnodes=fullnodes)
1254 1283
1255 1284 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1256 1285 shallow=False, ellipsisroots=None, fullnodes=None):
1257 1286 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1258 1287 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1259 1288
1260 1289 # Since generaldelta is directly supported by cg2, reordering
1261 1290 # generally doesn't help, so we disable it by default (treating
1262 1291 # bundle.reorder=auto just like bundle.reorder=False).
1263 1292 return cgpacker(repo, filematcher, b'02',
1264 1293 deltaparentfn=_deltaparentgeneraldelta,
1265 1294 allowreorder=False,
1266 1295 builddeltaheader=builddeltaheader,
1267 1296 manifestsend=b'',
1268 1297 bundlecaps=bundlecaps,
1269 1298 ellipses=ellipses,
1270 1299 shallow=shallow,
1271 1300 ellipsisroots=ellipsisroots,
1272 1301 fullnodes=fullnodes)
1273 1302
1274 1303 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1275 1304 shallow=False, ellipsisroots=None, fullnodes=None):
1276 1305 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1277 1306 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1278 1307
1279 1308 deltaparentfn = (_deltaparentellipses if ellipses
1280 1309 else _deltaparentgeneraldelta)
1281 1310
1282 1311 return cgpacker(repo, filematcher, b'03',
1283 1312 deltaparentfn=deltaparentfn,
1284 1313 allowreorder=False,
1285 1314 builddeltaheader=builddeltaheader,
1286 1315 manifestsend=closechunk(),
1287 1316 bundlecaps=bundlecaps,
1288 1317 ellipses=ellipses,
1289 1318 shallow=shallow,
1290 1319 ellipsisroots=ellipsisroots,
1291 1320 fullnodes=fullnodes)
1292 1321
1293 1322 _packermap = {'01': (_makecg1packer, cg1unpacker),
1294 1323 # cg2 adds support for exchanging generaldelta
1295 1324 '02': (_makecg2packer, cg2unpacker),
1296 1325 # cg3 adds support for exchanging revlog flags and treemanifests
1297 1326 '03': (_makecg3packer, cg3unpacker),
1298 1327 }
1299 1328
1300 1329 def allsupportedversions(repo):
1301 1330 versions = set(_packermap.keys())
1302 1331 if not (repo.ui.configbool('experimental', 'changegroup3') or
1303 1332 repo.ui.configbool('experimental', 'treemanifest') or
1304 1333 'treemanifest' in repo.requirements):
1305 1334 versions.discard('03')
1306 1335 return versions
1307 1336
1308 1337 # Changegroup versions that can be applied to the repo
1309 1338 def supportedincomingversions(repo):
1310 1339 return allsupportedversions(repo)
1311 1340
1312 1341 # Changegroup versions that can be created from the repo
1313 1342 def supportedoutgoingversions(repo):
1314 1343 versions = allsupportedversions(repo)
1315 1344 if 'treemanifest' in repo.requirements:
1316 1345 # Versions 01 and 02 support only flat manifests and it's just too
1317 1346 # expensive to convert between the flat manifest and tree manifest on
1318 1347 # the fly. Since tree manifests are hashed differently, all of history
1319 1348 # would have to be converted. Instead, we simply don't even pretend to
1320 1349 # support versions 01 and 02.
1321 1350 versions.discard('01')
1322 1351 versions.discard('02')
1323 1352 if repository.NARROW_REQUIREMENT in repo.requirements:
1324 1353 # Versions 01 and 02 don't support revlog flags, and we need to
1325 1354 # support that for stripping and unbundling to work.
1326 1355 versions.discard('01')
1327 1356 versions.discard('02')
1328 1357 if LFS_REQUIREMENT in repo.requirements:
1329 1358 # Versions 01 and 02 don't support revlog flags, and we need to
1330 1359 # mark LFS entries with REVIDX_EXTSTORED.
1331 1360 versions.discard('01')
1332 1361 versions.discard('02')
1333 1362
1334 1363 return versions
1335 1364
1336 1365 def localversion(repo):
1337 1366 # Finds the best version to use for bundles that are meant to be used
1338 1367 # locally, such as those from strip and shelve, and temporary bundles.
1339 1368 return max(supportedoutgoingversions(repo))
1340 1369
1341 1370 def safeversion(repo):
1342 1371 # Finds the smallest version that it's safe to assume clients of the repo
1343 1372 # will support. For example, all hg versions that support generaldelta also
1344 1373 # support changegroup 02.
1345 1374 versions = supportedoutgoingversions(repo)
1346 1375 if 'generaldelta' in repo.requirements:
1347 1376 versions.discard('01')
1348 1377 assert versions
1349 1378 return min(versions)
1350 1379
1351 1380 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1352 1381 ellipses=False, shallow=False, ellipsisroots=None,
1353 1382 fullnodes=None):
1354 1383 assert version in supportedoutgoingversions(repo)
1355 1384
1356 1385 if filematcher is None:
1357 1386 filematcher = matchmod.alwaysmatcher(repo.root, '')
1358 1387
1359 1388 if version == '01' and not filematcher.always():
1360 1389 raise error.ProgrammingError('version 01 changegroups do not support '
1361 1390 'sparse file matchers')
1362 1391
1363 1392 if ellipses and version in (b'01', b'02'):
1364 1393 raise error.Abort(
1365 1394 _('ellipsis nodes require at least cg3 on client and server, '
1366 1395 'but negotiated version %s') % version)
1367 1396
1368 1397 # Requested files could include files not in the local store. So
1369 1398 # filter those out.
1370 1399 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1371 1400 filematcher)
1372 1401
1373 1402 fn = _packermap[version][0]
1374 1403 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1375 1404 shallow=shallow, ellipsisroots=ellipsisroots,
1376 1405 fullnodes=fullnodes)
1377 1406
1378 1407 def getunbundler(version, fh, alg, extras=None):
1379 1408 return _packermap[version][1](fh, alg, extras=extras)
1380 1409
1381 1410 def _changegroupinfo(repo, nodes, source):
1382 1411 if repo.ui.verbose or source == 'bundle':
1383 1412 repo.ui.status(_("%d changesets found\n") % len(nodes))
1384 1413 if repo.ui.debugflag:
1385 1414 repo.ui.debug("list of changesets:\n")
1386 1415 for node in nodes:
1387 1416 repo.ui.debug("%s\n" % hex(node))
1388 1417
1389 1418 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1390 1419 bundlecaps=None):
1391 1420 cgstream = makestream(repo, outgoing, version, source,
1392 1421 fastpath=fastpath, bundlecaps=bundlecaps)
1393 1422 return getunbundler(version, util.chunkbuffer(cgstream), None,
1394 1423 {'clcount': len(outgoing.missing) })
1395 1424
1396 1425 def makestream(repo, outgoing, version, source, fastpath=False,
1397 1426 bundlecaps=None, filematcher=None):
1398 1427 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1399 1428 filematcher=filematcher)
1400 1429
1401 1430 repo = repo.unfiltered()
1402 1431 commonrevs = outgoing.common
1403 1432 csets = outgoing.missing
1404 1433 heads = outgoing.missingheads
1405 1434 # We go through the fast path if we get told to, or if all (unfiltered
1406 1435 # heads have been requested (since we then know there all linkrevs will
1407 1436 # be pulled by the client).
1408 1437 heads.sort()
1409 1438 fastpathlinkrev = fastpath or (
1410 1439 repo.filtername is None and heads == sorted(repo.heads()))
1411 1440
1412 1441 repo.hook('preoutgoing', throw=True, source=source)
1413 1442 _changegroupinfo(repo, csets, source)
1414 1443 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1415 1444
1416 1445 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1417 1446 revisions = 0
1418 1447 files = 0
1419 1448 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1420 1449 total=expectedfiles)
1421 1450 for chunkdata in iter(source.filelogheader, {}):
1422 1451 files += 1
1423 1452 f = chunkdata["filename"]
1424 1453 repo.ui.debug("adding %s revisions\n" % f)
1425 1454 progress.increment()
1426 1455 fl = repo.file(f)
1427 1456 o = len(fl)
1428 1457 try:
1429 1458 deltas = source.deltaiter()
1430 1459 if not fl.addgroup(deltas, revmap, trp):
1431 1460 raise error.Abort(_("received file revlog group is empty"))
1432 1461 except error.CensoredBaseError as e:
1433 1462 raise error.Abort(_("received delta base is censored: %s") % e)
1434 1463 revisions += len(fl) - o
1435 1464 if f in needfiles:
1436 1465 needs = needfiles[f]
1437 1466 for new in pycompat.xrange(o, len(fl)):
1438 1467 n = fl.node(new)
1439 1468 if n in needs:
1440 1469 needs.remove(n)
1441 1470 else:
1442 1471 raise error.Abort(
1443 1472 _("received spurious file revlog entry"))
1444 1473 if not needs:
1445 1474 del needfiles[f]
1446 1475 progress.complete()
1447 1476
1448 1477 for f, needs in needfiles.iteritems():
1449 1478 fl = repo.file(f)
1450 1479 for n in needs:
1451 1480 try:
1452 1481 fl.rev(n)
1453 1482 except error.LookupError:
1454 1483 raise error.Abort(
1455 1484 _('missing file data for %s:%s - run hg verify') %
1456 1485 (f, hex(n)))
1457 1486
1458 1487 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now