##// END OF EJS Templates
changegroup: minor cleanups to deltagroup()...
Gregory Szorc -
r39051:ad9ecced default
parent child Browse files
Show More
@@ -1,1456 +1,1458 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 match as matchmod,
30 30 mdiff,
31 31 phases,
32 32 pycompat,
33 33 repository,
34 34 revlog,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 stringutil,
40 40 )
41 41
42 42 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
43 43 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
44 44 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
45 45
46 46 LFS_REQUIREMENT = 'lfs'
47 47
48 48 readexactly = util.readexactly
49 49
50 50 def getchunk(stream):
51 51 """return the next chunk from stream as a string"""
52 52 d = readexactly(stream, 4)
53 53 l = struct.unpack(">l", d)[0]
54 54 if l <= 4:
55 55 if l:
56 56 raise error.Abort(_("invalid chunk length %d") % l)
57 57 return ""
58 58 return readexactly(stream, l - 4)
59 59
60 60 def chunkheader(length):
61 61 """return a changegroup chunk header (string)"""
62 62 return struct.pack(">l", length + 4)
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(">l", 0)
67 67
68 68 def _fileheader(path):
69 69 """Obtain a changegroup chunk header for a named path."""
70 70 return chunkheader(len(path)) + path
71 71
72 72 def writechunks(ui, chunks, filename, vfs=None):
73 73 """Write chunks to a file and return its filename.
74 74
75 75 The stream is assumed to be a bundle file.
76 76 Existing files will not be overwritten.
77 77 If no filename is specified, a temporary file is created.
78 78 """
79 79 fh = None
80 80 cleanup = None
81 81 try:
82 82 if filename:
83 83 if vfs:
84 84 fh = vfs.open(filename, "wb")
85 85 else:
86 86 # Increase default buffer size because default is usually
87 87 # small (4k is common on Linux).
88 88 fh = open(filename, "wb", 131072)
89 89 else:
90 90 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
91 91 fh = os.fdopen(fd, r"wb")
92 92 cleanup = filename
93 93 for c in chunks:
94 94 fh.write(c)
95 95 cleanup = None
96 96 return filename
97 97 finally:
98 98 if fh is not None:
99 99 fh.close()
100 100 if cleanup is not None:
101 101 if filename and vfs:
102 102 vfs.unlink(cleanup)
103 103 else:
104 104 os.unlink(cleanup)
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
125 125 deltaheadersize = deltaheader.size
126 126 version = '01'
127 127 _grouplistcount = 1 # One list of files after the manifests
128 128
129 129 def __init__(self, fh, alg, extras=None):
130 130 if alg is None:
131 131 alg = 'UN'
132 132 if alg not in util.compengines.supportedbundletypes:
133 133 raise error.Abort(_('unknown stream compression type: %s')
134 134 % alg)
135 135 if alg == 'BZ':
136 136 alg = '_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != 'UN'
148 148 def read(self, l):
149 149 return self._stream.read(l)
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152 def tell(self):
153 153 return self._stream.tell()
154 154 def close(self):
155 155 return self._stream.close()
156 156
157 157 def _chunklength(self):
158 158 d = readexactly(self._stream, 4)
159 159 l = struct.unpack(">l", d)[0]
160 160 if l <= 4:
161 161 if l:
162 162 raise error.Abort(_("invalid chunk length %d") % l)
163 163 return 0
164 164 if self.callback:
165 165 self.callback()
166 166 return l - 4
167 167
168 168 def changelogheader(self):
169 169 """v10 does not have a changelog header chunk"""
170 170 return {}
171 171
172 172 def manifestheader(self):
173 173 """v10 does not have a manifest header chunk"""
174 174 return {}
175 175
176 176 def filelogheader(self):
177 177 """return the header of the filelogs chunk, v10 only has the filename"""
178 178 l = self._chunklength()
179 179 if not l:
180 180 return {}
181 181 fname = readexactly(self._stream, l)
182 182 return {'filename': fname}
183 183
184 184 def _deltaheader(self, headertuple, prevnode):
185 185 node, p1, p2, cs = headertuple
186 186 if prevnode is None:
187 187 deltabase = p1
188 188 else:
189 189 deltabase = prevnode
190 190 flags = 0
191 191 return node, p1, p2, deltabase, cs, flags
192 192
193 193 def deltachunk(self, prevnode):
194 194 l = self._chunklength()
195 195 if not l:
196 196 return {}
197 197 headerdata = readexactly(self._stream, self.deltaheadersize)
198 198 header = self.deltaheader.unpack(headerdata)
199 199 delta = readexactly(self._stream, l - self.deltaheadersize)
200 200 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
201 201 return (node, p1, p2, cs, deltabase, delta, flags)
202 202
203 203 def getchunks(self):
204 204 """returns all the chunks contains in the bundle
205 205
206 206 Used when you need to forward the binary stream to a file or another
207 207 network API. To do so, it parse the changegroup data, otherwise it will
208 208 block in case of sshrepo because it don't know the end of the stream.
209 209 """
210 210 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
211 211 # and a list of filelogs. For changegroup 3, we expect 4 parts:
212 212 # changelog, manifestlog, a list of tree manifestlogs, and a list of
213 213 # filelogs.
214 214 #
215 215 # Changelog and manifestlog parts are terminated with empty chunks. The
216 216 # tree and file parts are a list of entry sections. Each entry section
217 217 # is a series of chunks terminating in an empty chunk. The list of these
218 218 # entry sections is terminated in yet another empty chunk, so we know
219 219 # we've reached the end of the tree/file list when we reach an empty
220 220 # chunk that was proceeded by no non-empty chunks.
221 221
222 222 parts = 0
223 223 while parts < 2 + self._grouplistcount:
224 224 noentries = True
225 225 while True:
226 226 chunk = getchunk(self)
227 227 if not chunk:
228 228 # The first two empty chunks represent the end of the
229 229 # changelog and the manifestlog portions. The remaining
230 230 # empty chunks represent either A) the end of individual
231 231 # tree or file entries in the file list, or B) the end of
232 232 # the entire list. It's the end of the entire list if there
233 233 # were no entries (i.e. noentries is True).
234 234 if parts < 2:
235 235 parts += 1
236 236 elif noentries:
237 237 parts += 1
238 238 break
239 239 noentries = False
240 240 yield chunkheader(len(chunk))
241 241 pos = 0
242 242 while pos < len(chunk):
243 243 next = pos + 2**20
244 244 yield chunk[pos:next]
245 245 pos = next
246 246 yield closechunk()
247 247
248 248 def _unpackmanifests(self, repo, revmap, trp, prog):
249 249 self.callback = prog.increment
250 250 # no need to check for empty manifest group here:
251 251 # if the result of the merge of 1 and 2 is the same in 3 and 4,
252 252 # no new manifest will be created and the manifest group will
253 253 # be empty during the pull
254 254 self.manifestheader()
255 255 deltas = self.deltaiter()
256 256 repo.manifestlog.addgroup(deltas, revmap, trp)
257 257 prog.complete()
258 258 self.callback = None
259 259
260 260 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
261 261 expectedtotal=None):
262 262 """Add the changegroup returned by source.read() to this repo.
263 263 srctype is a string like 'push', 'pull', or 'unbundle'. url is
264 264 the URL of the repo where this changegroup is coming from.
265 265
266 266 Return an integer summarizing the change to this repo:
267 267 - nothing changed or no source: 0
268 268 - more heads than before: 1+added heads (2..n)
269 269 - fewer heads than before: -1-removed heads (-2..-n)
270 270 - number of heads stays the same: 1
271 271 """
272 272 repo = repo.unfiltered()
273 273 def csmap(x):
274 274 repo.ui.debug("add changeset %s\n" % short(x))
275 275 return len(cl)
276 276
277 277 def revmap(x):
278 278 return cl.rev(x)
279 279
280 280 changesets = files = revisions = 0
281 281
282 282 try:
283 283 # The transaction may already carry source information. In this
284 284 # case we use the top level data. We overwrite the argument
285 285 # because we need to use the top level value (if they exist)
286 286 # in this function.
287 287 srctype = tr.hookargs.setdefault('source', srctype)
288 288 url = tr.hookargs.setdefault('url', url)
289 289 repo.hook('prechangegroup',
290 290 throw=True, **pycompat.strkwargs(tr.hookargs))
291 291
292 292 # write changelog data to temp files so concurrent readers
293 293 # will not see an inconsistent view
294 294 cl = repo.changelog
295 295 cl.delayupdate(tr)
296 296 oldheads = set(cl.heads())
297 297
298 298 trp = weakref.proxy(tr)
299 299 # pull off the changeset group
300 300 repo.ui.status(_("adding changesets\n"))
301 301 clstart = len(cl)
302 302 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
303 303 total=expectedtotal)
304 304 self.callback = progress.increment
305 305
306 306 efiles = set()
307 307 def onchangelog(cl, node):
308 308 efiles.update(cl.readfiles(node))
309 309
310 310 self.changelogheader()
311 311 deltas = self.deltaiter()
312 312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
313 313 efiles = len(efiles)
314 314
315 315 if not cgnodes:
316 316 repo.ui.develwarn('applied empty changegroup',
317 317 config='warn-empty-changegroup')
318 318 clend = len(cl)
319 319 changesets = clend - clstart
320 320 progress.complete()
321 321 self.callback = None
322 322
323 323 # pull off the manifest group
324 324 repo.ui.status(_("adding manifests\n"))
325 325 # We know that we'll never have more manifests than we had
326 326 # changesets.
327 327 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
328 328 total=changesets)
329 329 self._unpackmanifests(repo, revmap, trp, progress)
330 330
331 331 needfiles = {}
332 332 if repo.ui.configbool('server', 'validate'):
333 333 cl = repo.changelog
334 334 ml = repo.manifestlog
335 335 # validate incoming csets have their manifests
336 336 for cset in pycompat.xrange(clstart, clend):
337 337 mfnode = cl.changelogrevision(cset).manifest
338 338 mfest = ml[mfnode].readdelta()
339 339 # store file cgnodes we must see
340 340 for f, n in mfest.iteritems():
341 341 needfiles.setdefault(f, set()).add(n)
342 342
343 343 # process the files
344 344 repo.ui.status(_("adding file changes\n"))
345 345 newrevs, newfiles = _addchangegroupfiles(
346 346 repo, self, revmap, trp, efiles, needfiles)
347 347 revisions += newrevs
348 348 files += newfiles
349 349
350 350 deltaheads = 0
351 351 if oldheads:
352 352 heads = cl.heads()
353 353 deltaheads = len(heads) - len(oldheads)
354 354 for h in heads:
355 355 if h not in oldheads and repo[h].closesbranch():
356 356 deltaheads -= 1
357 357 htext = ""
358 358 if deltaheads:
359 359 htext = _(" (%+d heads)") % deltaheads
360 360
361 361 repo.ui.status(_("added %d changesets"
362 362 " with %d changes to %d files%s\n")
363 363 % (changesets, revisions, files, htext))
364 364 repo.invalidatevolatilesets()
365 365
366 366 if changesets > 0:
367 367 if 'node' not in tr.hookargs:
368 368 tr.hookargs['node'] = hex(cl.node(clstart))
369 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
370 370 hookargs = dict(tr.hookargs)
371 371 else:
372 372 hookargs = dict(tr.hookargs)
373 373 hookargs['node'] = hex(cl.node(clstart))
374 374 hookargs['node_last'] = hex(cl.node(clend - 1))
375 375 repo.hook('pretxnchangegroup',
376 376 throw=True, **pycompat.strkwargs(hookargs))
377 377
378 378 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
379 379 phaseall = None
380 380 if srctype in ('push', 'serve'):
381 381 # Old servers can not push the boundary themselves.
382 382 # New servers won't push the boundary if changeset already
383 383 # exists locally as secret
384 384 #
385 385 # We should not use added here but the list of all change in
386 386 # the bundle
387 387 if repo.publishing():
388 388 targetphase = phaseall = phases.public
389 389 else:
390 390 # closer target phase computation
391 391
392 392 # Those changesets have been pushed from the
393 393 # outside, their phases are going to be pushed
394 394 # alongside. Therefor `targetphase` is
395 395 # ignored.
396 396 targetphase = phaseall = phases.draft
397 397 if added:
398 398 phases.registernew(repo, tr, targetphase, added)
399 399 if phaseall is not None:
400 400 phases.advanceboundary(repo, tr, phaseall, cgnodes)
401 401
402 402 if changesets > 0:
403 403
404 404 def runhooks():
405 405 # These hooks run when the lock releases, not when the
406 406 # transaction closes. So it's possible for the changelog
407 407 # to have changed since we last saw it.
408 408 if clstart >= len(repo):
409 409 return
410 410
411 411 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
412 412
413 413 for n in added:
414 414 args = hookargs.copy()
415 415 args['node'] = hex(n)
416 416 del args['node_last']
417 417 repo.hook("incoming", **pycompat.strkwargs(args))
418 418
419 419 newheads = [h for h in repo.heads()
420 420 if h not in oldheads]
421 421 repo.ui.log("incoming",
422 422 "%d incoming changes - new heads: %s\n",
423 423 len(added),
424 424 ', '.join([hex(c[:6]) for c in newheads]))
425 425
426 426 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
427 427 lambda tr: repo._afterlock(runhooks))
428 428 finally:
429 429 repo.ui.flush()
430 430 # never return 0 here:
431 431 if deltaheads < 0:
432 432 ret = deltaheads - 1
433 433 else:
434 434 ret = deltaheads + 1
435 435 return ret
436 436
437 437 def deltaiter(self):
438 438 """
439 439 returns an iterator of the deltas in this changegroup
440 440
441 441 Useful for passing to the underlying storage system to be stored.
442 442 """
443 443 chain = None
444 444 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
445 445 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
446 446 yield chunkdata
447 447 chain = chunkdata[0]
448 448
449 449 class cg2unpacker(cg1unpacker):
450 450 """Unpacker for cg2 streams.
451 451
452 452 cg2 streams add support for generaldelta, so the delta header
453 453 format is slightly different. All other features about the data
454 454 remain the same.
455 455 """
456 456 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
457 457 deltaheadersize = deltaheader.size
458 458 version = '02'
459 459
460 460 def _deltaheader(self, headertuple, prevnode):
461 461 node, p1, p2, deltabase, cs = headertuple
462 462 flags = 0
463 463 return node, p1, p2, deltabase, cs, flags
464 464
465 465 class cg3unpacker(cg2unpacker):
466 466 """Unpacker for cg3 streams.
467 467
468 468 cg3 streams add support for exchanging treemanifests and revlog
469 469 flags. It adds the revlog flags to the delta header and an empty chunk
470 470 separating manifests and files.
471 471 """
472 472 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
473 473 deltaheadersize = deltaheader.size
474 474 version = '03'
475 475 _grouplistcount = 2 # One list of manifests and one list of files
476 476
477 477 def _deltaheader(self, headertuple, prevnode):
478 478 node, p1, p2, deltabase, cs, flags = headertuple
479 479 return node, p1, p2, deltabase, cs, flags
480 480
481 481 def _unpackmanifests(self, repo, revmap, trp, prog):
482 482 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
483 483 for chunkdata in iter(self.filelogheader, {}):
484 484 # If we get here, there are directory manifests in the changegroup
485 485 d = chunkdata["filename"]
486 486 repo.ui.debug("adding %s revisions\n" % d)
487 487 dirlog = repo.manifestlog._revlog.dirlog(d)
488 488 deltas = self.deltaiter()
489 489 if not dirlog.addgroup(deltas, revmap, trp):
490 490 raise error.Abort(_("received dir revlog group is empty"))
491 491
492 492 class headerlessfixup(object):
493 493 def __init__(self, fh, h):
494 494 self._h = h
495 495 self._fh = fh
496 496 def read(self, n):
497 497 if self._h:
498 498 d, self._h = self._h[:n], self._h[n:]
499 499 if len(d) < n:
500 500 d += readexactly(self._fh, n - len(d))
501 501 return d
502 502 return readexactly(self._fh, n)
503 503
504 504 @attr.s(slots=True, frozen=True)
505 505 class revisiondelta(object):
506 506 """Describes a delta entry in a changegroup.
507 507
508 508 Captured data is sufficient to serialize the delta into multiple
509 509 formats.
510 510 """
511 511 # 20 byte node of this revision.
512 512 node = attr.ib()
513 513 # 20 byte nodes of parent revisions.
514 514 p1node = attr.ib()
515 515 p2node = attr.ib()
516 516 # 20 byte node of node this delta is against.
517 517 basenode = attr.ib()
518 518 # 20 byte node of changeset revision this delta is associated with.
519 519 linknode = attr.ib()
520 520 # 2 bytes of flags to apply to revision data.
521 521 flags = attr.ib()
522 522 # Iterable of chunks holding raw delta data.
523 523 deltachunks = attr.ib()
524 524
525 525 def _revisiondeltatochunks(delta, headerfn):
526 526 """Serialize a revisiondelta to changegroup chunks."""
527 527 meta = headerfn(delta)
528 528 l = len(meta) + sum(len(x) for x in delta.deltachunks)
529 529 yield chunkheader(l)
530 530 yield meta
531 531 for x in delta.deltachunks:
532 532 yield x
533 533
534 534 def _sortnodesnormal(store, nodes, reorder):
535 535 """Sort nodes for changegroup generation and turn into revnums."""
536 536 # for generaldelta revlogs, we linearize the revs; this will both be
537 537 # much quicker and generate a much smaller bundle
538 538 if (store._generaldelta and reorder is None) or reorder:
539 539 dag = dagutil.revlogdag(store)
540 540 return dag.linearize(set(store.rev(n) for n in nodes))
541 541 else:
542 542 return sorted([store.rev(n) for n in nodes])
543 543
544 544 def _sortnodesellipsis(store, nodes, cl, lookup):
545 545 """Sort nodes for changegroup generation and turn into revnums."""
546 546 # Ellipses serving mode.
547 547 #
548 548 # In a perfect world, we'd generate better ellipsis-ified graphs
549 549 # for non-changelog revlogs. In practice, we haven't started doing
550 550 # that yet, so the resulting DAGs for the manifestlog and filelogs
551 551 # are actually full of bogus parentage on all the ellipsis
552 552 # nodes. This has the side effect that, while the contents are
553 553 # correct, the individual DAGs might be completely out of whack in
554 554 # a case like 882681bc3166 and its ancestors (back about 10
555 555 # revisions or so) in the main hg repo.
556 556 #
557 557 # The one invariant we *know* holds is that the new (potentially
558 558 # bogus) DAG shape will be valid if we order the nodes in the
559 559 # order that they're introduced in dramatis personae by the
560 560 # changelog, so what we do is we sort the non-changelog histories
561 561 # by the order in which they are used by the changelog.
562 562 key = lambda n: cl.rev(lookup(n))
563 563 return [store.rev(n) for n in sorted(nodes, key=key)]
564 564
565 565 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
566 566 """Construct a revision delta for non-ellipses changegroup generation."""
567 567 node = store.node(rev)
568 568 p1, p2 = store.parentrevs(rev)
569 569 base = deltaparentfn(store, rev, p1, p2, prev)
570 570
571 571 prefix = ''
572 572 if store.iscensored(base) or store.iscensored(rev):
573 573 try:
574 574 delta = store.revision(node, raw=True)
575 575 except error.CensoredNodeError as e:
576 576 delta = e.tombstone
577 577 if base == nullrev:
578 578 prefix = mdiff.trivialdiffheader(len(delta))
579 579 else:
580 580 baselen = store.rawsize(base)
581 581 prefix = mdiff.replacediffheader(baselen, len(delta))
582 582 elif base == nullrev:
583 583 delta = store.revision(node, raw=True)
584 584 prefix = mdiff.trivialdiffheader(len(delta))
585 585 else:
586 586 delta = store.revdiff(base, rev)
587 587 p1n, p2n = store.parents(node)
588 588
589 589 return revisiondelta(
590 590 node=node,
591 591 p1node=p1n,
592 592 p2node=p2n,
593 593 basenode=store.node(base),
594 594 linknode=linknode,
595 595 flags=store.flags(rev),
596 596 deltachunks=(prefix, delta),
597 597 )
598 598
599 599 def _revisiondeltanarrow(cl, store, ischangelog, rev, linkrev,
600 600 linknode, clrevtolocalrev, fullclnodes,
601 601 precomputedellipsis):
602 602 linkparents = precomputedellipsis[linkrev]
603 603 def local(clrev):
604 604 """Turn a changelog revnum into a local revnum.
605 605
606 606 The ellipsis dag is stored as revnums on the changelog,
607 607 but when we're producing ellipsis entries for
608 608 non-changelog revlogs, we need to turn those numbers into
609 609 something local. This does that for us, and during the
610 610 changelog sending phase will also expand the stored
611 611 mappings as needed.
612 612 """
613 613 if clrev == nullrev:
614 614 return nullrev
615 615
616 616 if ischangelog:
617 617 return clrev
618 618
619 619 # Walk the ellipsis-ized changelog breadth-first looking for a
620 620 # change that has been linked from the current revlog.
621 621 #
622 622 # For a flat manifest revlog only a single step should be necessary
623 623 # as all relevant changelog entries are relevant to the flat
624 624 # manifest.
625 625 #
626 626 # For a filelog or tree manifest dirlog however not every changelog
627 627 # entry will have been relevant, so we need to skip some changelog
628 628 # nodes even after ellipsis-izing.
629 629 walk = [clrev]
630 630 while walk:
631 631 p = walk[0]
632 632 walk = walk[1:]
633 633 if p in clrevtolocalrev:
634 634 return clrevtolocalrev[p]
635 635 elif p in fullclnodes:
636 636 walk.extend([pp for pp in cl.parentrevs(p)
637 637 if pp != nullrev])
638 638 elif p in precomputedellipsis:
639 639 walk.extend([pp for pp in precomputedellipsis[p]
640 640 if pp != nullrev])
641 641 else:
642 642 # In this case, we've got an ellipsis with parents
643 643 # outside the current bundle (likely an
644 644 # incremental pull). We "know" that we can use the
645 645 # value of this same revlog at whatever revision
646 646 # is pointed to by linknode. "Know" is in scare
647 647 # quotes because I haven't done enough examination
648 648 # of edge cases to convince myself this is really
649 649 # a fact - it works for all the (admittedly
650 650 # thorough) cases in our testsuite, but I would be
651 651 # somewhat unsurprised to find a case in the wild
652 652 # where this breaks down a bit. That said, I don't
653 653 # know if it would hurt anything.
654 654 for i in pycompat.xrange(rev, 0, -1):
655 655 if store.linkrev(i) == clrev:
656 656 return i
657 657 # We failed to resolve a parent for this node, so
658 658 # we crash the changegroup construction.
659 659 raise error.Abort(
660 660 'unable to resolve parent while packing %r %r'
661 661 ' for changeset %r' % (store.indexfile, rev, clrev))
662 662
663 663 return nullrev
664 664
665 665 if not linkparents or (
666 666 store.parentrevs(rev) == (nullrev, nullrev)):
667 667 p1, p2 = nullrev, nullrev
668 668 elif len(linkparents) == 1:
669 669 p1, = sorted(local(p) for p in linkparents)
670 670 p2 = nullrev
671 671 else:
672 672 p1, p2 = sorted(local(p) for p in linkparents)
673 673
674 674 n = store.node(rev)
675 675 p1n, p2n = store.node(p1), store.node(p2)
676 676 flags = store.flags(rev)
677 677 flags |= revlog.REVIDX_ELLIPSIS
678 678
679 679 # TODO: try and actually send deltas for ellipsis data blocks
680 680 data = store.revision(n)
681 681 diffheader = mdiff.trivialdiffheader(len(data))
682 682
683 683 return revisiondelta(
684 684 node=n,
685 685 p1node=p1n,
686 686 p2node=p2n,
687 687 basenode=nullid,
688 688 linknode=linknode,
689 689 flags=flags,
690 690 deltachunks=(diffheader, data),
691 691 )
692 692
693 693 def deltagroup(repo, revs, store, ischangelog, lookup, deltaparentfn,
694 694 units=None,
695 695 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
696 696 precomputedellipsis=None):
697 697 """Calculate deltas for a set of revisions.
698 698
699 699 Is a generator of ``revisiondelta`` instances.
700 700
701 701 If units is not None, progress detail will be generated, units specifies
702 702 the type of revlog that is touched (changelog, manifest, etc.).
703 703 """
704 # if we don't have any revisions touched by these changesets, bail
705 if len(revs) == 0:
704 if not revs:
706 705 return
707 706
708 707 cl = repo.changelog
709 708
710 # add the parent of the first rev
711 p = store.parentrevs(revs[0])[0]
712 revs.insert(0, p)
709 # Add the parent of the first rev.
710 revs.insert(0, store.parentrevs(revs[0])[0])
713 711
714 712 # build deltas
715 713 progress = None
716 714 if units is not None:
717 715 progress = repo.ui.makeprogress(_('bundling'), unit=units,
718 716 total=(len(revs) - 1))
719 for r in pycompat.xrange(len(revs) - 1):
717
718 for i in pycompat.xrange(len(revs) - 1):
720 719 if progress:
721 progress.update(r + 1)
722 prev, curr = revs[r], revs[r + 1]
720 progress.update(i + 1)
721
722 prev = revs[i]
723 curr = revs[i + 1]
724
723 725 linknode = lookup(store.node(curr))
724 726
725 727 if ellipses:
726 728 linkrev = cl.rev(linknode)
727 729 clrevtolocalrev[linkrev] = curr
728 730
729 731 # This is a node to send in full, because the changeset it
730 732 # corresponds to was a full changeset.
731 733 if linknode in fullclnodes:
732 734 delta = _revisiondeltanormal(store, curr, prev, linknode,
733 735 deltaparentfn)
734 736 elif linkrev not in precomputedellipsis:
735 737 delta = None
736 738 else:
737 739 delta = _revisiondeltanarrow(
738 740 cl, store, ischangelog, curr, linkrev, linknode,
739 741 clrevtolocalrev, fullclnodes,
740 742 precomputedellipsis)
741 743 else:
742 744 delta = _revisiondeltanormal(store, curr, prev, linknode,
743 745 deltaparentfn)
744 746
745 747 if delta:
746 748 yield delta
747 749
748 750 if progress:
749 751 progress.complete()
750 752
751 753 class cgpacker(object):
752 754 def __init__(self, repo, filematcher, version, allowreorder,
753 755 deltaparentfn, builddeltaheader, manifestsend,
754 756 bundlecaps=None, ellipses=False,
755 757 shallow=False, ellipsisroots=None, fullnodes=None):
756 758 """Given a source repo, construct a bundler.
757 759
758 760 filematcher is a matcher that matches on files to include in the
759 761 changegroup. Used to facilitate sparse changegroups.
760 762
761 763 allowreorder controls whether reordering of revisions is allowed.
762 764 This value is used when ``bundle.reorder`` is ``auto`` or isn't
763 765 set.
764 766
765 767 deltaparentfn is a callable that resolves the delta parent for
766 768 a specific revision.
767 769
768 770 builddeltaheader is a callable that constructs the header for a group
769 771 delta.
770 772
771 773 manifestsend is a chunk to send after manifests have been fully emitted.
772 774
773 775 ellipses indicates whether ellipsis serving mode is enabled.
774 776
775 777 bundlecaps is optional and can be used to specify the set of
776 778 capabilities which can be used to build the bundle. While bundlecaps is
777 779 unused in core Mercurial, extensions rely on this feature to communicate
778 780 capabilities to customize the changegroup packer.
779 781
780 782 shallow indicates whether shallow data might be sent. The packer may
781 783 need to pack file contents not introduced by the changes being packed.
782 784
783 785 fullnodes is the set of changelog nodes which should not be ellipsis
784 786 nodes. We store this rather than the set of nodes that should be
785 787 ellipsis because for very large histories we expect this to be
786 788 significantly smaller.
787 789 """
788 790 assert filematcher
789 791 self._filematcher = filematcher
790 792
791 793 self.version = version
792 794 self._deltaparentfn = deltaparentfn
793 795 self._builddeltaheader = builddeltaheader
794 796 self._manifestsend = manifestsend
795 797 self._ellipses = ellipses
796 798
797 799 # Set of capabilities we can use to build the bundle.
798 800 if bundlecaps is None:
799 801 bundlecaps = set()
800 802 self._bundlecaps = bundlecaps
801 803 self._isshallow = shallow
802 804 self._fullclnodes = fullnodes
803 805
804 806 # Maps ellipsis revs to their roots at the changelog level.
805 807 self._precomputedellipsis = ellipsisroots
806 808
807 809 # experimental config: bundle.reorder
808 810 reorder = repo.ui.config('bundle', 'reorder')
809 811 if reorder == 'auto':
810 812 self._reorder = allowreorder
811 813 else:
812 814 self._reorder = stringutil.parsebool(reorder)
813 815
814 816 self._repo = repo
815 817
816 818 if self._repo.ui.verbose and not self._repo.ui.debugflag:
817 819 self._verbosenote = self._repo.ui.note
818 820 else:
819 821 self._verbosenote = lambda s: None
820 822
821 823 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
822 824 """Yield a sequence of changegroup byte chunks."""
823 825
824 826 repo = self._repo
825 827 cl = repo.changelog
826 828
827 829 self._verbosenote(_('uncompressed size of bundle content:\n'))
828 830 size = 0
829 831
830 832 clstate, deltas = self._generatechangelog(cl, clnodes)
831 833 for delta in deltas:
832 834 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
833 835 size += len(chunk)
834 836 yield chunk
835 837
836 838 close = closechunk()
837 839 size += len(close)
838 840 yield closechunk()
839 841
840 842 self._verbosenote(_('%8.i (changelog)\n') % size)
841 843
842 844 clrevorder = clstate['clrevorder']
843 845 mfs = clstate['mfs']
844 846 changedfiles = clstate['changedfiles']
845 847
846 848 # We need to make sure that the linkrev in the changegroup refers to
847 849 # the first changeset that introduced the manifest or file revision.
848 850 # The fastpath is usually safer than the slowpath, because the filelogs
849 851 # are walked in revlog order.
850 852 #
851 853 # When taking the slowpath with reorder=None and the manifest revlog
852 854 # uses generaldelta, the manifest may be walked in the "wrong" order.
853 855 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
854 856 # cc0ff93d0c0c).
855 857 #
856 858 # When taking the fastpath, we are only vulnerable to reordering
857 859 # of the changelog itself. The changelog never uses generaldelta, so
858 860 # it is only reordered when reorder=True. To handle this case, we
859 861 # simply take the slowpath, which already has the 'clrevorder' logic.
860 862 # This was also fixed in cc0ff93d0c0c.
861 863 fastpathlinkrev = fastpathlinkrev and not self._reorder
862 864 # Treemanifests don't work correctly with fastpathlinkrev
863 865 # either, because we don't discover which directory nodes to
864 866 # send along with files. This could probably be fixed.
865 867 fastpathlinkrev = fastpathlinkrev and (
866 868 'treemanifest' not in repo.requirements)
867 869
868 870 fnodes = {} # needed file nodes
869 871
870 872 size = 0
871 873 it = self.generatemanifests(
872 874 commonrevs, clrevorder, fastpathlinkrev, mfs, fnodes, source,
873 875 clstate['clrevtomanifestrev'])
874 876
875 877 for dir, deltas in it:
876 878 if dir:
877 879 assert self.version == b'03'
878 880 chunk = _fileheader(dir)
879 881 size += len(chunk)
880 882 yield chunk
881 883
882 884 for delta in deltas:
883 885 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
884 886 for chunk in chunks:
885 887 size += len(chunk)
886 888 yield chunk
887 889
888 890 close = closechunk()
889 891 size += len(close)
890 892 yield close
891 893
892 894 self._verbosenote(_('%8.i (manifests)\n') % size)
893 895 yield self._manifestsend
894 896
895 897 mfdicts = None
896 898 if self._ellipses and self._isshallow:
897 899 mfdicts = [(self._repo.manifestlog[n].read(), lr)
898 900 for (n, lr) in mfs.iteritems()]
899 901
900 902 mfs.clear()
901 903 clrevs = set(cl.rev(x) for x in clnodes)
902 904
903 905 it = self.generatefiles(changedfiles, commonrevs,
904 906 source, mfdicts, fastpathlinkrev,
905 907 fnodes, clrevs)
906 908
907 909 for path, deltas in it:
908 910 h = _fileheader(path)
909 911 size = len(h)
910 912 yield h
911 913
912 914 for delta in deltas:
913 915 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
914 916 for chunk in chunks:
915 917 size += len(chunk)
916 918 yield chunk
917 919
918 920 close = closechunk()
919 921 size += len(close)
920 922 yield close
921 923
922 924 self._verbosenote(_('%8.i %s\n') % (size, path))
923 925
924 926 yield closechunk()
925 927
926 928 if clnodes:
927 929 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
928 930
929 931 def _generatechangelog(self, cl, nodes):
930 932 """Generate data for changelog chunks.
931 933
932 934 Returns a 2-tuple of a dict containing state and an iterable of
933 935 byte chunks. The state will not be fully populated until the
934 936 chunk stream has been fully consumed.
935 937 """
936 938 clrevorder = {}
937 939 mfs = {} # needed manifests
938 940 mfl = self._repo.manifestlog
939 941 # TODO violates storage abstraction.
940 942 mfrevlog = mfl._revlog
941 943 changedfiles = set()
942 944 clrevtomanifestrev = {}
943 945
944 946 # Callback for the changelog, used to collect changed files and
945 947 # manifest nodes.
946 948 # Returns the linkrev node (identity in the changelog case).
947 949 def lookupcl(x):
948 950 c = cl.read(x)
949 951 clrevorder[x] = len(clrevorder)
950 952
951 953 if self._ellipses:
952 954 # Only update mfs if x is going to be sent. Otherwise we
953 955 # end up with bogus linkrevs specified for manifests and
954 956 # we skip some manifest nodes that we should otherwise
955 957 # have sent.
956 958 if (x in self._fullclnodes
957 959 or cl.rev(x) in self._precomputedellipsis):
958 960 n = c[0]
959 961 # Record the first changeset introducing this manifest
960 962 # version.
961 963 mfs.setdefault(n, x)
962 964 # Set this narrow-specific dict so we have the lowest
963 965 # manifest revnum to look up for this cl revnum. (Part of
964 966 # mapping changelog ellipsis parents to manifest ellipsis
965 967 # parents)
966 968 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
967 969 # We can't trust the changed files list in the changeset if the
968 970 # client requested a shallow clone.
969 971 if self._isshallow:
970 972 changedfiles.update(mfl[c[0]].read().keys())
971 973 else:
972 974 changedfiles.update(c[3])
973 975 else:
974 976
975 977 n = c[0]
976 978 # record the first changeset introducing this manifest version
977 979 mfs.setdefault(n, x)
978 980 # Record a complete list of potentially-changed files in
979 981 # this manifest.
980 982 changedfiles.update(c[3])
981 983
982 984 return x
983 985
984 986 # Changelog doesn't benefit from reordering revisions. So send out
985 987 # revisions in store order.
986 988 revs = sorted(cl.rev(n) for n in nodes)
987 989
988 990 state = {
989 991 'clrevorder': clrevorder,
990 992 'mfs': mfs,
991 993 'changedfiles': changedfiles,
992 994 'clrevtomanifestrev': clrevtomanifestrev,
993 995 }
994 996
995 997 gen = deltagroup(
996 998 self._repo, revs, cl, True, lookupcl,
997 999 self._deltaparentfn,
998 1000 ellipses=self._ellipses,
999 1001 units=_('changesets'),
1000 1002 clrevtolocalrev={},
1001 1003 fullclnodes=self._fullclnodes,
1002 1004 precomputedellipsis=self._precomputedellipsis)
1003 1005
1004 1006 return state, gen
1005 1007
1006 1008 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
1007 1009 fnodes, source, clrevtolocalrev):
1008 1010 """Returns an iterator of changegroup chunks containing manifests.
1009 1011
1010 1012 `source` is unused here, but is used by extensions like remotefilelog to
1011 1013 change what is sent based in pulls vs pushes, etc.
1012 1014 """
1013 1015 repo = self._repo
1014 1016 cl = repo.changelog
1015 1017 mfl = repo.manifestlog
1016 1018 dirlog = mfl._revlog.dirlog
1017 1019 tmfnodes = {'': mfs}
1018 1020
1019 1021 # Callback for the manifest, used to collect linkrevs for filelog
1020 1022 # revisions.
1021 1023 # Returns the linkrev node (collected in lookupcl).
1022 1024 def makelookupmflinknode(dir, nodes):
1023 1025 if fastpathlinkrev:
1024 1026 assert not dir
1025 1027 return mfs.__getitem__
1026 1028
1027 1029 def lookupmflinknode(x):
1028 1030 """Callback for looking up the linknode for manifests.
1029 1031
1030 1032 Returns the linkrev node for the specified manifest.
1031 1033
1032 1034 SIDE EFFECT:
1033 1035
1034 1036 1) fclnodes gets populated with the list of relevant
1035 1037 file nodes if we're not using fastpathlinkrev
1036 1038 2) When treemanifests are in use, collects treemanifest nodes
1037 1039 to send
1038 1040
1039 1041 Note that this means manifests must be completely sent to
1040 1042 the client before you can trust the list of files and
1041 1043 treemanifests to send.
1042 1044 """
1043 1045 clnode = nodes[x]
1044 1046 mdata = mfl.get(dir, x).readfast(shallow=True)
1045 1047 for p, n, fl in mdata.iterentries():
1046 1048 if fl == 't': # subdirectory manifest
1047 1049 subdir = dir + p + '/'
1048 1050 tmfclnodes = tmfnodes.setdefault(subdir, {})
1049 1051 tmfclnode = tmfclnodes.setdefault(n, clnode)
1050 1052 if clrevorder[clnode] < clrevorder[tmfclnode]:
1051 1053 tmfclnodes[n] = clnode
1052 1054 else:
1053 1055 f = dir + p
1054 1056 fclnodes = fnodes.setdefault(f, {})
1055 1057 fclnode = fclnodes.setdefault(n, clnode)
1056 1058 if clrevorder[clnode] < clrevorder[fclnode]:
1057 1059 fclnodes[n] = clnode
1058 1060 return clnode
1059 1061 return lookupmflinknode
1060 1062
1061 1063 while tmfnodes:
1062 1064 dir, nodes = tmfnodes.popitem()
1063 1065 store = dirlog(dir)
1064 1066
1065 1067 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
1066 1068 prunednodes = []
1067 1069 else:
1068 1070 frev, flr = store.rev, store.linkrev
1069 1071 prunednodes = [n for n in nodes
1070 1072 if flr(frev(n)) not in commonrevs]
1071 1073
1072 1074 if dir and not prunednodes:
1073 1075 continue
1074 1076
1075 1077 lookupfn = makelookupmflinknode(dir, nodes)
1076 1078
1077 1079 if self._ellipses:
1078 1080 revs = _sortnodesellipsis(store, prunednodes, cl,
1079 1081 lookupfn)
1080 1082 else:
1081 1083 revs = _sortnodesnormal(store, prunednodes,
1082 1084 self._reorder)
1083 1085
1084 1086 deltas = deltagroup(
1085 1087 self._repo, revs, store, False, lookupfn,
1086 1088 self._deltaparentfn,
1087 1089 ellipses=self._ellipses,
1088 1090 units=_('manifests'),
1089 1091 clrevtolocalrev=clrevtolocalrev,
1090 1092 fullclnodes=self._fullclnodes,
1091 1093 precomputedellipsis=self._precomputedellipsis)
1092 1094
1093 1095 yield dir, deltas
1094 1096
1095 1097 # The 'source' parameter is useful for extensions
1096 1098 def generatefiles(self, changedfiles, commonrevs, source,
1097 1099 mfdicts, fastpathlinkrev, fnodes, clrevs):
1098 1100 changedfiles = list(filter(self._filematcher, changedfiles))
1099 1101
1100 1102 if not fastpathlinkrev:
1101 1103 def normallinknodes(unused, fname):
1102 1104 return fnodes.get(fname, {})
1103 1105 else:
1104 1106 cln = self._repo.changelog.node
1105 1107
1106 1108 def normallinknodes(store, fname):
1107 1109 flinkrev = store.linkrev
1108 1110 fnode = store.node
1109 1111 revs = ((r, flinkrev(r)) for r in store)
1110 1112 return dict((fnode(r), cln(lr))
1111 1113 for r, lr in revs if lr in clrevs)
1112 1114
1113 1115 clrevtolocalrev = {}
1114 1116
1115 1117 if self._isshallow:
1116 1118 # In a shallow clone, the linknodes callback needs to also include
1117 1119 # those file nodes that are in the manifests we sent but weren't
1118 1120 # introduced by those manifests.
1119 1121 commonctxs = [self._repo[c] for c in commonrevs]
1120 1122 clrev = self._repo.changelog.rev
1121 1123
1122 1124 # Defining this function has a side-effect of overriding the
1123 1125 # function of the same name that was passed in as an argument.
1124 1126 # TODO have caller pass in appropriate function.
1125 1127 def linknodes(flog, fname):
1126 1128 for c in commonctxs:
1127 1129 try:
1128 1130 fnode = c.filenode(fname)
1129 1131 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1130 1132 except error.ManifestLookupError:
1131 1133 pass
1132 1134 links = normallinknodes(flog, fname)
1133 1135 if len(links) != len(mfdicts):
1134 1136 for mf, lr in mfdicts:
1135 1137 fnode = mf.get(fname, None)
1136 1138 if fnode in links:
1137 1139 links[fnode] = min(links[fnode], lr, key=clrev)
1138 1140 elif fnode:
1139 1141 links[fnode] = lr
1140 1142 return links
1141 1143 else:
1142 1144 linknodes = normallinknodes
1143 1145
1144 1146 repo = self._repo
1145 1147 cl = repo.changelog
1146 1148 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1147 1149 total=len(changedfiles))
1148 1150 for i, fname in enumerate(sorted(changedfiles)):
1149 1151 filerevlog = repo.file(fname)
1150 1152 if not filerevlog:
1151 1153 raise error.Abort(_("empty or missing file data for %s") %
1152 1154 fname)
1153 1155
1154 1156 clrevtolocalrev.clear()
1155 1157
1156 1158 linkrevnodes = linknodes(filerevlog, fname)
1157 1159 # Lookup for filenodes, we collected the linkrev nodes above in the
1158 1160 # fastpath case and with lookupmf in the slowpath case.
1159 1161 def lookupfilelog(x):
1160 1162 return linkrevnodes[x]
1161 1163
1162 1164 frev, flr = filerevlog.rev, filerevlog.linkrev
1163 1165 filenodes = [n for n in linkrevnodes
1164 1166 if flr(frev(n)) not in commonrevs]
1165 1167
1166 1168 if filenodes:
1167 1169 if self._ellipses:
1168 1170 revs = _sortnodesellipsis(filerevlog, filenodes,
1169 1171 cl, lookupfilelog)
1170 1172 else:
1171 1173 revs = _sortnodesnormal(filerevlog, filenodes,
1172 1174 self._reorder)
1173 1175
1174 1176 progress.update(i + 1, item=fname)
1175 1177
1176 1178 deltas = deltagroup(
1177 1179 self._repo, revs, filerevlog, False, lookupfilelog,
1178 1180 self._deltaparentfn,
1179 1181 ellipses=self._ellipses,
1180 1182 clrevtolocalrev=clrevtolocalrev,
1181 1183 fullclnodes=self._fullclnodes,
1182 1184 precomputedellipsis=self._precomputedellipsis)
1183 1185
1184 1186 yield fname, deltas
1185 1187
1186 1188 progress.complete()
1187 1189
1188 1190 def _deltaparentprev(store, rev, p1, p2, prev):
1189 1191 """Resolve a delta parent to the previous revision.
1190 1192
1191 1193 Used for version 1 changegroups, which don't support generaldelta.
1192 1194 """
1193 1195 return prev
1194 1196
1195 1197 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1196 1198 """Resolve a delta parent when general deltas are supported."""
1197 1199 dp = store.deltaparent(rev)
1198 1200 if dp == nullrev and store.storedeltachains:
1199 1201 # Avoid sending full revisions when delta parent is null. Pick prev
1200 1202 # in that case. It's tempting to pick p1 in this case, as p1 will
1201 1203 # be smaller in the common case. However, computing a delta against
1202 1204 # p1 may require resolving the raw text of p1, which could be
1203 1205 # expensive. The revlog caches should have prev cached, meaning
1204 1206 # less CPU for changegroup generation. There is likely room to add
1205 1207 # a flag and/or config option to control this behavior.
1206 1208 base = prev
1207 1209 elif dp == nullrev:
1208 1210 # revlog is configured to use full snapshot for a reason,
1209 1211 # stick to full snapshot.
1210 1212 base = nullrev
1211 1213 elif dp not in (p1, p2, prev):
1212 1214 # Pick prev when we can't be sure remote has the base revision.
1213 1215 return prev
1214 1216 else:
1215 1217 base = dp
1216 1218
1217 1219 if base != nullrev and not store.candelta(base, rev):
1218 1220 base = nullrev
1219 1221
1220 1222 return base
1221 1223
1222 1224 def _deltaparentellipses(store, rev, p1, p2, prev):
1223 1225 """Resolve a delta parent when in ellipses mode."""
1224 1226 # TODO: send better deltas when in narrow mode.
1225 1227 #
1226 1228 # changegroup.group() loops over revisions to send,
1227 1229 # including revisions we'll skip. What this means is that
1228 1230 # `prev` will be a potentially useless delta base for all
1229 1231 # ellipsis nodes, as the client likely won't have it. In
1230 1232 # the future we should do bookkeeping about which nodes
1231 1233 # have been sent to the client, and try to be
1232 1234 # significantly smarter about delta bases. This is
1233 1235 # slightly tricky because this same code has to work for
1234 1236 # all revlogs, and we don't have the linkrev/linknode here.
1235 1237 return p1
1236 1238
1237 1239 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1238 1240 shallow=False, ellipsisroots=None, fullnodes=None):
1239 1241 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1240 1242 d.node, d.p1node, d.p2node, d.linknode)
1241 1243
1242 1244 return cgpacker(repo, filematcher, b'01',
1243 1245 deltaparentfn=_deltaparentprev,
1244 1246 allowreorder=None,
1245 1247 builddeltaheader=builddeltaheader,
1246 1248 manifestsend=b'',
1247 1249 bundlecaps=bundlecaps,
1248 1250 ellipses=ellipses,
1249 1251 shallow=shallow,
1250 1252 ellipsisroots=ellipsisroots,
1251 1253 fullnodes=fullnodes)
1252 1254
1253 1255 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1254 1256 shallow=False, ellipsisroots=None, fullnodes=None):
1255 1257 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1256 1258 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1257 1259
1258 1260 # Since generaldelta is directly supported by cg2, reordering
1259 1261 # generally doesn't help, so we disable it by default (treating
1260 1262 # bundle.reorder=auto just like bundle.reorder=False).
1261 1263 return cgpacker(repo, filematcher, b'02',
1262 1264 deltaparentfn=_deltaparentgeneraldelta,
1263 1265 allowreorder=False,
1264 1266 builddeltaheader=builddeltaheader,
1265 1267 manifestsend=b'',
1266 1268 bundlecaps=bundlecaps,
1267 1269 ellipses=ellipses,
1268 1270 shallow=shallow,
1269 1271 ellipsisroots=ellipsisroots,
1270 1272 fullnodes=fullnodes)
1271 1273
1272 1274 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1273 1275 shallow=False, ellipsisroots=None, fullnodes=None):
1274 1276 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1275 1277 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1276 1278
1277 1279 deltaparentfn = (_deltaparentellipses if ellipses
1278 1280 else _deltaparentgeneraldelta)
1279 1281
1280 1282 return cgpacker(repo, filematcher, b'03',
1281 1283 deltaparentfn=deltaparentfn,
1282 1284 allowreorder=False,
1283 1285 builddeltaheader=builddeltaheader,
1284 1286 manifestsend=closechunk(),
1285 1287 bundlecaps=bundlecaps,
1286 1288 ellipses=ellipses,
1287 1289 shallow=shallow,
1288 1290 ellipsisroots=ellipsisroots,
1289 1291 fullnodes=fullnodes)
1290 1292
1291 1293 _packermap = {'01': (_makecg1packer, cg1unpacker),
1292 1294 # cg2 adds support for exchanging generaldelta
1293 1295 '02': (_makecg2packer, cg2unpacker),
1294 1296 # cg3 adds support for exchanging revlog flags and treemanifests
1295 1297 '03': (_makecg3packer, cg3unpacker),
1296 1298 }
1297 1299
1298 1300 def allsupportedversions(repo):
1299 1301 versions = set(_packermap.keys())
1300 1302 if not (repo.ui.configbool('experimental', 'changegroup3') or
1301 1303 repo.ui.configbool('experimental', 'treemanifest') or
1302 1304 'treemanifest' in repo.requirements):
1303 1305 versions.discard('03')
1304 1306 return versions
1305 1307
1306 1308 # Changegroup versions that can be applied to the repo
1307 1309 def supportedincomingversions(repo):
1308 1310 return allsupportedversions(repo)
1309 1311
1310 1312 # Changegroup versions that can be created from the repo
1311 1313 def supportedoutgoingversions(repo):
1312 1314 versions = allsupportedversions(repo)
1313 1315 if 'treemanifest' in repo.requirements:
1314 1316 # Versions 01 and 02 support only flat manifests and it's just too
1315 1317 # expensive to convert between the flat manifest and tree manifest on
1316 1318 # the fly. Since tree manifests are hashed differently, all of history
1317 1319 # would have to be converted. Instead, we simply don't even pretend to
1318 1320 # support versions 01 and 02.
1319 1321 versions.discard('01')
1320 1322 versions.discard('02')
1321 1323 if repository.NARROW_REQUIREMENT in repo.requirements:
1322 1324 # Versions 01 and 02 don't support revlog flags, and we need to
1323 1325 # support that for stripping and unbundling to work.
1324 1326 versions.discard('01')
1325 1327 versions.discard('02')
1326 1328 if LFS_REQUIREMENT in repo.requirements:
1327 1329 # Versions 01 and 02 don't support revlog flags, and we need to
1328 1330 # mark LFS entries with REVIDX_EXTSTORED.
1329 1331 versions.discard('01')
1330 1332 versions.discard('02')
1331 1333
1332 1334 return versions
1333 1335
1334 1336 def localversion(repo):
1335 1337 # Finds the best version to use for bundles that are meant to be used
1336 1338 # locally, such as those from strip and shelve, and temporary bundles.
1337 1339 return max(supportedoutgoingversions(repo))
1338 1340
1339 1341 def safeversion(repo):
1340 1342 # Finds the smallest version that it's safe to assume clients of the repo
1341 1343 # will support. For example, all hg versions that support generaldelta also
1342 1344 # support changegroup 02.
1343 1345 versions = supportedoutgoingversions(repo)
1344 1346 if 'generaldelta' in repo.requirements:
1345 1347 versions.discard('01')
1346 1348 assert versions
1347 1349 return min(versions)
1348 1350
1349 1351 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1350 1352 ellipses=False, shallow=False, ellipsisroots=None,
1351 1353 fullnodes=None):
1352 1354 assert version in supportedoutgoingversions(repo)
1353 1355
1354 1356 if filematcher is None:
1355 1357 filematcher = matchmod.alwaysmatcher(repo.root, '')
1356 1358
1357 1359 if version == '01' and not filematcher.always():
1358 1360 raise error.ProgrammingError('version 01 changegroups do not support '
1359 1361 'sparse file matchers')
1360 1362
1361 1363 if ellipses and version in (b'01', b'02'):
1362 1364 raise error.Abort(
1363 1365 _('ellipsis nodes require at least cg3 on client and server, '
1364 1366 'but negotiated version %s') % version)
1365 1367
1366 1368 # Requested files could include files not in the local store. So
1367 1369 # filter those out.
1368 1370 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1369 1371 filematcher)
1370 1372
1371 1373 fn = _packermap[version][0]
1372 1374 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1373 1375 shallow=shallow, ellipsisroots=ellipsisroots,
1374 1376 fullnodes=fullnodes)
1375 1377
1376 1378 def getunbundler(version, fh, alg, extras=None):
1377 1379 return _packermap[version][1](fh, alg, extras=extras)
1378 1380
1379 1381 def _changegroupinfo(repo, nodes, source):
1380 1382 if repo.ui.verbose or source == 'bundle':
1381 1383 repo.ui.status(_("%d changesets found\n") % len(nodes))
1382 1384 if repo.ui.debugflag:
1383 1385 repo.ui.debug("list of changesets:\n")
1384 1386 for node in nodes:
1385 1387 repo.ui.debug("%s\n" % hex(node))
1386 1388
1387 1389 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1388 1390 bundlecaps=None):
1389 1391 cgstream = makestream(repo, outgoing, version, source,
1390 1392 fastpath=fastpath, bundlecaps=bundlecaps)
1391 1393 return getunbundler(version, util.chunkbuffer(cgstream), None,
1392 1394 {'clcount': len(outgoing.missing) })
1393 1395
1394 1396 def makestream(repo, outgoing, version, source, fastpath=False,
1395 1397 bundlecaps=None, filematcher=None):
1396 1398 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1397 1399 filematcher=filematcher)
1398 1400
1399 1401 repo = repo.unfiltered()
1400 1402 commonrevs = outgoing.common
1401 1403 csets = outgoing.missing
1402 1404 heads = outgoing.missingheads
1403 1405 # We go through the fast path if we get told to, or if all (unfiltered
1404 1406 # heads have been requested (since we then know there all linkrevs will
1405 1407 # be pulled by the client).
1406 1408 heads.sort()
1407 1409 fastpathlinkrev = fastpath or (
1408 1410 repo.filtername is None and heads == sorted(repo.heads()))
1409 1411
1410 1412 repo.hook('preoutgoing', throw=True, source=source)
1411 1413 _changegroupinfo(repo, csets, source)
1412 1414 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1413 1415
1414 1416 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1415 1417 revisions = 0
1416 1418 files = 0
1417 1419 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1418 1420 total=expectedfiles)
1419 1421 for chunkdata in iter(source.filelogheader, {}):
1420 1422 files += 1
1421 1423 f = chunkdata["filename"]
1422 1424 repo.ui.debug("adding %s revisions\n" % f)
1423 1425 progress.increment()
1424 1426 fl = repo.file(f)
1425 1427 o = len(fl)
1426 1428 try:
1427 1429 deltas = source.deltaiter()
1428 1430 if not fl.addgroup(deltas, revmap, trp):
1429 1431 raise error.Abort(_("received file revlog group is empty"))
1430 1432 except error.CensoredBaseError as e:
1431 1433 raise error.Abort(_("received delta base is censored: %s") % e)
1432 1434 revisions += len(fl) - o
1433 1435 if f in needfiles:
1434 1436 needs = needfiles[f]
1435 1437 for new in pycompat.xrange(o, len(fl)):
1436 1438 n = fl.node(new)
1437 1439 if n in needs:
1438 1440 needs.remove(n)
1439 1441 else:
1440 1442 raise error.Abort(
1441 1443 _("received spurious file revlog entry"))
1442 1444 if not needs:
1443 1445 del needfiles[f]
1444 1446 progress.complete()
1445 1447
1446 1448 for f, needs in needfiles.iteritems():
1447 1449 fl = repo.file(f)
1448 1450 for n in needs:
1449 1451 try:
1450 1452 fl.rev(n)
1451 1453 except error.LookupError:
1452 1454 raise error.Abort(
1453 1455 _('missing file data for %s:%s - run hg verify') %
1454 1456 (f, hex(n)))
1455 1457
1456 1458 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now