##// END OF EJS Templates
changegroup: pass clrevtolocalrev to each group...
Gregory Szorc -
r39037:a6e1ff40 default
parent child Browse files
Show More
@@ -1,1420 +1,1423 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def _fileheader(path):
70 70 """Obtain a changegroup chunk header for a named path."""
71 71 return chunkheader(len(path)) + path
72 72
73 73 def writechunks(ui, chunks, filename, vfs=None):
74 74 """Write chunks to a file and return its filename.
75 75
76 76 The stream is assumed to be a bundle file.
77 77 Existing files will not be overwritten.
78 78 If no filename is specified, a temporary file is created.
79 79 """
80 80 fh = None
81 81 cleanup = None
82 82 try:
83 83 if filename:
84 84 if vfs:
85 85 fh = vfs.open(filename, "wb")
86 86 else:
87 87 # Increase default buffer size because default is usually
88 88 # small (4k is common on Linux).
89 89 fh = open(filename, "wb", 131072)
90 90 else:
91 91 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
92 92 fh = os.fdopen(fd, r"wb")
93 93 cleanup = filename
94 94 for c in chunks:
95 95 fh.write(c)
96 96 cleanup = None
97 97 return filename
98 98 finally:
99 99 if fh is not None:
100 100 fh.close()
101 101 if cleanup is not None:
102 102 if filename and vfs:
103 103 vfs.unlink(cleanup)
104 104 else:
105 105 os.unlink(cleanup)
106 106
107 107 class cg1unpacker(object):
108 108 """Unpacker for cg1 changegroup streams.
109 109
110 110 A changegroup unpacker handles the framing of the revision data in
111 111 the wire format. Most consumers will want to use the apply()
112 112 method to add the changes from the changegroup to a repository.
113 113
114 114 If you're forwarding a changegroup unmodified to another consumer,
115 115 use getchunks(), which returns an iterator of changegroup
116 116 chunks. This is mostly useful for cases where you need to know the
117 117 data stream has ended by observing the end of the changegroup.
118 118
119 119 deltachunk() is useful only if you're applying delta data. Most
120 120 consumers should prefer apply() instead.
121 121
122 122 A few other public methods exist. Those are used only for
123 123 bundlerepo and some debug commands - their use is discouraged.
124 124 """
125 125 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 126 deltaheadersize = deltaheader.size
127 127 version = '01'
128 128 _grouplistcount = 1 # One list of files after the manifests
129 129
130 130 def __init__(self, fh, alg, extras=None):
131 131 if alg is None:
132 132 alg = 'UN'
133 133 if alg not in util.compengines.supportedbundletypes:
134 134 raise error.Abort(_('unknown stream compression type: %s')
135 135 % alg)
136 136 if alg == 'BZ':
137 137 alg = '_truncatedBZ'
138 138
139 139 compengine = util.compengines.forbundletype(alg)
140 140 self._stream = compengine.decompressorreader(fh)
141 141 self._type = alg
142 142 self.extras = extras or {}
143 143 self.callback = None
144 144
145 145 # These methods (compressed, read, seek, tell) all appear to only
146 146 # be used by bundlerepo, but it's a little hard to tell.
147 147 def compressed(self):
148 148 return self._type is not None and self._type != 'UN'
149 149 def read(self, l):
150 150 return self._stream.read(l)
151 151 def seek(self, pos):
152 152 return self._stream.seek(pos)
153 153 def tell(self):
154 154 return self._stream.tell()
155 155 def close(self):
156 156 return self._stream.close()
157 157
158 158 def _chunklength(self):
159 159 d = readexactly(self._stream, 4)
160 160 l = struct.unpack(">l", d)[0]
161 161 if l <= 4:
162 162 if l:
163 163 raise error.Abort(_("invalid chunk length %d") % l)
164 164 return 0
165 165 if self.callback:
166 166 self.callback()
167 167 return l - 4
168 168
169 169 def changelogheader(self):
170 170 """v10 does not have a changelog header chunk"""
171 171 return {}
172 172
173 173 def manifestheader(self):
174 174 """v10 does not have a manifest header chunk"""
175 175 return {}
176 176
177 177 def filelogheader(self):
178 178 """return the header of the filelogs chunk, v10 only has the filename"""
179 179 l = self._chunklength()
180 180 if not l:
181 181 return {}
182 182 fname = readexactly(self._stream, l)
183 183 return {'filename': fname}
184 184
185 185 def _deltaheader(self, headertuple, prevnode):
186 186 node, p1, p2, cs = headertuple
187 187 if prevnode is None:
188 188 deltabase = p1
189 189 else:
190 190 deltabase = prevnode
191 191 flags = 0
192 192 return node, p1, p2, deltabase, cs, flags
193 193
194 194 def deltachunk(self, prevnode):
195 195 l = self._chunklength()
196 196 if not l:
197 197 return {}
198 198 headerdata = readexactly(self._stream, self.deltaheadersize)
199 199 header = self.deltaheader.unpack(headerdata)
200 200 delta = readexactly(self._stream, l - self.deltaheadersize)
201 201 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
202 202 return (node, p1, p2, cs, deltabase, delta, flags)
203 203
204 204 def getchunks(self):
205 205 """returns all the chunks contains in the bundle
206 206
207 207 Used when you need to forward the binary stream to a file or another
208 208 network API. To do so, it parse the changegroup data, otherwise it will
209 209 block in case of sshrepo because it don't know the end of the stream.
210 210 """
211 211 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
212 212 # and a list of filelogs. For changegroup 3, we expect 4 parts:
213 213 # changelog, manifestlog, a list of tree manifestlogs, and a list of
214 214 # filelogs.
215 215 #
216 216 # Changelog and manifestlog parts are terminated with empty chunks. The
217 217 # tree and file parts are a list of entry sections. Each entry section
218 218 # is a series of chunks terminating in an empty chunk. The list of these
219 219 # entry sections is terminated in yet another empty chunk, so we know
220 220 # we've reached the end of the tree/file list when we reach an empty
221 221 # chunk that was proceeded by no non-empty chunks.
222 222
223 223 parts = 0
224 224 while parts < 2 + self._grouplistcount:
225 225 noentries = True
226 226 while True:
227 227 chunk = getchunk(self)
228 228 if not chunk:
229 229 # The first two empty chunks represent the end of the
230 230 # changelog and the manifestlog portions. The remaining
231 231 # empty chunks represent either A) the end of individual
232 232 # tree or file entries in the file list, or B) the end of
233 233 # the entire list. It's the end of the entire list if there
234 234 # were no entries (i.e. noentries is True).
235 235 if parts < 2:
236 236 parts += 1
237 237 elif noentries:
238 238 parts += 1
239 239 break
240 240 noentries = False
241 241 yield chunkheader(len(chunk))
242 242 pos = 0
243 243 while pos < len(chunk):
244 244 next = pos + 2**20
245 245 yield chunk[pos:next]
246 246 pos = next
247 247 yield closechunk()
248 248
249 249 def _unpackmanifests(self, repo, revmap, trp, prog):
250 250 self.callback = prog.increment
251 251 # no need to check for empty manifest group here:
252 252 # if the result of the merge of 1 and 2 is the same in 3 and 4,
253 253 # no new manifest will be created and the manifest group will
254 254 # be empty during the pull
255 255 self.manifestheader()
256 256 deltas = self.deltaiter()
257 257 repo.manifestlog.addgroup(deltas, revmap, trp)
258 258 prog.complete()
259 259 self.callback = None
260 260
261 261 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
262 262 expectedtotal=None):
263 263 """Add the changegroup returned by source.read() to this repo.
264 264 srctype is a string like 'push', 'pull', or 'unbundle'. url is
265 265 the URL of the repo where this changegroup is coming from.
266 266
267 267 Return an integer summarizing the change to this repo:
268 268 - nothing changed or no source: 0
269 269 - more heads than before: 1+added heads (2..n)
270 270 - fewer heads than before: -1-removed heads (-2..-n)
271 271 - number of heads stays the same: 1
272 272 """
273 273 repo = repo.unfiltered()
274 274 def csmap(x):
275 275 repo.ui.debug("add changeset %s\n" % short(x))
276 276 return len(cl)
277 277
278 278 def revmap(x):
279 279 return cl.rev(x)
280 280
281 281 changesets = files = revisions = 0
282 282
283 283 try:
284 284 # The transaction may already carry source information. In this
285 285 # case we use the top level data. We overwrite the argument
286 286 # because we need to use the top level value (if they exist)
287 287 # in this function.
288 288 srctype = tr.hookargs.setdefault('source', srctype)
289 289 url = tr.hookargs.setdefault('url', url)
290 290 repo.hook('prechangegroup',
291 291 throw=True, **pycompat.strkwargs(tr.hookargs))
292 292
293 293 # write changelog data to temp files so concurrent readers
294 294 # will not see an inconsistent view
295 295 cl = repo.changelog
296 296 cl.delayupdate(tr)
297 297 oldheads = set(cl.heads())
298 298
299 299 trp = weakref.proxy(tr)
300 300 # pull off the changeset group
301 301 repo.ui.status(_("adding changesets\n"))
302 302 clstart = len(cl)
303 303 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
304 304 total=expectedtotal)
305 305 self.callback = progress.increment
306 306
307 307 efiles = set()
308 308 def onchangelog(cl, node):
309 309 efiles.update(cl.readfiles(node))
310 310
311 311 self.changelogheader()
312 312 deltas = self.deltaiter()
313 313 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
314 314 efiles = len(efiles)
315 315
316 316 if not cgnodes:
317 317 repo.ui.develwarn('applied empty changegroup',
318 318 config='warn-empty-changegroup')
319 319 clend = len(cl)
320 320 changesets = clend - clstart
321 321 progress.complete()
322 322 self.callback = None
323 323
324 324 # pull off the manifest group
325 325 repo.ui.status(_("adding manifests\n"))
326 326 # We know that we'll never have more manifests than we had
327 327 # changesets.
328 328 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
329 329 total=changesets)
330 330 self._unpackmanifests(repo, revmap, trp, progress)
331 331
332 332 needfiles = {}
333 333 if repo.ui.configbool('server', 'validate'):
334 334 cl = repo.changelog
335 335 ml = repo.manifestlog
336 336 # validate incoming csets have their manifests
337 337 for cset in pycompat.xrange(clstart, clend):
338 338 mfnode = cl.changelogrevision(cset).manifest
339 339 mfest = ml[mfnode].readdelta()
340 340 # store file cgnodes we must see
341 341 for f, n in mfest.iteritems():
342 342 needfiles.setdefault(f, set()).add(n)
343 343
344 344 # process the files
345 345 repo.ui.status(_("adding file changes\n"))
346 346 newrevs, newfiles = _addchangegroupfiles(
347 347 repo, self, revmap, trp, efiles, needfiles)
348 348 revisions += newrevs
349 349 files += newfiles
350 350
351 351 deltaheads = 0
352 352 if oldheads:
353 353 heads = cl.heads()
354 354 deltaheads = len(heads) - len(oldheads)
355 355 for h in heads:
356 356 if h not in oldheads and repo[h].closesbranch():
357 357 deltaheads -= 1
358 358 htext = ""
359 359 if deltaheads:
360 360 htext = _(" (%+d heads)") % deltaheads
361 361
362 362 repo.ui.status(_("added %d changesets"
363 363 " with %d changes to %d files%s\n")
364 364 % (changesets, revisions, files, htext))
365 365 repo.invalidatevolatilesets()
366 366
367 367 if changesets > 0:
368 368 if 'node' not in tr.hookargs:
369 369 tr.hookargs['node'] = hex(cl.node(clstart))
370 370 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
371 371 hookargs = dict(tr.hookargs)
372 372 else:
373 373 hookargs = dict(tr.hookargs)
374 374 hookargs['node'] = hex(cl.node(clstart))
375 375 hookargs['node_last'] = hex(cl.node(clend - 1))
376 376 repo.hook('pretxnchangegroup',
377 377 throw=True, **pycompat.strkwargs(hookargs))
378 378
379 379 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
380 380 phaseall = None
381 381 if srctype in ('push', 'serve'):
382 382 # Old servers can not push the boundary themselves.
383 383 # New servers won't push the boundary if changeset already
384 384 # exists locally as secret
385 385 #
386 386 # We should not use added here but the list of all change in
387 387 # the bundle
388 388 if repo.publishing():
389 389 targetphase = phaseall = phases.public
390 390 else:
391 391 # closer target phase computation
392 392
393 393 # Those changesets have been pushed from the
394 394 # outside, their phases are going to be pushed
395 395 # alongside. Therefor `targetphase` is
396 396 # ignored.
397 397 targetphase = phaseall = phases.draft
398 398 if added:
399 399 phases.registernew(repo, tr, targetphase, added)
400 400 if phaseall is not None:
401 401 phases.advanceboundary(repo, tr, phaseall, cgnodes)
402 402
403 403 if changesets > 0:
404 404
405 405 def runhooks():
406 406 # These hooks run when the lock releases, not when the
407 407 # transaction closes. So it's possible for the changelog
408 408 # to have changed since we last saw it.
409 409 if clstart >= len(repo):
410 410 return
411 411
412 412 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
413 413
414 414 for n in added:
415 415 args = hookargs.copy()
416 416 args['node'] = hex(n)
417 417 del args['node_last']
418 418 repo.hook("incoming", **pycompat.strkwargs(args))
419 419
420 420 newheads = [h for h in repo.heads()
421 421 if h not in oldheads]
422 422 repo.ui.log("incoming",
423 423 "%d incoming changes - new heads: %s\n",
424 424 len(added),
425 425 ', '.join([hex(c[:6]) for c in newheads]))
426 426
427 427 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
428 428 lambda tr: repo._afterlock(runhooks))
429 429 finally:
430 430 repo.ui.flush()
431 431 # never return 0 here:
432 432 if deltaheads < 0:
433 433 ret = deltaheads - 1
434 434 else:
435 435 ret = deltaheads + 1
436 436 return ret
437 437
438 438 def deltaiter(self):
439 439 """
440 440 returns an iterator of the deltas in this changegroup
441 441
442 442 Useful for passing to the underlying storage system to be stored.
443 443 """
444 444 chain = None
445 445 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
446 446 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
447 447 yield chunkdata
448 448 chain = chunkdata[0]
449 449
450 450 class cg2unpacker(cg1unpacker):
451 451 """Unpacker for cg2 streams.
452 452
453 453 cg2 streams add support for generaldelta, so the delta header
454 454 format is slightly different. All other features about the data
455 455 remain the same.
456 456 """
457 457 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
458 458 deltaheadersize = deltaheader.size
459 459 version = '02'
460 460
461 461 def _deltaheader(self, headertuple, prevnode):
462 462 node, p1, p2, deltabase, cs = headertuple
463 463 flags = 0
464 464 return node, p1, p2, deltabase, cs, flags
465 465
466 466 class cg3unpacker(cg2unpacker):
467 467 """Unpacker for cg3 streams.
468 468
469 469 cg3 streams add support for exchanging treemanifests and revlog
470 470 flags. It adds the revlog flags to the delta header and an empty chunk
471 471 separating manifests and files.
472 472 """
473 473 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
474 474 deltaheadersize = deltaheader.size
475 475 version = '03'
476 476 _grouplistcount = 2 # One list of manifests and one list of files
477 477
478 478 def _deltaheader(self, headertuple, prevnode):
479 479 node, p1, p2, deltabase, cs, flags = headertuple
480 480 return node, p1, p2, deltabase, cs, flags
481 481
482 482 def _unpackmanifests(self, repo, revmap, trp, prog):
483 483 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
484 484 for chunkdata in iter(self.filelogheader, {}):
485 485 # If we get here, there are directory manifests in the changegroup
486 486 d = chunkdata["filename"]
487 487 repo.ui.debug("adding %s revisions\n" % d)
488 488 dirlog = repo.manifestlog._revlog.dirlog(d)
489 489 deltas = self.deltaiter()
490 490 if not dirlog.addgroup(deltas, revmap, trp):
491 491 raise error.Abort(_("received dir revlog group is empty"))
492 492
493 493 class headerlessfixup(object):
494 494 def __init__(self, fh, h):
495 495 self._h = h
496 496 self._fh = fh
497 497 def read(self, n):
498 498 if self._h:
499 499 d, self._h = self._h[:n], self._h[n:]
500 500 if len(d) < n:
501 501 d += readexactly(self._fh, n - len(d))
502 502 return d
503 503 return readexactly(self._fh, n)
504 504
505 505 @attr.s(slots=True, frozen=True)
506 506 class revisiondelta(object):
507 507 """Describes a delta entry in a changegroup.
508 508
509 509 Captured data is sufficient to serialize the delta into multiple
510 510 formats.
511 511 """
512 512 # 20 byte node of this revision.
513 513 node = attr.ib()
514 514 # 20 byte nodes of parent revisions.
515 515 p1node = attr.ib()
516 516 p2node = attr.ib()
517 517 # 20 byte node of node this delta is against.
518 518 basenode = attr.ib()
519 519 # 20 byte node of changeset revision this delta is associated with.
520 520 linknode = attr.ib()
521 521 # 2 bytes of flags to apply to revision data.
522 522 flags = attr.ib()
523 523 # Iterable of chunks holding raw delta data.
524 524 deltachunks = attr.ib()
525 525
526 526 def _sortnodesnormal(store, nodes, reorder):
527 527 """Sort nodes for changegroup generation and turn into revnums."""
528 528 # for generaldelta revlogs, we linearize the revs; this will both be
529 529 # much quicker and generate a much smaller bundle
530 530 if (store._generaldelta and reorder is None) or reorder:
531 531 dag = dagutil.revlogdag(store)
532 532 return dag.linearize(set(store.rev(n) for n in nodes))
533 533 else:
534 534 return sorted([store.rev(n) for n in nodes])
535 535
536 536 def _sortnodesellipsis(store, nodes, cl, lookup):
537 537 """Sort nodes for changegroup generation and turn into revnums."""
538 538 # Ellipses serving mode.
539 539 #
540 540 # In a perfect world, we'd generate better ellipsis-ified graphs
541 541 # for non-changelog revlogs. In practice, we haven't started doing
542 542 # that yet, so the resulting DAGs for the manifestlog and filelogs
543 543 # are actually full of bogus parentage on all the ellipsis
544 544 # nodes. This has the side effect that, while the contents are
545 545 # correct, the individual DAGs might be completely out of whack in
546 546 # a case like 882681bc3166 and its ancestors (back about 10
547 547 # revisions or so) in the main hg repo.
548 548 #
549 549 # The one invariant we *know* holds is that the new (potentially
550 550 # bogus) DAG shape will be valid if we order the nodes in the
551 551 # order that they're introduced in dramatis personae by the
552 552 # changelog, so what we do is we sort the non-changelog histories
553 553 # by the order in which they are used by the changelog.
554 554 key = lambda n: cl.rev(lookup(n))
555 555 return [store.rev(n) for n in sorted(nodes, key=key)]
556 556
557 557 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
558 558 """Construct a revision delta for non-ellipses changegroup generation."""
559 559 node = store.node(rev)
560 560 p1, p2 = store.parentrevs(rev)
561 561 base = deltaparentfn(store, rev, p1, p2, prev)
562 562
563 563 prefix = ''
564 564 if store.iscensored(base) or store.iscensored(rev):
565 565 try:
566 566 delta = store.revision(node, raw=True)
567 567 except error.CensoredNodeError as e:
568 568 delta = e.tombstone
569 569 if base == nullrev:
570 570 prefix = mdiff.trivialdiffheader(len(delta))
571 571 else:
572 572 baselen = store.rawsize(base)
573 573 prefix = mdiff.replacediffheader(baselen, len(delta))
574 574 elif base == nullrev:
575 575 delta = store.revision(node, raw=True)
576 576 prefix = mdiff.trivialdiffheader(len(delta))
577 577 else:
578 578 delta = store.revdiff(base, rev)
579 579 p1n, p2n = store.parents(node)
580 580
581 581 return revisiondelta(
582 582 node=node,
583 583 p1node=p1n,
584 584 p2node=p2n,
585 585 basenode=store.node(base),
586 586 linknode=linknode,
587 587 flags=store.flags(rev),
588 588 deltachunks=(prefix, delta),
589 589 )
590 590
591 591 class cgpacker(object):
592 592 def __init__(self, repo, filematcher, version, allowreorder,
593 593 deltaparentfn, builddeltaheader, manifestsend,
594 594 bundlecaps=None, ellipses=False,
595 595 shallow=False, ellipsisroots=None, fullnodes=None):
596 596 """Given a source repo, construct a bundler.
597 597
598 598 filematcher is a matcher that matches on files to include in the
599 599 changegroup. Used to facilitate sparse changegroups.
600 600
601 601 allowreorder controls whether reordering of revisions is allowed.
602 602 This value is used when ``bundle.reorder`` is ``auto`` or isn't
603 603 set.
604 604
605 605 deltaparentfn is a callable that resolves the delta parent for
606 606 a specific revision.
607 607
608 608 builddeltaheader is a callable that constructs the header for a group
609 609 delta.
610 610
611 611 manifestsend is a chunk to send after manifests have been fully emitted.
612 612
613 613 ellipses indicates whether ellipsis serving mode is enabled.
614 614
615 615 bundlecaps is optional and can be used to specify the set of
616 616 capabilities which can be used to build the bundle. While bundlecaps is
617 617 unused in core Mercurial, extensions rely on this feature to communicate
618 618 capabilities to customize the changegroup packer.
619 619
620 620 shallow indicates whether shallow data might be sent. The packer may
621 621 need to pack file contents not introduced by the changes being packed.
622 622
623 623 fullnodes is the set of changelog nodes which should not be ellipsis
624 624 nodes. We store this rather than the set of nodes that should be
625 625 ellipsis because for very large histories we expect this to be
626 626 significantly smaller.
627 627 """
628 628 assert filematcher
629 629 self._filematcher = filematcher
630 630
631 631 self.version = version
632 632 self._deltaparentfn = deltaparentfn
633 633 self._builddeltaheader = builddeltaheader
634 634 self._manifestsend = manifestsend
635 635 self._ellipses = ellipses
636 636
637 637 # Set of capabilities we can use to build the bundle.
638 638 if bundlecaps is None:
639 639 bundlecaps = set()
640 640 self._bundlecaps = bundlecaps
641 641 self._isshallow = shallow
642 642 self._fullclnodes = fullnodes
643 643
644 644 # Maps ellipsis revs to their roots at the changelog level.
645 645 self._precomputedellipsis = ellipsisroots
646 646
647 647 # experimental config: bundle.reorder
648 648 reorder = repo.ui.config('bundle', 'reorder')
649 649 if reorder == 'auto':
650 650 self._reorder = allowreorder
651 651 else:
652 652 self._reorder = stringutil.parsebool(reorder)
653 653
654 654 self._repo = repo
655 655
656 656 if self._repo.ui.verbose and not self._repo.ui.debugflag:
657 657 self._verbosenote = self._repo.ui.note
658 658 else:
659 659 self._verbosenote = lambda s: None
660 660
661 # Maps CL revs to per-revlog revisions. Cleared in close() at
662 # the end of each group.
663 self._clrevtolocalrev = {}
664
665 661 def _close(self):
666 # Ellipses serving mode.
667 self._clrevtolocalrev.clear()
668
669 662 return closechunk()
670 663
671 def group(self, revs, store, ischangelog, lookup, units=None):
664 def group(self, revs, store, ischangelog, lookup, units=None,
665 clrevtolocalrev=None):
672 666 """Calculate a delta group, yielding a sequence of changegroup chunks
673 667 (strings).
674 668
675 669 Given a list of changeset revs, return a set of deltas and
676 670 metadata corresponding to nodes. The first delta is
677 671 first parent(nodelist[0]) -> nodelist[0], the receiver is
678 672 guaranteed to have this parent as it has all history before
679 673 these changesets. In the case firstparent is nullrev the
680 674 changegroup starts with a full revision.
681 675
682 676 If units is not None, progress detail will be generated, units specifies
683 677 the type of revlog that is touched (changelog, manifest, etc.).
684 678 """
685 679 # if we don't have any revisions touched by these changesets, bail
686 680 if len(revs) == 0:
687 681 yield self._close()
688 682 return
689 683
690 684 cl = self._repo.changelog
691 685
692 686 # add the parent of the first rev
693 687 p = store.parentrevs(revs[0])[0]
694 688 revs.insert(0, p)
695 689
696 690 # build deltas
697 691 progress = None
698 692 if units is not None:
699 693 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
700 694 total=(len(revs) - 1))
701 695 for r in pycompat.xrange(len(revs) - 1):
702 696 if progress:
703 697 progress.update(r + 1)
704 698 prev, curr = revs[r], revs[r + 1]
705 699 linknode = lookup(store.node(curr))
706 700
707 701 if self._ellipses:
708 702 linkrev = cl.rev(linknode)
709 self._clrevtolocalrev[linkrev] = curr
703 clrevtolocalrev[linkrev] = curr
710 704
711 705 # This is a node to send in full, because the changeset it
712 706 # corresponds to was a full changeset.
713 707 if linknode in self._fullclnodes:
714 708 delta = _revisiondeltanormal(store, curr, prev, linknode,
715 709 self._deltaparentfn)
716 710 elif linkrev not in self._precomputedellipsis:
717 711 delta = None
718 712 else:
719 713 delta = self._revisiondeltanarrow(store, ischangelog,
720 curr, linkrev, linknode)
714 curr, linkrev, linknode,
715 clrevtolocalrev)
721 716 else:
722 717 delta = _revisiondeltanormal(store, curr, prev, linknode,
723 718 self._deltaparentfn)
724 719
725 720 if not delta:
726 721 continue
727 722
728 723 meta = self._builddeltaheader(delta)
729 724 l = len(meta) + sum(len(x) for x in delta.deltachunks)
730 725 yield chunkheader(l)
731 726 yield meta
732 727 for x in delta.deltachunks:
733 728 yield x
734 729
735 730 if progress:
736 731 progress.complete()
737 732 yield self._close()
738 733
739 734 # filter any nodes that claim to be part of the known set
740 735 def _prune(self, store, missing, commonrevs):
741 736 # TODO this violates storage abstraction for manifests.
742 737 if isinstance(store, manifest.manifestrevlog):
743 738 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
744 739 return []
745 740
746 741 rr, rl = store.rev, store.linkrev
747 742 return [n for n in missing if rl(rr(n)) not in commonrevs]
748 743
749 def _packmanifests(self, dir, dirlog, revs, lookuplinknode):
744 def _packmanifests(self, dir, dirlog, revs, lookuplinknode,
745 clrevtolocalrev):
750 746 """Pack manifests into a changegroup stream.
751 747
752 748 Encodes the directory name in the output so multiple manifests
753 749 can be sent. Multiple manifests is not supported by cg1 and cg2.
754 750 """
755 751 if dir:
756 752 assert self.version == b'03'
757 753 yield _fileheader(dir)
758 754
759 755 for chunk in self.group(revs, dirlog, False, lookuplinknode,
760 units=_('manifests')):
756 units=_('manifests'),
757 clrevtolocalrev=clrevtolocalrev):
761 758 yield chunk
762 759
763 760 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
764 761 """Yield a sequence of changegroup byte chunks."""
765 762
766 763 repo = self._repo
767 764 cl = repo.changelog
768 765
769 766 self._verbosenote(_('uncompressed size of bundle content:\n'))
770 767 size = 0
771 768
772 769 clstate, chunks = self._generatechangelog(cl, clnodes)
773 770 for chunk in chunks:
774 771 size += len(chunk)
775 772 yield chunk
776 773
777 774 self._verbosenote(_('%8.i (changelog)\n') % size)
778 775
779 776 clrevorder = clstate['clrevorder']
780 777 mfs = clstate['mfs']
781 778 changedfiles = clstate['changedfiles']
782 779
783 if self._ellipses:
784 self._clrevtolocalrev = clstate['clrevtomanifestrev']
785
786 780 # We need to make sure that the linkrev in the changegroup refers to
787 781 # the first changeset that introduced the manifest or file revision.
788 782 # The fastpath is usually safer than the slowpath, because the filelogs
789 783 # are walked in revlog order.
790 784 #
791 785 # When taking the slowpath with reorder=None and the manifest revlog
792 786 # uses generaldelta, the manifest may be walked in the "wrong" order.
793 787 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
794 788 # cc0ff93d0c0c).
795 789 #
796 790 # When taking the fastpath, we are only vulnerable to reordering
797 791 # of the changelog itself. The changelog never uses generaldelta, so
798 792 # it is only reordered when reorder=True. To handle this case, we
799 793 # simply take the slowpath, which already has the 'clrevorder' logic.
800 794 # This was also fixed in cc0ff93d0c0c.
801 795 fastpathlinkrev = fastpathlinkrev and not self._reorder
802 796 # Treemanifests don't work correctly with fastpathlinkrev
803 797 # either, because we don't discover which directory nodes to
804 798 # send along with files. This could probably be fixed.
805 799 fastpathlinkrev = fastpathlinkrev and (
806 800 'treemanifest' not in repo.requirements)
807 801
808 802 fnodes = {} # needed file nodes
809 803
810 804 for chunk in self.generatemanifests(commonrevs, clrevorder,
811 fastpathlinkrev, mfs, fnodes, source):
805 fastpathlinkrev, mfs, fnodes, source,
806 clstate['clrevtomanifestrev']):
812 807 yield chunk
813 808
814 809 mfdicts = None
815 810 if self._ellipses and self._isshallow:
816 811 mfdicts = [(self._repo.manifestlog[n].read(), lr)
817 812 for (n, lr) in mfs.iteritems()]
818 813
819 814 mfs.clear()
820 815 clrevs = set(cl.rev(x) for x in clnodes)
821 816
822 817 for chunk in self.generatefiles(changedfiles, commonrevs,
823 818 source, mfdicts, fastpathlinkrev,
824 819 fnodes, clrevs):
825 820 yield chunk
826 821
827 822 yield self._close()
828 823
829 824 if clnodes:
830 825 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
831 826
832 827 def _generatechangelog(self, cl, nodes):
833 828 """Generate data for changelog chunks.
834 829
835 830 Returns a 2-tuple of a dict containing state and an iterable of
836 831 byte chunks. The state will not be fully populated until the
837 832 chunk stream has been fully consumed.
838 833 """
839 834 clrevorder = {}
840 835 mfs = {} # needed manifests
841 836 mfl = self._repo.manifestlog
842 837 # TODO violates storage abstraction.
843 838 mfrevlog = mfl._revlog
844 839 changedfiles = set()
845 840 clrevtomanifestrev = {}
846 841
847 842 # Callback for the changelog, used to collect changed files and
848 843 # manifest nodes.
849 844 # Returns the linkrev node (identity in the changelog case).
850 845 def lookupcl(x):
851 846 c = cl.read(x)
852 847 clrevorder[x] = len(clrevorder)
853 848
854 849 if self._ellipses:
855 850 # Only update mfs if x is going to be sent. Otherwise we
856 851 # end up with bogus linkrevs specified for manifests and
857 852 # we skip some manifest nodes that we should otherwise
858 853 # have sent.
859 854 if (x in self._fullclnodes
860 855 or cl.rev(x) in self._precomputedellipsis):
861 856 n = c[0]
862 857 # Record the first changeset introducing this manifest
863 858 # version.
864 859 mfs.setdefault(n, x)
865 860 # Set this narrow-specific dict so we have the lowest
866 861 # manifest revnum to look up for this cl revnum. (Part of
867 862 # mapping changelog ellipsis parents to manifest ellipsis
868 863 # parents)
869 864 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
870 865 # We can't trust the changed files list in the changeset if the
871 866 # client requested a shallow clone.
872 867 if self._isshallow:
873 868 changedfiles.update(mfl[c[0]].read().keys())
874 869 else:
875 870 changedfiles.update(c[3])
876 871 else:
877 872
878 873 n = c[0]
879 874 # record the first changeset introducing this manifest version
880 875 mfs.setdefault(n, x)
881 876 # Record a complete list of potentially-changed files in
882 877 # this manifest.
883 878 changedfiles.update(c[3])
884 879
885 880 return x
886 881
887 882 # Changelog doesn't benefit from reordering revisions. So send out
888 883 # revisions in store order.
889 884 revs = sorted(cl.rev(n) for n in nodes)
890 885
891 886 state = {
892 887 'clrevorder': clrevorder,
893 888 'mfs': mfs,
894 889 'changedfiles': changedfiles,
895 890 'clrevtomanifestrev': clrevtomanifestrev,
896 891 }
897 892
898 gen = self.group(revs, cl, True, lookupcl, units=_('changesets'))
893 gen = self.group(revs, cl, True, lookupcl, units=_('changesets'),
894 clrevtolocalrev={})
899 895
900 896 return state, gen
901 897
902 898 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
903 fnodes, source):
899 fnodes, source, clrevtolocalrev):
904 900 """Returns an iterator of changegroup chunks containing manifests.
905 901
906 902 `source` is unused here, but is used by extensions like remotefilelog to
907 903 change what is sent based in pulls vs pushes, etc.
908 904 """
909 905 repo = self._repo
910 906 cl = repo.changelog
911 907 mfl = repo.manifestlog
912 908 dirlog = mfl._revlog.dirlog
913 909 tmfnodes = {'': mfs}
914 910
915 911 # Callback for the manifest, used to collect linkrevs for filelog
916 912 # revisions.
917 913 # Returns the linkrev node (collected in lookupcl).
918 914 def makelookupmflinknode(dir, nodes):
919 915 if fastpathlinkrev:
920 916 assert not dir
921 917 return mfs.__getitem__
922 918
923 919 def lookupmflinknode(x):
924 920 """Callback for looking up the linknode for manifests.
925 921
926 922 Returns the linkrev node for the specified manifest.
927 923
928 924 SIDE EFFECT:
929 925
930 926 1) fclnodes gets populated with the list of relevant
931 927 file nodes if we're not using fastpathlinkrev
932 928 2) When treemanifests are in use, collects treemanifest nodes
933 929 to send
934 930
935 931 Note that this means manifests must be completely sent to
936 932 the client before you can trust the list of files and
937 933 treemanifests to send.
938 934 """
939 935 clnode = nodes[x]
940 936 mdata = mfl.get(dir, x).readfast(shallow=True)
941 937 for p, n, fl in mdata.iterentries():
942 938 if fl == 't': # subdirectory manifest
943 939 subdir = dir + p + '/'
944 940 tmfclnodes = tmfnodes.setdefault(subdir, {})
945 941 tmfclnode = tmfclnodes.setdefault(n, clnode)
946 942 if clrevorder[clnode] < clrevorder[tmfclnode]:
947 943 tmfclnodes[n] = clnode
948 944 else:
949 945 f = dir + p
950 946 fclnodes = fnodes.setdefault(f, {})
951 947 fclnode = fclnodes.setdefault(n, clnode)
952 948 if clrevorder[clnode] < clrevorder[fclnode]:
953 949 fclnodes[n] = clnode
954 950 return clnode
955 951 return lookupmflinknode
956 952
957 953 size = 0
958 954 while tmfnodes:
959 955 dir, nodes = tmfnodes.popitem()
960 956 store = dirlog(dir)
961 957 prunednodes = self._prune(store, nodes, commonrevs)
962 958 if not dir or prunednodes:
963 959 lookupfn = makelookupmflinknode(dir, nodes)
964 960
965 961 if self._ellipses:
966 962 revs = _sortnodesellipsis(store, prunednodes, cl,
967 963 lookupfn)
968 964 else:
969 965 revs = _sortnodesnormal(store, prunednodes,
970 966 self._reorder)
971 967
972 for x in self._packmanifests(dir, store, revs, lookupfn):
968 for x in self._packmanifests(dir, store, revs, lookupfn,
969 clrevtolocalrev):
973 970 size += len(x)
974 971 yield x
975 972 self._verbosenote(_('%8.i (manifests)\n') % size)
976 973 yield self._manifestsend
977 974
978 975 # The 'source' parameter is useful for extensions
979 976 def generatefiles(self, changedfiles, commonrevs, source,
980 977 mfdicts, fastpathlinkrev, fnodes, clrevs):
981 978 changedfiles = list(filter(self._filematcher, changedfiles))
982 979
983 980 if not fastpathlinkrev:
984 981 def normallinknodes(unused, fname):
985 982 return fnodes.get(fname, {})
986 983 else:
987 984 cln = self._repo.changelog.node
988 985
989 986 def normallinknodes(store, fname):
990 987 flinkrev = store.linkrev
991 988 fnode = store.node
992 989 revs = ((r, flinkrev(r)) for r in store)
993 990 return dict((fnode(r), cln(lr))
994 991 for r, lr in revs if lr in clrevs)
995 992
993 clrevtolocalrev = {}
994
996 995 if self._isshallow:
997 996 # In a shallow clone, the linknodes callback needs to also include
998 997 # those file nodes that are in the manifests we sent but weren't
999 998 # introduced by those manifests.
1000 999 commonctxs = [self._repo[c] for c in commonrevs]
1001 1000 clrev = self._repo.changelog.rev
1002 1001
1003 1002 # Defining this function has a side-effect of overriding the
1004 1003 # function of the same name that was passed in as an argument.
1005 1004 # TODO have caller pass in appropriate function.
1006 1005 def linknodes(flog, fname):
1007 1006 for c in commonctxs:
1008 1007 try:
1009 1008 fnode = c.filenode(fname)
1010 self._clrevtolocalrev[c.rev()] = flog.rev(fnode)
1009 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1011 1010 except error.ManifestLookupError:
1012 1011 pass
1013 1012 links = normallinknodes(flog, fname)
1014 1013 if len(links) != len(mfdicts):
1015 1014 for mf, lr in mfdicts:
1016 1015 fnode = mf.get(fname, None)
1017 1016 if fnode in links:
1018 1017 links[fnode] = min(links[fnode], lr, key=clrev)
1019 1018 elif fnode:
1020 1019 links[fnode] = lr
1021 1020 return links
1022 1021 else:
1023 1022 linknodes = normallinknodes
1024 1023
1025 1024 repo = self._repo
1026 1025 cl = repo.changelog
1027 1026 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1028 1027 total=len(changedfiles))
1029 1028 for i, fname in enumerate(sorted(changedfiles)):
1030 1029 filerevlog = repo.file(fname)
1031 1030 if not filerevlog:
1032 1031 raise error.Abort(_("empty or missing file data for %s") %
1033 1032 fname)
1034 1033
1034 clrevtolocalrev.clear()
1035
1035 1036 linkrevnodes = linknodes(filerevlog, fname)
1036 1037 # Lookup for filenodes, we collected the linkrev nodes above in the
1037 1038 # fastpath case and with lookupmf in the slowpath case.
1038 1039 def lookupfilelog(x):
1039 1040 return linkrevnodes[x]
1040 1041
1041 1042 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
1042 1043 if filenodes:
1043 1044 if self._ellipses:
1044 1045 revs = _sortnodesellipsis(filerevlog, filenodes,
1045 1046 cl, lookupfilelog)
1046 1047 else:
1047 1048 revs = _sortnodesnormal(filerevlog, filenodes,
1048 1049 self._reorder)
1049 1050
1050 1051 progress.update(i + 1, item=fname)
1051 1052 h = _fileheader(fname)
1052 1053 size = len(h)
1053 1054 yield h
1054 for chunk in self.group(revs, filerevlog, False, lookupfilelog):
1055 for chunk in self.group(revs, filerevlog, False, lookupfilelog,
1056 clrevtolocalrev=clrevtolocalrev):
1055 1057 size += len(chunk)
1056 1058 yield chunk
1057 1059 self._verbosenote(_('%8.i %s\n') % (size, fname))
1058 1060 progress.complete()
1059 1061
1060 def _revisiondeltanarrow(self, store, ischangelog, rev, linkrev, linknode):
1062 def _revisiondeltanarrow(self, store, ischangelog, rev, linkrev, linknode,
1063 clrevtolocalrev):
1061 1064 linkparents = self._precomputedellipsis[linkrev]
1062 1065 def local(clrev):
1063 1066 """Turn a changelog revnum into a local revnum.
1064 1067
1065 1068 The ellipsis dag is stored as revnums on the changelog,
1066 1069 but when we're producing ellipsis entries for
1067 1070 non-changelog revlogs, we need to turn those numbers into
1068 1071 something local. This does that for us, and during the
1069 1072 changelog sending phase will also expand the stored
1070 1073 mappings as needed.
1071 1074 """
1072 1075 if clrev == nullrev:
1073 1076 return nullrev
1074 1077
1075 1078 if ischangelog:
1076 1079 return clrev
1077 1080
1078 1081 # Walk the ellipsis-ized changelog breadth-first looking for a
1079 1082 # change that has been linked from the current revlog.
1080 1083 #
1081 1084 # For a flat manifest revlog only a single step should be necessary
1082 1085 # as all relevant changelog entries are relevant to the flat
1083 1086 # manifest.
1084 1087 #
1085 1088 # For a filelog or tree manifest dirlog however not every changelog
1086 1089 # entry will have been relevant, so we need to skip some changelog
1087 1090 # nodes even after ellipsis-izing.
1088 1091 walk = [clrev]
1089 1092 while walk:
1090 1093 p = walk[0]
1091 1094 walk = walk[1:]
1092 if p in self._clrevtolocalrev:
1093 return self._clrevtolocalrev[p]
1095 if p in clrevtolocalrev:
1096 return clrevtolocalrev[p]
1094 1097 elif p in self._fullclnodes:
1095 1098 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1096 1099 if pp != nullrev])
1097 1100 elif p in self._precomputedellipsis:
1098 1101 walk.extend([pp for pp in self._precomputedellipsis[p]
1099 1102 if pp != nullrev])
1100 1103 else:
1101 1104 # In this case, we've got an ellipsis with parents
1102 1105 # outside the current bundle (likely an
1103 1106 # incremental pull). We "know" that we can use the
1104 1107 # value of this same revlog at whatever revision
1105 1108 # is pointed to by linknode. "Know" is in scare
1106 1109 # quotes because I haven't done enough examination
1107 1110 # of edge cases to convince myself this is really
1108 1111 # a fact - it works for all the (admittedly
1109 1112 # thorough) cases in our testsuite, but I would be
1110 1113 # somewhat unsurprised to find a case in the wild
1111 1114 # where this breaks down a bit. That said, I don't
1112 1115 # know if it would hurt anything.
1113 1116 for i in pycompat.xrange(rev, 0, -1):
1114 1117 if store.linkrev(i) == clrev:
1115 1118 return i
1116 1119 # We failed to resolve a parent for this node, so
1117 1120 # we crash the changegroup construction.
1118 1121 raise error.Abort(
1119 1122 'unable to resolve parent while packing %r %r'
1120 1123 ' for changeset %r' % (store.indexfile, rev, clrev))
1121 1124
1122 1125 return nullrev
1123 1126
1124 1127 if not linkparents or (
1125 1128 store.parentrevs(rev) == (nullrev, nullrev)):
1126 1129 p1, p2 = nullrev, nullrev
1127 1130 elif len(linkparents) == 1:
1128 1131 p1, = sorted(local(p) for p in linkparents)
1129 1132 p2 = nullrev
1130 1133 else:
1131 1134 p1, p2 = sorted(local(p) for p in linkparents)
1132 1135
1133 1136 n = store.node(rev)
1134 1137 p1n, p2n = store.node(p1), store.node(p2)
1135 1138 flags = store.flags(rev)
1136 1139 flags |= revlog.REVIDX_ELLIPSIS
1137 1140
1138 1141 # TODO: try and actually send deltas for ellipsis data blocks
1139 1142 data = store.revision(n)
1140 1143 diffheader = mdiff.trivialdiffheader(len(data))
1141 1144
1142 1145 return revisiondelta(
1143 1146 node=n,
1144 1147 p1node=p1n,
1145 1148 p2node=p2n,
1146 1149 basenode=nullid,
1147 1150 linknode=linknode,
1148 1151 flags=flags,
1149 1152 deltachunks=(diffheader, data),
1150 1153 )
1151 1154
1152 1155 def _deltaparentprev(store, rev, p1, p2, prev):
1153 1156 """Resolve a delta parent to the previous revision.
1154 1157
1155 1158 Used for version 1 changegroups, which don't support generaldelta.
1156 1159 """
1157 1160 return prev
1158 1161
1159 1162 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1160 1163 """Resolve a delta parent when general deltas are supported."""
1161 1164 dp = store.deltaparent(rev)
1162 1165 if dp == nullrev and store.storedeltachains:
1163 1166 # Avoid sending full revisions when delta parent is null. Pick prev
1164 1167 # in that case. It's tempting to pick p1 in this case, as p1 will
1165 1168 # be smaller in the common case. However, computing a delta against
1166 1169 # p1 may require resolving the raw text of p1, which could be
1167 1170 # expensive. The revlog caches should have prev cached, meaning
1168 1171 # less CPU for changegroup generation. There is likely room to add
1169 1172 # a flag and/or config option to control this behavior.
1170 1173 base = prev
1171 1174 elif dp == nullrev:
1172 1175 # revlog is configured to use full snapshot for a reason,
1173 1176 # stick to full snapshot.
1174 1177 base = nullrev
1175 1178 elif dp not in (p1, p2, prev):
1176 1179 # Pick prev when we can't be sure remote has the base revision.
1177 1180 return prev
1178 1181 else:
1179 1182 base = dp
1180 1183
1181 1184 if base != nullrev and not store.candelta(base, rev):
1182 1185 base = nullrev
1183 1186
1184 1187 return base
1185 1188
1186 1189 def _deltaparentellipses(store, rev, p1, p2, prev):
1187 1190 """Resolve a delta parent when in ellipses mode."""
1188 1191 # TODO: send better deltas when in narrow mode.
1189 1192 #
1190 1193 # changegroup.group() loops over revisions to send,
1191 1194 # including revisions we'll skip. What this means is that
1192 1195 # `prev` will be a potentially useless delta base for all
1193 1196 # ellipsis nodes, as the client likely won't have it. In
1194 1197 # the future we should do bookkeeping about which nodes
1195 1198 # have been sent to the client, and try to be
1196 1199 # significantly smarter about delta bases. This is
1197 1200 # slightly tricky because this same code has to work for
1198 1201 # all revlogs, and we don't have the linkrev/linknode here.
1199 1202 return p1
1200 1203
1201 1204 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1202 1205 shallow=False, ellipsisroots=None, fullnodes=None):
1203 1206 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1204 1207 d.node, d.p1node, d.p2node, d.linknode)
1205 1208
1206 1209 return cgpacker(repo, filematcher, b'01',
1207 1210 deltaparentfn=_deltaparentprev,
1208 1211 allowreorder=None,
1209 1212 builddeltaheader=builddeltaheader,
1210 1213 manifestsend=b'',
1211 1214 bundlecaps=bundlecaps,
1212 1215 ellipses=ellipses,
1213 1216 shallow=shallow,
1214 1217 ellipsisroots=ellipsisroots,
1215 1218 fullnodes=fullnodes)
1216 1219
1217 1220 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1218 1221 shallow=False, ellipsisroots=None, fullnodes=None):
1219 1222 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1220 1223 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1221 1224
1222 1225 # Since generaldelta is directly supported by cg2, reordering
1223 1226 # generally doesn't help, so we disable it by default (treating
1224 1227 # bundle.reorder=auto just like bundle.reorder=False).
1225 1228 return cgpacker(repo, filematcher, b'02',
1226 1229 deltaparentfn=_deltaparentgeneraldelta,
1227 1230 allowreorder=False,
1228 1231 builddeltaheader=builddeltaheader,
1229 1232 manifestsend=b'',
1230 1233 bundlecaps=bundlecaps,
1231 1234 ellipses=ellipses,
1232 1235 shallow=shallow,
1233 1236 ellipsisroots=ellipsisroots,
1234 1237 fullnodes=fullnodes)
1235 1238
1236 1239 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1237 1240 shallow=False, ellipsisroots=None, fullnodes=None):
1238 1241 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1239 1242 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1240 1243
1241 1244 deltaparentfn = (_deltaparentellipses if ellipses
1242 1245 else _deltaparentgeneraldelta)
1243 1246
1244 1247 return cgpacker(repo, filematcher, b'03',
1245 1248 deltaparentfn=deltaparentfn,
1246 1249 allowreorder=False,
1247 1250 builddeltaheader=builddeltaheader,
1248 1251 manifestsend=closechunk(),
1249 1252 bundlecaps=bundlecaps,
1250 1253 ellipses=ellipses,
1251 1254 shallow=shallow,
1252 1255 ellipsisroots=ellipsisroots,
1253 1256 fullnodes=fullnodes)
1254 1257
1255 1258 _packermap = {'01': (_makecg1packer, cg1unpacker),
1256 1259 # cg2 adds support for exchanging generaldelta
1257 1260 '02': (_makecg2packer, cg2unpacker),
1258 1261 # cg3 adds support for exchanging revlog flags and treemanifests
1259 1262 '03': (_makecg3packer, cg3unpacker),
1260 1263 }
1261 1264
1262 1265 def allsupportedversions(repo):
1263 1266 versions = set(_packermap.keys())
1264 1267 if not (repo.ui.configbool('experimental', 'changegroup3') or
1265 1268 repo.ui.configbool('experimental', 'treemanifest') or
1266 1269 'treemanifest' in repo.requirements):
1267 1270 versions.discard('03')
1268 1271 return versions
1269 1272
1270 1273 # Changegroup versions that can be applied to the repo
1271 1274 def supportedincomingversions(repo):
1272 1275 return allsupportedversions(repo)
1273 1276
1274 1277 # Changegroup versions that can be created from the repo
1275 1278 def supportedoutgoingversions(repo):
1276 1279 versions = allsupportedversions(repo)
1277 1280 if 'treemanifest' in repo.requirements:
1278 1281 # Versions 01 and 02 support only flat manifests and it's just too
1279 1282 # expensive to convert between the flat manifest and tree manifest on
1280 1283 # the fly. Since tree manifests are hashed differently, all of history
1281 1284 # would have to be converted. Instead, we simply don't even pretend to
1282 1285 # support versions 01 and 02.
1283 1286 versions.discard('01')
1284 1287 versions.discard('02')
1285 1288 if repository.NARROW_REQUIREMENT in repo.requirements:
1286 1289 # Versions 01 and 02 don't support revlog flags, and we need to
1287 1290 # support that for stripping and unbundling to work.
1288 1291 versions.discard('01')
1289 1292 versions.discard('02')
1290 1293 if LFS_REQUIREMENT in repo.requirements:
1291 1294 # Versions 01 and 02 don't support revlog flags, and we need to
1292 1295 # mark LFS entries with REVIDX_EXTSTORED.
1293 1296 versions.discard('01')
1294 1297 versions.discard('02')
1295 1298
1296 1299 return versions
1297 1300
1298 1301 def localversion(repo):
1299 1302 # Finds the best version to use for bundles that are meant to be used
1300 1303 # locally, such as those from strip and shelve, and temporary bundles.
1301 1304 return max(supportedoutgoingversions(repo))
1302 1305
1303 1306 def safeversion(repo):
1304 1307 # Finds the smallest version that it's safe to assume clients of the repo
1305 1308 # will support. For example, all hg versions that support generaldelta also
1306 1309 # support changegroup 02.
1307 1310 versions = supportedoutgoingversions(repo)
1308 1311 if 'generaldelta' in repo.requirements:
1309 1312 versions.discard('01')
1310 1313 assert versions
1311 1314 return min(versions)
1312 1315
1313 1316 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1314 1317 ellipses=False, shallow=False, ellipsisroots=None,
1315 1318 fullnodes=None):
1316 1319 assert version in supportedoutgoingversions(repo)
1317 1320
1318 1321 if filematcher is None:
1319 1322 filematcher = matchmod.alwaysmatcher(repo.root, '')
1320 1323
1321 1324 if version == '01' and not filematcher.always():
1322 1325 raise error.ProgrammingError('version 01 changegroups do not support '
1323 1326 'sparse file matchers')
1324 1327
1325 1328 if ellipses and version in (b'01', b'02'):
1326 1329 raise error.Abort(
1327 1330 _('ellipsis nodes require at least cg3 on client and server, '
1328 1331 'but negotiated version %s') % version)
1329 1332
1330 1333 # Requested files could include files not in the local store. So
1331 1334 # filter those out.
1332 1335 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1333 1336 filematcher)
1334 1337
1335 1338 fn = _packermap[version][0]
1336 1339 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1337 1340 shallow=shallow, ellipsisroots=ellipsisroots,
1338 1341 fullnodes=fullnodes)
1339 1342
1340 1343 def getunbundler(version, fh, alg, extras=None):
1341 1344 return _packermap[version][1](fh, alg, extras=extras)
1342 1345
1343 1346 def _changegroupinfo(repo, nodes, source):
1344 1347 if repo.ui.verbose or source == 'bundle':
1345 1348 repo.ui.status(_("%d changesets found\n") % len(nodes))
1346 1349 if repo.ui.debugflag:
1347 1350 repo.ui.debug("list of changesets:\n")
1348 1351 for node in nodes:
1349 1352 repo.ui.debug("%s\n" % hex(node))
1350 1353
1351 1354 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1352 1355 bundlecaps=None):
1353 1356 cgstream = makestream(repo, outgoing, version, source,
1354 1357 fastpath=fastpath, bundlecaps=bundlecaps)
1355 1358 return getunbundler(version, util.chunkbuffer(cgstream), None,
1356 1359 {'clcount': len(outgoing.missing) })
1357 1360
1358 1361 def makestream(repo, outgoing, version, source, fastpath=False,
1359 1362 bundlecaps=None, filematcher=None):
1360 1363 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1361 1364 filematcher=filematcher)
1362 1365
1363 1366 repo = repo.unfiltered()
1364 1367 commonrevs = outgoing.common
1365 1368 csets = outgoing.missing
1366 1369 heads = outgoing.missingheads
1367 1370 # We go through the fast path if we get told to, or if all (unfiltered
1368 1371 # heads have been requested (since we then know there all linkrevs will
1369 1372 # be pulled by the client).
1370 1373 heads.sort()
1371 1374 fastpathlinkrev = fastpath or (
1372 1375 repo.filtername is None and heads == sorted(repo.heads()))
1373 1376
1374 1377 repo.hook('preoutgoing', throw=True, source=source)
1375 1378 _changegroupinfo(repo, csets, source)
1376 1379 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1377 1380
1378 1381 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1379 1382 revisions = 0
1380 1383 files = 0
1381 1384 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1382 1385 total=expectedfiles)
1383 1386 for chunkdata in iter(source.filelogheader, {}):
1384 1387 files += 1
1385 1388 f = chunkdata["filename"]
1386 1389 repo.ui.debug("adding %s revisions\n" % f)
1387 1390 progress.increment()
1388 1391 fl = repo.file(f)
1389 1392 o = len(fl)
1390 1393 try:
1391 1394 deltas = source.deltaiter()
1392 1395 if not fl.addgroup(deltas, revmap, trp):
1393 1396 raise error.Abort(_("received file revlog group is empty"))
1394 1397 except error.CensoredBaseError as e:
1395 1398 raise error.Abort(_("received delta base is censored: %s") % e)
1396 1399 revisions += len(fl) - o
1397 1400 if f in needfiles:
1398 1401 needs = needfiles[f]
1399 1402 for new in pycompat.xrange(o, len(fl)):
1400 1403 n = fl.node(new)
1401 1404 if n in needs:
1402 1405 needs.remove(n)
1403 1406 else:
1404 1407 raise error.Abort(
1405 1408 _("received spurious file revlog entry"))
1406 1409 if not needs:
1407 1410 del needfiles[f]
1408 1411 progress.complete()
1409 1412
1410 1413 for f, needs in needfiles.iteritems():
1411 1414 fl = repo.file(f)
1412 1415 for n in needs:
1413 1416 try:
1414 1417 fl.rev(n)
1415 1418 except error.LookupError:
1416 1419 raise error.Abort(
1417 1420 _('missing file data for %s:%s - run hg verify') %
1418 1421 (f, hex(n)))
1419 1422
1420 1423 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now