##// END OF EJS Templates
changegroup: pass mfdicts properly...
Gregory Szorc -
r39019:fbbda9ff default
parent child Browse files
Show More
@@ -1,1449 +1,1438
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def _fileheader(path):
70 70 """Obtain a changegroup chunk header for a named path."""
71 71 return chunkheader(len(path)) + path
72 72
73 73 def writechunks(ui, chunks, filename, vfs=None):
74 74 """Write chunks to a file and return its filename.
75 75
76 76 The stream is assumed to be a bundle file.
77 77 Existing files will not be overwritten.
78 78 If no filename is specified, a temporary file is created.
79 79 """
80 80 fh = None
81 81 cleanup = None
82 82 try:
83 83 if filename:
84 84 if vfs:
85 85 fh = vfs.open(filename, "wb")
86 86 else:
87 87 # Increase default buffer size because default is usually
88 88 # small (4k is common on Linux).
89 89 fh = open(filename, "wb", 131072)
90 90 else:
91 91 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
92 92 fh = os.fdopen(fd, r"wb")
93 93 cleanup = filename
94 94 for c in chunks:
95 95 fh.write(c)
96 96 cleanup = None
97 97 return filename
98 98 finally:
99 99 if fh is not None:
100 100 fh.close()
101 101 if cleanup is not None:
102 102 if filename and vfs:
103 103 vfs.unlink(cleanup)
104 104 else:
105 105 os.unlink(cleanup)
106 106
107 107 class cg1unpacker(object):
108 108 """Unpacker for cg1 changegroup streams.
109 109
110 110 A changegroup unpacker handles the framing of the revision data in
111 111 the wire format. Most consumers will want to use the apply()
112 112 method to add the changes from the changegroup to a repository.
113 113
114 114 If you're forwarding a changegroup unmodified to another consumer,
115 115 use getchunks(), which returns an iterator of changegroup
116 116 chunks. This is mostly useful for cases where you need to know the
117 117 data stream has ended by observing the end of the changegroup.
118 118
119 119 deltachunk() is useful only if you're applying delta data. Most
120 120 consumers should prefer apply() instead.
121 121
122 122 A few other public methods exist. Those are used only for
123 123 bundlerepo and some debug commands - their use is discouraged.
124 124 """
125 125 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 126 deltaheadersize = deltaheader.size
127 127 version = '01'
128 128 _grouplistcount = 1 # One list of files after the manifests
129 129
130 130 def __init__(self, fh, alg, extras=None):
131 131 if alg is None:
132 132 alg = 'UN'
133 133 if alg not in util.compengines.supportedbundletypes:
134 134 raise error.Abort(_('unknown stream compression type: %s')
135 135 % alg)
136 136 if alg == 'BZ':
137 137 alg = '_truncatedBZ'
138 138
139 139 compengine = util.compengines.forbundletype(alg)
140 140 self._stream = compengine.decompressorreader(fh)
141 141 self._type = alg
142 142 self.extras = extras or {}
143 143 self.callback = None
144 144
145 145 # These methods (compressed, read, seek, tell) all appear to only
146 146 # be used by bundlerepo, but it's a little hard to tell.
147 147 def compressed(self):
148 148 return self._type is not None and self._type != 'UN'
149 149 def read(self, l):
150 150 return self._stream.read(l)
151 151 def seek(self, pos):
152 152 return self._stream.seek(pos)
153 153 def tell(self):
154 154 return self._stream.tell()
155 155 def close(self):
156 156 return self._stream.close()
157 157
158 158 def _chunklength(self):
159 159 d = readexactly(self._stream, 4)
160 160 l = struct.unpack(">l", d)[0]
161 161 if l <= 4:
162 162 if l:
163 163 raise error.Abort(_("invalid chunk length %d") % l)
164 164 return 0
165 165 if self.callback:
166 166 self.callback()
167 167 return l - 4
168 168
169 169 def changelogheader(self):
170 170 """v10 does not have a changelog header chunk"""
171 171 return {}
172 172
173 173 def manifestheader(self):
174 174 """v10 does not have a manifest header chunk"""
175 175 return {}
176 176
177 177 def filelogheader(self):
178 178 """return the header of the filelogs chunk, v10 only has the filename"""
179 179 l = self._chunklength()
180 180 if not l:
181 181 return {}
182 182 fname = readexactly(self._stream, l)
183 183 return {'filename': fname}
184 184
185 185 def _deltaheader(self, headertuple, prevnode):
186 186 node, p1, p2, cs = headertuple
187 187 if prevnode is None:
188 188 deltabase = p1
189 189 else:
190 190 deltabase = prevnode
191 191 flags = 0
192 192 return node, p1, p2, deltabase, cs, flags
193 193
194 194 def deltachunk(self, prevnode):
195 195 l = self._chunklength()
196 196 if not l:
197 197 return {}
198 198 headerdata = readexactly(self._stream, self.deltaheadersize)
199 199 header = self.deltaheader.unpack(headerdata)
200 200 delta = readexactly(self._stream, l - self.deltaheadersize)
201 201 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
202 202 return (node, p1, p2, cs, deltabase, delta, flags)
203 203
204 204 def getchunks(self):
205 205 """returns all the chunks contains in the bundle
206 206
207 207 Used when you need to forward the binary stream to a file or another
208 208 network API. To do so, it parse the changegroup data, otherwise it will
209 209 block in case of sshrepo because it don't know the end of the stream.
210 210 """
211 211 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
212 212 # and a list of filelogs. For changegroup 3, we expect 4 parts:
213 213 # changelog, manifestlog, a list of tree manifestlogs, and a list of
214 214 # filelogs.
215 215 #
216 216 # Changelog and manifestlog parts are terminated with empty chunks. The
217 217 # tree and file parts are a list of entry sections. Each entry section
218 218 # is a series of chunks terminating in an empty chunk. The list of these
219 219 # entry sections is terminated in yet another empty chunk, so we know
220 220 # we've reached the end of the tree/file list when we reach an empty
221 221 # chunk that was proceeded by no non-empty chunks.
222 222
223 223 parts = 0
224 224 while parts < 2 + self._grouplistcount:
225 225 noentries = True
226 226 while True:
227 227 chunk = getchunk(self)
228 228 if not chunk:
229 229 # The first two empty chunks represent the end of the
230 230 # changelog and the manifestlog portions. The remaining
231 231 # empty chunks represent either A) the end of individual
232 232 # tree or file entries in the file list, or B) the end of
233 233 # the entire list. It's the end of the entire list if there
234 234 # were no entries (i.e. noentries is True).
235 235 if parts < 2:
236 236 parts += 1
237 237 elif noentries:
238 238 parts += 1
239 239 break
240 240 noentries = False
241 241 yield chunkheader(len(chunk))
242 242 pos = 0
243 243 while pos < len(chunk):
244 244 next = pos + 2**20
245 245 yield chunk[pos:next]
246 246 pos = next
247 247 yield closechunk()
248 248
249 249 def _unpackmanifests(self, repo, revmap, trp, prog):
250 250 self.callback = prog.increment
251 251 # no need to check for empty manifest group here:
252 252 # if the result of the merge of 1 and 2 is the same in 3 and 4,
253 253 # no new manifest will be created and the manifest group will
254 254 # be empty during the pull
255 255 self.manifestheader()
256 256 deltas = self.deltaiter()
257 257 repo.manifestlog.addgroup(deltas, revmap, trp)
258 258 prog.complete()
259 259 self.callback = None
260 260
261 261 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
262 262 expectedtotal=None):
263 263 """Add the changegroup returned by source.read() to this repo.
264 264 srctype is a string like 'push', 'pull', or 'unbundle'. url is
265 265 the URL of the repo where this changegroup is coming from.
266 266
267 267 Return an integer summarizing the change to this repo:
268 268 - nothing changed or no source: 0
269 269 - more heads than before: 1+added heads (2..n)
270 270 - fewer heads than before: -1-removed heads (-2..-n)
271 271 - number of heads stays the same: 1
272 272 """
273 273 repo = repo.unfiltered()
274 274 def csmap(x):
275 275 repo.ui.debug("add changeset %s\n" % short(x))
276 276 return len(cl)
277 277
278 278 def revmap(x):
279 279 return cl.rev(x)
280 280
281 281 changesets = files = revisions = 0
282 282
283 283 try:
284 284 # The transaction may already carry source information. In this
285 285 # case we use the top level data. We overwrite the argument
286 286 # because we need to use the top level value (if they exist)
287 287 # in this function.
288 288 srctype = tr.hookargs.setdefault('source', srctype)
289 289 url = tr.hookargs.setdefault('url', url)
290 290 repo.hook('prechangegroup',
291 291 throw=True, **pycompat.strkwargs(tr.hookargs))
292 292
293 293 # write changelog data to temp files so concurrent readers
294 294 # will not see an inconsistent view
295 295 cl = repo.changelog
296 296 cl.delayupdate(tr)
297 297 oldheads = set(cl.heads())
298 298
299 299 trp = weakref.proxy(tr)
300 300 # pull off the changeset group
301 301 repo.ui.status(_("adding changesets\n"))
302 302 clstart = len(cl)
303 303 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
304 304 total=expectedtotal)
305 305 self.callback = progress.increment
306 306
307 307 efiles = set()
308 308 def onchangelog(cl, node):
309 309 efiles.update(cl.readfiles(node))
310 310
311 311 self.changelogheader()
312 312 deltas = self.deltaiter()
313 313 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
314 314 efiles = len(efiles)
315 315
316 316 if not cgnodes:
317 317 repo.ui.develwarn('applied empty changegroup',
318 318 config='warn-empty-changegroup')
319 319 clend = len(cl)
320 320 changesets = clend - clstart
321 321 progress.complete()
322 322 self.callback = None
323 323
324 324 # pull off the manifest group
325 325 repo.ui.status(_("adding manifests\n"))
326 326 # We know that we'll never have more manifests than we had
327 327 # changesets.
328 328 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
329 329 total=changesets)
330 330 self._unpackmanifests(repo, revmap, trp, progress)
331 331
332 332 needfiles = {}
333 333 if repo.ui.configbool('server', 'validate'):
334 334 cl = repo.changelog
335 335 ml = repo.manifestlog
336 336 # validate incoming csets have their manifests
337 337 for cset in pycompat.xrange(clstart, clend):
338 338 mfnode = cl.changelogrevision(cset).manifest
339 339 mfest = ml[mfnode].readdelta()
340 340 # store file cgnodes we must see
341 341 for f, n in mfest.iteritems():
342 342 needfiles.setdefault(f, set()).add(n)
343 343
344 344 # process the files
345 345 repo.ui.status(_("adding file changes\n"))
346 346 newrevs, newfiles = _addchangegroupfiles(
347 347 repo, self, revmap, trp, efiles, needfiles)
348 348 revisions += newrevs
349 349 files += newfiles
350 350
351 351 deltaheads = 0
352 352 if oldheads:
353 353 heads = cl.heads()
354 354 deltaheads = len(heads) - len(oldheads)
355 355 for h in heads:
356 356 if h not in oldheads and repo[h].closesbranch():
357 357 deltaheads -= 1
358 358 htext = ""
359 359 if deltaheads:
360 360 htext = _(" (%+d heads)") % deltaheads
361 361
362 362 repo.ui.status(_("added %d changesets"
363 363 " with %d changes to %d files%s\n")
364 364 % (changesets, revisions, files, htext))
365 365 repo.invalidatevolatilesets()
366 366
367 367 if changesets > 0:
368 368 if 'node' not in tr.hookargs:
369 369 tr.hookargs['node'] = hex(cl.node(clstart))
370 370 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
371 371 hookargs = dict(tr.hookargs)
372 372 else:
373 373 hookargs = dict(tr.hookargs)
374 374 hookargs['node'] = hex(cl.node(clstart))
375 375 hookargs['node_last'] = hex(cl.node(clend - 1))
376 376 repo.hook('pretxnchangegroup',
377 377 throw=True, **pycompat.strkwargs(hookargs))
378 378
379 379 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
380 380 phaseall = None
381 381 if srctype in ('push', 'serve'):
382 382 # Old servers can not push the boundary themselves.
383 383 # New servers won't push the boundary if changeset already
384 384 # exists locally as secret
385 385 #
386 386 # We should not use added here but the list of all change in
387 387 # the bundle
388 388 if repo.publishing():
389 389 targetphase = phaseall = phases.public
390 390 else:
391 391 # closer target phase computation
392 392
393 393 # Those changesets have been pushed from the
394 394 # outside, their phases are going to be pushed
395 395 # alongside. Therefor `targetphase` is
396 396 # ignored.
397 397 targetphase = phaseall = phases.draft
398 398 if added:
399 399 phases.registernew(repo, tr, targetphase, added)
400 400 if phaseall is not None:
401 401 phases.advanceboundary(repo, tr, phaseall, cgnodes)
402 402
403 403 if changesets > 0:
404 404
405 405 def runhooks():
406 406 # These hooks run when the lock releases, not when the
407 407 # transaction closes. So it's possible for the changelog
408 408 # to have changed since we last saw it.
409 409 if clstart >= len(repo):
410 410 return
411 411
412 412 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
413 413
414 414 for n in added:
415 415 args = hookargs.copy()
416 416 args['node'] = hex(n)
417 417 del args['node_last']
418 418 repo.hook("incoming", **pycompat.strkwargs(args))
419 419
420 420 newheads = [h for h in repo.heads()
421 421 if h not in oldheads]
422 422 repo.ui.log("incoming",
423 423 "%d incoming changes - new heads: %s\n",
424 424 len(added),
425 425 ', '.join([hex(c[:6]) for c in newheads]))
426 426
427 427 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
428 428 lambda tr: repo._afterlock(runhooks))
429 429 finally:
430 430 repo.ui.flush()
431 431 # never return 0 here:
432 432 if deltaheads < 0:
433 433 ret = deltaheads - 1
434 434 else:
435 435 ret = deltaheads + 1
436 436 return ret
437 437
438 438 def deltaiter(self):
439 439 """
440 440 returns an iterator of the deltas in this changegroup
441 441
442 442 Useful for passing to the underlying storage system to be stored.
443 443 """
444 444 chain = None
445 445 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
446 446 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
447 447 yield chunkdata
448 448 chain = chunkdata[0]
449 449
450 450 class cg2unpacker(cg1unpacker):
451 451 """Unpacker for cg2 streams.
452 452
453 453 cg2 streams add support for generaldelta, so the delta header
454 454 format is slightly different. All other features about the data
455 455 remain the same.
456 456 """
457 457 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
458 458 deltaheadersize = deltaheader.size
459 459 version = '02'
460 460
461 461 def _deltaheader(self, headertuple, prevnode):
462 462 node, p1, p2, deltabase, cs = headertuple
463 463 flags = 0
464 464 return node, p1, p2, deltabase, cs, flags
465 465
466 466 class cg3unpacker(cg2unpacker):
467 467 """Unpacker for cg3 streams.
468 468
469 469 cg3 streams add support for exchanging treemanifests and revlog
470 470 flags. It adds the revlog flags to the delta header and an empty chunk
471 471 separating manifests and files.
472 472 """
473 473 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
474 474 deltaheadersize = deltaheader.size
475 475 version = '03'
476 476 _grouplistcount = 2 # One list of manifests and one list of files
477 477
478 478 def _deltaheader(self, headertuple, prevnode):
479 479 node, p1, p2, deltabase, cs, flags = headertuple
480 480 return node, p1, p2, deltabase, cs, flags
481 481
482 482 def _unpackmanifests(self, repo, revmap, trp, prog):
483 483 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
484 484 for chunkdata in iter(self.filelogheader, {}):
485 485 # If we get here, there are directory manifests in the changegroup
486 486 d = chunkdata["filename"]
487 487 repo.ui.debug("adding %s revisions\n" % d)
488 488 dirlog = repo.manifestlog._revlog.dirlog(d)
489 489 deltas = self.deltaiter()
490 490 if not dirlog.addgroup(deltas, revmap, trp):
491 491 raise error.Abort(_("received dir revlog group is empty"))
492 492
493 493 class headerlessfixup(object):
494 494 def __init__(self, fh, h):
495 495 self._h = h
496 496 self._fh = fh
497 497 def read(self, n):
498 498 if self._h:
499 499 d, self._h = self._h[:n], self._h[n:]
500 500 if len(d) < n:
501 501 d += readexactly(self._fh, n - len(d))
502 502 return d
503 503 return readexactly(self._fh, n)
504 504
505 505 @attr.s(slots=True, frozen=True)
506 506 class revisiondelta(object):
507 507 """Describes a delta entry in a changegroup.
508 508
509 509 Captured data is sufficient to serialize the delta into multiple
510 510 formats.
511 511 """
512 512 # 20 byte node of this revision.
513 513 node = attr.ib()
514 514 # 20 byte nodes of parent revisions.
515 515 p1node = attr.ib()
516 516 p2node = attr.ib()
517 517 # 20 byte node of node this delta is against.
518 518 basenode = attr.ib()
519 519 # 20 byte node of changeset revision this delta is associated with.
520 520 linknode = attr.ib()
521 521 # 2 bytes of flags to apply to revision data.
522 522 flags = attr.ib()
523 523 # Iterable of chunks holding raw delta data.
524 524 deltachunks = attr.ib()
525 525
526 526 def _sortnodesnormal(store, nodes, reorder):
527 527 """Sort nodes for changegroup generation and turn into revnums."""
528 528 # for generaldelta revlogs, we linearize the revs; this will both be
529 529 # much quicker and generate a much smaller bundle
530 530 if (store._generaldelta and reorder is None) or reorder:
531 531 dag = dagutil.revlogdag(store)
532 532 return dag.linearize(set(store.rev(n) for n in nodes))
533 533 else:
534 534 return sorted([store.rev(n) for n in nodes])
535 535
536 536 def _sortnodesellipsis(store, nodes, clnodetorev, lookup):
537 537 """Sort nodes for changegroup generation and turn into revnums."""
538 538 # Ellipses serving mode.
539 539 #
540 540 # In a perfect world, we'd generate better ellipsis-ified graphs
541 541 # for non-changelog revlogs. In practice, we haven't started doing
542 542 # that yet, so the resulting DAGs for the manifestlog and filelogs
543 543 # are actually full of bogus parentage on all the ellipsis
544 544 # nodes. This has the side effect that, while the contents are
545 545 # correct, the individual DAGs might be completely out of whack in
546 546 # a case like 882681bc3166 and its ancestors (back about 10
547 547 # revisions or so) in the main hg repo.
548 548 #
549 549 # The one invariant we *know* holds is that the new (potentially
550 550 # bogus) DAG shape will be valid if we order the nodes in the
551 551 # order that they're introduced in dramatis personae by the
552 552 # changelog, so what we do is we sort the non-changelog histories
553 553 # by the order in which they are used by the changelog.
554 554 key = lambda n: clnodetorev[lookup(n)]
555 555 return [store.rev(n) for n in sorted(nodes, key=key)]
556 556
557 557 class cgpacker(object):
558 558 def __init__(self, repo, filematcher, version, allowreorder,
559 559 deltaparentfn, builddeltaheader, manifestsend,
560 560 bundlecaps=None, ellipses=False,
561 561 shallow=False, ellipsisroots=None, fullnodes=None):
562 562 """Given a source repo, construct a bundler.
563 563
564 564 filematcher is a matcher that matches on files to include in the
565 565 changegroup. Used to facilitate sparse changegroups.
566 566
567 567 allowreorder controls whether reordering of revisions is allowed.
568 568 This value is used when ``bundle.reorder`` is ``auto`` or isn't
569 569 set.
570 570
571 571 deltaparentfn is a callable that resolves the delta parent for
572 572 a specific revision.
573 573
574 574 builddeltaheader is a callable that constructs the header for a group
575 575 delta.
576 576
577 577 manifestsend is a chunk to send after manifests have been fully emitted.
578 578
579 579 ellipses indicates whether ellipsis serving mode is enabled.
580 580
581 581 bundlecaps is optional and can be used to specify the set of
582 582 capabilities which can be used to build the bundle. While bundlecaps is
583 583 unused in core Mercurial, extensions rely on this feature to communicate
584 584 capabilities to customize the changegroup packer.
585 585
586 586 shallow indicates whether shallow data might be sent. The packer may
587 587 need to pack file contents not introduced by the changes being packed.
588 588
589 589 fullnodes is the list of nodes which should not be ellipsis nodes. We
590 590 store this rather than the set of nodes that should be ellipsis because
591 591 for very large histories we expect this to be significantly smaller.
592 592 """
593 593 assert filematcher
594 594 self._filematcher = filematcher
595 595
596 596 self.version = version
597 597 self._deltaparentfn = deltaparentfn
598 598 self._builddeltaheader = builddeltaheader
599 599 self._manifestsend = manifestsend
600 600 self._ellipses = ellipses
601 601
602 602 # Set of capabilities we can use to build the bundle.
603 603 if bundlecaps is None:
604 604 bundlecaps = set()
605 605 self._bundlecaps = bundlecaps
606 606 self._isshallow = shallow
607 607 self._fullnodes = fullnodes
608 608
609 609 # Maps ellipsis revs to their roots at the changelog level.
610 610 self._precomputedellipsis = ellipsisroots
611 611
612 612 # experimental config: bundle.reorder
613 613 reorder = repo.ui.config('bundle', 'reorder')
614 614 if reorder == 'auto':
615 615 self._reorder = allowreorder
616 616 else:
617 617 self._reorder = stringutil.parsebool(reorder)
618 618
619 619 self._repo = repo
620 620
621 621 if self._repo.ui.verbose and not self._repo.ui.debugflag:
622 622 self._verbosenote = self._repo.ui.note
623 623 else:
624 624 self._verbosenote = lambda s: None
625 625
626 626 # Maps CL revs to per-revlog revisions. Cleared in close() at
627 627 # the end of each group.
628 628 self._clrevtolocalrev = {}
629 629 self._nextclrevtolocalrev = {}
630 630
631 631 # Maps changelog nodes to changelog revs. Filled in once
632 632 # during changelog stage and then left unmodified.
633 633 self._clnodetorev = {}
634 634
635 635 def _close(self):
636 636 # Ellipses serving mode.
637 637 self._clrevtolocalrev.clear()
638 638 if self._nextclrevtolocalrev is not None:
639 639 self._clrevtolocalrev = self._nextclrevtolocalrev
640 640 self._nextclrevtolocalrev = None
641 641
642 642 return closechunk()
643 643
644 644 def group(self, revs, store, ischangelog, lookup, units=None):
645 645 """Calculate a delta group, yielding a sequence of changegroup chunks
646 646 (strings).
647 647
648 648 Given a list of changeset revs, return a set of deltas and
649 649 metadata corresponding to nodes. The first delta is
650 650 first parent(nodelist[0]) -> nodelist[0], the receiver is
651 651 guaranteed to have this parent as it has all history before
652 652 these changesets. In the case firstparent is nullrev the
653 653 changegroup starts with a full revision.
654 654
655 655 If units is not None, progress detail will be generated, units specifies
656 656 the type of revlog that is touched (changelog, manifest, etc.).
657 657 """
658 658 # if we don't have any revisions touched by these changesets, bail
659 659 if len(revs) == 0:
660 660 yield self._close()
661 661 return
662 662
663 663 # add the parent of the first rev
664 664 p = store.parentrevs(revs[0])[0]
665 665 revs.insert(0, p)
666 666
667 667 # build deltas
668 668 progress = None
669 669 if units is not None:
670 670 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
671 671 total=(len(revs) - 1))
672 672 for r in pycompat.xrange(len(revs) - 1):
673 673 if progress:
674 674 progress.update(r + 1)
675 675 prev, curr = revs[r], revs[r + 1]
676 676 linknode = lookup(store.node(curr))
677 677 for c in self._revchunk(store, ischangelog, curr, prev, linknode):
678 678 yield c
679 679
680 680 if progress:
681 681 progress.complete()
682 682 yield self._close()
683 683
684 684 # filter any nodes that claim to be part of the known set
685 685 def _prune(self, store, missing, commonrevs):
686 686 # TODO this violates storage abstraction for manifests.
687 687 if isinstance(store, manifest.manifestrevlog):
688 688 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
689 689 return []
690 690
691 691 rr, rl = store.rev, store.linkrev
692 692 return [n for n in missing if rl(rr(n)) not in commonrevs]
693 693
694 694 def _packmanifests(self, dir, dirlog, revs, lookuplinknode):
695 695 """Pack manifests into a changegroup stream.
696 696
697 697 Encodes the directory name in the output so multiple manifests
698 698 can be sent. Multiple manifests is not supported by cg1 and cg2.
699 699 """
700 700 if dir:
701 701 assert self.version == b'03'
702 702 yield _fileheader(dir)
703 703
704 704 for chunk in self.group(revs, dirlog, False, lookuplinknode,
705 705 units=_('manifests')):
706 706 yield chunk
707 707
708 708 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
709 709 """Yield a sequence of changegroup byte chunks."""
710 710
711 711 repo = self._repo
712 712 cl = repo.changelog
713 713
714 714 self._verbosenote(_('uncompressed size of bundle content:\n'))
715 715 size = 0
716 716
717 717 clstate, chunks = self._generatechangelog(cl, clnodes)
718 718 for chunk in chunks:
719 719 size += len(chunk)
720 720 yield chunk
721 721
722 722 self._verbosenote(_('%8.i (changelog)\n') % size)
723 723
724 724 clrevorder = clstate['clrevorder']
725 725 mfs = clstate['mfs']
726 726 changedfiles = clstate['changedfiles']
727 727
728 728 # We need to make sure that the linkrev in the changegroup refers to
729 729 # the first changeset that introduced the manifest or file revision.
730 730 # The fastpath is usually safer than the slowpath, because the filelogs
731 731 # are walked in revlog order.
732 732 #
733 733 # When taking the slowpath with reorder=None and the manifest revlog
734 734 # uses generaldelta, the manifest may be walked in the "wrong" order.
735 735 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
736 736 # cc0ff93d0c0c).
737 737 #
738 738 # When taking the fastpath, we are only vulnerable to reordering
739 739 # of the changelog itself. The changelog never uses generaldelta, so
740 740 # it is only reordered when reorder=True. To handle this case, we
741 741 # simply take the slowpath, which already has the 'clrevorder' logic.
742 742 # This was also fixed in cc0ff93d0c0c.
743 743 fastpathlinkrev = fastpathlinkrev and not self._reorder
744 744 # Treemanifests don't work correctly with fastpathlinkrev
745 745 # either, because we don't discover which directory nodes to
746 746 # send along with files. This could probably be fixed.
747 747 fastpathlinkrev = fastpathlinkrev and (
748 748 'treemanifest' not in repo.requirements)
749 749
750 750 fnodes = {} # needed file nodes
751 751
752 752 for chunk in self.generatemanifests(commonrevs, clrevorder,
753 753 fastpathlinkrev, mfs, fnodes, source):
754 754 yield chunk
755 755
756 if self._ellipses:
757 mfdicts = None
758 if self._isshallow:
759 mfdicts = [(self._repo.manifestlog[n].read(), lr)
760 for (n, lr) in mfs.iteritems()]
756 mfdicts = None
757 if self._ellipses and self._isshallow:
758 mfdicts = [(self._repo.manifestlog[n].read(), lr)
759 for (n, lr) in mfs.iteritems()]
761 760
762 761 mfs.clear()
763 762 clrevs = set(cl.rev(x) for x in clnodes)
764 763
765 764 if not fastpathlinkrev:
766 765 def linknodes(unused, fname):
767 766 return fnodes.get(fname, {})
768 767 else:
769 768 cln = cl.node
770 769 def linknodes(filerevlog, fname):
771 770 llr = filerevlog.linkrev
772 771 fln = filerevlog.node
773 772 revs = ((r, llr(r)) for r in filerevlog)
774 773 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
775 774
776 if self._ellipses:
777 # We need to pass the mfdicts variable down into
778 # generatefiles(), but more than one command might have
779 # wrapped generatefiles so we can't modify the function
780 # signature. Instead, we pass the data to ourselves using an
781 # instance attribute. I'm sorry.
782 self._mfdicts = mfdicts
783
784 775 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
785 source):
776 source, mfdicts):
786 777 yield chunk
787 778
788 779 yield self._close()
789 780
790 781 if clnodes:
791 782 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
792 783
793 784 def _generatechangelog(self, cl, nodes):
794 785 """Generate data for changelog chunks.
795 786
796 787 Returns a 2-tuple of a dict containing state and an iterable of
797 788 byte chunks. The state will not be fully populated until the
798 789 chunk stream has been fully consumed.
799 790 """
800 791 clrevorder = {}
801 792 mfs = {} # needed manifests
802 793 mfl = self._repo.manifestlog
803 794 # TODO violates storage abstraction.
804 795 mfrevlog = mfl._revlog
805 796 changedfiles = set()
806 797
807 798 # Callback for the changelog, used to collect changed files and
808 799 # manifest nodes.
809 800 # Returns the linkrev node (identity in the changelog case).
810 801 def lookupcl(x):
811 802 c = cl.read(x)
812 803 clrevorder[x] = len(clrevorder)
813 804
814 805 if self._ellipses:
815 806 # Only update mfs if x is going to be sent. Otherwise we
816 807 # end up with bogus linkrevs specified for manifests and
817 808 # we skip some manifest nodes that we should otherwise
818 809 # have sent.
819 810 if (x in self._fullnodes
820 811 or cl.rev(x) in self._precomputedellipsis):
821 812 n = c[0]
822 813 # Record the first changeset introducing this manifest
823 814 # version.
824 815 mfs.setdefault(n, x)
825 816 # Set this narrow-specific dict so we have the lowest
826 817 # manifest revnum to look up for this cl revnum. (Part of
827 818 # mapping changelog ellipsis parents to manifest ellipsis
828 819 # parents)
829 820 self._nextclrevtolocalrev.setdefault(cl.rev(x),
830 821 mfrevlog.rev(n))
831 822 # We can't trust the changed files list in the changeset if the
832 823 # client requested a shallow clone.
833 824 if self._isshallow:
834 825 changedfiles.update(mfl[c[0]].read().keys())
835 826 else:
836 827 changedfiles.update(c[3])
837 828 else:
838 829
839 830 n = c[0]
840 831 # record the first changeset introducing this manifest version
841 832 mfs.setdefault(n, x)
842 833 # Record a complete list of potentially-changed files in
843 834 # this manifest.
844 835 changedfiles.update(c[3])
845 836
846 837 return x
847 838
848 839 # Changelog doesn't benefit from reordering revisions. So send out
849 840 # revisions in store order.
850 841 revs = sorted(cl.rev(n) for n in nodes)
851 842
852 843 state = {
853 844 'clrevorder': clrevorder,
854 845 'mfs': mfs,
855 846 'changedfiles': changedfiles,
856 847 }
857 848
858 849 gen = self.group(revs, cl, True, lookupcl, units=_('changesets'))
859 850
860 851 return state, gen
861 852
862 853 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
863 854 fnodes, source):
864 855 """Returns an iterator of changegroup chunks containing manifests.
865 856
866 857 `source` is unused here, but is used by extensions like remotefilelog to
867 858 change what is sent based in pulls vs pushes, etc.
868 859 """
869 860 repo = self._repo
870 861 mfl = repo.manifestlog
871 862 dirlog = mfl._revlog.dirlog
872 863 tmfnodes = {'': mfs}
873 864
874 865 # Callback for the manifest, used to collect linkrevs for filelog
875 866 # revisions.
876 867 # Returns the linkrev node (collected in lookupcl).
877 868 def makelookupmflinknode(dir, nodes):
878 869 if fastpathlinkrev:
879 870 assert not dir
880 871 return mfs.__getitem__
881 872
882 873 def lookupmflinknode(x):
883 874 """Callback for looking up the linknode for manifests.
884 875
885 876 Returns the linkrev node for the specified manifest.
886 877
887 878 SIDE EFFECT:
888 879
889 880 1) fclnodes gets populated with the list of relevant
890 881 file nodes if we're not using fastpathlinkrev
891 882 2) When treemanifests are in use, collects treemanifest nodes
892 883 to send
893 884
894 885 Note that this means manifests must be completely sent to
895 886 the client before you can trust the list of files and
896 887 treemanifests to send.
897 888 """
898 889 clnode = nodes[x]
899 890 mdata = mfl.get(dir, x).readfast(shallow=True)
900 891 for p, n, fl in mdata.iterentries():
901 892 if fl == 't': # subdirectory manifest
902 893 subdir = dir + p + '/'
903 894 tmfclnodes = tmfnodes.setdefault(subdir, {})
904 895 tmfclnode = tmfclnodes.setdefault(n, clnode)
905 896 if clrevorder[clnode] < clrevorder[tmfclnode]:
906 897 tmfclnodes[n] = clnode
907 898 else:
908 899 f = dir + p
909 900 fclnodes = fnodes.setdefault(f, {})
910 901 fclnode = fclnodes.setdefault(n, clnode)
911 902 if clrevorder[clnode] < clrevorder[fclnode]:
912 903 fclnodes[n] = clnode
913 904 return clnode
914 905 return lookupmflinknode
915 906
916 907 size = 0
917 908 while tmfnodes:
918 909 dir, nodes = tmfnodes.popitem()
919 910 store = dirlog(dir)
920 911 prunednodes = self._prune(store, nodes, commonrevs)
921 912 if not dir or prunednodes:
922 913 lookupfn = makelookupmflinknode(dir, nodes)
923 914
924 915 if self._ellipses:
925 916 revs = _sortnodesellipsis(store, prunednodes,
926 917 self._clnodetorev, lookupfn)
927 918 else:
928 919 revs = _sortnodesnormal(store, prunednodes,
929 920 self._reorder)
930 921
931 922 for x in self._packmanifests(dir, store, revs, lookupfn):
932 923 size += len(x)
933 924 yield x
934 925 self._verbosenote(_('%8.i (manifests)\n') % size)
935 926 yield self._manifestsend
936 927
937 928 # The 'source' parameter is useful for extensions
938 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
929 def generatefiles(self, changedfiles, linknodes, commonrevs, source,
930 mfdicts):
939 931 changedfiles = list(filter(self._filematcher, changedfiles))
940 932
941 933 if self._isshallow:
942 # See comment in generate() for why this sadness is a thing.
943 mfdicts = self._mfdicts
944 del self._mfdicts
945 934 # In a shallow clone, the linknodes callback needs to also include
946 935 # those file nodes that are in the manifests we sent but weren't
947 936 # introduced by those manifests.
948 937 commonctxs = [self._repo[c] for c in commonrevs]
949 938 oldlinknodes = linknodes
950 939 clrev = self._repo.changelog.rev
951 940
952 941 # Defining this function has a side-effect of overriding the
953 942 # function of the same name that was passed in as an argument.
954 943 # TODO have caller pass in appropriate function.
955 944 def linknodes(flog, fname):
956 945 for c in commonctxs:
957 946 try:
958 947 fnode = c.filenode(fname)
959 948 self._clrevtolocalrev[c.rev()] = flog.rev(fnode)
960 949 except error.ManifestLookupError:
961 950 pass
962 951 links = oldlinknodes(flog, fname)
963 952 if len(links) != len(mfdicts):
964 953 for mf, lr in mfdicts:
965 954 fnode = mf.get(fname, None)
966 955 if fnode in links:
967 956 links[fnode] = min(links[fnode], lr, key=clrev)
968 957 elif fnode:
969 958 links[fnode] = lr
970 959 return links
971 960
972 961 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
973 962
974 963 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
975 964 repo = self._repo
976 965 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
977 966 total=len(changedfiles))
978 967 for i, fname in enumerate(sorted(changedfiles)):
979 968 filerevlog = repo.file(fname)
980 969 if not filerevlog:
981 970 raise error.Abort(_("empty or missing file data for %s") %
982 971 fname)
983 972
984 973 linkrevnodes = linknodes(filerevlog, fname)
985 974 # Lookup for filenodes, we collected the linkrev nodes above in the
986 975 # fastpath case and with lookupmf in the slowpath case.
987 976 def lookupfilelog(x):
988 977 return linkrevnodes[x]
989 978
990 979 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
991 980 if filenodes:
992 981 if self._ellipses:
993 982 revs = _sortnodesellipsis(filerevlog, filenodes,
994 983 self._clnodetorev, lookupfilelog)
995 984 else:
996 985 revs = _sortnodesnormal(filerevlog, filenodes,
997 986 self._reorder)
998 987
999 988 progress.update(i + 1, item=fname)
1000 989 h = _fileheader(fname)
1001 990 size = len(h)
1002 991 yield h
1003 992 for chunk in self.group(revs, filerevlog, False, lookupfilelog):
1004 993 size += len(chunk)
1005 994 yield chunk
1006 995 self._verbosenote(_('%8.i %s\n') % (size, fname))
1007 996 progress.complete()
1008 997
1009 998 def _revchunk(self, store, ischangelog, rev, prev, linknode):
1010 999 if self._ellipses:
1011 1000 fn = self._revisiondeltanarrow
1012 1001 else:
1013 1002 fn = self._revisiondeltanormal
1014 1003
1015 1004 delta = fn(store, ischangelog, rev, prev, linknode)
1016 1005 if not delta:
1017 1006 return
1018 1007
1019 1008 meta = self._builddeltaheader(delta)
1020 1009 l = len(meta) + sum(len(x) for x in delta.deltachunks)
1021 1010
1022 1011 yield chunkheader(l)
1023 1012 yield meta
1024 1013 for x in delta.deltachunks:
1025 1014 yield x
1026 1015
1027 1016 def _revisiondeltanormal(self, store, ischangelog, rev, prev, linknode):
1028 1017 node = store.node(rev)
1029 1018 p1, p2 = store.parentrevs(rev)
1030 1019 base = self._deltaparentfn(store, rev, p1, p2, prev)
1031 1020
1032 1021 prefix = ''
1033 1022 if store.iscensored(base) or store.iscensored(rev):
1034 1023 try:
1035 1024 delta = store.revision(node, raw=True)
1036 1025 except error.CensoredNodeError as e:
1037 1026 delta = e.tombstone
1038 1027 if base == nullrev:
1039 1028 prefix = mdiff.trivialdiffheader(len(delta))
1040 1029 else:
1041 1030 baselen = store.rawsize(base)
1042 1031 prefix = mdiff.replacediffheader(baselen, len(delta))
1043 1032 elif base == nullrev:
1044 1033 delta = store.revision(node, raw=True)
1045 1034 prefix = mdiff.trivialdiffheader(len(delta))
1046 1035 else:
1047 1036 delta = store.revdiff(base, rev)
1048 1037 p1n, p2n = store.parents(node)
1049 1038
1050 1039 return revisiondelta(
1051 1040 node=node,
1052 1041 p1node=p1n,
1053 1042 p2node=p2n,
1054 1043 basenode=store.node(base),
1055 1044 linknode=linknode,
1056 1045 flags=store.flags(rev),
1057 1046 deltachunks=(prefix, delta),
1058 1047 )
1059 1048
1060 1049 def _revisiondeltanarrow(self, store, ischangelog, rev, prev, linknode):
1061 1050 # build up some mapping information that's useful later. See
1062 1051 # the local() nested function below.
1063 1052 if ischangelog:
1064 1053 self._clnodetorev[linknode] = rev
1065 1054 linkrev = rev
1066 1055 self._clrevtolocalrev[linkrev] = rev
1067 1056 else:
1068 1057 linkrev = self._clnodetorev[linknode]
1069 1058 self._clrevtolocalrev[linkrev] = rev
1070 1059
1071 1060 # This is a node to send in full, because the changeset it
1072 1061 # corresponds to was a full changeset.
1073 1062 if linknode in self._fullnodes:
1074 1063 return self._revisiondeltanormal(store, ischangelog, rev, prev,
1075 1064 linknode)
1076 1065
1077 1066 # At this point, a node can either be one we should skip or an
1078 1067 # ellipsis. If it's not an ellipsis, bail immediately.
1079 1068 if linkrev not in self._precomputedellipsis:
1080 1069 return
1081 1070
1082 1071 linkparents = self._precomputedellipsis[linkrev]
1083 1072 def local(clrev):
1084 1073 """Turn a changelog revnum into a local revnum.
1085 1074
1086 1075 The ellipsis dag is stored as revnums on the changelog,
1087 1076 but when we're producing ellipsis entries for
1088 1077 non-changelog revlogs, we need to turn those numbers into
1089 1078 something local. This does that for us, and during the
1090 1079 changelog sending phase will also expand the stored
1091 1080 mappings as needed.
1092 1081 """
1093 1082 if clrev == nullrev:
1094 1083 return nullrev
1095 1084
1096 1085 if ischangelog:
1097 1086 # If we're doing the changelog, it's possible that we
1098 1087 # have a parent that is already on the client, and we
1099 1088 # need to store some extra mapping information so that
1100 1089 # our contained ellipsis nodes will be able to resolve
1101 1090 # their parents.
1102 1091 if clrev not in self._clrevtolocalrev:
1103 1092 clnode = store.node(clrev)
1104 1093 self._clnodetorev[clnode] = clrev
1105 1094 return clrev
1106 1095
1107 1096 # Walk the ellipsis-ized changelog breadth-first looking for a
1108 1097 # change that has been linked from the current revlog.
1109 1098 #
1110 1099 # For a flat manifest revlog only a single step should be necessary
1111 1100 # as all relevant changelog entries are relevant to the flat
1112 1101 # manifest.
1113 1102 #
1114 1103 # For a filelog or tree manifest dirlog however not every changelog
1115 1104 # entry will have been relevant, so we need to skip some changelog
1116 1105 # nodes even after ellipsis-izing.
1117 1106 walk = [clrev]
1118 1107 while walk:
1119 1108 p = walk[0]
1120 1109 walk = walk[1:]
1121 1110 if p in self._clrevtolocalrev:
1122 1111 return self._clrevtolocalrev[p]
1123 1112 elif p in self._fullnodes:
1124 1113 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1125 1114 if pp != nullrev])
1126 1115 elif p in self._precomputedellipsis:
1127 1116 walk.extend([pp for pp in self._precomputedellipsis[p]
1128 1117 if pp != nullrev])
1129 1118 else:
1130 1119 # In this case, we've got an ellipsis with parents
1131 1120 # outside the current bundle (likely an
1132 1121 # incremental pull). We "know" that we can use the
1133 1122 # value of this same revlog at whatever revision
1134 1123 # is pointed to by linknode. "Know" is in scare
1135 1124 # quotes because I haven't done enough examination
1136 1125 # of edge cases to convince myself this is really
1137 1126 # a fact - it works for all the (admittedly
1138 1127 # thorough) cases in our testsuite, but I would be
1139 1128 # somewhat unsurprised to find a case in the wild
1140 1129 # where this breaks down a bit. That said, I don't
1141 1130 # know if it would hurt anything.
1142 1131 for i in pycompat.xrange(rev, 0, -1):
1143 1132 if store.linkrev(i) == clrev:
1144 1133 return i
1145 1134 # We failed to resolve a parent for this node, so
1146 1135 # we crash the changegroup construction.
1147 1136 raise error.Abort(
1148 1137 'unable to resolve parent while packing %r %r'
1149 1138 ' for changeset %r' % (store.indexfile, rev, clrev))
1150 1139
1151 1140 return nullrev
1152 1141
1153 1142 if not linkparents or (
1154 1143 store.parentrevs(rev) == (nullrev, nullrev)):
1155 1144 p1, p2 = nullrev, nullrev
1156 1145 elif len(linkparents) == 1:
1157 1146 p1, = sorted(local(p) for p in linkparents)
1158 1147 p2 = nullrev
1159 1148 else:
1160 1149 p1, p2 = sorted(local(p) for p in linkparents)
1161 1150
1162 1151 n = store.node(rev)
1163 1152 p1n, p2n = store.node(p1), store.node(p2)
1164 1153 flags = store.flags(rev)
1165 1154 flags |= revlog.REVIDX_ELLIPSIS
1166 1155
1167 1156 # TODO: try and actually send deltas for ellipsis data blocks
1168 1157 data = store.revision(n)
1169 1158 diffheader = mdiff.trivialdiffheader(len(data))
1170 1159
1171 1160 return revisiondelta(
1172 1161 node=n,
1173 1162 p1node=p1n,
1174 1163 p2node=p2n,
1175 1164 basenode=nullid,
1176 1165 linknode=linknode,
1177 1166 flags=flags,
1178 1167 deltachunks=(diffheader, data),
1179 1168 )
1180 1169
1181 1170 def _deltaparentprev(store, rev, p1, p2, prev):
1182 1171 """Resolve a delta parent to the previous revision.
1183 1172
1184 1173 Used for version 1 changegroups, which don't support generaldelta.
1185 1174 """
1186 1175 return prev
1187 1176
1188 1177 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1189 1178 """Resolve a delta parent when general deltas are supported."""
1190 1179 dp = store.deltaparent(rev)
1191 1180 if dp == nullrev and store.storedeltachains:
1192 1181 # Avoid sending full revisions when delta parent is null. Pick prev
1193 1182 # in that case. It's tempting to pick p1 in this case, as p1 will
1194 1183 # be smaller in the common case. However, computing a delta against
1195 1184 # p1 may require resolving the raw text of p1, which could be
1196 1185 # expensive. The revlog caches should have prev cached, meaning
1197 1186 # less CPU for changegroup generation. There is likely room to add
1198 1187 # a flag and/or config option to control this behavior.
1199 1188 base = prev
1200 1189 elif dp == nullrev:
1201 1190 # revlog is configured to use full snapshot for a reason,
1202 1191 # stick to full snapshot.
1203 1192 base = nullrev
1204 1193 elif dp not in (p1, p2, prev):
1205 1194 # Pick prev when we can't be sure remote has the base revision.
1206 1195 return prev
1207 1196 else:
1208 1197 base = dp
1209 1198
1210 1199 if base != nullrev and not store.candelta(base, rev):
1211 1200 base = nullrev
1212 1201
1213 1202 return base
1214 1203
1215 1204 def _deltaparentellipses(store, rev, p1, p2, prev):
1216 1205 """Resolve a delta parent when in ellipses mode."""
1217 1206 # TODO: send better deltas when in narrow mode.
1218 1207 #
1219 1208 # changegroup.group() loops over revisions to send,
1220 1209 # including revisions we'll skip. What this means is that
1221 1210 # `prev` will be a potentially useless delta base for all
1222 1211 # ellipsis nodes, as the client likely won't have it. In
1223 1212 # the future we should do bookkeeping about which nodes
1224 1213 # have been sent to the client, and try to be
1225 1214 # significantly smarter about delta bases. This is
1226 1215 # slightly tricky because this same code has to work for
1227 1216 # all revlogs, and we don't have the linkrev/linknode here.
1228 1217 return p1
1229 1218
1230 1219 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1231 1220 shallow=False, ellipsisroots=None, fullnodes=None):
1232 1221 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1233 1222 d.node, d.p1node, d.p2node, d.linknode)
1234 1223
1235 1224 return cgpacker(repo, filematcher, b'01',
1236 1225 deltaparentfn=_deltaparentprev,
1237 1226 allowreorder=None,
1238 1227 builddeltaheader=builddeltaheader,
1239 1228 manifestsend=b'',
1240 1229 bundlecaps=bundlecaps,
1241 1230 ellipses=ellipses,
1242 1231 shallow=shallow,
1243 1232 ellipsisroots=ellipsisroots,
1244 1233 fullnodes=fullnodes)
1245 1234
1246 1235 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1247 1236 shallow=False, ellipsisroots=None, fullnodes=None):
1248 1237 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1249 1238 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1250 1239
1251 1240 # Since generaldelta is directly supported by cg2, reordering
1252 1241 # generally doesn't help, so we disable it by default (treating
1253 1242 # bundle.reorder=auto just like bundle.reorder=False).
1254 1243 return cgpacker(repo, filematcher, b'02',
1255 1244 deltaparentfn=_deltaparentgeneraldelta,
1256 1245 allowreorder=False,
1257 1246 builddeltaheader=builddeltaheader,
1258 1247 manifestsend=b'',
1259 1248 bundlecaps=bundlecaps,
1260 1249 ellipses=ellipses,
1261 1250 shallow=shallow,
1262 1251 ellipsisroots=ellipsisroots,
1263 1252 fullnodes=fullnodes)
1264 1253
1265 1254 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1266 1255 shallow=False, ellipsisroots=None, fullnodes=None):
1267 1256 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1268 1257 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1269 1258
1270 1259 deltaparentfn = (_deltaparentellipses if ellipses
1271 1260 else _deltaparentgeneraldelta)
1272 1261
1273 1262 return cgpacker(repo, filematcher, b'03',
1274 1263 deltaparentfn=deltaparentfn,
1275 1264 allowreorder=False,
1276 1265 builddeltaheader=builddeltaheader,
1277 1266 manifestsend=closechunk(),
1278 1267 bundlecaps=bundlecaps,
1279 1268 ellipses=ellipses,
1280 1269 shallow=shallow,
1281 1270 ellipsisroots=ellipsisroots,
1282 1271 fullnodes=fullnodes)
1283 1272
1284 1273 _packermap = {'01': (_makecg1packer, cg1unpacker),
1285 1274 # cg2 adds support for exchanging generaldelta
1286 1275 '02': (_makecg2packer, cg2unpacker),
1287 1276 # cg3 adds support for exchanging revlog flags and treemanifests
1288 1277 '03': (_makecg3packer, cg3unpacker),
1289 1278 }
1290 1279
1291 1280 def allsupportedversions(repo):
1292 1281 versions = set(_packermap.keys())
1293 1282 if not (repo.ui.configbool('experimental', 'changegroup3') or
1294 1283 repo.ui.configbool('experimental', 'treemanifest') or
1295 1284 'treemanifest' in repo.requirements):
1296 1285 versions.discard('03')
1297 1286 return versions
1298 1287
1299 1288 # Changegroup versions that can be applied to the repo
1300 1289 def supportedincomingversions(repo):
1301 1290 return allsupportedversions(repo)
1302 1291
1303 1292 # Changegroup versions that can be created from the repo
1304 1293 def supportedoutgoingversions(repo):
1305 1294 versions = allsupportedversions(repo)
1306 1295 if 'treemanifest' in repo.requirements:
1307 1296 # Versions 01 and 02 support only flat manifests and it's just too
1308 1297 # expensive to convert between the flat manifest and tree manifest on
1309 1298 # the fly. Since tree manifests are hashed differently, all of history
1310 1299 # would have to be converted. Instead, we simply don't even pretend to
1311 1300 # support versions 01 and 02.
1312 1301 versions.discard('01')
1313 1302 versions.discard('02')
1314 1303 if repository.NARROW_REQUIREMENT in repo.requirements:
1315 1304 # Versions 01 and 02 don't support revlog flags, and we need to
1316 1305 # support that for stripping and unbundling to work.
1317 1306 versions.discard('01')
1318 1307 versions.discard('02')
1319 1308 if LFS_REQUIREMENT in repo.requirements:
1320 1309 # Versions 01 and 02 don't support revlog flags, and we need to
1321 1310 # mark LFS entries with REVIDX_EXTSTORED.
1322 1311 versions.discard('01')
1323 1312 versions.discard('02')
1324 1313
1325 1314 return versions
1326 1315
1327 1316 def localversion(repo):
1328 1317 # Finds the best version to use for bundles that are meant to be used
1329 1318 # locally, such as those from strip and shelve, and temporary bundles.
1330 1319 return max(supportedoutgoingversions(repo))
1331 1320
1332 1321 def safeversion(repo):
1333 1322 # Finds the smallest version that it's safe to assume clients of the repo
1334 1323 # will support. For example, all hg versions that support generaldelta also
1335 1324 # support changegroup 02.
1336 1325 versions = supportedoutgoingversions(repo)
1337 1326 if 'generaldelta' in repo.requirements:
1338 1327 versions.discard('01')
1339 1328 assert versions
1340 1329 return min(versions)
1341 1330
1342 1331 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1343 1332 ellipses=False, shallow=False, ellipsisroots=None,
1344 1333 fullnodes=None):
1345 1334 assert version in supportedoutgoingversions(repo)
1346 1335
1347 1336 if filematcher is None:
1348 1337 filematcher = matchmod.alwaysmatcher(repo.root, '')
1349 1338
1350 1339 if version == '01' and not filematcher.always():
1351 1340 raise error.ProgrammingError('version 01 changegroups do not support '
1352 1341 'sparse file matchers')
1353 1342
1354 1343 if ellipses and version in (b'01', b'02'):
1355 1344 raise error.Abort(
1356 1345 _('ellipsis nodes require at least cg3 on client and server, '
1357 1346 'but negotiated version %s') % version)
1358 1347
1359 1348 # Requested files could include files not in the local store. So
1360 1349 # filter those out.
1361 1350 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1362 1351 filematcher)
1363 1352
1364 1353 fn = _packermap[version][0]
1365 1354 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1366 1355 shallow=shallow, ellipsisroots=ellipsisroots,
1367 1356 fullnodes=fullnodes)
1368 1357
1369 1358 def getunbundler(version, fh, alg, extras=None):
1370 1359 return _packermap[version][1](fh, alg, extras=extras)
1371 1360
1372 1361 def _changegroupinfo(repo, nodes, source):
1373 1362 if repo.ui.verbose or source == 'bundle':
1374 1363 repo.ui.status(_("%d changesets found\n") % len(nodes))
1375 1364 if repo.ui.debugflag:
1376 1365 repo.ui.debug("list of changesets:\n")
1377 1366 for node in nodes:
1378 1367 repo.ui.debug("%s\n" % hex(node))
1379 1368
1380 1369 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1381 1370 bundlecaps=None):
1382 1371 cgstream = makestream(repo, outgoing, version, source,
1383 1372 fastpath=fastpath, bundlecaps=bundlecaps)
1384 1373 return getunbundler(version, util.chunkbuffer(cgstream), None,
1385 1374 {'clcount': len(outgoing.missing) })
1386 1375
1387 1376 def makestream(repo, outgoing, version, source, fastpath=False,
1388 1377 bundlecaps=None, filematcher=None):
1389 1378 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1390 1379 filematcher=filematcher)
1391 1380
1392 1381 repo = repo.unfiltered()
1393 1382 commonrevs = outgoing.common
1394 1383 csets = outgoing.missing
1395 1384 heads = outgoing.missingheads
1396 1385 # We go through the fast path if we get told to, or if all (unfiltered
1397 1386 # heads have been requested (since we then know there all linkrevs will
1398 1387 # be pulled by the client).
1399 1388 heads.sort()
1400 1389 fastpathlinkrev = fastpath or (
1401 1390 repo.filtername is None and heads == sorted(repo.heads()))
1402 1391
1403 1392 repo.hook('preoutgoing', throw=True, source=source)
1404 1393 _changegroupinfo(repo, csets, source)
1405 1394 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1406 1395
1407 1396 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1408 1397 revisions = 0
1409 1398 files = 0
1410 1399 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1411 1400 total=expectedfiles)
1412 1401 for chunkdata in iter(source.filelogheader, {}):
1413 1402 files += 1
1414 1403 f = chunkdata["filename"]
1415 1404 repo.ui.debug("adding %s revisions\n" % f)
1416 1405 progress.increment()
1417 1406 fl = repo.file(f)
1418 1407 o = len(fl)
1419 1408 try:
1420 1409 deltas = source.deltaiter()
1421 1410 if not fl.addgroup(deltas, revmap, trp):
1422 1411 raise error.Abort(_("received file revlog group is empty"))
1423 1412 except error.CensoredBaseError as e:
1424 1413 raise error.Abort(_("received delta base is censored: %s") % e)
1425 1414 revisions += len(fl) - o
1426 1415 if f in needfiles:
1427 1416 needs = needfiles[f]
1428 1417 for new in pycompat.xrange(o, len(fl)):
1429 1418 n = fl.node(new)
1430 1419 if n in needs:
1431 1420 needs.remove(n)
1432 1421 else:
1433 1422 raise error.Abort(
1434 1423 _("received spurious file revlog entry"))
1435 1424 if not needs:
1436 1425 del needfiles[f]
1437 1426 progress.complete()
1438 1427
1439 1428 for f, needs in needfiles.iteritems():
1440 1429 fl = repo.file(f)
1441 1430 for n in needs:
1442 1431 try:
1443 1432 fl.rev(n)
1444 1433 except error.LookupError:
1445 1434 raise error.Abort(
1446 1435 _('missing file data for %s:%s - run hg verify') %
1447 1436 (f, hex(n)))
1448 1437
1449 1438 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now