##// END OF EJS Templates
changegroup: rename _fullnodes to _fullclnodes...
Gregory Szorc -
r39032:5baafb8f default
parent child Browse files
Show More
@@ -1,1428 +1,1429 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def _fileheader(path):
70 70 """Obtain a changegroup chunk header for a named path."""
71 71 return chunkheader(len(path)) + path
72 72
73 73 def writechunks(ui, chunks, filename, vfs=None):
74 74 """Write chunks to a file and return its filename.
75 75
76 76 The stream is assumed to be a bundle file.
77 77 Existing files will not be overwritten.
78 78 If no filename is specified, a temporary file is created.
79 79 """
80 80 fh = None
81 81 cleanup = None
82 82 try:
83 83 if filename:
84 84 if vfs:
85 85 fh = vfs.open(filename, "wb")
86 86 else:
87 87 # Increase default buffer size because default is usually
88 88 # small (4k is common on Linux).
89 89 fh = open(filename, "wb", 131072)
90 90 else:
91 91 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
92 92 fh = os.fdopen(fd, r"wb")
93 93 cleanup = filename
94 94 for c in chunks:
95 95 fh.write(c)
96 96 cleanup = None
97 97 return filename
98 98 finally:
99 99 if fh is not None:
100 100 fh.close()
101 101 if cleanup is not None:
102 102 if filename and vfs:
103 103 vfs.unlink(cleanup)
104 104 else:
105 105 os.unlink(cleanup)
106 106
107 107 class cg1unpacker(object):
108 108 """Unpacker for cg1 changegroup streams.
109 109
110 110 A changegroup unpacker handles the framing of the revision data in
111 111 the wire format. Most consumers will want to use the apply()
112 112 method to add the changes from the changegroup to a repository.
113 113
114 114 If you're forwarding a changegroup unmodified to another consumer,
115 115 use getchunks(), which returns an iterator of changegroup
116 116 chunks. This is mostly useful for cases where you need to know the
117 117 data stream has ended by observing the end of the changegroup.
118 118
119 119 deltachunk() is useful only if you're applying delta data. Most
120 120 consumers should prefer apply() instead.
121 121
122 122 A few other public methods exist. Those are used only for
123 123 bundlerepo and some debug commands - their use is discouraged.
124 124 """
125 125 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 126 deltaheadersize = deltaheader.size
127 127 version = '01'
128 128 _grouplistcount = 1 # One list of files after the manifests
129 129
130 130 def __init__(self, fh, alg, extras=None):
131 131 if alg is None:
132 132 alg = 'UN'
133 133 if alg not in util.compengines.supportedbundletypes:
134 134 raise error.Abort(_('unknown stream compression type: %s')
135 135 % alg)
136 136 if alg == 'BZ':
137 137 alg = '_truncatedBZ'
138 138
139 139 compengine = util.compengines.forbundletype(alg)
140 140 self._stream = compengine.decompressorreader(fh)
141 141 self._type = alg
142 142 self.extras = extras or {}
143 143 self.callback = None
144 144
145 145 # These methods (compressed, read, seek, tell) all appear to only
146 146 # be used by bundlerepo, but it's a little hard to tell.
147 147 def compressed(self):
148 148 return self._type is not None and self._type != 'UN'
149 149 def read(self, l):
150 150 return self._stream.read(l)
151 151 def seek(self, pos):
152 152 return self._stream.seek(pos)
153 153 def tell(self):
154 154 return self._stream.tell()
155 155 def close(self):
156 156 return self._stream.close()
157 157
158 158 def _chunklength(self):
159 159 d = readexactly(self._stream, 4)
160 160 l = struct.unpack(">l", d)[0]
161 161 if l <= 4:
162 162 if l:
163 163 raise error.Abort(_("invalid chunk length %d") % l)
164 164 return 0
165 165 if self.callback:
166 166 self.callback()
167 167 return l - 4
168 168
169 169 def changelogheader(self):
170 170 """v10 does not have a changelog header chunk"""
171 171 return {}
172 172
173 173 def manifestheader(self):
174 174 """v10 does not have a manifest header chunk"""
175 175 return {}
176 176
177 177 def filelogheader(self):
178 178 """return the header of the filelogs chunk, v10 only has the filename"""
179 179 l = self._chunklength()
180 180 if not l:
181 181 return {}
182 182 fname = readexactly(self._stream, l)
183 183 return {'filename': fname}
184 184
185 185 def _deltaheader(self, headertuple, prevnode):
186 186 node, p1, p2, cs = headertuple
187 187 if prevnode is None:
188 188 deltabase = p1
189 189 else:
190 190 deltabase = prevnode
191 191 flags = 0
192 192 return node, p1, p2, deltabase, cs, flags
193 193
194 194 def deltachunk(self, prevnode):
195 195 l = self._chunklength()
196 196 if not l:
197 197 return {}
198 198 headerdata = readexactly(self._stream, self.deltaheadersize)
199 199 header = self.deltaheader.unpack(headerdata)
200 200 delta = readexactly(self._stream, l - self.deltaheadersize)
201 201 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
202 202 return (node, p1, p2, cs, deltabase, delta, flags)
203 203
204 204 def getchunks(self):
205 205 """returns all the chunks contains in the bundle
206 206
207 207 Used when you need to forward the binary stream to a file or another
208 208 network API. To do so, it parse the changegroup data, otherwise it will
209 209 block in case of sshrepo because it don't know the end of the stream.
210 210 """
211 211 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
212 212 # and a list of filelogs. For changegroup 3, we expect 4 parts:
213 213 # changelog, manifestlog, a list of tree manifestlogs, and a list of
214 214 # filelogs.
215 215 #
216 216 # Changelog and manifestlog parts are terminated with empty chunks. The
217 217 # tree and file parts are a list of entry sections. Each entry section
218 218 # is a series of chunks terminating in an empty chunk. The list of these
219 219 # entry sections is terminated in yet another empty chunk, so we know
220 220 # we've reached the end of the tree/file list when we reach an empty
221 221 # chunk that was proceeded by no non-empty chunks.
222 222
223 223 parts = 0
224 224 while parts < 2 + self._grouplistcount:
225 225 noentries = True
226 226 while True:
227 227 chunk = getchunk(self)
228 228 if not chunk:
229 229 # The first two empty chunks represent the end of the
230 230 # changelog and the manifestlog portions. The remaining
231 231 # empty chunks represent either A) the end of individual
232 232 # tree or file entries in the file list, or B) the end of
233 233 # the entire list. It's the end of the entire list if there
234 234 # were no entries (i.e. noentries is True).
235 235 if parts < 2:
236 236 parts += 1
237 237 elif noentries:
238 238 parts += 1
239 239 break
240 240 noentries = False
241 241 yield chunkheader(len(chunk))
242 242 pos = 0
243 243 while pos < len(chunk):
244 244 next = pos + 2**20
245 245 yield chunk[pos:next]
246 246 pos = next
247 247 yield closechunk()
248 248
249 249 def _unpackmanifests(self, repo, revmap, trp, prog):
250 250 self.callback = prog.increment
251 251 # no need to check for empty manifest group here:
252 252 # if the result of the merge of 1 and 2 is the same in 3 and 4,
253 253 # no new manifest will be created and the manifest group will
254 254 # be empty during the pull
255 255 self.manifestheader()
256 256 deltas = self.deltaiter()
257 257 repo.manifestlog.addgroup(deltas, revmap, trp)
258 258 prog.complete()
259 259 self.callback = None
260 260
261 261 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
262 262 expectedtotal=None):
263 263 """Add the changegroup returned by source.read() to this repo.
264 264 srctype is a string like 'push', 'pull', or 'unbundle'. url is
265 265 the URL of the repo where this changegroup is coming from.
266 266
267 267 Return an integer summarizing the change to this repo:
268 268 - nothing changed or no source: 0
269 269 - more heads than before: 1+added heads (2..n)
270 270 - fewer heads than before: -1-removed heads (-2..-n)
271 271 - number of heads stays the same: 1
272 272 """
273 273 repo = repo.unfiltered()
274 274 def csmap(x):
275 275 repo.ui.debug("add changeset %s\n" % short(x))
276 276 return len(cl)
277 277
278 278 def revmap(x):
279 279 return cl.rev(x)
280 280
281 281 changesets = files = revisions = 0
282 282
283 283 try:
284 284 # The transaction may already carry source information. In this
285 285 # case we use the top level data. We overwrite the argument
286 286 # because we need to use the top level value (if they exist)
287 287 # in this function.
288 288 srctype = tr.hookargs.setdefault('source', srctype)
289 289 url = tr.hookargs.setdefault('url', url)
290 290 repo.hook('prechangegroup',
291 291 throw=True, **pycompat.strkwargs(tr.hookargs))
292 292
293 293 # write changelog data to temp files so concurrent readers
294 294 # will not see an inconsistent view
295 295 cl = repo.changelog
296 296 cl.delayupdate(tr)
297 297 oldheads = set(cl.heads())
298 298
299 299 trp = weakref.proxy(tr)
300 300 # pull off the changeset group
301 301 repo.ui.status(_("adding changesets\n"))
302 302 clstart = len(cl)
303 303 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
304 304 total=expectedtotal)
305 305 self.callback = progress.increment
306 306
307 307 efiles = set()
308 308 def onchangelog(cl, node):
309 309 efiles.update(cl.readfiles(node))
310 310
311 311 self.changelogheader()
312 312 deltas = self.deltaiter()
313 313 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
314 314 efiles = len(efiles)
315 315
316 316 if not cgnodes:
317 317 repo.ui.develwarn('applied empty changegroup',
318 318 config='warn-empty-changegroup')
319 319 clend = len(cl)
320 320 changesets = clend - clstart
321 321 progress.complete()
322 322 self.callback = None
323 323
324 324 # pull off the manifest group
325 325 repo.ui.status(_("adding manifests\n"))
326 326 # We know that we'll never have more manifests than we had
327 327 # changesets.
328 328 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
329 329 total=changesets)
330 330 self._unpackmanifests(repo, revmap, trp, progress)
331 331
332 332 needfiles = {}
333 333 if repo.ui.configbool('server', 'validate'):
334 334 cl = repo.changelog
335 335 ml = repo.manifestlog
336 336 # validate incoming csets have their manifests
337 337 for cset in pycompat.xrange(clstart, clend):
338 338 mfnode = cl.changelogrevision(cset).manifest
339 339 mfest = ml[mfnode].readdelta()
340 340 # store file cgnodes we must see
341 341 for f, n in mfest.iteritems():
342 342 needfiles.setdefault(f, set()).add(n)
343 343
344 344 # process the files
345 345 repo.ui.status(_("adding file changes\n"))
346 346 newrevs, newfiles = _addchangegroupfiles(
347 347 repo, self, revmap, trp, efiles, needfiles)
348 348 revisions += newrevs
349 349 files += newfiles
350 350
351 351 deltaheads = 0
352 352 if oldheads:
353 353 heads = cl.heads()
354 354 deltaheads = len(heads) - len(oldheads)
355 355 for h in heads:
356 356 if h not in oldheads and repo[h].closesbranch():
357 357 deltaheads -= 1
358 358 htext = ""
359 359 if deltaheads:
360 360 htext = _(" (%+d heads)") % deltaheads
361 361
362 362 repo.ui.status(_("added %d changesets"
363 363 " with %d changes to %d files%s\n")
364 364 % (changesets, revisions, files, htext))
365 365 repo.invalidatevolatilesets()
366 366
367 367 if changesets > 0:
368 368 if 'node' not in tr.hookargs:
369 369 tr.hookargs['node'] = hex(cl.node(clstart))
370 370 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
371 371 hookargs = dict(tr.hookargs)
372 372 else:
373 373 hookargs = dict(tr.hookargs)
374 374 hookargs['node'] = hex(cl.node(clstart))
375 375 hookargs['node_last'] = hex(cl.node(clend - 1))
376 376 repo.hook('pretxnchangegroup',
377 377 throw=True, **pycompat.strkwargs(hookargs))
378 378
379 379 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
380 380 phaseall = None
381 381 if srctype in ('push', 'serve'):
382 382 # Old servers can not push the boundary themselves.
383 383 # New servers won't push the boundary if changeset already
384 384 # exists locally as secret
385 385 #
386 386 # We should not use added here but the list of all change in
387 387 # the bundle
388 388 if repo.publishing():
389 389 targetphase = phaseall = phases.public
390 390 else:
391 391 # closer target phase computation
392 392
393 393 # Those changesets have been pushed from the
394 394 # outside, their phases are going to be pushed
395 395 # alongside. Therefor `targetphase` is
396 396 # ignored.
397 397 targetphase = phaseall = phases.draft
398 398 if added:
399 399 phases.registernew(repo, tr, targetphase, added)
400 400 if phaseall is not None:
401 401 phases.advanceboundary(repo, tr, phaseall, cgnodes)
402 402
403 403 if changesets > 0:
404 404
405 405 def runhooks():
406 406 # These hooks run when the lock releases, not when the
407 407 # transaction closes. So it's possible for the changelog
408 408 # to have changed since we last saw it.
409 409 if clstart >= len(repo):
410 410 return
411 411
412 412 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
413 413
414 414 for n in added:
415 415 args = hookargs.copy()
416 416 args['node'] = hex(n)
417 417 del args['node_last']
418 418 repo.hook("incoming", **pycompat.strkwargs(args))
419 419
420 420 newheads = [h for h in repo.heads()
421 421 if h not in oldheads]
422 422 repo.ui.log("incoming",
423 423 "%d incoming changes - new heads: %s\n",
424 424 len(added),
425 425 ', '.join([hex(c[:6]) for c in newheads]))
426 426
427 427 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
428 428 lambda tr: repo._afterlock(runhooks))
429 429 finally:
430 430 repo.ui.flush()
431 431 # never return 0 here:
432 432 if deltaheads < 0:
433 433 ret = deltaheads - 1
434 434 else:
435 435 ret = deltaheads + 1
436 436 return ret
437 437
438 438 def deltaiter(self):
439 439 """
440 440 returns an iterator of the deltas in this changegroup
441 441
442 442 Useful for passing to the underlying storage system to be stored.
443 443 """
444 444 chain = None
445 445 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
446 446 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
447 447 yield chunkdata
448 448 chain = chunkdata[0]
449 449
450 450 class cg2unpacker(cg1unpacker):
451 451 """Unpacker for cg2 streams.
452 452
453 453 cg2 streams add support for generaldelta, so the delta header
454 454 format is slightly different. All other features about the data
455 455 remain the same.
456 456 """
457 457 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
458 458 deltaheadersize = deltaheader.size
459 459 version = '02'
460 460
461 461 def _deltaheader(self, headertuple, prevnode):
462 462 node, p1, p2, deltabase, cs = headertuple
463 463 flags = 0
464 464 return node, p1, p2, deltabase, cs, flags
465 465
466 466 class cg3unpacker(cg2unpacker):
467 467 """Unpacker for cg3 streams.
468 468
469 469 cg3 streams add support for exchanging treemanifests and revlog
470 470 flags. It adds the revlog flags to the delta header and an empty chunk
471 471 separating manifests and files.
472 472 """
473 473 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
474 474 deltaheadersize = deltaheader.size
475 475 version = '03'
476 476 _grouplistcount = 2 # One list of manifests and one list of files
477 477
478 478 def _deltaheader(self, headertuple, prevnode):
479 479 node, p1, p2, deltabase, cs, flags = headertuple
480 480 return node, p1, p2, deltabase, cs, flags
481 481
482 482 def _unpackmanifests(self, repo, revmap, trp, prog):
483 483 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
484 484 for chunkdata in iter(self.filelogheader, {}):
485 485 # If we get here, there are directory manifests in the changegroup
486 486 d = chunkdata["filename"]
487 487 repo.ui.debug("adding %s revisions\n" % d)
488 488 dirlog = repo.manifestlog._revlog.dirlog(d)
489 489 deltas = self.deltaiter()
490 490 if not dirlog.addgroup(deltas, revmap, trp):
491 491 raise error.Abort(_("received dir revlog group is empty"))
492 492
493 493 class headerlessfixup(object):
494 494 def __init__(self, fh, h):
495 495 self._h = h
496 496 self._fh = fh
497 497 def read(self, n):
498 498 if self._h:
499 499 d, self._h = self._h[:n], self._h[n:]
500 500 if len(d) < n:
501 501 d += readexactly(self._fh, n - len(d))
502 502 return d
503 503 return readexactly(self._fh, n)
504 504
505 505 @attr.s(slots=True, frozen=True)
506 506 class revisiondelta(object):
507 507 """Describes a delta entry in a changegroup.
508 508
509 509 Captured data is sufficient to serialize the delta into multiple
510 510 formats.
511 511 """
512 512 # 20 byte node of this revision.
513 513 node = attr.ib()
514 514 # 20 byte nodes of parent revisions.
515 515 p1node = attr.ib()
516 516 p2node = attr.ib()
517 517 # 20 byte node of node this delta is against.
518 518 basenode = attr.ib()
519 519 # 20 byte node of changeset revision this delta is associated with.
520 520 linknode = attr.ib()
521 521 # 2 bytes of flags to apply to revision data.
522 522 flags = attr.ib()
523 523 # Iterable of chunks holding raw delta data.
524 524 deltachunks = attr.ib()
525 525
526 526 def _sortnodesnormal(store, nodes, reorder):
527 527 """Sort nodes for changegroup generation and turn into revnums."""
528 528 # for generaldelta revlogs, we linearize the revs; this will both be
529 529 # much quicker and generate a much smaller bundle
530 530 if (store._generaldelta and reorder is None) or reorder:
531 531 dag = dagutil.revlogdag(store)
532 532 return dag.linearize(set(store.rev(n) for n in nodes))
533 533 else:
534 534 return sorted([store.rev(n) for n in nodes])
535 535
536 536 def _sortnodesellipsis(store, nodes, clnodetorev, lookup):
537 537 """Sort nodes for changegroup generation and turn into revnums."""
538 538 # Ellipses serving mode.
539 539 #
540 540 # In a perfect world, we'd generate better ellipsis-ified graphs
541 541 # for non-changelog revlogs. In practice, we haven't started doing
542 542 # that yet, so the resulting DAGs for the manifestlog and filelogs
543 543 # are actually full of bogus parentage on all the ellipsis
544 544 # nodes. This has the side effect that, while the contents are
545 545 # correct, the individual DAGs might be completely out of whack in
546 546 # a case like 882681bc3166 and its ancestors (back about 10
547 547 # revisions or so) in the main hg repo.
548 548 #
549 549 # The one invariant we *know* holds is that the new (potentially
550 550 # bogus) DAG shape will be valid if we order the nodes in the
551 551 # order that they're introduced in dramatis personae by the
552 552 # changelog, so what we do is we sort the non-changelog histories
553 553 # by the order in which they are used by the changelog.
554 554 key = lambda n: clnodetorev[lookup(n)]
555 555 return [store.rev(n) for n in sorted(nodes, key=key)]
556 556
557 557 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
558 558 """Construct a revision delta for non-ellipses changegroup generation."""
559 559 node = store.node(rev)
560 560 p1, p2 = store.parentrevs(rev)
561 561 base = deltaparentfn(store, rev, p1, p2, prev)
562 562
563 563 prefix = ''
564 564 if store.iscensored(base) or store.iscensored(rev):
565 565 try:
566 566 delta = store.revision(node, raw=True)
567 567 except error.CensoredNodeError as e:
568 568 delta = e.tombstone
569 569 if base == nullrev:
570 570 prefix = mdiff.trivialdiffheader(len(delta))
571 571 else:
572 572 baselen = store.rawsize(base)
573 573 prefix = mdiff.replacediffheader(baselen, len(delta))
574 574 elif base == nullrev:
575 575 delta = store.revision(node, raw=True)
576 576 prefix = mdiff.trivialdiffheader(len(delta))
577 577 else:
578 578 delta = store.revdiff(base, rev)
579 579 p1n, p2n = store.parents(node)
580 580
581 581 return revisiondelta(
582 582 node=node,
583 583 p1node=p1n,
584 584 p2node=p2n,
585 585 basenode=store.node(base),
586 586 linknode=linknode,
587 587 flags=store.flags(rev),
588 588 deltachunks=(prefix, delta),
589 589 )
590 590
591 591 class cgpacker(object):
592 592 def __init__(self, repo, filematcher, version, allowreorder,
593 593 deltaparentfn, builddeltaheader, manifestsend,
594 594 bundlecaps=None, ellipses=False,
595 595 shallow=False, ellipsisroots=None, fullnodes=None):
596 596 """Given a source repo, construct a bundler.
597 597
598 598 filematcher is a matcher that matches on files to include in the
599 599 changegroup. Used to facilitate sparse changegroups.
600 600
601 601 allowreorder controls whether reordering of revisions is allowed.
602 602 This value is used when ``bundle.reorder`` is ``auto`` or isn't
603 603 set.
604 604
605 605 deltaparentfn is a callable that resolves the delta parent for
606 606 a specific revision.
607 607
608 608 builddeltaheader is a callable that constructs the header for a group
609 609 delta.
610 610
611 611 manifestsend is a chunk to send after manifests have been fully emitted.
612 612
613 613 ellipses indicates whether ellipsis serving mode is enabled.
614 614
615 615 bundlecaps is optional and can be used to specify the set of
616 616 capabilities which can be used to build the bundle. While bundlecaps is
617 617 unused in core Mercurial, extensions rely on this feature to communicate
618 618 capabilities to customize the changegroup packer.
619 619
620 620 shallow indicates whether shallow data might be sent. The packer may
621 621 need to pack file contents not introduced by the changes being packed.
622 622
623 fullnodes is the list of nodes which should not be ellipsis nodes. We
624 store this rather than the set of nodes that should be ellipsis because
625 for very large histories we expect this to be significantly smaller.
623 fullnodes is the set of changelog nodes which should not be ellipsis
624 nodes. We store this rather than the set of nodes that should be
625 ellipsis because for very large histories we expect this to be
626 significantly smaller.
626 627 """
627 628 assert filematcher
628 629 self._filematcher = filematcher
629 630
630 631 self.version = version
631 632 self._deltaparentfn = deltaparentfn
632 633 self._builddeltaheader = builddeltaheader
633 634 self._manifestsend = manifestsend
634 635 self._ellipses = ellipses
635 636
636 637 # Set of capabilities we can use to build the bundle.
637 638 if bundlecaps is None:
638 639 bundlecaps = set()
639 640 self._bundlecaps = bundlecaps
640 641 self._isshallow = shallow
641 self._fullnodes = fullnodes
642 self._fullclnodes = fullnodes
642 643
643 644 # Maps ellipsis revs to their roots at the changelog level.
644 645 self._precomputedellipsis = ellipsisroots
645 646
646 647 # experimental config: bundle.reorder
647 648 reorder = repo.ui.config('bundle', 'reorder')
648 649 if reorder == 'auto':
649 650 self._reorder = allowreorder
650 651 else:
651 652 self._reorder = stringutil.parsebool(reorder)
652 653
653 654 self._repo = repo
654 655
655 656 if self._repo.ui.verbose and not self._repo.ui.debugflag:
656 657 self._verbosenote = self._repo.ui.note
657 658 else:
658 659 self._verbosenote = lambda s: None
659 660
660 661 # Maps CL revs to per-revlog revisions. Cleared in close() at
661 662 # the end of each group.
662 663 self._clrevtolocalrev = {}
663 664 self._nextclrevtolocalrev = {}
664 665
665 666 # Maps changelog nodes to changelog revs. Filled in once
666 667 # during changelog stage and then left unmodified.
667 668 self._clnodetorev = {}
668 669
669 670 def _close(self):
670 671 # Ellipses serving mode.
671 672 self._clrevtolocalrev.clear()
672 673 if self._nextclrevtolocalrev is not None:
673 674 self._clrevtolocalrev = self._nextclrevtolocalrev
674 675 self._nextclrevtolocalrev = None
675 676
676 677 return closechunk()
677 678
678 679 def group(self, revs, store, ischangelog, lookup, units=None):
679 680 """Calculate a delta group, yielding a sequence of changegroup chunks
680 681 (strings).
681 682
682 683 Given a list of changeset revs, return a set of deltas and
683 684 metadata corresponding to nodes. The first delta is
684 685 first parent(nodelist[0]) -> nodelist[0], the receiver is
685 686 guaranteed to have this parent as it has all history before
686 687 these changesets. In the case firstparent is nullrev the
687 688 changegroup starts with a full revision.
688 689
689 690 If units is not None, progress detail will be generated, units specifies
690 691 the type of revlog that is touched (changelog, manifest, etc.).
691 692 """
692 693 # if we don't have any revisions touched by these changesets, bail
693 694 if len(revs) == 0:
694 695 yield self._close()
695 696 return
696 697
697 698 # add the parent of the first rev
698 699 p = store.parentrevs(revs[0])[0]
699 700 revs.insert(0, p)
700 701
701 702 # build deltas
702 703 progress = None
703 704 if units is not None:
704 705 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
705 706 total=(len(revs) - 1))
706 707 for r in pycompat.xrange(len(revs) - 1):
707 708 if progress:
708 709 progress.update(r + 1)
709 710 prev, curr = revs[r], revs[r + 1]
710 711 linknode = lookup(store.node(curr))
711 712
712 713 if self._ellipses:
713 714 linkrev = self._clnodetorev[linknode]
714 715 self._clrevtolocalrev[linkrev] = curr
715 716
716 717 # This is a node to send in full, because the changeset it
717 718 # corresponds to was a full changeset.
718 if linknode in self._fullnodes:
719 if linknode in self._fullclnodes:
719 720 delta = _revisiondeltanormal(store, curr, prev, linknode,
720 721 self._deltaparentfn)
721 722 elif linkrev not in self._precomputedellipsis:
722 723 delta = None
723 724 else:
724 725 delta = self._revisiondeltanarrow(store, ischangelog,
725 726 curr, linkrev, linknode)
726 727 else:
727 728 delta = _revisiondeltanormal(store, curr, prev, linknode,
728 729 self._deltaparentfn)
729 730
730 731 if not delta:
731 732 continue
732 733
733 734 meta = self._builddeltaheader(delta)
734 735 l = len(meta) + sum(len(x) for x in delta.deltachunks)
735 736 yield chunkheader(l)
736 737 yield meta
737 738 for x in delta.deltachunks:
738 739 yield x
739 740
740 741 if progress:
741 742 progress.complete()
742 743 yield self._close()
743 744
744 745 # filter any nodes that claim to be part of the known set
745 746 def _prune(self, store, missing, commonrevs):
746 747 # TODO this violates storage abstraction for manifests.
747 748 if isinstance(store, manifest.manifestrevlog):
748 749 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
749 750 return []
750 751
751 752 rr, rl = store.rev, store.linkrev
752 753 return [n for n in missing if rl(rr(n)) not in commonrevs]
753 754
754 755 def _packmanifests(self, dir, dirlog, revs, lookuplinknode):
755 756 """Pack manifests into a changegroup stream.
756 757
757 758 Encodes the directory name in the output so multiple manifests
758 759 can be sent. Multiple manifests is not supported by cg1 and cg2.
759 760 """
760 761 if dir:
761 762 assert self.version == b'03'
762 763 yield _fileheader(dir)
763 764
764 765 for chunk in self.group(revs, dirlog, False, lookuplinknode,
765 766 units=_('manifests')):
766 767 yield chunk
767 768
768 769 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
769 770 """Yield a sequence of changegroup byte chunks."""
770 771
771 772 repo = self._repo
772 773 cl = repo.changelog
773 774
774 775 self._verbosenote(_('uncompressed size of bundle content:\n'))
775 776 size = 0
776 777
777 778 clstate, chunks = self._generatechangelog(cl, clnodes)
778 779 for chunk in chunks:
779 780 size += len(chunk)
780 781 yield chunk
781 782
782 783 self._verbosenote(_('%8.i (changelog)\n') % size)
783 784
784 785 clrevorder = clstate['clrevorder']
785 786 mfs = clstate['mfs']
786 787 changedfiles = clstate['changedfiles']
787 788
788 789 # We need to make sure that the linkrev in the changegroup refers to
789 790 # the first changeset that introduced the manifest or file revision.
790 791 # The fastpath is usually safer than the slowpath, because the filelogs
791 792 # are walked in revlog order.
792 793 #
793 794 # When taking the slowpath with reorder=None and the manifest revlog
794 795 # uses generaldelta, the manifest may be walked in the "wrong" order.
795 796 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
796 797 # cc0ff93d0c0c).
797 798 #
798 799 # When taking the fastpath, we are only vulnerable to reordering
799 800 # of the changelog itself. The changelog never uses generaldelta, so
800 801 # it is only reordered when reorder=True. To handle this case, we
801 802 # simply take the slowpath, which already has the 'clrevorder' logic.
802 803 # This was also fixed in cc0ff93d0c0c.
803 804 fastpathlinkrev = fastpathlinkrev and not self._reorder
804 805 # Treemanifests don't work correctly with fastpathlinkrev
805 806 # either, because we don't discover which directory nodes to
806 807 # send along with files. This could probably be fixed.
807 808 fastpathlinkrev = fastpathlinkrev and (
808 809 'treemanifest' not in repo.requirements)
809 810
810 811 fnodes = {} # needed file nodes
811 812
812 813 for chunk in self.generatemanifests(commonrevs, clrevorder,
813 814 fastpathlinkrev, mfs, fnodes, source):
814 815 yield chunk
815 816
816 817 mfdicts = None
817 818 if self._ellipses and self._isshallow:
818 819 mfdicts = [(self._repo.manifestlog[n].read(), lr)
819 820 for (n, lr) in mfs.iteritems()]
820 821
821 822 mfs.clear()
822 823 clrevs = set(cl.rev(x) for x in clnodes)
823 824
824 825 if not fastpathlinkrev:
825 826 def linknodes(unused, fname):
826 827 return fnodes.get(fname, {})
827 828 else:
828 829 cln = cl.node
829 830 def linknodes(filerevlog, fname):
830 831 llr = filerevlog.linkrev
831 832 fln = filerevlog.node
832 833 revs = ((r, llr(r)) for r in filerevlog)
833 834 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
834 835
835 836 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
836 837 source, mfdicts):
837 838 yield chunk
838 839
839 840 yield self._close()
840 841
841 842 if clnodes:
842 843 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
843 844
844 845 def _generatechangelog(self, cl, nodes):
845 846 """Generate data for changelog chunks.
846 847
847 848 Returns a 2-tuple of a dict containing state and an iterable of
848 849 byte chunks. The state will not be fully populated until the
849 850 chunk stream has been fully consumed.
850 851 """
851 852 clrevorder = {}
852 853 mfs = {} # needed manifests
853 854 mfl = self._repo.manifestlog
854 855 # TODO violates storage abstraction.
855 856 mfrevlog = mfl._revlog
856 857 changedfiles = set()
857 858
858 859 # Callback for the changelog, used to collect changed files and
859 860 # manifest nodes.
860 861 # Returns the linkrev node (identity in the changelog case).
861 862 def lookupcl(x):
862 863 c = cl.read(x)
863 864 clrevorder[x] = len(clrevorder)
864 865
865 866 if self._ellipses:
866 867 self._clnodetorev[x] = cl.rev(x)
867 868
868 869 # Only update mfs if x is going to be sent. Otherwise we
869 870 # end up with bogus linkrevs specified for manifests and
870 871 # we skip some manifest nodes that we should otherwise
871 872 # have sent.
872 if (x in self._fullnodes
873 if (x in self._fullclnodes
873 874 or cl.rev(x) in self._precomputedellipsis):
874 875 n = c[0]
875 876 # Record the first changeset introducing this manifest
876 877 # version.
877 878 mfs.setdefault(n, x)
878 879 # Set this narrow-specific dict so we have the lowest
879 880 # manifest revnum to look up for this cl revnum. (Part of
880 881 # mapping changelog ellipsis parents to manifest ellipsis
881 882 # parents)
882 883 self._nextclrevtolocalrev.setdefault(cl.rev(x),
883 884 mfrevlog.rev(n))
884 885 # We can't trust the changed files list in the changeset if the
885 886 # client requested a shallow clone.
886 887 if self._isshallow:
887 888 changedfiles.update(mfl[c[0]].read().keys())
888 889 else:
889 890 changedfiles.update(c[3])
890 891 else:
891 892
892 893 n = c[0]
893 894 # record the first changeset introducing this manifest version
894 895 mfs.setdefault(n, x)
895 896 # Record a complete list of potentially-changed files in
896 897 # this manifest.
897 898 changedfiles.update(c[3])
898 899
899 900 return x
900 901
901 902 # Changelog doesn't benefit from reordering revisions. So send out
902 903 # revisions in store order.
903 904 revs = sorted(cl.rev(n) for n in nodes)
904 905
905 906 state = {
906 907 'clrevorder': clrevorder,
907 908 'mfs': mfs,
908 909 'changedfiles': changedfiles,
909 910 }
910 911
911 912 gen = self.group(revs, cl, True, lookupcl, units=_('changesets'))
912 913
913 914 return state, gen
914 915
915 916 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
916 917 fnodes, source):
917 918 """Returns an iterator of changegroup chunks containing manifests.
918 919
919 920 `source` is unused here, but is used by extensions like remotefilelog to
920 921 change what is sent based in pulls vs pushes, etc.
921 922 """
922 923 repo = self._repo
923 924 mfl = repo.manifestlog
924 925 dirlog = mfl._revlog.dirlog
925 926 tmfnodes = {'': mfs}
926 927
927 928 # Callback for the manifest, used to collect linkrevs for filelog
928 929 # revisions.
929 930 # Returns the linkrev node (collected in lookupcl).
930 931 def makelookupmflinknode(dir, nodes):
931 932 if fastpathlinkrev:
932 933 assert not dir
933 934 return mfs.__getitem__
934 935
935 936 def lookupmflinknode(x):
936 937 """Callback for looking up the linknode for manifests.
937 938
938 939 Returns the linkrev node for the specified manifest.
939 940
940 941 SIDE EFFECT:
941 942
942 943 1) fclnodes gets populated with the list of relevant
943 944 file nodes if we're not using fastpathlinkrev
944 945 2) When treemanifests are in use, collects treemanifest nodes
945 946 to send
946 947
947 948 Note that this means manifests must be completely sent to
948 949 the client before you can trust the list of files and
949 950 treemanifests to send.
950 951 """
951 952 clnode = nodes[x]
952 953 mdata = mfl.get(dir, x).readfast(shallow=True)
953 954 for p, n, fl in mdata.iterentries():
954 955 if fl == 't': # subdirectory manifest
955 956 subdir = dir + p + '/'
956 957 tmfclnodes = tmfnodes.setdefault(subdir, {})
957 958 tmfclnode = tmfclnodes.setdefault(n, clnode)
958 959 if clrevorder[clnode] < clrevorder[tmfclnode]:
959 960 tmfclnodes[n] = clnode
960 961 else:
961 962 f = dir + p
962 963 fclnodes = fnodes.setdefault(f, {})
963 964 fclnode = fclnodes.setdefault(n, clnode)
964 965 if clrevorder[clnode] < clrevorder[fclnode]:
965 966 fclnodes[n] = clnode
966 967 return clnode
967 968 return lookupmflinknode
968 969
969 970 size = 0
970 971 while tmfnodes:
971 972 dir, nodes = tmfnodes.popitem()
972 973 store = dirlog(dir)
973 974 prunednodes = self._prune(store, nodes, commonrevs)
974 975 if not dir or prunednodes:
975 976 lookupfn = makelookupmflinknode(dir, nodes)
976 977
977 978 if self._ellipses:
978 979 revs = _sortnodesellipsis(store, prunednodes,
979 980 self._clnodetorev, lookupfn)
980 981 else:
981 982 revs = _sortnodesnormal(store, prunednodes,
982 983 self._reorder)
983 984
984 985 for x in self._packmanifests(dir, store, revs, lookupfn):
985 986 size += len(x)
986 987 yield x
987 988 self._verbosenote(_('%8.i (manifests)\n') % size)
988 989 yield self._manifestsend
989 990
990 991 # The 'source' parameter is useful for extensions
991 992 def generatefiles(self, changedfiles, linknodes, commonrevs, source,
992 993 mfdicts):
993 994 changedfiles = list(filter(self._filematcher, changedfiles))
994 995
995 996 if self._isshallow:
996 997 # In a shallow clone, the linknodes callback needs to also include
997 998 # those file nodes that are in the manifests we sent but weren't
998 999 # introduced by those manifests.
999 1000 commonctxs = [self._repo[c] for c in commonrevs]
1000 1001 oldlinknodes = linknodes
1001 1002 clrev = self._repo.changelog.rev
1002 1003
1003 1004 # Defining this function has a side-effect of overriding the
1004 1005 # function of the same name that was passed in as an argument.
1005 1006 # TODO have caller pass in appropriate function.
1006 1007 def linknodes(flog, fname):
1007 1008 for c in commonctxs:
1008 1009 try:
1009 1010 fnode = c.filenode(fname)
1010 1011 self._clrevtolocalrev[c.rev()] = flog.rev(fnode)
1011 1012 except error.ManifestLookupError:
1012 1013 pass
1013 1014 links = oldlinknodes(flog, fname)
1014 1015 if len(links) != len(mfdicts):
1015 1016 for mf, lr in mfdicts:
1016 1017 fnode = mf.get(fname, None)
1017 1018 if fnode in links:
1018 1019 links[fnode] = min(links[fnode], lr, key=clrev)
1019 1020 elif fnode:
1020 1021 links[fnode] = lr
1021 1022 return links
1022 1023
1023 1024 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
1024 1025
1025 1026 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
1026 1027 repo = self._repo
1027 1028 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1028 1029 total=len(changedfiles))
1029 1030 for i, fname in enumerate(sorted(changedfiles)):
1030 1031 filerevlog = repo.file(fname)
1031 1032 if not filerevlog:
1032 1033 raise error.Abort(_("empty or missing file data for %s") %
1033 1034 fname)
1034 1035
1035 1036 linkrevnodes = linknodes(filerevlog, fname)
1036 1037 # Lookup for filenodes, we collected the linkrev nodes above in the
1037 1038 # fastpath case and with lookupmf in the slowpath case.
1038 1039 def lookupfilelog(x):
1039 1040 return linkrevnodes[x]
1040 1041
1041 1042 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
1042 1043 if filenodes:
1043 1044 if self._ellipses:
1044 1045 revs = _sortnodesellipsis(filerevlog, filenodes,
1045 1046 self._clnodetorev, lookupfilelog)
1046 1047 else:
1047 1048 revs = _sortnodesnormal(filerevlog, filenodes,
1048 1049 self._reorder)
1049 1050
1050 1051 progress.update(i + 1, item=fname)
1051 1052 h = _fileheader(fname)
1052 1053 size = len(h)
1053 1054 yield h
1054 1055 for chunk in self.group(revs, filerevlog, False, lookupfilelog):
1055 1056 size += len(chunk)
1056 1057 yield chunk
1057 1058 self._verbosenote(_('%8.i %s\n') % (size, fname))
1058 1059 progress.complete()
1059 1060
1060 1061 def _revisiondeltanarrow(self, store, ischangelog, rev, linkrev, linknode):
1061 1062 linkparents = self._precomputedellipsis[linkrev]
1062 1063 def local(clrev):
1063 1064 """Turn a changelog revnum into a local revnum.
1064 1065
1065 1066 The ellipsis dag is stored as revnums on the changelog,
1066 1067 but when we're producing ellipsis entries for
1067 1068 non-changelog revlogs, we need to turn those numbers into
1068 1069 something local. This does that for us, and during the
1069 1070 changelog sending phase will also expand the stored
1070 1071 mappings as needed.
1071 1072 """
1072 1073 if clrev == nullrev:
1073 1074 return nullrev
1074 1075
1075 1076 if ischangelog:
1076 1077 # If we're doing the changelog, it's possible that we
1077 1078 # have a parent that is already on the client, and we
1078 1079 # need to store some extra mapping information so that
1079 1080 # our contained ellipsis nodes will be able to resolve
1080 1081 # their parents.
1081 1082 if clrev not in self._clrevtolocalrev:
1082 1083 clnode = store.node(clrev)
1083 1084 self._clnodetorev[clnode] = clrev
1084 1085 return clrev
1085 1086
1086 1087 # Walk the ellipsis-ized changelog breadth-first looking for a
1087 1088 # change that has been linked from the current revlog.
1088 1089 #
1089 1090 # For a flat manifest revlog only a single step should be necessary
1090 1091 # as all relevant changelog entries are relevant to the flat
1091 1092 # manifest.
1092 1093 #
1093 1094 # For a filelog or tree manifest dirlog however not every changelog
1094 1095 # entry will have been relevant, so we need to skip some changelog
1095 1096 # nodes even after ellipsis-izing.
1096 1097 walk = [clrev]
1097 1098 while walk:
1098 1099 p = walk[0]
1099 1100 walk = walk[1:]
1100 1101 if p in self._clrevtolocalrev:
1101 1102 return self._clrevtolocalrev[p]
1102 elif p in self._fullnodes:
1103 elif p in self._fullclnodes:
1103 1104 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1104 1105 if pp != nullrev])
1105 1106 elif p in self._precomputedellipsis:
1106 1107 walk.extend([pp for pp in self._precomputedellipsis[p]
1107 1108 if pp != nullrev])
1108 1109 else:
1109 1110 # In this case, we've got an ellipsis with parents
1110 1111 # outside the current bundle (likely an
1111 1112 # incremental pull). We "know" that we can use the
1112 1113 # value of this same revlog at whatever revision
1113 1114 # is pointed to by linknode. "Know" is in scare
1114 1115 # quotes because I haven't done enough examination
1115 1116 # of edge cases to convince myself this is really
1116 1117 # a fact - it works for all the (admittedly
1117 1118 # thorough) cases in our testsuite, but I would be
1118 1119 # somewhat unsurprised to find a case in the wild
1119 1120 # where this breaks down a bit. That said, I don't
1120 1121 # know if it would hurt anything.
1121 1122 for i in pycompat.xrange(rev, 0, -1):
1122 1123 if store.linkrev(i) == clrev:
1123 1124 return i
1124 1125 # We failed to resolve a parent for this node, so
1125 1126 # we crash the changegroup construction.
1126 1127 raise error.Abort(
1127 1128 'unable to resolve parent while packing %r %r'
1128 1129 ' for changeset %r' % (store.indexfile, rev, clrev))
1129 1130
1130 1131 return nullrev
1131 1132
1132 1133 if not linkparents or (
1133 1134 store.parentrevs(rev) == (nullrev, nullrev)):
1134 1135 p1, p2 = nullrev, nullrev
1135 1136 elif len(linkparents) == 1:
1136 1137 p1, = sorted(local(p) for p in linkparents)
1137 1138 p2 = nullrev
1138 1139 else:
1139 1140 p1, p2 = sorted(local(p) for p in linkparents)
1140 1141
1141 1142 n = store.node(rev)
1142 1143 p1n, p2n = store.node(p1), store.node(p2)
1143 1144 flags = store.flags(rev)
1144 1145 flags |= revlog.REVIDX_ELLIPSIS
1145 1146
1146 1147 # TODO: try and actually send deltas for ellipsis data blocks
1147 1148 data = store.revision(n)
1148 1149 diffheader = mdiff.trivialdiffheader(len(data))
1149 1150
1150 1151 return revisiondelta(
1151 1152 node=n,
1152 1153 p1node=p1n,
1153 1154 p2node=p2n,
1154 1155 basenode=nullid,
1155 1156 linknode=linknode,
1156 1157 flags=flags,
1157 1158 deltachunks=(diffheader, data),
1158 1159 )
1159 1160
1160 1161 def _deltaparentprev(store, rev, p1, p2, prev):
1161 1162 """Resolve a delta parent to the previous revision.
1162 1163
1163 1164 Used for version 1 changegroups, which don't support generaldelta.
1164 1165 """
1165 1166 return prev
1166 1167
1167 1168 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1168 1169 """Resolve a delta parent when general deltas are supported."""
1169 1170 dp = store.deltaparent(rev)
1170 1171 if dp == nullrev and store.storedeltachains:
1171 1172 # Avoid sending full revisions when delta parent is null. Pick prev
1172 1173 # in that case. It's tempting to pick p1 in this case, as p1 will
1173 1174 # be smaller in the common case. However, computing a delta against
1174 1175 # p1 may require resolving the raw text of p1, which could be
1175 1176 # expensive. The revlog caches should have prev cached, meaning
1176 1177 # less CPU for changegroup generation. There is likely room to add
1177 1178 # a flag and/or config option to control this behavior.
1178 1179 base = prev
1179 1180 elif dp == nullrev:
1180 1181 # revlog is configured to use full snapshot for a reason,
1181 1182 # stick to full snapshot.
1182 1183 base = nullrev
1183 1184 elif dp not in (p1, p2, prev):
1184 1185 # Pick prev when we can't be sure remote has the base revision.
1185 1186 return prev
1186 1187 else:
1187 1188 base = dp
1188 1189
1189 1190 if base != nullrev and not store.candelta(base, rev):
1190 1191 base = nullrev
1191 1192
1192 1193 return base
1193 1194
1194 1195 def _deltaparentellipses(store, rev, p1, p2, prev):
1195 1196 """Resolve a delta parent when in ellipses mode."""
1196 1197 # TODO: send better deltas when in narrow mode.
1197 1198 #
1198 1199 # changegroup.group() loops over revisions to send,
1199 1200 # including revisions we'll skip. What this means is that
1200 1201 # `prev` will be a potentially useless delta base for all
1201 1202 # ellipsis nodes, as the client likely won't have it. In
1202 1203 # the future we should do bookkeeping about which nodes
1203 1204 # have been sent to the client, and try to be
1204 1205 # significantly smarter about delta bases. This is
1205 1206 # slightly tricky because this same code has to work for
1206 1207 # all revlogs, and we don't have the linkrev/linknode here.
1207 1208 return p1
1208 1209
1209 1210 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1210 1211 shallow=False, ellipsisroots=None, fullnodes=None):
1211 1212 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1212 1213 d.node, d.p1node, d.p2node, d.linknode)
1213 1214
1214 1215 return cgpacker(repo, filematcher, b'01',
1215 1216 deltaparentfn=_deltaparentprev,
1216 1217 allowreorder=None,
1217 1218 builddeltaheader=builddeltaheader,
1218 1219 manifestsend=b'',
1219 1220 bundlecaps=bundlecaps,
1220 1221 ellipses=ellipses,
1221 1222 shallow=shallow,
1222 1223 ellipsisroots=ellipsisroots,
1223 1224 fullnodes=fullnodes)
1224 1225
1225 1226 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1226 1227 shallow=False, ellipsisroots=None, fullnodes=None):
1227 1228 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1228 1229 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1229 1230
1230 1231 # Since generaldelta is directly supported by cg2, reordering
1231 1232 # generally doesn't help, so we disable it by default (treating
1232 1233 # bundle.reorder=auto just like bundle.reorder=False).
1233 1234 return cgpacker(repo, filematcher, b'02',
1234 1235 deltaparentfn=_deltaparentgeneraldelta,
1235 1236 allowreorder=False,
1236 1237 builddeltaheader=builddeltaheader,
1237 1238 manifestsend=b'',
1238 1239 bundlecaps=bundlecaps,
1239 1240 ellipses=ellipses,
1240 1241 shallow=shallow,
1241 1242 ellipsisroots=ellipsisroots,
1242 1243 fullnodes=fullnodes)
1243 1244
1244 1245 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1245 1246 shallow=False, ellipsisroots=None, fullnodes=None):
1246 1247 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1247 1248 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1248 1249
1249 1250 deltaparentfn = (_deltaparentellipses if ellipses
1250 1251 else _deltaparentgeneraldelta)
1251 1252
1252 1253 return cgpacker(repo, filematcher, b'03',
1253 1254 deltaparentfn=deltaparentfn,
1254 1255 allowreorder=False,
1255 1256 builddeltaheader=builddeltaheader,
1256 1257 manifestsend=closechunk(),
1257 1258 bundlecaps=bundlecaps,
1258 1259 ellipses=ellipses,
1259 1260 shallow=shallow,
1260 1261 ellipsisroots=ellipsisroots,
1261 1262 fullnodes=fullnodes)
1262 1263
1263 1264 _packermap = {'01': (_makecg1packer, cg1unpacker),
1264 1265 # cg2 adds support for exchanging generaldelta
1265 1266 '02': (_makecg2packer, cg2unpacker),
1266 1267 # cg3 adds support for exchanging revlog flags and treemanifests
1267 1268 '03': (_makecg3packer, cg3unpacker),
1268 1269 }
1269 1270
1270 1271 def allsupportedversions(repo):
1271 1272 versions = set(_packermap.keys())
1272 1273 if not (repo.ui.configbool('experimental', 'changegroup3') or
1273 1274 repo.ui.configbool('experimental', 'treemanifest') or
1274 1275 'treemanifest' in repo.requirements):
1275 1276 versions.discard('03')
1276 1277 return versions
1277 1278
1278 1279 # Changegroup versions that can be applied to the repo
1279 1280 def supportedincomingversions(repo):
1280 1281 return allsupportedversions(repo)
1281 1282
1282 1283 # Changegroup versions that can be created from the repo
1283 1284 def supportedoutgoingversions(repo):
1284 1285 versions = allsupportedversions(repo)
1285 1286 if 'treemanifest' in repo.requirements:
1286 1287 # Versions 01 and 02 support only flat manifests and it's just too
1287 1288 # expensive to convert between the flat manifest and tree manifest on
1288 1289 # the fly. Since tree manifests are hashed differently, all of history
1289 1290 # would have to be converted. Instead, we simply don't even pretend to
1290 1291 # support versions 01 and 02.
1291 1292 versions.discard('01')
1292 1293 versions.discard('02')
1293 1294 if repository.NARROW_REQUIREMENT in repo.requirements:
1294 1295 # Versions 01 and 02 don't support revlog flags, and we need to
1295 1296 # support that for stripping and unbundling to work.
1296 1297 versions.discard('01')
1297 1298 versions.discard('02')
1298 1299 if LFS_REQUIREMENT in repo.requirements:
1299 1300 # Versions 01 and 02 don't support revlog flags, and we need to
1300 1301 # mark LFS entries with REVIDX_EXTSTORED.
1301 1302 versions.discard('01')
1302 1303 versions.discard('02')
1303 1304
1304 1305 return versions
1305 1306
1306 1307 def localversion(repo):
1307 1308 # Finds the best version to use for bundles that are meant to be used
1308 1309 # locally, such as those from strip and shelve, and temporary bundles.
1309 1310 return max(supportedoutgoingversions(repo))
1310 1311
1311 1312 def safeversion(repo):
1312 1313 # Finds the smallest version that it's safe to assume clients of the repo
1313 1314 # will support. For example, all hg versions that support generaldelta also
1314 1315 # support changegroup 02.
1315 1316 versions = supportedoutgoingversions(repo)
1316 1317 if 'generaldelta' in repo.requirements:
1317 1318 versions.discard('01')
1318 1319 assert versions
1319 1320 return min(versions)
1320 1321
1321 1322 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1322 1323 ellipses=False, shallow=False, ellipsisroots=None,
1323 1324 fullnodes=None):
1324 1325 assert version in supportedoutgoingversions(repo)
1325 1326
1326 1327 if filematcher is None:
1327 1328 filematcher = matchmod.alwaysmatcher(repo.root, '')
1328 1329
1329 1330 if version == '01' and not filematcher.always():
1330 1331 raise error.ProgrammingError('version 01 changegroups do not support '
1331 1332 'sparse file matchers')
1332 1333
1333 1334 if ellipses and version in (b'01', b'02'):
1334 1335 raise error.Abort(
1335 1336 _('ellipsis nodes require at least cg3 on client and server, '
1336 1337 'but negotiated version %s') % version)
1337 1338
1338 1339 # Requested files could include files not in the local store. So
1339 1340 # filter those out.
1340 1341 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1341 1342 filematcher)
1342 1343
1343 1344 fn = _packermap[version][0]
1344 1345 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1345 1346 shallow=shallow, ellipsisroots=ellipsisroots,
1346 1347 fullnodes=fullnodes)
1347 1348
1348 1349 def getunbundler(version, fh, alg, extras=None):
1349 1350 return _packermap[version][1](fh, alg, extras=extras)
1350 1351
1351 1352 def _changegroupinfo(repo, nodes, source):
1352 1353 if repo.ui.verbose or source == 'bundle':
1353 1354 repo.ui.status(_("%d changesets found\n") % len(nodes))
1354 1355 if repo.ui.debugflag:
1355 1356 repo.ui.debug("list of changesets:\n")
1356 1357 for node in nodes:
1357 1358 repo.ui.debug("%s\n" % hex(node))
1358 1359
1359 1360 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1360 1361 bundlecaps=None):
1361 1362 cgstream = makestream(repo, outgoing, version, source,
1362 1363 fastpath=fastpath, bundlecaps=bundlecaps)
1363 1364 return getunbundler(version, util.chunkbuffer(cgstream), None,
1364 1365 {'clcount': len(outgoing.missing) })
1365 1366
1366 1367 def makestream(repo, outgoing, version, source, fastpath=False,
1367 1368 bundlecaps=None, filematcher=None):
1368 1369 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1369 1370 filematcher=filematcher)
1370 1371
1371 1372 repo = repo.unfiltered()
1372 1373 commonrevs = outgoing.common
1373 1374 csets = outgoing.missing
1374 1375 heads = outgoing.missingheads
1375 1376 # We go through the fast path if we get told to, or if all (unfiltered
1376 1377 # heads have been requested (since we then know there all linkrevs will
1377 1378 # be pulled by the client).
1378 1379 heads.sort()
1379 1380 fastpathlinkrev = fastpath or (
1380 1381 repo.filtername is None and heads == sorted(repo.heads()))
1381 1382
1382 1383 repo.hook('preoutgoing', throw=True, source=source)
1383 1384 _changegroupinfo(repo, csets, source)
1384 1385 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1385 1386
1386 1387 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1387 1388 revisions = 0
1388 1389 files = 0
1389 1390 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1390 1391 total=expectedfiles)
1391 1392 for chunkdata in iter(source.filelogheader, {}):
1392 1393 files += 1
1393 1394 f = chunkdata["filename"]
1394 1395 repo.ui.debug("adding %s revisions\n" % f)
1395 1396 progress.increment()
1396 1397 fl = repo.file(f)
1397 1398 o = len(fl)
1398 1399 try:
1399 1400 deltas = source.deltaiter()
1400 1401 if not fl.addgroup(deltas, revmap, trp):
1401 1402 raise error.Abort(_("received file revlog group is empty"))
1402 1403 except error.CensoredBaseError as e:
1403 1404 raise error.Abort(_("received delta base is censored: %s") % e)
1404 1405 revisions += len(fl) - o
1405 1406 if f in needfiles:
1406 1407 needs = needfiles[f]
1407 1408 for new in pycompat.xrange(o, len(fl)):
1408 1409 n = fl.node(new)
1409 1410 if n in needs:
1410 1411 needs.remove(n)
1411 1412 else:
1412 1413 raise error.Abort(
1413 1414 _("received spurious file revlog entry"))
1414 1415 if not needs:
1415 1416 del needfiles[f]
1416 1417 progress.complete()
1417 1418
1418 1419 for f, needs in needfiles.iteritems():
1419 1420 fl = repo.file(f)
1420 1421 for n in needs:
1421 1422 try:
1422 1423 fl.rev(n)
1423 1424 except error.LookupError:
1424 1425 raise error.Abort(
1425 1426 _('missing file data for %s:%s - run hg verify') %
1426 1427 (f, hex(n)))
1427 1428
1428 1429 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now