##// END OF EJS Templates
changegroup: move revision maps to cgpacker...
Gregory Szorc -
r38942:0548f696 default
parent child Browse files
Show More
@@ -1,1411 +1,1413 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, "wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, "wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103 class cg1unpacker(object):
104 104 """Unpacker for cg1 changegroup streams.
105 105
106 106 A changegroup unpacker handles the framing of the revision data in
107 107 the wire format. Most consumers will want to use the apply()
108 108 method to add the changes from the changegroup to a repository.
109 109
110 110 If you're forwarding a changegroup unmodified to another consumer,
111 111 use getchunks(), which returns an iterator of changegroup
112 112 chunks. This is mostly useful for cases where you need to know the
113 113 data stream has ended by observing the end of the changegroup.
114 114
115 115 deltachunk() is useful only if you're applying delta data. Most
116 116 consumers should prefer apply() instead.
117 117
118 118 A few other public methods exist. Those are used only for
119 119 bundlerepo and some debug commands - their use is discouraged.
120 120 """
121 121 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
122 122 deltaheadersize = deltaheader.size
123 123 version = '01'
124 124 _grouplistcount = 1 # One list of files after the manifests
125 125
126 126 def __init__(self, fh, alg, extras=None):
127 127 if alg is None:
128 128 alg = 'UN'
129 129 if alg not in util.compengines.supportedbundletypes:
130 130 raise error.Abort(_('unknown stream compression type: %s')
131 131 % alg)
132 132 if alg == 'BZ':
133 133 alg = '_truncatedBZ'
134 134
135 135 compengine = util.compengines.forbundletype(alg)
136 136 self._stream = compengine.decompressorreader(fh)
137 137 self._type = alg
138 138 self.extras = extras or {}
139 139 self.callback = None
140 140
141 141 # These methods (compressed, read, seek, tell) all appear to only
142 142 # be used by bundlerepo, but it's a little hard to tell.
143 143 def compressed(self):
144 144 return self._type is not None and self._type != 'UN'
145 145 def read(self, l):
146 146 return self._stream.read(l)
147 147 def seek(self, pos):
148 148 return self._stream.seek(pos)
149 149 def tell(self):
150 150 return self._stream.tell()
151 151 def close(self):
152 152 return self._stream.close()
153 153
154 154 def _chunklength(self):
155 155 d = readexactly(self._stream, 4)
156 156 l = struct.unpack(">l", d)[0]
157 157 if l <= 4:
158 158 if l:
159 159 raise error.Abort(_("invalid chunk length %d") % l)
160 160 return 0
161 161 if self.callback:
162 162 self.callback()
163 163 return l - 4
164 164
165 165 def changelogheader(self):
166 166 """v10 does not have a changelog header chunk"""
167 167 return {}
168 168
169 169 def manifestheader(self):
170 170 """v10 does not have a manifest header chunk"""
171 171 return {}
172 172
173 173 def filelogheader(self):
174 174 """return the header of the filelogs chunk, v10 only has the filename"""
175 175 l = self._chunklength()
176 176 if not l:
177 177 return {}
178 178 fname = readexactly(self._stream, l)
179 179 return {'filename': fname}
180 180
181 181 def _deltaheader(self, headertuple, prevnode):
182 182 node, p1, p2, cs = headertuple
183 183 if prevnode is None:
184 184 deltabase = p1
185 185 else:
186 186 deltabase = prevnode
187 187 flags = 0
188 188 return node, p1, p2, deltabase, cs, flags
189 189
190 190 def deltachunk(self, prevnode):
191 191 l = self._chunklength()
192 192 if not l:
193 193 return {}
194 194 headerdata = readexactly(self._stream, self.deltaheadersize)
195 195 header = self.deltaheader.unpack(headerdata)
196 196 delta = readexactly(self._stream, l - self.deltaheadersize)
197 197 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
198 198 return (node, p1, p2, cs, deltabase, delta, flags)
199 199
200 200 def getchunks(self):
201 201 """returns all the chunks contains in the bundle
202 202
203 203 Used when you need to forward the binary stream to a file or another
204 204 network API. To do so, it parse the changegroup data, otherwise it will
205 205 block in case of sshrepo because it don't know the end of the stream.
206 206 """
207 207 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
208 208 # and a list of filelogs. For changegroup 3, we expect 4 parts:
209 209 # changelog, manifestlog, a list of tree manifestlogs, and a list of
210 210 # filelogs.
211 211 #
212 212 # Changelog and manifestlog parts are terminated with empty chunks. The
213 213 # tree and file parts are a list of entry sections. Each entry section
214 214 # is a series of chunks terminating in an empty chunk. The list of these
215 215 # entry sections is terminated in yet another empty chunk, so we know
216 216 # we've reached the end of the tree/file list when we reach an empty
217 217 # chunk that was proceeded by no non-empty chunks.
218 218
219 219 parts = 0
220 220 while parts < 2 + self._grouplistcount:
221 221 noentries = True
222 222 while True:
223 223 chunk = getchunk(self)
224 224 if not chunk:
225 225 # The first two empty chunks represent the end of the
226 226 # changelog and the manifestlog portions. The remaining
227 227 # empty chunks represent either A) the end of individual
228 228 # tree or file entries in the file list, or B) the end of
229 229 # the entire list. It's the end of the entire list if there
230 230 # were no entries (i.e. noentries is True).
231 231 if parts < 2:
232 232 parts += 1
233 233 elif noentries:
234 234 parts += 1
235 235 break
236 236 noentries = False
237 237 yield chunkheader(len(chunk))
238 238 pos = 0
239 239 while pos < len(chunk):
240 240 next = pos + 2**20
241 241 yield chunk[pos:next]
242 242 pos = next
243 243 yield closechunk()
244 244
245 245 def _unpackmanifests(self, repo, revmap, trp, prog):
246 246 self.callback = prog.increment
247 247 # no need to check for empty manifest group here:
248 248 # if the result of the merge of 1 and 2 is the same in 3 and 4,
249 249 # no new manifest will be created and the manifest group will
250 250 # be empty during the pull
251 251 self.manifestheader()
252 252 deltas = self.deltaiter()
253 253 repo.manifestlog.addgroup(deltas, revmap, trp)
254 254 prog.complete()
255 255 self.callback = None
256 256
257 257 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
258 258 expectedtotal=None):
259 259 """Add the changegroup returned by source.read() to this repo.
260 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
261 261 the URL of the repo where this changegroup is coming from.
262 262
263 263 Return an integer summarizing the change to this repo:
264 264 - nothing changed or no source: 0
265 265 - more heads than before: 1+added heads (2..n)
266 266 - fewer heads than before: -1-removed heads (-2..-n)
267 267 - number of heads stays the same: 1
268 268 """
269 269 repo = repo.unfiltered()
270 270 def csmap(x):
271 271 repo.ui.debug("add changeset %s\n" % short(x))
272 272 return len(cl)
273 273
274 274 def revmap(x):
275 275 return cl.rev(x)
276 276
277 277 changesets = files = revisions = 0
278 278
279 279 try:
280 280 # The transaction may already carry source information. In this
281 281 # case we use the top level data. We overwrite the argument
282 282 # because we need to use the top level value (if they exist)
283 283 # in this function.
284 284 srctype = tr.hookargs.setdefault('source', srctype)
285 285 url = tr.hookargs.setdefault('url', url)
286 286 repo.hook('prechangegroup',
287 287 throw=True, **pycompat.strkwargs(tr.hookargs))
288 288
289 289 # write changelog data to temp files so concurrent readers
290 290 # will not see an inconsistent view
291 291 cl = repo.changelog
292 292 cl.delayupdate(tr)
293 293 oldheads = set(cl.heads())
294 294
295 295 trp = weakref.proxy(tr)
296 296 # pull off the changeset group
297 297 repo.ui.status(_("adding changesets\n"))
298 298 clstart = len(cl)
299 299 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
300 300 total=expectedtotal)
301 301 self.callback = progress.increment
302 302
303 303 efiles = set()
304 304 def onchangelog(cl, node):
305 305 efiles.update(cl.readfiles(node))
306 306
307 307 self.changelogheader()
308 308 deltas = self.deltaiter()
309 309 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
310 310 efiles = len(efiles)
311 311
312 312 if not cgnodes:
313 313 repo.ui.develwarn('applied empty changegroup',
314 314 config='warn-empty-changegroup')
315 315 clend = len(cl)
316 316 changesets = clend - clstart
317 317 progress.complete()
318 318 self.callback = None
319 319
320 320 # pull off the manifest group
321 321 repo.ui.status(_("adding manifests\n"))
322 322 # We know that we'll never have more manifests than we had
323 323 # changesets.
324 324 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
325 325 total=changesets)
326 326 self._unpackmanifests(repo, revmap, trp, progress)
327 327
328 328 needfiles = {}
329 329 if repo.ui.configbool('server', 'validate'):
330 330 cl = repo.changelog
331 331 ml = repo.manifestlog
332 332 # validate incoming csets have their manifests
333 333 for cset in pycompat.xrange(clstart, clend):
334 334 mfnode = cl.changelogrevision(cset).manifest
335 335 mfest = ml[mfnode].readdelta()
336 336 # store file cgnodes we must see
337 337 for f, n in mfest.iteritems():
338 338 needfiles.setdefault(f, set()).add(n)
339 339
340 340 # process the files
341 341 repo.ui.status(_("adding file changes\n"))
342 342 newrevs, newfiles = _addchangegroupfiles(
343 343 repo, self, revmap, trp, efiles, needfiles)
344 344 revisions += newrevs
345 345 files += newfiles
346 346
347 347 deltaheads = 0
348 348 if oldheads:
349 349 heads = cl.heads()
350 350 deltaheads = len(heads) - len(oldheads)
351 351 for h in heads:
352 352 if h not in oldheads and repo[h].closesbranch():
353 353 deltaheads -= 1
354 354 htext = ""
355 355 if deltaheads:
356 356 htext = _(" (%+d heads)") % deltaheads
357 357
358 358 repo.ui.status(_("added %d changesets"
359 359 " with %d changes to %d files%s\n")
360 360 % (changesets, revisions, files, htext))
361 361 repo.invalidatevolatilesets()
362 362
363 363 if changesets > 0:
364 364 if 'node' not in tr.hookargs:
365 365 tr.hookargs['node'] = hex(cl.node(clstart))
366 366 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
367 367 hookargs = dict(tr.hookargs)
368 368 else:
369 369 hookargs = dict(tr.hookargs)
370 370 hookargs['node'] = hex(cl.node(clstart))
371 371 hookargs['node_last'] = hex(cl.node(clend - 1))
372 372 repo.hook('pretxnchangegroup',
373 373 throw=True, **pycompat.strkwargs(hookargs))
374 374
375 375 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
376 376 phaseall = None
377 377 if srctype in ('push', 'serve'):
378 378 # Old servers can not push the boundary themselves.
379 379 # New servers won't push the boundary if changeset already
380 380 # exists locally as secret
381 381 #
382 382 # We should not use added here but the list of all change in
383 383 # the bundle
384 384 if repo.publishing():
385 385 targetphase = phaseall = phases.public
386 386 else:
387 387 # closer target phase computation
388 388
389 389 # Those changesets have been pushed from the
390 390 # outside, their phases are going to be pushed
391 391 # alongside. Therefor `targetphase` is
392 392 # ignored.
393 393 targetphase = phaseall = phases.draft
394 394 if added:
395 395 phases.registernew(repo, tr, targetphase, added)
396 396 if phaseall is not None:
397 397 phases.advanceboundary(repo, tr, phaseall, cgnodes)
398 398
399 399 if changesets > 0:
400 400
401 401 def runhooks():
402 402 # These hooks run when the lock releases, not when the
403 403 # transaction closes. So it's possible for the changelog
404 404 # to have changed since we last saw it.
405 405 if clstart >= len(repo):
406 406 return
407 407
408 408 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
409 409
410 410 for n in added:
411 411 args = hookargs.copy()
412 412 args['node'] = hex(n)
413 413 del args['node_last']
414 414 repo.hook("incoming", **pycompat.strkwargs(args))
415 415
416 416 newheads = [h for h in repo.heads()
417 417 if h not in oldheads]
418 418 repo.ui.log("incoming",
419 419 "%d incoming changes - new heads: %s\n",
420 420 len(added),
421 421 ', '.join([hex(c[:6]) for c in newheads]))
422 422
423 423 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
424 424 lambda tr: repo._afterlock(runhooks))
425 425 finally:
426 426 repo.ui.flush()
427 427 # never return 0 here:
428 428 if deltaheads < 0:
429 429 ret = deltaheads - 1
430 430 else:
431 431 ret = deltaheads + 1
432 432 return ret
433 433
434 434 def deltaiter(self):
435 435 """
436 436 returns an iterator of the deltas in this changegroup
437 437
438 438 Useful for passing to the underlying storage system to be stored.
439 439 """
440 440 chain = None
441 441 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
442 442 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
443 443 yield chunkdata
444 444 chain = chunkdata[0]
445 445
446 446 class cg2unpacker(cg1unpacker):
447 447 """Unpacker for cg2 streams.
448 448
449 449 cg2 streams add support for generaldelta, so the delta header
450 450 format is slightly different. All other features about the data
451 451 remain the same.
452 452 """
453 453 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
454 454 deltaheadersize = deltaheader.size
455 455 version = '02'
456 456
457 457 def _deltaheader(self, headertuple, prevnode):
458 458 node, p1, p2, deltabase, cs = headertuple
459 459 flags = 0
460 460 return node, p1, p2, deltabase, cs, flags
461 461
462 462 class cg3unpacker(cg2unpacker):
463 463 """Unpacker for cg3 streams.
464 464
465 465 cg3 streams add support for exchanging treemanifests and revlog
466 466 flags. It adds the revlog flags to the delta header and an empty chunk
467 467 separating manifests and files.
468 468 """
469 469 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
470 470 deltaheadersize = deltaheader.size
471 471 version = '03'
472 472 _grouplistcount = 2 # One list of manifests and one list of files
473 473
474 474 def _deltaheader(self, headertuple, prevnode):
475 475 node, p1, p2, deltabase, cs, flags = headertuple
476 476 return node, p1, p2, deltabase, cs, flags
477 477
478 478 def _unpackmanifests(self, repo, revmap, trp, prog):
479 479 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
480 480 for chunkdata in iter(self.filelogheader, {}):
481 481 # If we get here, there are directory manifests in the changegroup
482 482 d = chunkdata["filename"]
483 483 repo.ui.debug("adding %s revisions\n" % d)
484 484 dirlog = repo.manifestlog._revlog.dirlog(d)
485 485 deltas = self.deltaiter()
486 486 if not dirlog.addgroup(deltas, revmap, trp):
487 487 raise error.Abort(_("received dir revlog group is empty"))
488 488
489 489 class headerlessfixup(object):
490 490 def __init__(self, fh, h):
491 491 self._h = h
492 492 self._fh = fh
493 493 def read(self, n):
494 494 if self._h:
495 495 d, self._h = self._h[:n], self._h[n:]
496 496 if len(d) < n:
497 497 d += readexactly(self._fh, n - len(d))
498 498 return d
499 499 return readexactly(self._fh, n)
500 500
501 501 @attr.s(slots=True, frozen=True)
502 502 class revisiondelta(object):
503 503 """Describes a delta entry in a changegroup.
504 504
505 505 Captured data is sufficient to serialize the delta into multiple
506 506 formats.
507 507 """
508 508 # 20 byte node of this revision.
509 509 node = attr.ib()
510 510 # 20 byte nodes of parent revisions.
511 511 p1node = attr.ib()
512 512 p2node = attr.ib()
513 513 # 20 byte node of node this delta is against.
514 514 basenode = attr.ib()
515 515 # 20 byte node of changeset revision this delta is associated with.
516 516 linknode = attr.ib()
517 517 # 2 bytes of flags to apply to revision data.
518 518 flags = attr.ib()
519 519 # Iterable of chunks holding raw delta data.
520 520 deltachunks = attr.ib()
521 521
522 522 class cgpacker(object):
523 523 def __init__(self, repo, filematcher, version, allowreorder,
524 524 useprevdelta, builddeltaheader, manifestsend,
525 525 sendtreemanifests, bundlecaps=None, shallow=False):
526 526 """Given a source repo, construct a bundler.
527 527
528 528 filematcher is a matcher that matches on files to include in the
529 529 changegroup. Used to facilitate sparse changegroups.
530 530
531 531 allowreorder controls whether reordering of revisions is allowed.
532 532 This value is used when ``bundle.reorder`` is ``auto`` or isn't
533 533 set.
534 534
535 535 useprevdelta controls whether revisions should always delta against
536 536 the previous revision in the changegroup.
537 537
538 538 builddeltaheader is a callable that constructs the header for a group
539 539 delta.
540 540
541 541 manifestsend is a chunk to send after manifests have been fully emitted.
542 542
543 543 sendtreemanifests indicates whether tree manifests should be emitted.
544 544
545 545 bundlecaps is optional and can be used to specify the set of
546 546 capabilities which can be used to build the bundle. While bundlecaps is
547 547 unused in core Mercurial, extensions rely on this feature to communicate
548 548 capabilities to customize the changegroup packer.
549 549
550 550 shallow indicates whether shallow data might be sent. The packer may
551 551 need to pack file contents not introduced by the changes being packed.
552 552 """
553 553 assert filematcher
554 554 self._filematcher = filematcher
555 555
556 556 self.version = version
557 557 self._useprevdelta = useprevdelta
558 558 self._builddeltaheader = builddeltaheader
559 559 self._manifestsend = manifestsend
560 560 self._sendtreemanifests = sendtreemanifests
561 561
562 562 # Set of capabilities we can use to build the bundle.
563 563 if bundlecaps is None:
564 564 bundlecaps = set()
565 565 self._bundlecaps = bundlecaps
566 566 self._isshallow = shallow
567 567
568 568 # experimental config: bundle.reorder
569 569 reorder = repo.ui.config('bundle', 'reorder')
570 570 if reorder == 'auto':
571 571 self._reorder = allowreorder
572 572 else:
573 573 self._reorder = stringutil.parsebool(reorder)
574 574
575 575 self._repo = repo
576 576
577 577 if self._repo.ui.verbose and not self._repo.ui.debugflag:
578 578 self._verbosenote = self._repo.ui.note
579 579 else:
580 580 self._verbosenote = lambda s: None
581 581
582 582 # TODO the functionality keyed off of this should probably be
583 583 # controlled via arguments to group() that influence behavior.
584 584 self._changelogdone = False
585 585
586 # Maps CL revs to per-revlog revisions. Cleared in close() at
587 # the end of each group.
588 self._clrevtolocalrev = {}
589 self._nextclrevtolocalrev = {}
590
591 # Maps changelog nodes to changelog revs. Filled in once
592 # during changelog stage and then left unmodified.
593 self._clnodetorev = {}
594
586 595 def _close(self):
587 596 # Ellipses serving mode.
588 getattr(self, '_clrev_to_localrev', {}).clear()
589 if getattr(self, '_next_clrev_to_localrev', {}):
590 self._clrev_to_localrev = self._next_clrev_to_localrev
591 del self._next_clrev_to_localrev
597 self._clrevtolocalrev.clear()
598 if self._nextclrevtolocalrev:
599 self.clrevtolocalrev = self._nextclrevtolocalrev
600 self._nextclrevtolocalrev.clear()
592 601 self._changelogdone = True
593 602
594 603 return closechunk()
595 604
596 605 def _fileheader(self, fname):
597 606 return chunkheader(len(fname)) + fname
598 607
599 608 # Extracted both for clarity and for overriding in extensions.
600 609 def _sortgroup(self, store, nodelist, lookup):
601 610 """Sort nodes for change group and turn them into revnums."""
602 611 # Ellipses serving mode.
603 612 #
604 613 # In a perfect world, we'd generate better ellipsis-ified graphs
605 614 # for non-changelog revlogs. In practice, we haven't started doing
606 615 # that yet, so the resulting DAGs for the manifestlog and filelogs
607 616 # are actually full of bogus parentage on all the ellipsis
608 617 # nodes. This has the side effect that, while the contents are
609 618 # correct, the individual DAGs might be completely out of whack in
610 619 # a case like 882681bc3166 and its ancestors (back about 10
611 620 # revisions or so) in the main hg repo.
612 621 #
613 622 # The one invariant we *know* holds is that the new (potentially
614 623 # bogus) DAG shape will be valid if we order the nodes in the
615 624 # order that they're introduced in dramatis personae by the
616 625 # changelog, so what we do is we sort the non-changelog histories
617 626 # by the order in which they are used by the changelog.
618 if util.safehasattr(self, '_full_nodes') and self._clnode_to_rev:
619 key = lambda n: self._clnode_to_rev[lookup(n)]
627 if util.safehasattr(self, '_full_nodes') and self._clnodetorev:
628 key = lambda n: self._clnodetorev[lookup(n)]
620 629 return [store.rev(n) for n in sorted(nodelist, key=key)]
621 630
622 631 # for generaldelta revlogs, we linearize the revs; this will both be
623 632 # much quicker and generate a much smaller bundle
624 633 if (store._generaldelta and self._reorder is None) or self._reorder:
625 634 dag = dagutil.revlogdag(store)
626 635 return dag.linearize(set(store.rev(n) for n in nodelist))
627 636 else:
628 637 return sorted([store.rev(n) for n in nodelist])
629 638
630 639 def group(self, nodelist, store, lookup, units=None):
631 640 """Calculate a delta group, yielding a sequence of changegroup chunks
632 641 (strings).
633 642
634 643 Given a list of changeset revs, return a set of deltas and
635 644 metadata corresponding to nodes. The first delta is
636 645 first parent(nodelist[0]) -> nodelist[0], the receiver is
637 646 guaranteed to have this parent as it has all history before
638 647 these changesets. In the case firstparent is nullrev the
639 648 changegroup starts with a full revision.
640 649
641 650 If units is not None, progress detail will be generated, units specifies
642 651 the type of revlog that is touched (changelog, manifest, etc.).
643 652 """
644 653 # if we don't have any revisions touched by these changesets, bail
645 654 if len(nodelist) == 0:
646 655 yield self._close()
647 656 return
648 657
649 658 revs = self._sortgroup(store, nodelist, lookup)
650 659
651 660 # add the parent of the first rev
652 661 p = store.parentrevs(revs[0])[0]
653 662 revs.insert(0, p)
654 663
655 664 # build deltas
656 665 progress = None
657 666 if units is not None:
658 667 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
659 668 total=(len(revs) - 1))
660 669 for r in pycompat.xrange(len(revs) - 1):
661 670 if progress:
662 671 progress.update(r + 1)
663 672 prev, curr = revs[r], revs[r + 1]
664 673 linknode = lookup(store.node(curr))
665 674 for c in self._revchunk(store, curr, prev, linknode):
666 675 yield c
667 676
668 677 if progress:
669 678 progress.complete()
670 679 yield self._close()
671 680
672 681 # filter any nodes that claim to be part of the known set
673 682 def _prune(self, store, missing, commonrevs):
674 683 # TODO this violates storage abstraction for manifests.
675 684 if isinstance(store, manifest.manifestrevlog):
676 685 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
677 686 return []
678 687
679 688 rr, rl = store.rev, store.linkrev
680 689 return [n for n in missing if rl(rr(n)) not in commonrevs]
681 690
682 691 def _packmanifests(self, dir, mfnodes, lookuplinknode):
683 692 """Pack flat manifests into a changegroup stream."""
684 693 assert not dir
685 694 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
686 695 lookuplinknode, units=_('manifests')):
687 696 yield chunk
688 697
689 698 def _packtreemanifests(self, dir, mfnodes, lookuplinknode):
690 699 """Version of _packmanifests that operates on directory manifests.
691 700
692 701 Encodes the directory name in the output so multiple manifests
693 702 can be sent.
694 703 """
695 704 assert self.version == b'03'
696 705
697 706 if dir:
698 707 yield self._fileheader(dir)
699 708
700 709 # TODO violates storage abstractions by assuming revlogs.
701 710 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
702 711 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
703 712 units=_('manifests')):
704 713 yield chunk
705 714
706 715 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
707 716 '''yield a sequence of changegroup chunks (strings)'''
708 717 repo = self._repo
709 718 cl = repo.changelog
710 719
711 720 clrevorder = {}
712 721 mfs = {} # needed manifests
713 722 fnodes = {} # needed file nodes
714 723 mfl = repo.manifestlog
715 724 # TODO violates storage abstraction.
716 725 mfrevlog = mfl._revlog
717 726 changedfiles = set()
718 727
719 728 ellipsesmode = util.safehasattr(self, '_full_nodes')
720 729
721 730 # Callback for the changelog, used to collect changed files and
722 731 # manifest nodes.
723 732 # Returns the linkrev node (identity in the changelog case).
724 733 def lookupcl(x):
725 734 c = cl.read(x)
726 735 clrevorder[x] = len(clrevorder)
727 736
728 737 if ellipsesmode:
729 738 # Only update mfs if x is going to be sent. Otherwise we
730 739 # end up with bogus linkrevs specified for manifests and
731 740 # we skip some manifest nodes that we should otherwise
732 741 # have sent.
733 742 if (x in self._full_nodes
734 743 or cl.rev(x) in self._precomputed_ellipsis):
735 744 n = c[0]
736 745 # Record the first changeset introducing this manifest
737 746 # version.
738 747 mfs.setdefault(n, x)
739 748 # Set this narrow-specific dict so we have the lowest
740 749 # manifest revnum to look up for this cl revnum. (Part of
741 750 # mapping changelog ellipsis parents to manifest ellipsis
742 751 # parents)
743 self._next_clrev_to_localrev.setdefault(cl.rev(x),
744 mfrevlog.rev(n))
752 self._nextclrevtolocalrev.setdefault(cl.rev(x),
753 mfrevlog.rev(n))
745 754 # We can't trust the changed files list in the changeset if the
746 755 # client requested a shallow clone.
747 756 if self._isshallow:
748 757 changedfiles.update(mfl[c[0]].read().keys())
749 758 else:
750 759 changedfiles.update(c[3])
751 760 else:
752 761
753 762 n = c[0]
754 763 # record the first changeset introducing this manifest version
755 764 mfs.setdefault(n, x)
756 765 # Record a complete list of potentially-changed files in
757 766 # this manifest.
758 767 changedfiles.update(c[3])
759 768
760 769 return x
761 770
762 771 self._verbosenote(_('uncompressed size of bundle content:\n'))
763 772 size = 0
764 773 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
765 774 size += len(chunk)
766 775 yield chunk
767 776 self._verbosenote(_('%8.i (changelog)\n') % size)
768 777
769 778 # We need to make sure that the linkrev in the changegroup refers to
770 779 # the first changeset that introduced the manifest or file revision.
771 780 # The fastpath is usually safer than the slowpath, because the filelogs
772 781 # are walked in revlog order.
773 782 #
774 783 # When taking the slowpath with reorder=None and the manifest revlog
775 784 # uses generaldelta, the manifest may be walked in the "wrong" order.
776 785 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
777 786 # cc0ff93d0c0c).
778 787 #
779 788 # When taking the fastpath, we are only vulnerable to reordering
780 789 # of the changelog itself. The changelog never uses generaldelta, so
781 790 # it is only reordered when reorder=True. To handle this case, we
782 791 # simply take the slowpath, which already has the 'clrevorder' logic.
783 792 # This was also fixed in cc0ff93d0c0c.
784 793 fastpathlinkrev = fastpathlinkrev and not self._reorder
785 794 # Treemanifests don't work correctly with fastpathlinkrev
786 795 # either, because we don't discover which directory nodes to
787 796 # send along with files. This could probably be fixed.
788 797 fastpathlinkrev = fastpathlinkrev and (
789 798 'treemanifest' not in repo.requirements)
790 799
791 800 for chunk in self.generatemanifests(commonrevs, clrevorder,
792 801 fastpathlinkrev, mfs, fnodes, source):
793 802 yield chunk
794 803
795 804 if ellipsesmode:
796 805 mfdicts = None
797 806 if self._isshallow:
798 807 mfdicts = [(self._repo.manifestlog[n].read(), lr)
799 808 for (n, lr) in mfs.iteritems()]
800 809
801 810 mfs.clear()
802 811 clrevs = set(cl.rev(x) for x in clnodes)
803 812
804 813 if not fastpathlinkrev:
805 814 def linknodes(unused, fname):
806 815 return fnodes.get(fname, {})
807 816 else:
808 817 cln = cl.node
809 818 def linknodes(filerevlog, fname):
810 819 llr = filerevlog.linkrev
811 820 fln = filerevlog.node
812 821 revs = ((r, llr(r)) for r in filerevlog)
813 822 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
814 823
815 824 if ellipsesmode:
816 825 # We need to pass the mfdicts variable down into
817 826 # generatefiles(), but more than one command might have
818 827 # wrapped generatefiles so we can't modify the function
819 828 # signature. Instead, we pass the data to ourselves using an
820 829 # instance attribute. I'm sorry.
821 830 self._mfdicts = mfdicts
822 831
823 832 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
824 833 source):
825 834 yield chunk
826 835
827 836 yield self._close()
828 837
829 838 if clnodes:
830 839 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
831 840
832 841 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
833 842 fnodes, source):
834 843 """Returns an iterator of changegroup chunks containing manifests.
835 844
836 845 `source` is unused here, but is used by extensions like remotefilelog to
837 846 change what is sent based in pulls vs pushes, etc.
838 847 """
839 848 repo = self._repo
840 849 mfl = repo.manifestlog
841 850 dirlog = mfl._revlog.dirlog
842 851 tmfnodes = {'': mfs}
843 852
844 853 # Callback for the manifest, used to collect linkrevs for filelog
845 854 # revisions.
846 855 # Returns the linkrev node (collected in lookupcl).
847 856 def makelookupmflinknode(dir, nodes):
848 857 if fastpathlinkrev:
849 858 assert not dir
850 859 return mfs.__getitem__
851 860
852 861 def lookupmflinknode(x):
853 862 """Callback for looking up the linknode for manifests.
854 863
855 864 Returns the linkrev node for the specified manifest.
856 865
857 866 SIDE EFFECT:
858 867
859 868 1) fclnodes gets populated with the list of relevant
860 869 file nodes if we're not using fastpathlinkrev
861 870 2) When treemanifests are in use, collects treemanifest nodes
862 871 to send
863 872
864 873 Note that this means manifests must be completely sent to
865 874 the client before you can trust the list of files and
866 875 treemanifests to send.
867 876 """
868 877 clnode = nodes[x]
869 878 mdata = mfl.get(dir, x).readfast(shallow=True)
870 879 for p, n, fl in mdata.iterentries():
871 880 if fl == 't': # subdirectory manifest
872 881 subdir = dir + p + '/'
873 882 tmfclnodes = tmfnodes.setdefault(subdir, {})
874 883 tmfclnode = tmfclnodes.setdefault(n, clnode)
875 884 if clrevorder[clnode] < clrevorder[tmfclnode]:
876 885 tmfclnodes[n] = clnode
877 886 else:
878 887 f = dir + p
879 888 fclnodes = fnodes.setdefault(f, {})
880 889 fclnode = fclnodes.setdefault(n, clnode)
881 890 if clrevorder[clnode] < clrevorder[fclnode]:
882 891 fclnodes[n] = clnode
883 892 return clnode
884 893 return lookupmflinknode
885 894
886 895 fn = (self._packtreemanifests if self._sendtreemanifests
887 896 else self._packmanifests)
888 897 size = 0
889 898 while tmfnodes:
890 899 dir, nodes = tmfnodes.popitem()
891 900 prunednodes = self._prune(dirlog(dir), nodes, commonrevs)
892 901 if not dir or prunednodes:
893 902 for x in fn(dir, prunednodes, makelookupmflinknode(dir, nodes)):
894 903 size += len(x)
895 904 yield x
896 905 self._verbosenote(_('%8.i (manifests)\n') % size)
897 906 yield self._manifestsend
898 907
899 908 # The 'source' parameter is useful for extensions
900 909 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
901 910 changedfiles = list(filter(self._filematcher, changedfiles))
902 911
903 912 if self._isshallow:
904 913 # See comment in generate() for why this sadness is a thing.
905 914 mfdicts = self._mfdicts
906 915 del self._mfdicts
907 916 # In a shallow clone, the linknodes callback needs to also include
908 917 # those file nodes that are in the manifests we sent but weren't
909 918 # introduced by those manifests.
910 919 commonctxs = [self._repo[c] for c in commonrevs]
911 920 oldlinknodes = linknodes
912 921 clrev = self._repo.changelog.rev
913 922
914 923 # Defining this function has a side-effect of overriding the
915 924 # function of the same name that was passed in as an argument.
916 925 # TODO have caller pass in appropriate function.
917 926 def linknodes(flog, fname):
918 927 for c in commonctxs:
919 928 try:
920 929 fnode = c.filenode(fname)
921 self._clrev_to_localrev[c.rev()] = flog.rev(fnode)
930 self._clrevtolocalrev[c.rev()] = flog.rev(fnode)
922 931 except error.ManifestLookupError:
923 932 pass
924 933 links = oldlinknodes(flog, fname)
925 934 if len(links) != len(mfdicts):
926 935 for mf, lr in mfdicts:
927 936 fnode = mf.get(fname, None)
928 937 if fnode in links:
929 938 links[fnode] = min(links[fnode], lr, key=clrev)
930 939 elif fnode:
931 940 links[fnode] = lr
932 941 return links
933 942
934 943 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
935 944
936 945 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
937 946 repo = self._repo
938 947 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
939 948 total=len(changedfiles))
940 949 for i, fname in enumerate(sorted(changedfiles)):
941 950 filerevlog = repo.file(fname)
942 951 if not filerevlog:
943 952 raise error.Abort(_("empty or missing file data for %s") %
944 953 fname)
945 954
946 955 linkrevnodes = linknodes(filerevlog, fname)
947 956 # Lookup for filenodes, we collected the linkrev nodes above in the
948 957 # fastpath case and with lookupmf in the slowpath case.
949 958 def lookupfilelog(x):
950 959 return linkrevnodes[x]
951 960
952 961 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
953 962 if filenodes:
954 963 progress.update(i + 1, item=fname)
955 964 h = self._fileheader(fname)
956 965 size = len(h)
957 966 yield h
958 967 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
959 968 size += len(chunk)
960 969 yield chunk
961 970 self._verbosenote(_('%8.i %s\n') % (size, fname))
962 971 progress.complete()
963 972
964 973 def _deltaparent(self, store, rev, p1, p2, prev):
965 974 if self._useprevdelta:
966 975 if not store.candelta(prev, rev):
967 976 raise error.ProgrammingError(
968 977 'cg1 should not be used in this case')
969 978 return prev
970 979
971 980 # Narrow ellipses mode.
972 981 if util.safehasattr(self, '_full_nodes'):
973 982 # TODO: send better deltas when in narrow mode.
974 983 #
975 984 # changegroup.group() loops over revisions to send,
976 985 # including revisions we'll skip. What this means is that
977 986 # `prev` will be a potentially useless delta base for all
978 987 # ellipsis nodes, as the client likely won't have it. In
979 988 # the future we should do bookkeeping about which nodes
980 989 # have been sent to the client, and try to be
981 990 # significantly smarter about delta bases. This is
982 991 # slightly tricky because this same code has to work for
983 992 # all revlogs, and we don't have the linkrev/linknode here.
984 993 return p1
985 994
986 995 dp = store.deltaparent(rev)
987 996 if dp == nullrev and store.storedeltachains:
988 997 # Avoid sending full revisions when delta parent is null. Pick prev
989 998 # in that case. It's tempting to pick p1 in this case, as p1 will
990 999 # be smaller in the common case. However, computing a delta against
991 1000 # p1 may require resolving the raw text of p1, which could be
992 1001 # expensive. The revlog caches should have prev cached, meaning
993 1002 # less CPU for changegroup generation. There is likely room to add
994 1003 # a flag and/or config option to control this behavior.
995 1004 base = prev
996 1005 elif dp == nullrev:
997 1006 # revlog is configured to use full snapshot for a reason,
998 1007 # stick to full snapshot.
999 1008 base = nullrev
1000 1009 elif dp not in (p1, p2, prev):
1001 1010 # Pick prev when we can't be sure remote has the base revision.
1002 1011 return prev
1003 1012 else:
1004 1013 base = dp
1005 1014
1006 1015 if base != nullrev and not store.candelta(base, rev):
1007 1016 base = nullrev
1008 1017
1009 1018 return base
1010 1019
1011 1020 def _revchunk(self, store, rev, prev, linknode):
1012 1021 if util.safehasattr(self, '_full_nodes'):
1013 1022 fn = self._revisiondeltanarrow
1014 1023 else:
1015 1024 fn = self._revisiondeltanormal
1016 1025
1017 1026 delta = fn(store, rev, prev, linknode)
1018 1027 if not delta:
1019 1028 return
1020 1029
1021 1030 meta = self._builddeltaheader(delta)
1022 1031 l = len(meta) + sum(len(x) for x in delta.deltachunks)
1023 1032
1024 1033 yield chunkheader(l)
1025 1034 yield meta
1026 1035 for x in delta.deltachunks:
1027 1036 yield x
1028 1037
1029 1038 def _revisiondeltanormal(self, store, rev, prev, linknode):
1030 1039 node = store.node(rev)
1031 1040 p1, p2 = store.parentrevs(rev)
1032 1041 base = self._deltaparent(store, rev, p1, p2, prev)
1033 1042
1034 1043 prefix = ''
1035 1044 if store.iscensored(base) or store.iscensored(rev):
1036 1045 try:
1037 1046 delta = store.revision(node, raw=True)
1038 1047 except error.CensoredNodeError as e:
1039 1048 delta = e.tombstone
1040 1049 if base == nullrev:
1041 1050 prefix = mdiff.trivialdiffheader(len(delta))
1042 1051 else:
1043 1052 baselen = store.rawsize(base)
1044 1053 prefix = mdiff.replacediffheader(baselen, len(delta))
1045 1054 elif base == nullrev:
1046 1055 delta = store.revision(node, raw=True)
1047 1056 prefix = mdiff.trivialdiffheader(len(delta))
1048 1057 else:
1049 1058 delta = store.revdiff(base, rev)
1050 1059 p1n, p2n = store.parents(node)
1051 1060
1052 1061 return revisiondelta(
1053 1062 node=node,
1054 1063 p1node=p1n,
1055 1064 p2node=p2n,
1056 1065 basenode=store.node(base),
1057 1066 linknode=linknode,
1058 1067 flags=store.flags(rev),
1059 1068 deltachunks=(prefix, delta),
1060 1069 )
1061 1070
1062 1071 def _revisiondeltanarrow(self, store, rev, prev, linknode):
1063 1072 # build up some mapping information that's useful later. See
1064 1073 # the local() nested function below.
1065 1074 if not self._changelogdone:
1066 self._clnode_to_rev[linknode] = rev
1075 self._clnodetorev[linknode] = rev
1067 1076 linkrev = rev
1068 self._clrev_to_localrev[linkrev] = rev
1077 self._clrevtolocalrev[linkrev] = rev
1069 1078 else:
1070 linkrev = self._clnode_to_rev[linknode]
1071 self._clrev_to_localrev[linkrev] = rev
1079 linkrev = self._clnodetorev[linknode]
1080 self._clrevtolocalrev[linkrev] = rev
1072 1081
1073 1082 # This is a node to send in full, because the changeset it
1074 1083 # corresponds to was a full changeset.
1075 1084 if linknode in self._full_nodes:
1076 1085 return self._revisiondeltanormal(store, rev, prev, linknode)
1077 1086
1078 1087 # At this point, a node can either be one we should skip or an
1079 1088 # ellipsis. If it's not an ellipsis, bail immediately.
1080 1089 if linkrev not in self._precomputed_ellipsis:
1081 1090 return
1082 1091
1083 1092 linkparents = self._precomputed_ellipsis[linkrev]
1084 1093 def local(clrev):
1085 1094 """Turn a changelog revnum into a local revnum.
1086 1095
1087 1096 The ellipsis dag is stored as revnums on the changelog,
1088 1097 but when we're producing ellipsis entries for
1089 1098 non-changelog revlogs, we need to turn those numbers into
1090 1099 something local. This does that for us, and during the
1091 1100 changelog sending phase will also expand the stored
1092 1101 mappings as needed.
1093 1102 """
1094 1103 if clrev == nullrev:
1095 1104 return nullrev
1096 1105
1097 1106 if not self._changelogdone:
1098 1107 # If we're doing the changelog, it's possible that we
1099 1108 # have a parent that is already on the client, and we
1100 1109 # need to store some extra mapping information so that
1101 1110 # our contained ellipsis nodes will be able to resolve
1102 1111 # their parents.
1103 if clrev not in self._clrev_to_localrev:
1112 if clrev not in self._clrevtolocalrev:
1104 1113 clnode = store.node(clrev)
1105 self._clnode_to_rev[clnode] = clrev
1114 self._clnodetorev[clnode] = clrev
1106 1115 return clrev
1107 1116
1108 1117 # Walk the ellipsis-ized changelog breadth-first looking for a
1109 1118 # change that has been linked from the current revlog.
1110 1119 #
1111 1120 # For a flat manifest revlog only a single step should be necessary
1112 1121 # as all relevant changelog entries are relevant to the flat
1113 1122 # manifest.
1114 1123 #
1115 1124 # For a filelog or tree manifest dirlog however not every changelog
1116 1125 # entry will have been relevant, so we need to skip some changelog
1117 1126 # nodes even after ellipsis-izing.
1118 1127 walk = [clrev]
1119 1128 while walk:
1120 1129 p = walk[0]
1121 1130 walk = walk[1:]
1122 if p in self._clrev_to_localrev:
1123 return self._clrev_to_localrev[p]
1131 if p in self._clrevtolocalrev:
1132 return self._clrevtolocalrev[p]
1124 1133 elif p in self._full_nodes:
1125 1134 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1126 1135 if pp != nullrev])
1127 1136 elif p in self._precomputed_ellipsis:
1128 1137 walk.extend([pp for pp in self._precomputed_ellipsis[p]
1129 1138 if pp != nullrev])
1130 1139 else:
1131 1140 # In this case, we've got an ellipsis with parents
1132 1141 # outside the current bundle (likely an
1133 1142 # incremental pull). We "know" that we can use the
1134 1143 # value of this same revlog at whatever revision
1135 1144 # is pointed to by linknode. "Know" is in scare
1136 1145 # quotes because I haven't done enough examination
1137 1146 # of edge cases to convince myself this is really
1138 1147 # a fact - it works for all the (admittedly
1139 1148 # thorough) cases in our testsuite, but I would be
1140 1149 # somewhat unsurprised to find a case in the wild
1141 1150 # where this breaks down a bit. That said, I don't
1142 1151 # know if it would hurt anything.
1143 1152 for i in pycompat.xrange(rev, 0, -1):
1144 1153 if store.linkrev(i) == clrev:
1145 1154 return i
1146 1155 # We failed to resolve a parent for this node, so
1147 1156 # we crash the changegroup construction.
1148 1157 raise error.Abort(
1149 1158 'unable to resolve parent while packing %r %r'
1150 1159 ' for changeset %r' % (store.indexfile, rev, clrev))
1151 1160
1152 1161 return nullrev
1153 1162
1154 1163 if not linkparents or (
1155 1164 store.parentrevs(rev) == (nullrev, nullrev)):
1156 1165 p1, p2 = nullrev, nullrev
1157 1166 elif len(linkparents) == 1:
1158 1167 p1, = sorted(local(p) for p in linkparents)
1159 1168 p2 = nullrev
1160 1169 else:
1161 1170 p1, p2 = sorted(local(p) for p in linkparents)
1162 1171
1163 1172 n = store.node(rev)
1164 1173 p1n, p2n = store.node(p1), store.node(p2)
1165 1174 flags = store.flags(rev)
1166 1175 flags |= revlog.REVIDX_ELLIPSIS
1167 1176
1168 1177 # TODO: try and actually send deltas for ellipsis data blocks
1169 1178 data = store.revision(n)
1170 1179 diffheader = mdiff.trivialdiffheader(len(data))
1171 1180
1172 1181 return revisiondelta(
1173 1182 node=n,
1174 1183 p1node=p1n,
1175 1184 p2node=p2n,
1176 1185 basenode=nullid,
1177 1186 linknode=linknode,
1178 1187 flags=flags,
1179 1188 deltachunks=(diffheader, data),
1180 1189 )
1181 1190
1182 1191 def _makecg1packer(repo, filematcher, bundlecaps, shallow=False):
1183 1192 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1184 1193 d.node, d.p1node, d.p2node, d.linknode)
1185 1194
1186 1195 return cgpacker(repo, filematcher, b'01',
1187 1196 useprevdelta=True,
1188 1197 allowreorder=None,
1189 1198 builddeltaheader=builddeltaheader,
1190 1199 manifestsend=b'',
1191 1200 sendtreemanifests=False,
1192 1201 bundlecaps=bundlecaps,
1193 1202 shallow=shallow)
1194 1203
1195 1204 def _makecg2packer(repo, filematcher, bundlecaps, shallow=False):
1196 1205 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1197 1206 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1198 1207
1199 1208 # Since generaldelta is directly supported by cg2, reordering
1200 1209 # generally doesn't help, so we disable it by default (treating
1201 1210 # bundle.reorder=auto just like bundle.reorder=False).
1202 1211 return cgpacker(repo, filematcher, b'02',
1203 1212 useprevdelta=False,
1204 1213 allowreorder=False,
1205 1214 builddeltaheader=builddeltaheader,
1206 1215 manifestsend=b'',
1207 1216 sendtreemanifests=False,
1208 1217 bundlecaps=bundlecaps,
1209 1218 shallow=shallow)
1210 1219
1211 1220 def _makecg3packer(repo, filematcher, bundlecaps, shallow=False):
1212 1221 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1213 1222 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1214 1223
1215 1224 return cgpacker(repo, filematcher, b'03',
1216 1225 useprevdelta=False,
1217 1226 allowreorder=False,
1218 1227 builddeltaheader=builddeltaheader,
1219 1228 manifestsend=closechunk(),
1220 1229 sendtreemanifests=True,
1221 1230 bundlecaps=bundlecaps,
1222 1231 shallow=shallow)
1223 1232
1224 1233 _packermap = {'01': (_makecg1packer, cg1unpacker),
1225 1234 # cg2 adds support for exchanging generaldelta
1226 1235 '02': (_makecg2packer, cg2unpacker),
1227 1236 # cg3 adds support for exchanging revlog flags and treemanifests
1228 1237 '03': (_makecg3packer, cg3unpacker),
1229 1238 }
1230 1239
1231 1240 def allsupportedversions(repo):
1232 1241 versions = set(_packermap.keys())
1233 1242 if not (repo.ui.configbool('experimental', 'changegroup3') or
1234 1243 repo.ui.configbool('experimental', 'treemanifest') or
1235 1244 'treemanifest' in repo.requirements):
1236 1245 versions.discard('03')
1237 1246 return versions
1238 1247
1239 1248 # Changegroup versions that can be applied to the repo
1240 1249 def supportedincomingversions(repo):
1241 1250 return allsupportedversions(repo)
1242 1251
1243 1252 # Changegroup versions that can be created from the repo
1244 1253 def supportedoutgoingversions(repo):
1245 1254 versions = allsupportedversions(repo)
1246 1255 if 'treemanifest' in repo.requirements:
1247 1256 # Versions 01 and 02 support only flat manifests and it's just too
1248 1257 # expensive to convert between the flat manifest and tree manifest on
1249 1258 # the fly. Since tree manifests are hashed differently, all of history
1250 1259 # would have to be converted. Instead, we simply don't even pretend to
1251 1260 # support versions 01 and 02.
1252 1261 versions.discard('01')
1253 1262 versions.discard('02')
1254 1263 if repository.NARROW_REQUIREMENT in repo.requirements:
1255 1264 # Versions 01 and 02 don't support revlog flags, and we need to
1256 1265 # support that for stripping and unbundling to work.
1257 1266 versions.discard('01')
1258 1267 versions.discard('02')
1259 1268 if LFS_REQUIREMENT in repo.requirements:
1260 1269 # Versions 01 and 02 don't support revlog flags, and we need to
1261 1270 # mark LFS entries with REVIDX_EXTSTORED.
1262 1271 versions.discard('01')
1263 1272 versions.discard('02')
1264 1273
1265 1274 return versions
1266 1275
1267 1276 def localversion(repo):
1268 1277 # Finds the best version to use for bundles that are meant to be used
1269 1278 # locally, such as those from strip and shelve, and temporary bundles.
1270 1279 return max(supportedoutgoingversions(repo))
1271 1280
1272 1281 def safeversion(repo):
1273 1282 # Finds the smallest version that it's safe to assume clients of the repo
1274 1283 # will support. For example, all hg versions that support generaldelta also
1275 1284 # support changegroup 02.
1276 1285 versions = supportedoutgoingversions(repo)
1277 1286 if 'generaldelta' in repo.requirements:
1278 1287 versions.discard('01')
1279 1288 assert versions
1280 1289 return min(versions)
1281 1290
1282 1291 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1283 1292 shallow=False):
1284 1293 assert version in supportedoutgoingversions(repo)
1285 1294
1286 1295 if filematcher is None:
1287 1296 filematcher = matchmod.alwaysmatcher(repo.root, '')
1288 1297
1289 1298 if version == '01' and not filematcher.always():
1290 1299 raise error.ProgrammingError('version 01 changegroups do not support '
1291 1300 'sparse file matchers')
1292 1301
1293 1302 # Requested files could include files not in the local store. So
1294 1303 # filter those out.
1295 1304 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1296 1305 filematcher)
1297 1306
1298 1307 fn = _packermap[version][0]
1299 1308 return fn(repo, filematcher, bundlecaps, shallow=shallow)
1300 1309
1301 1310 def getunbundler(version, fh, alg, extras=None):
1302 1311 return _packermap[version][1](fh, alg, extras=extras)
1303 1312
1304 1313 def _changegroupinfo(repo, nodes, source):
1305 1314 if repo.ui.verbose or source == 'bundle':
1306 1315 repo.ui.status(_("%d changesets found\n") % len(nodes))
1307 1316 if repo.ui.debugflag:
1308 1317 repo.ui.debug("list of changesets:\n")
1309 1318 for node in nodes:
1310 1319 repo.ui.debug("%s\n" % hex(node))
1311 1320
1312 1321 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1313 1322 bundlecaps=None):
1314 1323 cgstream = makestream(repo, outgoing, version, source,
1315 1324 fastpath=fastpath, bundlecaps=bundlecaps)
1316 1325 return getunbundler(version, util.chunkbuffer(cgstream), None,
1317 1326 {'clcount': len(outgoing.missing) })
1318 1327
1319 1328 def makestream(repo, outgoing, version, source, fastpath=False,
1320 1329 bundlecaps=None, filematcher=None):
1321 1330 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1322 1331 filematcher=filematcher)
1323 1332
1324 1333 repo = repo.unfiltered()
1325 1334 commonrevs = outgoing.common
1326 1335 csets = outgoing.missing
1327 1336 heads = outgoing.missingheads
1328 1337 # We go through the fast path if we get told to, or if all (unfiltered
1329 1338 # heads have been requested (since we then know there all linkrevs will
1330 1339 # be pulled by the client).
1331 1340 heads.sort()
1332 1341 fastpathlinkrev = fastpath or (
1333 1342 repo.filtername is None and heads == sorted(repo.heads()))
1334 1343
1335 1344 repo.hook('preoutgoing', throw=True, source=source)
1336 1345 _changegroupinfo(repo, csets, source)
1337 1346 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1338 1347
1339 1348 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1340 1349 revisions = 0
1341 1350 files = 0
1342 1351 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1343 1352 total=expectedfiles)
1344 1353 for chunkdata in iter(source.filelogheader, {}):
1345 1354 files += 1
1346 1355 f = chunkdata["filename"]
1347 1356 repo.ui.debug("adding %s revisions\n" % f)
1348 1357 progress.increment()
1349 1358 fl = repo.file(f)
1350 1359 o = len(fl)
1351 1360 try:
1352 1361 deltas = source.deltaiter()
1353 1362 if not fl.addgroup(deltas, revmap, trp):
1354 1363 raise error.Abort(_("received file revlog group is empty"))
1355 1364 except error.CensoredBaseError as e:
1356 1365 raise error.Abort(_("received delta base is censored: %s") % e)
1357 1366 revisions += len(fl) - o
1358 1367 if f in needfiles:
1359 1368 needs = needfiles[f]
1360 1369 for new in pycompat.xrange(o, len(fl)):
1361 1370 n = fl.node(new)
1362 1371 if n in needs:
1363 1372 needs.remove(n)
1364 1373 else:
1365 1374 raise error.Abort(
1366 1375 _("received spurious file revlog entry"))
1367 1376 if not needs:
1368 1377 del needfiles[f]
1369 1378 progress.complete()
1370 1379
1371 1380 for f, needs in needfiles.iteritems():
1372 1381 fl = repo.file(f)
1373 1382 for n in needs:
1374 1383 try:
1375 1384 fl.rev(n)
1376 1385 except error.LookupError:
1377 1386 raise error.Abort(
1378 1387 _('missing file data for %s:%s - run hg verify') %
1379 1388 (f, hex(n)))
1380 1389
1381 1390 return revisions, files
1382 1391
1383 1392 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1384 1393 ellipsisroots, visitnodes, depth, source, version):
1385 1394 if version in ('01', '02'):
1386 1395 raise error.Abort(
1387 1396 'ellipsis nodes require at least cg3 on client and server, '
1388 1397 'but negotiated version %s' % version)
1389 1398 # We wrap cg1packer.revchunk, using a side channel to pass
1390 1399 # relevant_nodes into that area. Then if linknode isn't in the
1391 1400 # set, we know we have an ellipsis node and we should defer
1392 1401 # sending that node's data. We override close() to detect
1393 1402 # pending ellipsis nodes and flush them.
1394 1403 packer = getbundler(version, repo, filematcher=match,
1395 1404 shallow=depth is not None)
1396 1405 # Give the packer the list of nodes which should not be
1397 1406 # ellipsis nodes. We store this rather than the set of nodes
1398 1407 # that should be an ellipsis because for very large histories
1399 1408 # we expect this to be significantly smaller.
1400 1409 packer._full_nodes = relevant_nodes
1401 1410 # Maps ellipsis revs to their roots at the changelog level.
1402 1411 packer._precomputed_ellipsis = ellipsisroots
1403 # Maps CL revs to per-revlog revisions. Cleared in close() at
1404 # the end of each group.
1405 packer._clrev_to_localrev = {}
1406 packer._next_clrev_to_localrev = {}
1407 # Maps changelog nodes to changelog revs. Filled in once
1408 # during changelog stage and then left unmodified.
1409 packer._clnode_to_rev = {}
1410 1412
1411 1413 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now