##// END OF EJS Templates
changegroup: control reordering via constructor argument...
Gregory Szorc -
r38936:6e999a2d default
parent child Browse files
Show More
@@ -1,1388 +1,1387 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, "wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, "wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103 class cg1unpacker(object):
104 104 """Unpacker for cg1 changegroup streams.
105 105
106 106 A changegroup unpacker handles the framing of the revision data in
107 107 the wire format. Most consumers will want to use the apply()
108 108 method to add the changes from the changegroup to a repository.
109 109
110 110 If you're forwarding a changegroup unmodified to another consumer,
111 111 use getchunks(), which returns an iterator of changegroup
112 112 chunks. This is mostly useful for cases where you need to know the
113 113 data stream has ended by observing the end of the changegroup.
114 114
115 115 deltachunk() is useful only if you're applying delta data. Most
116 116 consumers should prefer apply() instead.
117 117
118 118 A few other public methods exist. Those are used only for
119 119 bundlerepo and some debug commands - their use is discouraged.
120 120 """
121 121 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
122 122 deltaheadersize = deltaheader.size
123 123 version = '01'
124 124 _grouplistcount = 1 # One list of files after the manifests
125 125
126 126 def __init__(self, fh, alg, extras=None):
127 127 if alg is None:
128 128 alg = 'UN'
129 129 if alg not in util.compengines.supportedbundletypes:
130 130 raise error.Abort(_('unknown stream compression type: %s')
131 131 % alg)
132 132 if alg == 'BZ':
133 133 alg = '_truncatedBZ'
134 134
135 135 compengine = util.compengines.forbundletype(alg)
136 136 self._stream = compengine.decompressorreader(fh)
137 137 self._type = alg
138 138 self.extras = extras or {}
139 139 self.callback = None
140 140
141 141 # These methods (compressed, read, seek, tell) all appear to only
142 142 # be used by bundlerepo, but it's a little hard to tell.
143 143 def compressed(self):
144 144 return self._type is not None and self._type != 'UN'
145 145 def read(self, l):
146 146 return self._stream.read(l)
147 147 def seek(self, pos):
148 148 return self._stream.seek(pos)
149 149 def tell(self):
150 150 return self._stream.tell()
151 151 def close(self):
152 152 return self._stream.close()
153 153
154 154 def _chunklength(self):
155 155 d = readexactly(self._stream, 4)
156 156 l = struct.unpack(">l", d)[0]
157 157 if l <= 4:
158 158 if l:
159 159 raise error.Abort(_("invalid chunk length %d") % l)
160 160 return 0
161 161 if self.callback:
162 162 self.callback()
163 163 return l - 4
164 164
165 165 def changelogheader(self):
166 166 """v10 does not have a changelog header chunk"""
167 167 return {}
168 168
169 169 def manifestheader(self):
170 170 """v10 does not have a manifest header chunk"""
171 171 return {}
172 172
173 173 def filelogheader(self):
174 174 """return the header of the filelogs chunk, v10 only has the filename"""
175 175 l = self._chunklength()
176 176 if not l:
177 177 return {}
178 178 fname = readexactly(self._stream, l)
179 179 return {'filename': fname}
180 180
181 181 def _deltaheader(self, headertuple, prevnode):
182 182 node, p1, p2, cs = headertuple
183 183 if prevnode is None:
184 184 deltabase = p1
185 185 else:
186 186 deltabase = prevnode
187 187 flags = 0
188 188 return node, p1, p2, deltabase, cs, flags
189 189
190 190 def deltachunk(self, prevnode):
191 191 l = self._chunklength()
192 192 if not l:
193 193 return {}
194 194 headerdata = readexactly(self._stream, self.deltaheadersize)
195 195 header = self.deltaheader.unpack(headerdata)
196 196 delta = readexactly(self._stream, l - self.deltaheadersize)
197 197 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
198 198 return (node, p1, p2, cs, deltabase, delta, flags)
199 199
200 200 def getchunks(self):
201 201 """returns all the chunks contains in the bundle
202 202
203 203 Used when you need to forward the binary stream to a file or another
204 204 network API. To do so, it parse the changegroup data, otherwise it will
205 205 block in case of sshrepo because it don't know the end of the stream.
206 206 """
207 207 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
208 208 # and a list of filelogs. For changegroup 3, we expect 4 parts:
209 209 # changelog, manifestlog, a list of tree manifestlogs, and a list of
210 210 # filelogs.
211 211 #
212 212 # Changelog and manifestlog parts are terminated with empty chunks. The
213 213 # tree and file parts are a list of entry sections. Each entry section
214 214 # is a series of chunks terminating in an empty chunk. The list of these
215 215 # entry sections is terminated in yet another empty chunk, so we know
216 216 # we've reached the end of the tree/file list when we reach an empty
217 217 # chunk that was proceeded by no non-empty chunks.
218 218
219 219 parts = 0
220 220 while parts < 2 + self._grouplistcount:
221 221 noentries = True
222 222 while True:
223 223 chunk = getchunk(self)
224 224 if not chunk:
225 225 # The first two empty chunks represent the end of the
226 226 # changelog and the manifestlog portions. The remaining
227 227 # empty chunks represent either A) the end of individual
228 228 # tree or file entries in the file list, or B) the end of
229 229 # the entire list. It's the end of the entire list if there
230 230 # were no entries (i.e. noentries is True).
231 231 if parts < 2:
232 232 parts += 1
233 233 elif noentries:
234 234 parts += 1
235 235 break
236 236 noentries = False
237 237 yield chunkheader(len(chunk))
238 238 pos = 0
239 239 while pos < len(chunk):
240 240 next = pos + 2**20
241 241 yield chunk[pos:next]
242 242 pos = next
243 243 yield closechunk()
244 244
245 245 def _unpackmanifests(self, repo, revmap, trp, prog):
246 246 self.callback = prog.increment
247 247 # no need to check for empty manifest group here:
248 248 # if the result of the merge of 1 and 2 is the same in 3 and 4,
249 249 # no new manifest will be created and the manifest group will
250 250 # be empty during the pull
251 251 self.manifestheader()
252 252 deltas = self.deltaiter()
253 253 repo.manifestlog.addgroup(deltas, revmap, trp)
254 254 prog.complete()
255 255 self.callback = None
256 256
257 257 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
258 258 expectedtotal=None):
259 259 """Add the changegroup returned by source.read() to this repo.
260 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
261 261 the URL of the repo where this changegroup is coming from.
262 262
263 263 Return an integer summarizing the change to this repo:
264 264 - nothing changed or no source: 0
265 265 - more heads than before: 1+added heads (2..n)
266 266 - fewer heads than before: -1-removed heads (-2..-n)
267 267 - number of heads stays the same: 1
268 268 """
269 269 repo = repo.unfiltered()
270 270 def csmap(x):
271 271 repo.ui.debug("add changeset %s\n" % short(x))
272 272 return len(cl)
273 273
274 274 def revmap(x):
275 275 return cl.rev(x)
276 276
277 277 changesets = files = revisions = 0
278 278
279 279 try:
280 280 # The transaction may already carry source information. In this
281 281 # case we use the top level data. We overwrite the argument
282 282 # because we need to use the top level value (if they exist)
283 283 # in this function.
284 284 srctype = tr.hookargs.setdefault('source', srctype)
285 285 url = tr.hookargs.setdefault('url', url)
286 286 repo.hook('prechangegroup',
287 287 throw=True, **pycompat.strkwargs(tr.hookargs))
288 288
289 289 # write changelog data to temp files so concurrent readers
290 290 # will not see an inconsistent view
291 291 cl = repo.changelog
292 292 cl.delayupdate(tr)
293 293 oldheads = set(cl.heads())
294 294
295 295 trp = weakref.proxy(tr)
296 296 # pull off the changeset group
297 297 repo.ui.status(_("adding changesets\n"))
298 298 clstart = len(cl)
299 299 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
300 300 total=expectedtotal)
301 301 self.callback = progress.increment
302 302
303 303 efiles = set()
304 304 def onchangelog(cl, node):
305 305 efiles.update(cl.readfiles(node))
306 306
307 307 self.changelogheader()
308 308 deltas = self.deltaiter()
309 309 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
310 310 efiles = len(efiles)
311 311
312 312 if not cgnodes:
313 313 repo.ui.develwarn('applied empty changegroup',
314 314 config='warn-empty-changegroup')
315 315 clend = len(cl)
316 316 changesets = clend - clstart
317 317 progress.complete()
318 318 self.callback = None
319 319
320 320 # pull off the manifest group
321 321 repo.ui.status(_("adding manifests\n"))
322 322 # We know that we'll never have more manifests than we had
323 323 # changesets.
324 324 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
325 325 total=changesets)
326 326 self._unpackmanifests(repo, revmap, trp, progress)
327 327
328 328 needfiles = {}
329 329 if repo.ui.configbool('server', 'validate'):
330 330 cl = repo.changelog
331 331 ml = repo.manifestlog
332 332 # validate incoming csets have their manifests
333 333 for cset in pycompat.xrange(clstart, clend):
334 334 mfnode = cl.changelogrevision(cset).manifest
335 335 mfest = ml[mfnode].readdelta()
336 336 # store file cgnodes we must see
337 337 for f, n in mfest.iteritems():
338 338 needfiles.setdefault(f, set()).add(n)
339 339
340 340 # process the files
341 341 repo.ui.status(_("adding file changes\n"))
342 342 newrevs, newfiles = _addchangegroupfiles(
343 343 repo, self, revmap, trp, efiles, needfiles)
344 344 revisions += newrevs
345 345 files += newfiles
346 346
347 347 deltaheads = 0
348 348 if oldheads:
349 349 heads = cl.heads()
350 350 deltaheads = len(heads) - len(oldheads)
351 351 for h in heads:
352 352 if h not in oldheads and repo[h].closesbranch():
353 353 deltaheads -= 1
354 354 htext = ""
355 355 if deltaheads:
356 356 htext = _(" (%+d heads)") % deltaheads
357 357
358 358 repo.ui.status(_("added %d changesets"
359 359 " with %d changes to %d files%s\n")
360 360 % (changesets, revisions, files, htext))
361 361 repo.invalidatevolatilesets()
362 362
363 363 if changesets > 0:
364 364 if 'node' not in tr.hookargs:
365 365 tr.hookargs['node'] = hex(cl.node(clstart))
366 366 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
367 367 hookargs = dict(tr.hookargs)
368 368 else:
369 369 hookargs = dict(tr.hookargs)
370 370 hookargs['node'] = hex(cl.node(clstart))
371 371 hookargs['node_last'] = hex(cl.node(clend - 1))
372 372 repo.hook('pretxnchangegroup',
373 373 throw=True, **pycompat.strkwargs(hookargs))
374 374
375 375 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
376 376 phaseall = None
377 377 if srctype in ('push', 'serve'):
378 378 # Old servers can not push the boundary themselves.
379 379 # New servers won't push the boundary if changeset already
380 380 # exists locally as secret
381 381 #
382 382 # We should not use added here but the list of all change in
383 383 # the bundle
384 384 if repo.publishing():
385 385 targetphase = phaseall = phases.public
386 386 else:
387 387 # closer target phase computation
388 388
389 389 # Those changesets have been pushed from the
390 390 # outside, their phases are going to be pushed
391 391 # alongside. Therefor `targetphase` is
392 392 # ignored.
393 393 targetphase = phaseall = phases.draft
394 394 if added:
395 395 phases.registernew(repo, tr, targetphase, added)
396 396 if phaseall is not None:
397 397 phases.advanceboundary(repo, tr, phaseall, cgnodes)
398 398
399 399 if changesets > 0:
400 400
401 401 def runhooks():
402 402 # These hooks run when the lock releases, not when the
403 403 # transaction closes. So it's possible for the changelog
404 404 # to have changed since we last saw it.
405 405 if clstart >= len(repo):
406 406 return
407 407
408 408 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
409 409
410 410 for n in added:
411 411 args = hookargs.copy()
412 412 args['node'] = hex(n)
413 413 del args['node_last']
414 414 repo.hook("incoming", **pycompat.strkwargs(args))
415 415
416 416 newheads = [h for h in repo.heads()
417 417 if h not in oldheads]
418 418 repo.ui.log("incoming",
419 419 "%d incoming changes - new heads: %s\n",
420 420 len(added),
421 421 ', '.join([hex(c[:6]) for c in newheads]))
422 422
423 423 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
424 424 lambda tr: repo._afterlock(runhooks))
425 425 finally:
426 426 repo.ui.flush()
427 427 # never return 0 here:
428 428 if deltaheads < 0:
429 429 ret = deltaheads - 1
430 430 else:
431 431 ret = deltaheads + 1
432 432 return ret
433 433
434 434 def deltaiter(self):
435 435 """
436 436 returns an iterator of the deltas in this changegroup
437 437
438 438 Useful for passing to the underlying storage system to be stored.
439 439 """
440 440 chain = None
441 441 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
442 442 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
443 443 yield chunkdata
444 444 chain = chunkdata[0]
445 445
446 446 class cg2unpacker(cg1unpacker):
447 447 """Unpacker for cg2 streams.
448 448
449 449 cg2 streams add support for generaldelta, so the delta header
450 450 format is slightly different. All other features about the data
451 451 remain the same.
452 452 """
453 453 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
454 454 deltaheadersize = deltaheader.size
455 455 version = '02'
456 456
457 457 def _deltaheader(self, headertuple, prevnode):
458 458 node, p1, p2, deltabase, cs = headertuple
459 459 flags = 0
460 460 return node, p1, p2, deltabase, cs, flags
461 461
462 462 class cg3unpacker(cg2unpacker):
463 463 """Unpacker for cg3 streams.
464 464
465 465 cg3 streams add support for exchanging treemanifests and revlog
466 466 flags. It adds the revlog flags to the delta header and an empty chunk
467 467 separating manifests and files.
468 468 """
469 469 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
470 470 deltaheadersize = deltaheader.size
471 471 version = '03'
472 472 _grouplistcount = 2 # One list of manifests and one list of files
473 473
474 474 def _deltaheader(self, headertuple, prevnode):
475 475 node, p1, p2, deltabase, cs, flags = headertuple
476 476 return node, p1, p2, deltabase, cs, flags
477 477
478 478 def _unpackmanifests(self, repo, revmap, trp, prog):
479 479 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
480 480 for chunkdata in iter(self.filelogheader, {}):
481 481 # If we get here, there are directory manifests in the changegroup
482 482 d = chunkdata["filename"]
483 483 repo.ui.debug("adding %s revisions\n" % d)
484 484 dirlog = repo.manifestlog._revlog.dirlog(d)
485 485 deltas = self.deltaiter()
486 486 if not dirlog.addgroup(deltas, revmap, trp):
487 487 raise error.Abort(_("received dir revlog group is empty"))
488 488
489 489 class headerlessfixup(object):
490 490 def __init__(self, fh, h):
491 491 self._h = h
492 492 self._fh = fh
493 493 def read(self, n):
494 494 if self._h:
495 495 d, self._h = self._h[:n], self._h[n:]
496 496 if len(d) < n:
497 497 d += readexactly(self._fh, n - len(d))
498 498 return d
499 499 return readexactly(self._fh, n)
500 500
501 501 @attr.s(slots=True, frozen=True)
502 502 class revisiondelta(object):
503 503 """Describes a delta entry in a changegroup.
504 504
505 505 Captured data is sufficient to serialize the delta into multiple
506 506 formats.
507 507 """
508 508 # 20 byte node of this revision.
509 509 node = attr.ib()
510 510 # 20 byte nodes of parent revisions.
511 511 p1node = attr.ib()
512 512 p2node = attr.ib()
513 513 # 20 byte node of node this delta is against.
514 514 basenode = attr.ib()
515 515 # 20 byte node of changeset revision this delta is associated with.
516 516 linknode = attr.ib()
517 517 # 2 bytes of flags to apply to revision data.
518 518 flags = attr.ib()
519 519 # Iterable of chunks holding raw delta data.
520 520 deltachunks = attr.ib()
521 521
522 522 class cg1packer(object):
523 def __init__(self, repo, filematcher, version, builddeltaheader,
524 manifestsend, sendtreemanifests,
523 def __init__(self, repo, filematcher, version, allowreorder,
524 builddeltaheader, manifestsend, sendtreemanifests,
525 525 bundlecaps=None):
526 526 """Given a source repo, construct a bundler.
527 527
528 528 filematcher is a matcher that matches on files to include in the
529 529 changegroup. Used to facilitate sparse changegroups.
530 530
531 allowreorder controls whether reordering of revisions is allowed.
532 This value is used when ``bundle.reorder`` is ``auto`` or isn't
533 set.
534
531 535 builddeltaheader is a callable that constructs the header for a group
532 536 delta.
533 537
534 538 manifestsend is a chunk to send after manifests have been fully emitted.
535 539
536 540 sendtreemanifests indicates whether tree manifests should be emitted.
537 541
538 542 bundlecaps is optional and can be used to specify the set of
539 543 capabilities which can be used to build the bundle. While bundlecaps is
540 544 unused in core Mercurial, extensions rely on this feature to communicate
541 545 capabilities to customize the changegroup packer.
542 546 """
543 547 assert filematcher
544 548 self._filematcher = filematcher
545 549
546 550 self.version = version
547 551 self._builddeltaheader = builddeltaheader
548 552 self._manifestsend = manifestsend
549 553 self._sendtreemanifests = sendtreemanifests
550 554
551 555 # Set of capabilities we can use to build the bundle.
552 556 if bundlecaps is None:
553 557 bundlecaps = set()
554 558 self._bundlecaps = bundlecaps
559
555 560 # experimental config: bundle.reorder
556 561 reorder = repo.ui.config('bundle', 'reorder')
557 562 if reorder == 'auto':
558 reorder = None
563 self._reorder = allowreorder
559 564 else:
560 reorder = stringutil.parsebool(reorder)
565 self._reorder = stringutil.parsebool(reorder)
566
561 567 self._repo = repo
562 self._reorder = reorder
568
563 569 if self._repo.ui.verbose and not self._repo.ui.debugflag:
564 570 self._verbosenote = self._repo.ui.note
565 571 else:
566 572 self._verbosenote = lambda s: None
567 573
568 574 def close(self):
569 575 # Ellipses serving mode.
570 576 getattr(self, 'clrev_to_localrev', {}).clear()
571 577 if getattr(self, 'next_clrev_to_localrev', {}):
572 578 self.clrev_to_localrev = self.next_clrev_to_localrev
573 579 del self.next_clrev_to_localrev
574 580 self.changelog_done = True
575 581
576 582 return closechunk()
577 583
578 584 def fileheader(self, fname):
579 585 return chunkheader(len(fname)) + fname
580 586
581 587 # Extracted both for clarity and for overriding in extensions.
582 588 def _sortgroup(self, store, nodelist, lookup):
583 589 """Sort nodes for change group and turn them into revnums."""
584 590 # Ellipses serving mode.
585 591 #
586 592 # In a perfect world, we'd generate better ellipsis-ified graphs
587 593 # for non-changelog revlogs. In practice, we haven't started doing
588 594 # that yet, so the resulting DAGs for the manifestlog and filelogs
589 595 # are actually full of bogus parentage on all the ellipsis
590 596 # nodes. This has the side effect that, while the contents are
591 597 # correct, the individual DAGs might be completely out of whack in
592 598 # a case like 882681bc3166 and its ancestors (back about 10
593 599 # revisions or so) in the main hg repo.
594 600 #
595 601 # The one invariant we *know* holds is that the new (potentially
596 602 # bogus) DAG shape will be valid if we order the nodes in the
597 603 # order that they're introduced in dramatis personae by the
598 604 # changelog, so what we do is we sort the non-changelog histories
599 605 # by the order in which they are used by the changelog.
600 606 if util.safehasattr(self, 'full_nodes') and self.clnode_to_rev:
601 607 key = lambda n: self.clnode_to_rev[lookup(n)]
602 608 return [store.rev(n) for n in sorted(nodelist, key=key)]
603 609
604 610 # for generaldelta revlogs, we linearize the revs; this will both be
605 611 # much quicker and generate a much smaller bundle
606 612 if (store._generaldelta and self._reorder is None) or self._reorder:
607 613 dag = dagutil.revlogdag(store)
608 614 return dag.linearize(set(store.rev(n) for n in nodelist))
609 615 else:
610 616 return sorted([store.rev(n) for n in nodelist])
611 617
612 618 def group(self, nodelist, store, lookup, units=None):
613 619 """Calculate a delta group, yielding a sequence of changegroup chunks
614 620 (strings).
615 621
616 622 Given a list of changeset revs, return a set of deltas and
617 623 metadata corresponding to nodes. The first delta is
618 624 first parent(nodelist[0]) -> nodelist[0], the receiver is
619 625 guaranteed to have this parent as it has all history before
620 626 these changesets. In the case firstparent is nullrev the
621 627 changegroup starts with a full revision.
622 628
623 629 If units is not None, progress detail will be generated, units specifies
624 630 the type of revlog that is touched (changelog, manifest, etc.).
625 631 """
626 632 # if we don't have any revisions touched by these changesets, bail
627 633 if len(nodelist) == 0:
628 634 yield self.close()
629 635 return
630 636
631 637 revs = self._sortgroup(store, nodelist, lookup)
632 638
633 639 # add the parent of the first rev
634 640 p = store.parentrevs(revs[0])[0]
635 641 revs.insert(0, p)
636 642
637 643 # build deltas
638 644 progress = None
639 645 if units is not None:
640 646 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
641 647 total=(len(revs) - 1))
642 648 for r in pycompat.xrange(len(revs) - 1):
643 649 if progress:
644 650 progress.update(r + 1)
645 651 prev, curr = revs[r], revs[r + 1]
646 652 linknode = lookup(store.node(curr))
647 653 for c in self.revchunk(store, curr, prev, linknode):
648 654 yield c
649 655
650 656 if progress:
651 657 progress.complete()
652 658 yield self.close()
653 659
654 660 # filter any nodes that claim to be part of the known set
655 661 def prune(self, store, missing, commonrevs):
656 662 # TODO this violates storage abstraction for manifests.
657 663 if isinstance(store, manifest.manifestrevlog):
658 664 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
659 665 return []
660 666
661 667 rr, rl = store.rev, store.linkrev
662 668 return [n for n in missing if rl(rr(n)) not in commonrevs]
663 669
664 670 def _packmanifests(self, dir, mfnodes, lookuplinknode):
665 671 """Pack flat manifests into a changegroup stream."""
666 672 assert not dir
667 673 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
668 674 lookuplinknode, units=_('manifests')):
669 675 yield chunk
670 676
671 677 def _packtreemanifests(self, dir, mfnodes, lookuplinknode):
672 678 """Version of _packmanifests that operates on directory manifests.
673 679
674 680 Encodes the directory name in the output so multiple manifests
675 681 can be sent.
676 682 """
677 683 assert self.version == b'03'
678 684
679 685 if dir:
680 686 yield self.fileheader(dir)
681 687
682 688 # TODO violates storage abstractions by assuming revlogs.
683 689 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
684 690 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
685 691 units=_('manifests')):
686 692 yield chunk
687 693
688 694 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
689 695 '''yield a sequence of changegroup chunks (strings)'''
690 696 repo = self._repo
691 697 cl = repo.changelog
692 698
693 699 clrevorder = {}
694 700 mfs = {} # needed manifests
695 701 fnodes = {} # needed file nodes
696 702 mfl = repo.manifestlog
697 703 # TODO violates storage abstraction.
698 704 mfrevlog = mfl._revlog
699 705 changedfiles = set()
700 706
701 707 ellipsesmode = util.safehasattr(self, 'full_nodes')
702 708
703 709 # Callback for the changelog, used to collect changed files and
704 710 # manifest nodes.
705 711 # Returns the linkrev node (identity in the changelog case).
706 712 def lookupcl(x):
707 713 c = cl.read(x)
708 714 clrevorder[x] = len(clrevorder)
709 715
710 716 if ellipsesmode:
711 717 # Only update mfs if x is going to be sent. Otherwise we
712 718 # end up with bogus linkrevs specified for manifests and
713 719 # we skip some manifest nodes that we should otherwise
714 720 # have sent.
715 721 if (x in self.full_nodes
716 722 or cl.rev(x) in self.precomputed_ellipsis):
717 723 n = c[0]
718 724 # Record the first changeset introducing this manifest
719 725 # version.
720 726 mfs.setdefault(n, x)
721 727 # Set this narrow-specific dict so we have the lowest
722 728 # manifest revnum to look up for this cl revnum. (Part of
723 729 # mapping changelog ellipsis parents to manifest ellipsis
724 730 # parents)
725 731 self.next_clrev_to_localrev.setdefault(cl.rev(x),
726 732 mfrevlog.rev(n))
727 733 # We can't trust the changed files list in the changeset if the
728 734 # client requested a shallow clone.
729 735 if self.is_shallow:
730 736 changedfiles.update(mfl[c[0]].read().keys())
731 737 else:
732 738 changedfiles.update(c[3])
733 739 else:
734 740
735 741 n = c[0]
736 742 # record the first changeset introducing this manifest version
737 743 mfs.setdefault(n, x)
738 744 # Record a complete list of potentially-changed files in
739 745 # this manifest.
740 746 changedfiles.update(c[3])
741 747
742 748 return x
743 749
744 750 self._verbosenote(_('uncompressed size of bundle content:\n'))
745 751 size = 0
746 752 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
747 753 size += len(chunk)
748 754 yield chunk
749 755 self._verbosenote(_('%8.i (changelog)\n') % size)
750 756
751 757 # We need to make sure that the linkrev in the changegroup refers to
752 758 # the first changeset that introduced the manifest or file revision.
753 759 # The fastpath is usually safer than the slowpath, because the filelogs
754 760 # are walked in revlog order.
755 761 #
756 762 # When taking the slowpath with reorder=None and the manifest revlog
757 763 # uses generaldelta, the manifest may be walked in the "wrong" order.
758 764 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
759 765 # cc0ff93d0c0c).
760 766 #
761 767 # When taking the fastpath, we are only vulnerable to reordering
762 768 # of the changelog itself. The changelog never uses generaldelta, so
763 769 # it is only reordered when reorder=True. To handle this case, we
764 770 # simply take the slowpath, which already has the 'clrevorder' logic.
765 771 # This was also fixed in cc0ff93d0c0c.
766 772 fastpathlinkrev = fastpathlinkrev and not self._reorder
767 773 # Treemanifests don't work correctly with fastpathlinkrev
768 774 # either, because we don't discover which directory nodes to
769 775 # send along with files. This could probably be fixed.
770 776 fastpathlinkrev = fastpathlinkrev and (
771 777 'treemanifest' not in repo.requirements)
772 778
773 779 for chunk in self.generatemanifests(commonrevs, clrevorder,
774 780 fastpathlinkrev, mfs, fnodes, source):
775 781 yield chunk
776 782
777 783 if ellipsesmode:
778 784 mfdicts = None
779 785 if self.is_shallow:
780 786 mfdicts = [(self._repo.manifestlog[n].read(), lr)
781 787 for (n, lr) in mfs.iteritems()]
782 788
783 789 mfs.clear()
784 790 clrevs = set(cl.rev(x) for x in clnodes)
785 791
786 792 if not fastpathlinkrev:
787 793 def linknodes(unused, fname):
788 794 return fnodes.get(fname, {})
789 795 else:
790 796 cln = cl.node
791 797 def linknodes(filerevlog, fname):
792 798 llr = filerevlog.linkrev
793 799 fln = filerevlog.node
794 800 revs = ((r, llr(r)) for r in filerevlog)
795 801 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
796 802
797 803 if ellipsesmode:
798 804 # We need to pass the mfdicts variable down into
799 805 # generatefiles(), but more than one command might have
800 806 # wrapped generatefiles so we can't modify the function
801 807 # signature. Instead, we pass the data to ourselves using an
802 808 # instance attribute. I'm sorry.
803 809 self._mfdicts = mfdicts
804 810
805 811 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
806 812 source):
807 813 yield chunk
808 814
809 815 yield self.close()
810 816
811 817 if clnodes:
812 818 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
813 819
814 820 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
815 821 fnodes, source):
816 822 """Returns an iterator of changegroup chunks containing manifests.
817 823
818 824 `source` is unused here, but is used by extensions like remotefilelog to
819 825 change what is sent based in pulls vs pushes, etc.
820 826 """
821 827 repo = self._repo
822 828 mfl = repo.manifestlog
823 829 dirlog = mfl._revlog.dirlog
824 830 tmfnodes = {'': mfs}
825 831
826 832 # Callback for the manifest, used to collect linkrevs for filelog
827 833 # revisions.
828 834 # Returns the linkrev node (collected in lookupcl).
829 835 def makelookupmflinknode(dir, nodes):
830 836 if fastpathlinkrev:
831 837 assert not dir
832 838 return mfs.__getitem__
833 839
834 840 def lookupmflinknode(x):
835 841 """Callback for looking up the linknode for manifests.
836 842
837 843 Returns the linkrev node for the specified manifest.
838 844
839 845 SIDE EFFECT:
840 846
841 847 1) fclnodes gets populated with the list of relevant
842 848 file nodes if we're not using fastpathlinkrev
843 849 2) When treemanifests are in use, collects treemanifest nodes
844 850 to send
845 851
846 852 Note that this means manifests must be completely sent to
847 853 the client before you can trust the list of files and
848 854 treemanifests to send.
849 855 """
850 856 clnode = nodes[x]
851 857 mdata = mfl.get(dir, x).readfast(shallow=True)
852 858 for p, n, fl in mdata.iterentries():
853 859 if fl == 't': # subdirectory manifest
854 860 subdir = dir + p + '/'
855 861 tmfclnodes = tmfnodes.setdefault(subdir, {})
856 862 tmfclnode = tmfclnodes.setdefault(n, clnode)
857 863 if clrevorder[clnode] < clrevorder[tmfclnode]:
858 864 tmfclnodes[n] = clnode
859 865 else:
860 866 f = dir + p
861 867 fclnodes = fnodes.setdefault(f, {})
862 868 fclnode = fclnodes.setdefault(n, clnode)
863 869 if clrevorder[clnode] < clrevorder[fclnode]:
864 870 fclnodes[n] = clnode
865 871 return clnode
866 872 return lookupmflinknode
867 873
868 874 fn = (self._packtreemanifests if self._sendtreemanifests
869 875 else self._packmanifests)
870 876 size = 0
871 877 while tmfnodes:
872 878 dir, nodes = tmfnodes.popitem()
873 879 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
874 880 if not dir or prunednodes:
875 881 for x in fn(dir, prunednodes, makelookupmflinknode(dir, nodes)):
876 882 size += len(x)
877 883 yield x
878 884 self._verbosenote(_('%8.i (manifests)\n') % size)
879 885 yield self._manifestsend
880 886
881 887 # The 'source' parameter is useful for extensions
882 888 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
883 889 changedfiles = list(filter(self._filematcher, changedfiles))
884 890
885 891 if getattr(self, 'is_shallow', False):
886 892 # See comment in generate() for why this sadness is a thing.
887 893 mfdicts = self._mfdicts
888 894 del self._mfdicts
889 895 # In a shallow clone, the linknodes callback needs to also include
890 896 # those file nodes that are in the manifests we sent but weren't
891 897 # introduced by those manifests.
892 898 commonctxs = [self._repo[c] for c in commonrevs]
893 899 oldlinknodes = linknodes
894 900 clrev = self._repo.changelog.rev
895 901
896 902 # Defining this function has a side-effect of overriding the
897 903 # function of the same name that was passed in as an argument.
898 904 # TODO have caller pass in appropriate function.
899 905 def linknodes(flog, fname):
900 906 for c in commonctxs:
901 907 try:
902 908 fnode = c.filenode(fname)
903 909 self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
904 910 except error.ManifestLookupError:
905 911 pass
906 912 links = oldlinknodes(flog, fname)
907 913 if len(links) != len(mfdicts):
908 914 for mf, lr in mfdicts:
909 915 fnode = mf.get(fname, None)
910 916 if fnode in links:
911 917 links[fnode] = min(links[fnode], lr, key=clrev)
912 918 elif fnode:
913 919 links[fnode] = lr
914 920 return links
915 921
916 922 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
917 923
918 924 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
919 925 repo = self._repo
920 926 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
921 927 total=len(changedfiles))
922 928 for i, fname in enumerate(sorted(changedfiles)):
923 929 filerevlog = repo.file(fname)
924 930 if not filerevlog:
925 931 raise error.Abort(_("empty or missing file data for %s") %
926 932 fname)
927 933
928 934 linkrevnodes = linknodes(filerevlog, fname)
929 935 # Lookup for filenodes, we collected the linkrev nodes above in the
930 936 # fastpath case and with lookupmf in the slowpath case.
931 937 def lookupfilelog(x):
932 938 return linkrevnodes[x]
933 939
934 940 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
935 941 if filenodes:
936 942 progress.update(i + 1, item=fname)
937 943 h = self.fileheader(fname)
938 944 size = len(h)
939 945 yield h
940 946 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
941 947 size += len(chunk)
942 948 yield chunk
943 949 self._verbosenote(_('%8.i %s\n') % (size, fname))
944 950 progress.complete()
945 951
946 952 def deltaparent(self, store, rev, p1, p2, prev):
947 953 if not store.candelta(prev, rev):
948 954 raise error.ProgrammingError('cg1 should not be used in this case')
949 955 return prev
950 956
951 957 def revchunk(self, store, rev, prev, linknode):
952 958 if util.safehasattr(self, 'full_nodes'):
953 959 fn = self._revisiondeltanarrow
954 960 else:
955 961 fn = self._revisiondeltanormal
956 962
957 963 delta = fn(store, rev, prev, linknode)
958 964 if not delta:
959 965 return
960 966
961 967 meta = self._builddeltaheader(delta)
962 968 l = len(meta) + sum(len(x) for x in delta.deltachunks)
963 969
964 970 yield chunkheader(l)
965 971 yield meta
966 972 for x in delta.deltachunks:
967 973 yield x
968 974
969 975 def _revisiondeltanormal(self, store, rev, prev, linknode):
970 976 node = store.node(rev)
971 977 p1, p2 = store.parentrevs(rev)
972 978 base = self.deltaparent(store, rev, p1, p2, prev)
973 979
974 980 prefix = ''
975 981 if store.iscensored(base) or store.iscensored(rev):
976 982 try:
977 983 delta = store.revision(node, raw=True)
978 984 except error.CensoredNodeError as e:
979 985 delta = e.tombstone
980 986 if base == nullrev:
981 987 prefix = mdiff.trivialdiffheader(len(delta))
982 988 else:
983 989 baselen = store.rawsize(base)
984 990 prefix = mdiff.replacediffheader(baselen, len(delta))
985 991 elif base == nullrev:
986 992 delta = store.revision(node, raw=True)
987 993 prefix = mdiff.trivialdiffheader(len(delta))
988 994 else:
989 995 delta = store.revdiff(base, rev)
990 996 p1n, p2n = store.parents(node)
991 997
992 998 return revisiondelta(
993 999 node=node,
994 1000 p1node=p1n,
995 1001 p2node=p2n,
996 1002 basenode=store.node(base),
997 1003 linknode=linknode,
998 1004 flags=store.flags(rev),
999 1005 deltachunks=(prefix, delta),
1000 1006 )
1001 1007
1002 1008 def _revisiondeltanarrow(self, store, rev, prev, linknode):
1003 1009 # build up some mapping information that's useful later. See
1004 1010 # the local() nested function below.
1005 1011 if not self.changelog_done:
1006 1012 self.clnode_to_rev[linknode] = rev
1007 1013 linkrev = rev
1008 1014 self.clrev_to_localrev[linkrev] = rev
1009 1015 else:
1010 1016 linkrev = self.clnode_to_rev[linknode]
1011 1017 self.clrev_to_localrev[linkrev] = rev
1012 1018
1013 1019 # This is a node to send in full, because the changeset it
1014 1020 # corresponds to was a full changeset.
1015 1021 if linknode in self.full_nodes:
1016 1022 return self._revisiondeltanormal(store, rev, prev, linknode)
1017 1023
1018 1024 # At this point, a node can either be one we should skip or an
1019 1025 # ellipsis. If it's not an ellipsis, bail immediately.
1020 1026 if linkrev not in self.precomputed_ellipsis:
1021 1027 return
1022 1028
1023 1029 linkparents = self.precomputed_ellipsis[linkrev]
1024 1030 def local(clrev):
1025 1031 """Turn a changelog revnum into a local revnum.
1026 1032
1027 1033 The ellipsis dag is stored as revnums on the changelog,
1028 1034 but when we're producing ellipsis entries for
1029 1035 non-changelog revlogs, we need to turn those numbers into
1030 1036 something local. This does that for us, and during the
1031 1037 changelog sending phase will also expand the stored
1032 1038 mappings as needed.
1033 1039 """
1034 1040 if clrev == nullrev:
1035 1041 return nullrev
1036 1042
1037 1043 if not self.changelog_done:
1038 1044 # If we're doing the changelog, it's possible that we
1039 1045 # have a parent that is already on the client, and we
1040 1046 # need to store some extra mapping information so that
1041 1047 # our contained ellipsis nodes will be able to resolve
1042 1048 # their parents.
1043 1049 if clrev not in self.clrev_to_localrev:
1044 1050 clnode = store.node(clrev)
1045 1051 self.clnode_to_rev[clnode] = clrev
1046 1052 return clrev
1047 1053
1048 1054 # Walk the ellipsis-ized changelog breadth-first looking for a
1049 1055 # change that has been linked from the current revlog.
1050 1056 #
1051 1057 # For a flat manifest revlog only a single step should be necessary
1052 1058 # as all relevant changelog entries are relevant to the flat
1053 1059 # manifest.
1054 1060 #
1055 1061 # For a filelog or tree manifest dirlog however not every changelog
1056 1062 # entry will have been relevant, so we need to skip some changelog
1057 1063 # nodes even after ellipsis-izing.
1058 1064 walk = [clrev]
1059 1065 while walk:
1060 1066 p = walk[0]
1061 1067 walk = walk[1:]
1062 1068 if p in self.clrev_to_localrev:
1063 1069 return self.clrev_to_localrev[p]
1064 1070 elif p in self.full_nodes:
1065 1071 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1066 1072 if pp != nullrev])
1067 1073 elif p in self.precomputed_ellipsis:
1068 1074 walk.extend([pp for pp in self.precomputed_ellipsis[p]
1069 1075 if pp != nullrev])
1070 1076 else:
1071 1077 # In this case, we've got an ellipsis with parents
1072 1078 # outside the current bundle (likely an
1073 1079 # incremental pull). We "know" that we can use the
1074 1080 # value of this same revlog at whatever revision
1075 1081 # is pointed to by linknode. "Know" is in scare
1076 1082 # quotes because I haven't done enough examination
1077 1083 # of edge cases to convince myself this is really
1078 1084 # a fact - it works for all the (admittedly
1079 1085 # thorough) cases in our testsuite, but I would be
1080 1086 # somewhat unsurprised to find a case in the wild
1081 1087 # where this breaks down a bit. That said, I don't
1082 1088 # know if it would hurt anything.
1083 1089 for i in pycompat.xrange(rev, 0, -1):
1084 1090 if store.linkrev(i) == clrev:
1085 1091 return i
1086 1092 # We failed to resolve a parent for this node, so
1087 1093 # we crash the changegroup construction.
1088 1094 raise error.Abort(
1089 1095 'unable to resolve parent while packing %r %r'
1090 1096 ' for changeset %r' % (store.indexfile, rev, clrev))
1091 1097
1092 1098 return nullrev
1093 1099
1094 1100 if not linkparents or (
1095 1101 store.parentrevs(rev) == (nullrev, nullrev)):
1096 1102 p1, p2 = nullrev, nullrev
1097 1103 elif len(linkparents) == 1:
1098 1104 p1, = sorted(local(p) for p in linkparents)
1099 1105 p2 = nullrev
1100 1106 else:
1101 1107 p1, p2 = sorted(local(p) for p in linkparents)
1102 1108
1103 1109 n = store.node(rev)
1104 1110 p1n, p2n = store.node(p1), store.node(p2)
1105 1111 flags = store.flags(rev)
1106 1112 flags |= revlog.REVIDX_ELLIPSIS
1107 1113
1108 1114 # TODO: try and actually send deltas for ellipsis data blocks
1109 1115 data = store.revision(n)
1110 1116 diffheader = mdiff.trivialdiffheader(len(data))
1111 1117
1112 1118 return revisiondelta(
1113 1119 node=n,
1114 1120 p1node=p1n,
1115 1121 p2node=p2n,
1116 1122 basenode=nullid,
1117 1123 linknode=linknode,
1118 1124 flags=flags,
1119 1125 deltachunks=(diffheader, data),
1120 1126 )
1121 1127
1122 1128 class cg2packer(cg1packer):
1123 def __init__(self, repo, filematcher, version, builddeltaheader,
1124 manifestsend, sendtreemanifests, bundlecaps=None):
1125 super(cg2packer, self).__init__(repo, filematcher, version,
1126 builddeltaheader, manifestsend,
1127 sendtreemanifests,
1128 bundlecaps=bundlecaps)
1129
1130 if self._reorder is None:
1131 # Since generaldelta is directly supported by cg2, reordering
1132 # generally doesn't help, so we disable it by default (treating
1133 # bundle.reorder=auto just like bundle.reorder=False).
1134 self._reorder = False
1135
1136 1129 def deltaparent(self, store, rev, p1, p2, prev):
1137 1130 # Narrow ellipses mode.
1138 1131 if util.safehasattr(self, 'full_nodes'):
1139 1132 # TODO: send better deltas when in narrow mode.
1140 1133 #
1141 1134 # changegroup.group() loops over revisions to send,
1142 1135 # including revisions we'll skip. What this means is that
1143 1136 # `prev` will be a potentially useless delta base for all
1144 1137 # ellipsis nodes, as the client likely won't have it. In
1145 1138 # the future we should do bookkeeping about which nodes
1146 1139 # have been sent to the client, and try to be
1147 1140 # significantly smarter about delta bases. This is
1148 1141 # slightly tricky because this same code has to work for
1149 1142 # all revlogs, and we don't have the linkrev/linknode here.
1150 1143 return p1
1151 1144
1152 1145 dp = store.deltaparent(rev)
1153 1146 if dp == nullrev and store.storedeltachains:
1154 1147 # Avoid sending full revisions when delta parent is null. Pick prev
1155 1148 # in that case. It's tempting to pick p1 in this case, as p1 will
1156 1149 # be smaller in the common case. However, computing a delta against
1157 1150 # p1 may require resolving the raw text of p1, which could be
1158 1151 # expensive. The revlog caches should have prev cached, meaning
1159 1152 # less CPU for changegroup generation. There is likely room to add
1160 1153 # a flag and/or config option to control this behavior.
1161 1154 base = prev
1162 1155 elif dp == nullrev:
1163 1156 # revlog is configured to use full snapshot for a reason,
1164 1157 # stick to full snapshot.
1165 1158 base = nullrev
1166 1159 elif dp not in (p1, p2, prev):
1167 1160 # Pick prev when we can't be sure remote has the base revision.
1168 1161 return prev
1169 1162 else:
1170 1163 base = dp
1171 1164 if base != nullrev and not store.candelta(base, rev):
1172 1165 base = nullrev
1173 1166 return base
1174 1167
1175 1168 def _makecg1packer(repo, filematcher, bundlecaps):
1176 1169 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1177 1170 d.node, d.p1node, d.p2node, d.linknode)
1178 1171
1179 return cg1packer(repo, filematcher, b'01', builddeltaheader,
1172 return cg1packer(repo, filematcher, b'01', allowreorder=None,
1173 builddeltaheader=builddeltaheader,
1180 1174 manifestsend=b'', sendtreemanifests=False,
1181 1175 bundlecaps=bundlecaps)
1182 1176
1183 1177 def _makecg2packer(repo, filematcher, bundlecaps):
1184 1178 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1185 1179 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1186 1180
1187 return cg2packer(repo, filematcher, b'02', builddeltaheader,
1181 # Since generaldelta is directly supported by cg2, reordering
1182 # generally doesn't help, so we disable it by default (treating
1183 # bundle.reorder=auto just like bundle.reorder=False).
1184 return cg2packer(repo, filematcher, b'02', allowreorder=False,
1185 builddeltaheader=builddeltaheader,
1188 1186 manifestsend=b'', sendtreemanifests=False,
1189 1187 bundlecaps=bundlecaps)
1190 1188
1191 1189 def _makecg3packer(repo, filematcher, bundlecaps):
1192 1190 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1193 1191 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1194 1192
1195 return cg2packer(repo, filematcher, b'03', builddeltaheader,
1193 return cg2packer(repo, filematcher, b'03', allowreorder=False,
1194 builddeltaheader=builddeltaheader,
1196 1195 manifestsend=closechunk(), sendtreemanifests=True,
1197 1196 bundlecaps=bundlecaps)
1198 1197
1199 1198 _packermap = {'01': (_makecg1packer, cg1unpacker),
1200 1199 # cg2 adds support for exchanging generaldelta
1201 1200 '02': (_makecg2packer, cg2unpacker),
1202 1201 # cg3 adds support for exchanging revlog flags and treemanifests
1203 1202 '03': (_makecg3packer, cg3unpacker),
1204 1203 }
1205 1204
1206 1205 def allsupportedversions(repo):
1207 1206 versions = set(_packermap.keys())
1208 1207 if not (repo.ui.configbool('experimental', 'changegroup3') or
1209 1208 repo.ui.configbool('experimental', 'treemanifest') or
1210 1209 'treemanifest' in repo.requirements):
1211 1210 versions.discard('03')
1212 1211 return versions
1213 1212
1214 1213 # Changegroup versions that can be applied to the repo
1215 1214 def supportedincomingversions(repo):
1216 1215 return allsupportedversions(repo)
1217 1216
1218 1217 # Changegroup versions that can be created from the repo
1219 1218 def supportedoutgoingversions(repo):
1220 1219 versions = allsupportedversions(repo)
1221 1220 if 'treemanifest' in repo.requirements:
1222 1221 # Versions 01 and 02 support only flat manifests and it's just too
1223 1222 # expensive to convert between the flat manifest and tree manifest on
1224 1223 # the fly. Since tree manifests are hashed differently, all of history
1225 1224 # would have to be converted. Instead, we simply don't even pretend to
1226 1225 # support versions 01 and 02.
1227 1226 versions.discard('01')
1228 1227 versions.discard('02')
1229 1228 if repository.NARROW_REQUIREMENT in repo.requirements:
1230 1229 # Versions 01 and 02 don't support revlog flags, and we need to
1231 1230 # support that for stripping and unbundling to work.
1232 1231 versions.discard('01')
1233 1232 versions.discard('02')
1234 1233 if LFS_REQUIREMENT in repo.requirements:
1235 1234 # Versions 01 and 02 don't support revlog flags, and we need to
1236 1235 # mark LFS entries with REVIDX_EXTSTORED.
1237 1236 versions.discard('01')
1238 1237 versions.discard('02')
1239 1238
1240 1239 return versions
1241 1240
1242 1241 def localversion(repo):
1243 1242 # Finds the best version to use for bundles that are meant to be used
1244 1243 # locally, such as those from strip and shelve, and temporary bundles.
1245 1244 return max(supportedoutgoingversions(repo))
1246 1245
1247 1246 def safeversion(repo):
1248 1247 # Finds the smallest version that it's safe to assume clients of the repo
1249 1248 # will support. For example, all hg versions that support generaldelta also
1250 1249 # support changegroup 02.
1251 1250 versions = supportedoutgoingversions(repo)
1252 1251 if 'generaldelta' in repo.requirements:
1253 1252 versions.discard('01')
1254 1253 assert versions
1255 1254 return min(versions)
1256 1255
1257 1256 def getbundler(version, repo, bundlecaps=None, filematcher=None):
1258 1257 assert version in supportedoutgoingversions(repo)
1259 1258
1260 1259 if filematcher is None:
1261 1260 filematcher = matchmod.alwaysmatcher(repo.root, '')
1262 1261
1263 1262 if version == '01' and not filematcher.always():
1264 1263 raise error.ProgrammingError('version 01 changegroups do not support '
1265 1264 'sparse file matchers')
1266 1265
1267 1266 # Requested files could include files not in the local store. So
1268 1267 # filter those out.
1269 1268 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1270 1269 filematcher)
1271 1270
1272 1271 fn = _packermap[version][0]
1273 1272 return fn(repo, filematcher, bundlecaps)
1274 1273
1275 1274 def getunbundler(version, fh, alg, extras=None):
1276 1275 return _packermap[version][1](fh, alg, extras=extras)
1277 1276
1278 1277 def _changegroupinfo(repo, nodes, source):
1279 1278 if repo.ui.verbose or source == 'bundle':
1280 1279 repo.ui.status(_("%d changesets found\n") % len(nodes))
1281 1280 if repo.ui.debugflag:
1282 1281 repo.ui.debug("list of changesets:\n")
1283 1282 for node in nodes:
1284 1283 repo.ui.debug("%s\n" % hex(node))
1285 1284
1286 1285 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1287 1286 bundlecaps=None):
1288 1287 cgstream = makestream(repo, outgoing, version, source,
1289 1288 fastpath=fastpath, bundlecaps=bundlecaps)
1290 1289 return getunbundler(version, util.chunkbuffer(cgstream), None,
1291 1290 {'clcount': len(outgoing.missing) })
1292 1291
1293 1292 def makestream(repo, outgoing, version, source, fastpath=False,
1294 1293 bundlecaps=None, filematcher=None):
1295 1294 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1296 1295 filematcher=filematcher)
1297 1296
1298 1297 repo = repo.unfiltered()
1299 1298 commonrevs = outgoing.common
1300 1299 csets = outgoing.missing
1301 1300 heads = outgoing.missingheads
1302 1301 # We go through the fast path if we get told to, or if all (unfiltered
1303 1302 # heads have been requested (since we then know there all linkrevs will
1304 1303 # be pulled by the client).
1305 1304 heads.sort()
1306 1305 fastpathlinkrev = fastpath or (
1307 1306 repo.filtername is None and heads == sorted(repo.heads()))
1308 1307
1309 1308 repo.hook('preoutgoing', throw=True, source=source)
1310 1309 _changegroupinfo(repo, csets, source)
1311 1310 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1312 1311
1313 1312 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1314 1313 revisions = 0
1315 1314 files = 0
1316 1315 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1317 1316 total=expectedfiles)
1318 1317 for chunkdata in iter(source.filelogheader, {}):
1319 1318 files += 1
1320 1319 f = chunkdata["filename"]
1321 1320 repo.ui.debug("adding %s revisions\n" % f)
1322 1321 progress.increment()
1323 1322 fl = repo.file(f)
1324 1323 o = len(fl)
1325 1324 try:
1326 1325 deltas = source.deltaiter()
1327 1326 if not fl.addgroup(deltas, revmap, trp):
1328 1327 raise error.Abort(_("received file revlog group is empty"))
1329 1328 except error.CensoredBaseError as e:
1330 1329 raise error.Abort(_("received delta base is censored: %s") % e)
1331 1330 revisions += len(fl) - o
1332 1331 if f in needfiles:
1333 1332 needs = needfiles[f]
1334 1333 for new in pycompat.xrange(o, len(fl)):
1335 1334 n = fl.node(new)
1336 1335 if n in needs:
1337 1336 needs.remove(n)
1338 1337 else:
1339 1338 raise error.Abort(
1340 1339 _("received spurious file revlog entry"))
1341 1340 if not needs:
1342 1341 del needfiles[f]
1343 1342 progress.complete()
1344 1343
1345 1344 for f, needs in needfiles.iteritems():
1346 1345 fl = repo.file(f)
1347 1346 for n in needs:
1348 1347 try:
1349 1348 fl.rev(n)
1350 1349 except error.LookupError:
1351 1350 raise error.Abort(
1352 1351 _('missing file data for %s:%s - run hg verify') %
1353 1352 (f, hex(n)))
1354 1353
1355 1354 return revisions, files
1356 1355
1357 1356 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1358 1357 ellipsisroots, visitnodes, depth, source, version):
1359 1358 if version in ('01', '02'):
1360 1359 raise error.Abort(
1361 1360 'ellipsis nodes require at least cg3 on client and server, '
1362 1361 'but negotiated version %s' % version)
1363 1362 # We wrap cg1packer.revchunk, using a side channel to pass
1364 1363 # relevant_nodes into that area. Then if linknode isn't in the
1365 1364 # set, we know we have an ellipsis node and we should defer
1366 1365 # sending that node's data. We override close() to detect
1367 1366 # pending ellipsis nodes and flush them.
1368 1367 packer = getbundler(version, repo, filematcher=match)
1369 1368 # Give the packer the list of nodes which should not be
1370 1369 # ellipsis nodes. We store this rather than the set of nodes
1371 1370 # that should be an ellipsis because for very large histories
1372 1371 # we expect this to be significantly smaller.
1373 1372 packer.full_nodes = relevant_nodes
1374 1373 # Maps ellipsis revs to their roots at the changelog level.
1375 1374 packer.precomputed_ellipsis = ellipsisroots
1376 1375 # Maps CL revs to per-revlog revisions. Cleared in close() at
1377 1376 # the end of each group.
1378 1377 packer.clrev_to_localrev = {}
1379 1378 packer.next_clrev_to_localrev = {}
1380 1379 # Maps changelog nodes to changelog revs. Filled in once
1381 1380 # during changelog stage and then left unmodified.
1382 1381 packer.clnode_to_rev = {}
1383 1382 packer.changelog_done = False
1384 1383 # If true, informs the packer that it is serving shallow content and might
1385 1384 # need to pack file contents not introduced by the changes being packed.
1386 1385 packer.is_shallow = depth is not None
1387 1386
1388 1387 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now