##// END OF EJS Templates
changegroup: declare shallow flag in constructor...
Gregory Szorc -
r38940:cdb9bc21 default
parent child Browse files
Show More
@@ -1,1402 +1,1408
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, "wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, "wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103 class cg1unpacker(object):
104 104 """Unpacker for cg1 changegroup streams.
105 105
106 106 A changegroup unpacker handles the framing of the revision data in
107 107 the wire format. Most consumers will want to use the apply()
108 108 method to add the changes from the changegroup to a repository.
109 109
110 110 If you're forwarding a changegroup unmodified to another consumer,
111 111 use getchunks(), which returns an iterator of changegroup
112 112 chunks. This is mostly useful for cases where you need to know the
113 113 data stream has ended by observing the end of the changegroup.
114 114
115 115 deltachunk() is useful only if you're applying delta data. Most
116 116 consumers should prefer apply() instead.
117 117
118 118 A few other public methods exist. Those are used only for
119 119 bundlerepo and some debug commands - their use is discouraged.
120 120 """
121 121 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
122 122 deltaheadersize = deltaheader.size
123 123 version = '01'
124 124 _grouplistcount = 1 # One list of files after the manifests
125 125
126 126 def __init__(self, fh, alg, extras=None):
127 127 if alg is None:
128 128 alg = 'UN'
129 129 if alg not in util.compengines.supportedbundletypes:
130 130 raise error.Abort(_('unknown stream compression type: %s')
131 131 % alg)
132 132 if alg == 'BZ':
133 133 alg = '_truncatedBZ'
134 134
135 135 compengine = util.compengines.forbundletype(alg)
136 136 self._stream = compengine.decompressorreader(fh)
137 137 self._type = alg
138 138 self.extras = extras or {}
139 139 self.callback = None
140 140
141 141 # These methods (compressed, read, seek, tell) all appear to only
142 142 # be used by bundlerepo, but it's a little hard to tell.
143 143 def compressed(self):
144 144 return self._type is not None and self._type != 'UN'
145 145 def read(self, l):
146 146 return self._stream.read(l)
147 147 def seek(self, pos):
148 148 return self._stream.seek(pos)
149 149 def tell(self):
150 150 return self._stream.tell()
151 151 def close(self):
152 152 return self._stream.close()
153 153
154 154 def _chunklength(self):
155 155 d = readexactly(self._stream, 4)
156 156 l = struct.unpack(">l", d)[0]
157 157 if l <= 4:
158 158 if l:
159 159 raise error.Abort(_("invalid chunk length %d") % l)
160 160 return 0
161 161 if self.callback:
162 162 self.callback()
163 163 return l - 4
164 164
165 165 def changelogheader(self):
166 166 """v10 does not have a changelog header chunk"""
167 167 return {}
168 168
169 169 def manifestheader(self):
170 170 """v10 does not have a manifest header chunk"""
171 171 return {}
172 172
173 173 def filelogheader(self):
174 174 """return the header of the filelogs chunk, v10 only has the filename"""
175 175 l = self._chunklength()
176 176 if not l:
177 177 return {}
178 178 fname = readexactly(self._stream, l)
179 179 return {'filename': fname}
180 180
181 181 def _deltaheader(self, headertuple, prevnode):
182 182 node, p1, p2, cs = headertuple
183 183 if prevnode is None:
184 184 deltabase = p1
185 185 else:
186 186 deltabase = prevnode
187 187 flags = 0
188 188 return node, p1, p2, deltabase, cs, flags
189 189
190 190 def deltachunk(self, prevnode):
191 191 l = self._chunklength()
192 192 if not l:
193 193 return {}
194 194 headerdata = readexactly(self._stream, self.deltaheadersize)
195 195 header = self.deltaheader.unpack(headerdata)
196 196 delta = readexactly(self._stream, l - self.deltaheadersize)
197 197 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
198 198 return (node, p1, p2, cs, deltabase, delta, flags)
199 199
200 200 def getchunks(self):
201 201 """returns all the chunks contains in the bundle
202 202
203 203 Used when you need to forward the binary stream to a file or another
204 204 network API. To do so, it parse the changegroup data, otherwise it will
205 205 block in case of sshrepo because it don't know the end of the stream.
206 206 """
207 207 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
208 208 # and a list of filelogs. For changegroup 3, we expect 4 parts:
209 209 # changelog, manifestlog, a list of tree manifestlogs, and a list of
210 210 # filelogs.
211 211 #
212 212 # Changelog and manifestlog parts are terminated with empty chunks. The
213 213 # tree and file parts are a list of entry sections. Each entry section
214 214 # is a series of chunks terminating in an empty chunk. The list of these
215 215 # entry sections is terminated in yet another empty chunk, so we know
216 216 # we've reached the end of the tree/file list when we reach an empty
217 217 # chunk that was proceeded by no non-empty chunks.
218 218
219 219 parts = 0
220 220 while parts < 2 + self._grouplistcount:
221 221 noentries = True
222 222 while True:
223 223 chunk = getchunk(self)
224 224 if not chunk:
225 225 # The first two empty chunks represent the end of the
226 226 # changelog and the manifestlog portions. The remaining
227 227 # empty chunks represent either A) the end of individual
228 228 # tree or file entries in the file list, or B) the end of
229 229 # the entire list. It's the end of the entire list if there
230 230 # were no entries (i.e. noentries is True).
231 231 if parts < 2:
232 232 parts += 1
233 233 elif noentries:
234 234 parts += 1
235 235 break
236 236 noentries = False
237 237 yield chunkheader(len(chunk))
238 238 pos = 0
239 239 while pos < len(chunk):
240 240 next = pos + 2**20
241 241 yield chunk[pos:next]
242 242 pos = next
243 243 yield closechunk()
244 244
245 245 def _unpackmanifests(self, repo, revmap, trp, prog):
246 246 self.callback = prog.increment
247 247 # no need to check for empty manifest group here:
248 248 # if the result of the merge of 1 and 2 is the same in 3 and 4,
249 249 # no new manifest will be created and the manifest group will
250 250 # be empty during the pull
251 251 self.manifestheader()
252 252 deltas = self.deltaiter()
253 253 repo.manifestlog.addgroup(deltas, revmap, trp)
254 254 prog.complete()
255 255 self.callback = None
256 256
257 257 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
258 258 expectedtotal=None):
259 259 """Add the changegroup returned by source.read() to this repo.
260 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
261 261 the URL of the repo where this changegroup is coming from.
262 262
263 263 Return an integer summarizing the change to this repo:
264 264 - nothing changed or no source: 0
265 265 - more heads than before: 1+added heads (2..n)
266 266 - fewer heads than before: -1-removed heads (-2..-n)
267 267 - number of heads stays the same: 1
268 268 """
269 269 repo = repo.unfiltered()
270 270 def csmap(x):
271 271 repo.ui.debug("add changeset %s\n" % short(x))
272 272 return len(cl)
273 273
274 274 def revmap(x):
275 275 return cl.rev(x)
276 276
277 277 changesets = files = revisions = 0
278 278
279 279 try:
280 280 # The transaction may already carry source information. In this
281 281 # case we use the top level data. We overwrite the argument
282 282 # because we need to use the top level value (if they exist)
283 283 # in this function.
284 284 srctype = tr.hookargs.setdefault('source', srctype)
285 285 url = tr.hookargs.setdefault('url', url)
286 286 repo.hook('prechangegroup',
287 287 throw=True, **pycompat.strkwargs(tr.hookargs))
288 288
289 289 # write changelog data to temp files so concurrent readers
290 290 # will not see an inconsistent view
291 291 cl = repo.changelog
292 292 cl.delayupdate(tr)
293 293 oldheads = set(cl.heads())
294 294
295 295 trp = weakref.proxy(tr)
296 296 # pull off the changeset group
297 297 repo.ui.status(_("adding changesets\n"))
298 298 clstart = len(cl)
299 299 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
300 300 total=expectedtotal)
301 301 self.callback = progress.increment
302 302
303 303 efiles = set()
304 304 def onchangelog(cl, node):
305 305 efiles.update(cl.readfiles(node))
306 306
307 307 self.changelogheader()
308 308 deltas = self.deltaiter()
309 309 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
310 310 efiles = len(efiles)
311 311
312 312 if not cgnodes:
313 313 repo.ui.develwarn('applied empty changegroup',
314 314 config='warn-empty-changegroup')
315 315 clend = len(cl)
316 316 changesets = clend - clstart
317 317 progress.complete()
318 318 self.callback = None
319 319
320 320 # pull off the manifest group
321 321 repo.ui.status(_("adding manifests\n"))
322 322 # We know that we'll never have more manifests than we had
323 323 # changesets.
324 324 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
325 325 total=changesets)
326 326 self._unpackmanifests(repo, revmap, trp, progress)
327 327
328 328 needfiles = {}
329 329 if repo.ui.configbool('server', 'validate'):
330 330 cl = repo.changelog
331 331 ml = repo.manifestlog
332 332 # validate incoming csets have their manifests
333 333 for cset in pycompat.xrange(clstart, clend):
334 334 mfnode = cl.changelogrevision(cset).manifest
335 335 mfest = ml[mfnode].readdelta()
336 336 # store file cgnodes we must see
337 337 for f, n in mfest.iteritems():
338 338 needfiles.setdefault(f, set()).add(n)
339 339
340 340 # process the files
341 341 repo.ui.status(_("adding file changes\n"))
342 342 newrevs, newfiles = _addchangegroupfiles(
343 343 repo, self, revmap, trp, efiles, needfiles)
344 344 revisions += newrevs
345 345 files += newfiles
346 346
347 347 deltaheads = 0
348 348 if oldheads:
349 349 heads = cl.heads()
350 350 deltaheads = len(heads) - len(oldheads)
351 351 for h in heads:
352 352 if h not in oldheads and repo[h].closesbranch():
353 353 deltaheads -= 1
354 354 htext = ""
355 355 if deltaheads:
356 356 htext = _(" (%+d heads)") % deltaheads
357 357
358 358 repo.ui.status(_("added %d changesets"
359 359 " with %d changes to %d files%s\n")
360 360 % (changesets, revisions, files, htext))
361 361 repo.invalidatevolatilesets()
362 362
363 363 if changesets > 0:
364 364 if 'node' not in tr.hookargs:
365 365 tr.hookargs['node'] = hex(cl.node(clstart))
366 366 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
367 367 hookargs = dict(tr.hookargs)
368 368 else:
369 369 hookargs = dict(tr.hookargs)
370 370 hookargs['node'] = hex(cl.node(clstart))
371 371 hookargs['node_last'] = hex(cl.node(clend - 1))
372 372 repo.hook('pretxnchangegroup',
373 373 throw=True, **pycompat.strkwargs(hookargs))
374 374
375 375 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
376 376 phaseall = None
377 377 if srctype in ('push', 'serve'):
378 378 # Old servers can not push the boundary themselves.
379 379 # New servers won't push the boundary if changeset already
380 380 # exists locally as secret
381 381 #
382 382 # We should not use added here but the list of all change in
383 383 # the bundle
384 384 if repo.publishing():
385 385 targetphase = phaseall = phases.public
386 386 else:
387 387 # closer target phase computation
388 388
389 389 # Those changesets have been pushed from the
390 390 # outside, their phases are going to be pushed
391 391 # alongside. Therefor `targetphase` is
392 392 # ignored.
393 393 targetphase = phaseall = phases.draft
394 394 if added:
395 395 phases.registernew(repo, tr, targetphase, added)
396 396 if phaseall is not None:
397 397 phases.advanceboundary(repo, tr, phaseall, cgnodes)
398 398
399 399 if changesets > 0:
400 400
401 401 def runhooks():
402 402 # These hooks run when the lock releases, not when the
403 403 # transaction closes. So it's possible for the changelog
404 404 # to have changed since we last saw it.
405 405 if clstart >= len(repo):
406 406 return
407 407
408 408 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
409 409
410 410 for n in added:
411 411 args = hookargs.copy()
412 412 args['node'] = hex(n)
413 413 del args['node_last']
414 414 repo.hook("incoming", **pycompat.strkwargs(args))
415 415
416 416 newheads = [h for h in repo.heads()
417 417 if h not in oldheads]
418 418 repo.ui.log("incoming",
419 419 "%d incoming changes - new heads: %s\n",
420 420 len(added),
421 421 ', '.join([hex(c[:6]) for c in newheads]))
422 422
423 423 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
424 424 lambda tr: repo._afterlock(runhooks))
425 425 finally:
426 426 repo.ui.flush()
427 427 # never return 0 here:
428 428 if deltaheads < 0:
429 429 ret = deltaheads - 1
430 430 else:
431 431 ret = deltaheads + 1
432 432 return ret
433 433
434 434 def deltaiter(self):
435 435 """
436 436 returns an iterator of the deltas in this changegroup
437 437
438 438 Useful for passing to the underlying storage system to be stored.
439 439 """
440 440 chain = None
441 441 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
442 442 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
443 443 yield chunkdata
444 444 chain = chunkdata[0]
445 445
446 446 class cg2unpacker(cg1unpacker):
447 447 """Unpacker for cg2 streams.
448 448
449 449 cg2 streams add support for generaldelta, so the delta header
450 450 format is slightly different. All other features about the data
451 451 remain the same.
452 452 """
453 453 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
454 454 deltaheadersize = deltaheader.size
455 455 version = '02'
456 456
457 457 def _deltaheader(self, headertuple, prevnode):
458 458 node, p1, p2, deltabase, cs = headertuple
459 459 flags = 0
460 460 return node, p1, p2, deltabase, cs, flags
461 461
462 462 class cg3unpacker(cg2unpacker):
463 463 """Unpacker for cg3 streams.
464 464
465 465 cg3 streams add support for exchanging treemanifests and revlog
466 466 flags. It adds the revlog flags to the delta header and an empty chunk
467 467 separating manifests and files.
468 468 """
469 469 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
470 470 deltaheadersize = deltaheader.size
471 471 version = '03'
472 472 _grouplistcount = 2 # One list of manifests and one list of files
473 473
474 474 def _deltaheader(self, headertuple, prevnode):
475 475 node, p1, p2, deltabase, cs, flags = headertuple
476 476 return node, p1, p2, deltabase, cs, flags
477 477
478 478 def _unpackmanifests(self, repo, revmap, trp, prog):
479 479 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
480 480 for chunkdata in iter(self.filelogheader, {}):
481 481 # If we get here, there are directory manifests in the changegroup
482 482 d = chunkdata["filename"]
483 483 repo.ui.debug("adding %s revisions\n" % d)
484 484 dirlog = repo.manifestlog._revlog.dirlog(d)
485 485 deltas = self.deltaiter()
486 486 if not dirlog.addgroup(deltas, revmap, trp):
487 487 raise error.Abort(_("received dir revlog group is empty"))
488 488
489 489 class headerlessfixup(object):
490 490 def __init__(self, fh, h):
491 491 self._h = h
492 492 self._fh = fh
493 493 def read(self, n):
494 494 if self._h:
495 495 d, self._h = self._h[:n], self._h[n:]
496 496 if len(d) < n:
497 497 d += readexactly(self._fh, n - len(d))
498 498 return d
499 499 return readexactly(self._fh, n)
500 500
501 501 @attr.s(slots=True, frozen=True)
502 502 class revisiondelta(object):
503 503 """Describes a delta entry in a changegroup.
504 504
505 505 Captured data is sufficient to serialize the delta into multiple
506 506 formats.
507 507 """
508 508 # 20 byte node of this revision.
509 509 node = attr.ib()
510 510 # 20 byte nodes of parent revisions.
511 511 p1node = attr.ib()
512 512 p2node = attr.ib()
513 513 # 20 byte node of node this delta is against.
514 514 basenode = attr.ib()
515 515 # 20 byte node of changeset revision this delta is associated with.
516 516 linknode = attr.ib()
517 517 # 2 bytes of flags to apply to revision data.
518 518 flags = attr.ib()
519 519 # Iterable of chunks holding raw delta data.
520 520 deltachunks = attr.ib()
521 521
522 522 class cgpacker(object):
523 523 def __init__(self, repo, filematcher, version, allowreorder,
524 524 useprevdelta, builddeltaheader, manifestsend,
525 sendtreemanifests, bundlecaps=None):
525 sendtreemanifests, bundlecaps=None, shallow=False):
526 526 """Given a source repo, construct a bundler.
527 527
528 528 filematcher is a matcher that matches on files to include in the
529 529 changegroup. Used to facilitate sparse changegroups.
530 530
531 531 allowreorder controls whether reordering of revisions is allowed.
532 532 This value is used when ``bundle.reorder`` is ``auto`` or isn't
533 533 set.
534 534
535 535 useprevdelta controls whether revisions should always delta against
536 536 the previous revision in the changegroup.
537 537
538 538 builddeltaheader is a callable that constructs the header for a group
539 539 delta.
540 540
541 541 manifestsend is a chunk to send after manifests have been fully emitted.
542 542
543 543 sendtreemanifests indicates whether tree manifests should be emitted.
544 544
545 545 bundlecaps is optional and can be used to specify the set of
546 546 capabilities which can be used to build the bundle. While bundlecaps is
547 547 unused in core Mercurial, extensions rely on this feature to communicate
548 548 capabilities to customize the changegroup packer.
549
550 shallow indicates whether shallow data might be sent. The packer may
551 need to pack file contents not introduced by the changes being packed.
549 552 """
550 553 assert filematcher
551 554 self._filematcher = filematcher
552 555
553 556 self.version = version
554 557 self._useprevdelta = useprevdelta
555 558 self._builddeltaheader = builddeltaheader
556 559 self._manifestsend = manifestsend
557 560 self._sendtreemanifests = sendtreemanifests
558 561
559 562 # Set of capabilities we can use to build the bundle.
560 563 if bundlecaps is None:
561 564 bundlecaps = set()
562 565 self._bundlecaps = bundlecaps
566 self._isshallow = shallow
563 567
564 568 # experimental config: bundle.reorder
565 569 reorder = repo.ui.config('bundle', 'reorder')
566 570 if reorder == 'auto':
567 571 self._reorder = allowreorder
568 572 else:
569 573 self._reorder = stringutil.parsebool(reorder)
570 574
571 575 self._repo = repo
572 576
573 577 if self._repo.ui.verbose and not self._repo.ui.debugflag:
574 578 self._verbosenote = self._repo.ui.note
575 579 else:
576 580 self._verbosenote = lambda s: None
577 581
578 582 def _close(self):
579 583 # Ellipses serving mode.
580 584 getattr(self, '_clrev_to_localrev', {}).clear()
581 585 if getattr(self, '_next_clrev_to_localrev', {}):
582 586 self._clrev_to_localrev = self._next_clrev_to_localrev
583 587 del self._next_clrev_to_localrev
584 588 self._changelog_done = True
585 589
586 590 return closechunk()
587 591
588 592 def _fileheader(self, fname):
589 593 return chunkheader(len(fname)) + fname
590 594
591 595 # Extracted both for clarity and for overriding in extensions.
592 596 def _sortgroup(self, store, nodelist, lookup):
593 597 """Sort nodes for change group and turn them into revnums."""
594 598 # Ellipses serving mode.
595 599 #
596 600 # In a perfect world, we'd generate better ellipsis-ified graphs
597 601 # for non-changelog revlogs. In practice, we haven't started doing
598 602 # that yet, so the resulting DAGs for the manifestlog and filelogs
599 603 # are actually full of bogus parentage on all the ellipsis
600 604 # nodes. This has the side effect that, while the contents are
601 605 # correct, the individual DAGs might be completely out of whack in
602 606 # a case like 882681bc3166 and its ancestors (back about 10
603 607 # revisions or so) in the main hg repo.
604 608 #
605 609 # The one invariant we *know* holds is that the new (potentially
606 610 # bogus) DAG shape will be valid if we order the nodes in the
607 611 # order that they're introduced in dramatis personae by the
608 612 # changelog, so what we do is we sort the non-changelog histories
609 613 # by the order in which they are used by the changelog.
610 614 if util.safehasattr(self, '_full_nodes') and self._clnode_to_rev:
611 615 key = lambda n: self._clnode_to_rev[lookup(n)]
612 616 return [store.rev(n) for n in sorted(nodelist, key=key)]
613 617
614 618 # for generaldelta revlogs, we linearize the revs; this will both be
615 619 # much quicker and generate a much smaller bundle
616 620 if (store._generaldelta and self._reorder is None) or self._reorder:
617 621 dag = dagutil.revlogdag(store)
618 622 return dag.linearize(set(store.rev(n) for n in nodelist))
619 623 else:
620 624 return sorted([store.rev(n) for n in nodelist])
621 625
622 626 def group(self, nodelist, store, lookup, units=None):
623 627 """Calculate a delta group, yielding a sequence of changegroup chunks
624 628 (strings).
625 629
626 630 Given a list of changeset revs, return a set of deltas and
627 631 metadata corresponding to nodes. The first delta is
628 632 first parent(nodelist[0]) -> nodelist[0], the receiver is
629 633 guaranteed to have this parent as it has all history before
630 634 these changesets. In the case firstparent is nullrev the
631 635 changegroup starts with a full revision.
632 636
633 637 If units is not None, progress detail will be generated, units specifies
634 638 the type of revlog that is touched (changelog, manifest, etc.).
635 639 """
636 640 # if we don't have any revisions touched by these changesets, bail
637 641 if len(nodelist) == 0:
638 642 yield self._close()
639 643 return
640 644
641 645 revs = self._sortgroup(store, nodelist, lookup)
642 646
643 647 # add the parent of the first rev
644 648 p = store.parentrevs(revs[0])[0]
645 649 revs.insert(0, p)
646 650
647 651 # build deltas
648 652 progress = None
649 653 if units is not None:
650 654 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
651 655 total=(len(revs) - 1))
652 656 for r in pycompat.xrange(len(revs) - 1):
653 657 if progress:
654 658 progress.update(r + 1)
655 659 prev, curr = revs[r], revs[r + 1]
656 660 linknode = lookup(store.node(curr))
657 661 for c in self._revchunk(store, curr, prev, linknode):
658 662 yield c
659 663
660 664 if progress:
661 665 progress.complete()
662 666 yield self._close()
663 667
664 668 # filter any nodes that claim to be part of the known set
665 669 def _prune(self, store, missing, commonrevs):
666 670 # TODO this violates storage abstraction for manifests.
667 671 if isinstance(store, manifest.manifestrevlog):
668 672 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
669 673 return []
670 674
671 675 rr, rl = store.rev, store.linkrev
672 676 return [n for n in missing if rl(rr(n)) not in commonrevs]
673 677
674 678 def _packmanifests(self, dir, mfnodes, lookuplinknode):
675 679 """Pack flat manifests into a changegroup stream."""
676 680 assert not dir
677 681 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
678 682 lookuplinknode, units=_('manifests')):
679 683 yield chunk
680 684
681 685 def _packtreemanifests(self, dir, mfnodes, lookuplinknode):
682 686 """Version of _packmanifests that operates on directory manifests.
683 687
684 688 Encodes the directory name in the output so multiple manifests
685 689 can be sent.
686 690 """
687 691 assert self.version == b'03'
688 692
689 693 if dir:
690 694 yield self._fileheader(dir)
691 695
692 696 # TODO violates storage abstractions by assuming revlogs.
693 697 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
694 698 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
695 699 units=_('manifests')):
696 700 yield chunk
697 701
698 702 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
699 703 '''yield a sequence of changegroup chunks (strings)'''
700 704 repo = self._repo
701 705 cl = repo.changelog
702 706
703 707 clrevorder = {}
704 708 mfs = {} # needed manifests
705 709 fnodes = {} # needed file nodes
706 710 mfl = repo.manifestlog
707 711 # TODO violates storage abstraction.
708 712 mfrevlog = mfl._revlog
709 713 changedfiles = set()
710 714
711 715 ellipsesmode = util.safehasattr(self, '_full_nodes')
712 716
713 717 # Callback for the changelog, used to collect changed files and
714 718 # manifest nodes.
715 719 # Returns the linkrev node (identity in the changelog case).
716 720 def lookupcl(x):
717 721 c = cl.read(x)
718 722 clrevorder[x] = len(clrevorder)
719 723
720 724 if ellipsesmode:
721 725 # Only update mfs if x is going to be sent. Otherwise we
722 726 # end up with bogus linkrevs specified for manifests and
723 727 # we skip some manifest nodes that we should otherwise
724 728 # have sent.
725 729 if (x in self._full_nodes
726 730 or cl.rev(x) in self._precomputed_ellipsis):
727 731 n = c[0]
728 732 # Record the first changeset introducing this manifest
729 733 # version.
730 734 mfs.setdefault(n, x)
731 735 # Set this narrow-specific dict so we have the lowest
732 736 # manifest revnum to look up for this cl revnum. (Part of
733 737 # mapping changelog ellipsis parents to manifest ellipsis
734 738 # parents)
735 739 self._next_clrev_to_localrev.setdefault(cl.rev(x),
736 740 mfrevlog.rev(n))
737 741 # We can't trust the changed files list in the changeset if the
738 742 # client requested a shallow clone.
739 if self._is_shallow:
743 if self._isshallow:
740 744 changedfiles.update(mfl[c[0]].read().keys())
741 745 else:
742 746 changedfiles.update(c[3])
743 747 else:
744 748
745 749 n = c[0]
746 750 # record the first changeset introducing this manifest version
747 751 mfs.setdefault(n, x)
748 752 # Record a complete list of potentially-changed files in
749 753 # this manifest.
750 754 changedfiles.update(c[3])
751 755
752 756 return x
753 757
754 758 self._verbosenote(_('uncompressed size of bundle content:\n'))
755 759 size = 0
756 760 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
757 761 size += len(chunk)
758 762 yield chunk
759 763 self._verbosenote(_('%8.i (changelog)\n') % size)
760 764
761 765 # We need to make sure that the linkrev in the changegroup refers to
762 766 # the first changeset that introduced the manifest or file revision.
763 767 # The fastpath is usually safer than the slowpath, because the filelogs
764 768 # are walked in revlog order.
765 769 #
766 770 # When taking the slowpath with reorder=None and the manifest revlog
767 771 # uses generaldelta, the manifest may be walked in the "wrong" order.
768 772 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
769 773 # cc0ff93d0c0c).
770 774 #
771 775 # When taking the fastpath, we are only vulnerable to reordering
772 776 # of the changelog itself. The changelog never uses generaldelta, so
773 777 # it is only reordered when reorder=True. To handle this case, we
774 778 # simply take the slowpath, which already has the 'clrevorder' logic.
775 779 # This was also fixed in cc0ff93d0c0c.
776 780 fastpathlinkrev = fastpathlinkrev and not self._reorder
777 781 # Treemanifests don't work correctly with fastpathlinkrev
778 782 # either, because we don't discover which directory nodes to
779 783 # send along with files. This could probably be fixed.
780 784 fastpathlinkrev = fastpathlinkrev and (
781 785 'treemanifest' not in repo.requirements)
782 786
783 787 for chunk in self.generatemanifests(commonrevs, clrevorder,
784 788 fastpathlinkrev, mfs, fnodes, source):
785 789 yield chunk
786 790
787 791 if ellipsesmode:
788 792 mfdicts = None
789 if self._is_shallow:
793 if self._isshallow:
790 794 mfdicts = [(self._repo.manifestlog[n].read(), lr)
791 795 for (n, lr) in mfs.iteritems()]
792 796
793 797 mfs.clear()
794 798 clrevs = set(cl.rev(x) for x in clnodes)
795 799
796 800 if not fastpathlinkrev:
797 801 def linknodes(unused, fname):
798 802 return fnodes.get(fname, {})
799 803 else:
800 804 cln = cl.node
801 805 def linknodes(filerevlog, fname):
802 806 llr = filerevlog.linkrev
803 807 fln = filerevlog.node
804 808 revs = ((r, llr(r)) for r in filerevlog)
805 809 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
806 810
807 811 if ellipsesmode:
808 812 # We need to pass the mfdicts variable down into
809 813 # generatefiles(), but more than one command might have
810 814 # wrapped generatefiles so we can't modify the function
811 815 # signature. Instead, we pass the data to ourselves using an
812 816 # instance attribute. I'm sorry.
813 817 self._mfdicts = mfdicts
814 818
815 819 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
816 820 source):
817 821 yield chunk
818 822
819 823 yield self._close()
820 824
821 825 if clnodes:
822 826 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
823 827
824 828 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
825 829 fnodes, source):
826 830 """Returns an iterator of changegroup chunks containing manifests.
827 831
828 832 `source` is unused here, but is used by extensions like remotefilelog to
829 833 change what is sent based in pulls vs pushes, etc.
830 834 """
831 835 repo = self._repo
832 836 mfl = repo.manifestlog
833 837 dirlog = mfl._revlog.dirlog
834 838 tmfnodes = {'': mfs}
835 839
836 840 # Callback for the manifest, used to collect linkrevs for filelog
837 841 # revisions.
838 842 # Returns the linkrev node (collected in lookupcl).
839 843 def makelookupmflinknode(dir, nodes):
840 844 if fastpathlinkrev:
841 845 assert not dir
842 846 return mfs.__getitem__
843 847
844 848 def lookupmflinknode(x):
845 849 """Callback for looking up the linknode for manifests.
846 850
847 851 Returns the linkrev node for the specified manifest.
848 852
849 853 SIDE EFFECT:
850 854
851 855 1) fclnodes gets populated with the list of relevant
852 856 file nodes if we're not using fastpathlinkrev
853 857 2) When treemanifests are in use, collects treemanifest nodes
854 858 to send
855 859
856 860 Note that this means manifests must be completely sent to
857 861 the client before you can trust the list of files and
858 862 treemanifests to send.
859 863 """
860 864 clnode = nodes[x]
861 865 mdata = mfl.get(dir, x).readfast(shallow=True)
862 866 for p, n, fl in mdata.iterentries():
863 867 if fl == 't': # subdirectory manifest
864 868 subdir = dir + p + '/'
865 869 tmfclnodes = tmfnodes.setdefault(subdir, {})
866 870 tmfclnode = tmfclnodes.setdefault(n, clnode)
867 871 if clrevorder[clnode] < clrevorder[tmfclnode]:
868 872 tmfclnodes[n] = clnode
869 873 else:
870 874 f = dir + p
871 875 fclnodes = fnodes.setdefault(f, {})
872 876 fclnode = fclnodes.setdefault(n, clnode)
873 877 if clrevorder[clnode] < clrevorder[fclnode]:
874 878 fclnodes[n] = clnode
875 879 return clnode
876 880 return lookupmflinknode
877 881
878 882 fn = (self._packtreemanifests if self._sendtreemanifests
879 883 else self._packmanifests)
880 884 size = 0
881 885 while tmfnodes:
882 886 dir, nodes = tmfnodes.popitem()
883 887 prunednodes = self._prune(dirlog(dir), nodes, commonrevs)
884 888 if not dir or prunednodes:
885 889 for x in fn(dir, prunednodes, makelookupmflinknode(dir, nodes)):
886 890 size += len(x)
887 891 yield x
888 892 self._verbosenote(_('%8.i (manifests)\n') % size)
889 893 yield self._manifestsend
890 894
891 895 # The 'source' parameter is useful for extensions
892 896 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
893 897 changedfiles = list(filter(self._filematcher, changedfiles))
894 898
895 if getattr(self, '_is_shallow', False):
899 if self._isshallow:
896 900 # See comment in generate() for why this sadness is a thing.
897 901 mfdicts = self._mfdicts
898 902 del self._mfdicts
899 903 # In a shallow clone, the linknodes callback needs to also include
900 904 # those file nodes that are in the manifests we sent but weren't
901 905 # introduced by those manifests.
902 906 commonctxs = [self._repo[c] for c in commonrevs]
903 907 oldlinknodes = linknodes
904 908 clrev = self._repo.changelog.rev
905 909
906 910 # Defining this function has a side-effect of overriding the
907 911 # function of the same name that was passed in as an argument.
908 912 # TODO have caller pass in appropriate function.
909 913 def linknodes(flog, fname):
910 914 for c in commonctxs:
911 915 try:
912 916 fnode = c.filenode(fname)
913 917 self._clrev_to_localrev[c.rev()] = flog.rev(fnode)
914 918 except error.ManifestLookupError:
915 919 pass
916 920 links = oldlinknodes(flog, fname)
917 921 if len(links) != len(mfdicts):
918 922 for mf, lr in mfdicts:
919 923 fnode = mf.get(fname, None)
920 924 if fnode in links:
921 925 links[fnode] = min(links[fnode], lr, key=clrev)
922 926 elif fnode:
923 927 links[fnode] = lr
924 928 return links
925 929
926 930 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
927 931
928 932 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
929 933 repo = self._repo
930 934 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
931 935 total=len(changedfiles))
932 936 for i, fname in enumerate(sorted(changedfiles)):
933 937 filerevlog = repo.file(fname)
934 938 if not filerevlog:
935 939 raise error.Abort(_("empty or missing file data for %s") %
936 940 fname)
937 941
938 942 linkrevnodes = linknodes(filerevlog, fname)
939 943 # Lookup for filenodes, we collected the linkrev nodes above in the
940 944 # fastpath case and with lookupmf in the slowpath case.
941 945 def lookupfilelog(x):
942 946 return linkrevnodes[x]
943 947
944 948 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
945 949 if filenodes:
946 950 progress.update(i + 1, item=fname)
947 951 h = self._fileheader(fname)
948 952 size = len(h)
949 953 yield h
950 954 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
951 955 size += len(chunk)
952 956 yield chunk
953 957 self._verbosenote(_('%8.i %s\n') % (size, fname))
954 958 progress.complete()
955 959
956 960 def _deltaparent(self, store, rev, p1, p2, prev):
957 961 if self._useprevdelta:
958 962 if not store.candelta(prev, rev):
959 963 raise error.ProgrammingError(
960 964 'cg1 should not be used in this case')
961 965 return prev
962 966
963 967 # Narrow ellipses mode.
964 968 if util.safehasattr(self, '_full_nodes'):
965 969 # TODO: send better deltas when in narrow mode.
966 970 #
967 971 # changegroup.group() loops over revisions to send,
968 972 # including revisions we'll skip. What this means is that
969 973 # `prev` will be a potentially useless delta base for all
970 974 # ellipsis nodes, as the client likely won't have it. In
971 975 # the future we should do bookkeeping about which nodes
972 976 # have been sent to the client, and try to be
973 977 # significantly smarter about delta bases. This is
974 978 # slightly tricky because this same code has to work for
975 979 # all revlogs, and we don't have the linkrev/linknode here.
976 980 return p1
977 981
978 982 dp = store.deltaparent(rev)
979 983 if dp == nullrev and store.storedeltachains:
980 984 # Avoid sending full revisions when delta parent is null. Pick prev
981 985 # in that case. It's tempting to pick p1 in this case, as p1 will
982 986 # be smaller in the common case. However, computing a delta against
983 987 # p1 may require resolving the raw text of p1, which could be
984 988 # expensive. The revlog caches should have prev cached, meaning
985 989 # less CPU for changegroup generation. There is likely room to add
986 990 # a flag and/or config option to control this behavior.
987 991 base = prev
988 992 elif dp == nullrev:
989 993 # revlog is configured to use full snapshot for a reason,
990 994 # stick to full snapshot.
991 995 base = nullrev
992 996 elif dp not in (p1, p2, prev):
993 997 # Pick prev when we can't be sure remote has the base revision.
994 998 return prev
995 999 else:
996 1000 base = dp
997 1001
998 1002 if base != nullrev and not store.candelta(base, rev):
999 1003 base = nullrev
1000 1004
1001 1005 return base
1002 1006
1003 1007 def _revchunk(self, store, rev, prev, linknode):
1004 1008 if util.safehasattr(self, '_full_nodes'):
1005 1009 fn = self._revisiondeltanarrow
1006 1010 else:
1007 1011 fn = self._revisiondeltanormal
1008 1012
1009 1013 delta = fn(store, rev, prev, linknode)
1010 1014 if not delta:
1011 1015 return
1012 1016
1013 1017 meta = self._builddeltaheader(delta)
1014 1018 l = len(meta) + sum(len(x) for x in delta.deltachunks)
1015 1019
1016 1020 yield chunkheader(l)
1017 1021 yield meta
1018 1022 for x in delta.deltachunks:
1019 1023 yield x
1020 1024
1021 1025 def _revisiondeltanormal(self, store, rev, prev, linknode):
1022 1026 node = store.node(rev)
1023 1027 p1, p2 = store.parentrevs(rev)
1024 1028 base = self._deltaparent(store, rev, p1, p2, prev)
1025 1029
1026 1030 prefix = ''
1027 1031 if store.iscensored(base) or store.iscensored(rev):
1028 1032 try:
1029 1033 delta = store.revision(node, raw=True)
1030 1034 except error.CensoredNodeError as e:
1031 1035 delta = e.tombstone
1032 1036 if base == nullrev:
1033 1037 prefix = mdiff.trivialdiffheader(len(delta))
1034 1038 else:
1035 1039 baselen = store.rawsize(base)
1036 1040 prefix = mdiff.replacediffheader(baselen, len(delta))
1037 1041 elif base == nullrev:
1038 1042 delta = store.revision(node, raw=True)
1039 1043 prefix = mdiff.trivialdiffheader(len(delta))
1040 1044 else:
1041 1045 delta = store.revdiff(base, rev)
1042 1046 p1n, p2n = store.parents(node)
1043 1047
1044 1048 return revisiondelta(
1045 1049 node=node,
1046 1050 p1node=p1n,
1047 1051 p2node=p2n,
1048 1052 basenode=store.node(base),
1049 1053 linknode=linknode,
1050 1054 flags=store.flags(rev),
1051 1055 deltachunks=(prefix, delta),
1052 1056 )
1053 1057
1054 1058 def _revisiondeltanarrow(self, store, rev, prev, linknode):
1055 1059 # build up some mapping information that's useful later. See
1056 1060 # the local() nested function below.
1057 1061 if not self._changelog_done:
1058 1062 self._clnode_to_rev[linknode] = rev
1059 1063 linkrev = rev
1060 1064 self._clrev_to_localrev[linkrev] = rev
1061 1065 else:
1062 1066 linkrev = self._clnode_to_rev[linknode]
1063 1067 self._clrev_to_localrev[linkrev] = rev
1064 1068
1065 1069 # This is a node to send in full, because the changeset it
1066 1070 # corresponds to was a full changeset.
1067 1071 if linknode in self._full_nodes:
1068 1072 return self._revisiondeltanormal(store, rev, prev, linknode)
1069 1073
1070 1074 # At this point, a node can either be one we should skip or an
1071 1075 # ellipsis. If it's not an ellipsis, bail immediately.
1072 1076 if linkrev not in self._precomputed_ellipsis:
1073 1077 return
1074 1078
1075 1079 linkparents = self._precomputed_ellipsis[linkrev]
1076 1080 def local(clrev):
1077 1081 """Turn a changelog revnum into a local revnum.
1078 1082
1079 1083 The ellipsis dag is stored as revnums on the changelog,
1080 1084 but when we're producing ellipsis entries for
1081 1085 non-changelog revlogs, we need to turn those numbers into
1082 1086 something local. This does that for us, and during the
1083 1087 changelog sending phase will also expand the stored
1084 1088 mappings as needed.
1085 1089 """
1086 1090 if clrev == nullrev:
1087 1091 return nullrev
1088 1092
1089 1093 if not self._changelog_done:
1090 1094 # If we're doing the changelog, it's possible that we
1091 1095 # have a parent that is already on the client, and we
1092 1096 # need to store some extra mapping information so that
1093 1097 # our contained ellipsis nodes will be able to resolve
1094 1098 # their parents.
1095 1099 if clrev not in self._clrev_to_localrev:
1096 1100 clnode = store.node(clrev)
1097 1101 self._clnode_to_rev[clnode] = clrev
1098 1102 return clrev
1099 1103
1100 1104 # Walk the ellipsis-ized changelog breadth-first looking for a
1101 1105 # change that has been linked from the current revlog.
1102 1106 #
1103 1107 # For a flat manifest revlog only a single step should be necessary
1104 1108 # as all relevant changelog entries are relevant to the flat
1105 1109 # manifest.
1106 1110 #
1107 1111 # For a filelog or tree manifest dirlog however not every changelog
1108 1112 # entry will have been relevant, so we need to skip some changelog
1109 1113 # nodes even after ellipsis-izing.
1110 1114 walk = [clrev]
1111 1115 while walk:
1112 1116 p = walk[0]
1113 1117 walk = walk[1:]
1114 1118 if p in self._clrev_to_localrev:
1115 1119 return self._clrev_to_localrev[p]
1116 1120 elif p in self._full_nodes:
1117 1121 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1118 1122 if pp != nullrev])
1119 1123 elif p in self._precomputed_ellipsis:
1120 1124 walk.extend([pp for pp in self._precomputed_ellipsis[p]
1121 1125 if pp != nullrev])
1122 1126 else:
1123 1127 # In this case, we've got an ellipsis with parents
1124 1128 # outside the current bundle (likely an
1125 1129 # incremental pull). We "know" that we can use the
1126 1130 # value of this same revlog at whatever revision
1127 1131 # is pointed to by linknode. "Know" is in scare
1128 1132 # quotes because I haven't done enough examination
1129 1133 # of edge cases to convince myself this is really
1130 1134 # a fact - it works for all the (admittedly
1131 1135 # thorough) cases in our testsuite, but I would be
1132 1136 # somewhat unsurprised to find a case in the wild
1133 1137 # where this breaks down a bit. That said, I don't
1134 1138 # know if it would hurt anything.
1135 1139 for i in pycompat.xrange(rev, 0, -1):
1136 1140 if store.linkrev(i) == clrev:
1137 1141 return i
1138 1142 # We failed to resolve a parent for this node, so
1139 1143 # we crash the changegroup construction.
1140 1144 raise error.Abort(
1141 1145 'unable to resolve parent while packing %r %r'
1142 1146 ' for changeset %r' % (store.indexfile, rev, clrev))
1143 1147
1144 1148 return nullrev
1145 1149
1146 1150 if not linkparents or (
1147 1151 store.parentrevs(rev) == (nullrev, nullrev)):
1148 1152 p1, p2 = nullrev, nullrev
1149 1153 elif len(linkparents) == 1:
1150 1154 p1, = sorted(local(p) for p in linkparents)
1151 1155 p2 = nullrev
1152 1156 else:
1153 1157 p1, p2 = sorted(local(p) for p in linkparents)
1154 1158
1155 1159 n = store.node(rev)
1156 1160 p1n, p2n = store.node(p1), store.node(p2)
1157 1161 flags = store.flags(rev)
1158 1162 flags |= revlog.REVIDX_ELLIPSIS
1159 1163
1160 1164 # TODO: try and actually send deltas for ellipsis data blocks
1161 1165 data = store.revision(n)
1162 1166 diffheader = mdiff.trivialdiffheader(len(data))
1163 1167
1164 1168 return revisiondelta(
1165 1169 node=n,
1166 1170 p1node=p1n,
1167 1171 p2node=p2n,
1168 1172 basenode=nullid,
1169 1173 linknode=linknode,
1170 1174 flags=flags,
1171 1175 deltachunks=(diffheader, data),
1172 1176 )
1173 1177
1174 def _makecg1packer(repo, filematcher, bundlecaps):
1178 def _makecg1packer(repo, filematcher, bundlecaps, shallow=False):
1175 1179 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1176 1180 d.node, d.p1node, d.p2node, d.linknode)
1177 1181
1178 1182 return cgpacker(repo, filematcher, b'01',
1179 1183 useprevdelta=True,
1180 1184 allowreorder=None,
1181 1185 builddeltaheader=builddeltaheader,
1182 1186 manifestsend=b'',
1183 1187 sendtreemanifests=False,
1184 bundlecaps=bundlecaps)
1188 bundlecaps=bundlecaps,
1189 shallow=shallow)
1185 1190
1186 def _makecg2packer(repo, filematcher, bundlecaps):
1191 def _makecg2packer(repo, filematcher, bundlecaps, shallow=False):
1187 1192 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1188 1193 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1189 1194
1190 1195 # Since generaldelta is directly supported by cg2, reordering
1191 1196 # generally doesn't help, so we disable it by default (treating
1192 1197 # bundle.reorder=auto just like bundle.reorder=False).
1193 1198 return cgpacker(repo, filematcher, b'02',
1194 1199 useprevdelta=False,
1195 1200 allowreorder=False,
1196 1201 builddeltaheader=builddeltaheader,
1197 1202 manifestsend=b'',
1198 1203 sendtreemanifests=False,
1199 bundlecaps=bundlecaps)
1204 bundlecaps=bundlecaps,
1205 shallow=shallow)
1200 1206
1201 def _makecg3packer(repo, filematcher, bundlecaps):
1207 def _makecg3packer(repo, filematcher, bundlecaps, shallow=False):
1202 1208 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1203 1209 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1204 1210
1205 1211 return cgpacker(repo, filematcher, b'03',
1206 1212 useprevdelta=False,
1207 1213 allowreorder=False,
1208 1214 builddeltaheader=builddeltaheader,
1209 1215 manifestsend=closechunk(),
1210 1216 sendtreemanifests=True,
1211 bundlecaps=bundlecaps)
1217 bundlecaps=bundlecaps,
1218 shallow=shallow)
1212 1219
1213 1220 _packermap = {'01': (_makecg1packer, cg1unpacker),
1214 1221 # cg2 adds support for exchanging generaldelta
1215 1222 '02': (_makecg2packer, cg2unpacker),
1216 1223 # cg3 adds support for exchanging revlog flags and treemanifests
1217 1224 '03': (_makecg3packer, cg3unpacker),
1218 1225 }
1219 1226
1220 1227 def allsupportedversions(repo):
1221 1228 versions = set(_packermap.keys())
1222 1229 if not (repo.ui.configbool('experimental', 'changegroup3') or
1223 1230 repo.ui.configbool('experimental', 'treemanifest') or
1224 1231 'treemanifest' in repo.requirements):
1225 1232 versions.discard('03')
1226 1233 return versions
1227 1234
1228 1235 # Changegroup versions that can be applied to the repo
1229 1236 def supportedincomingversions(repo):
1230 1237 return allsupportedversions(repo)
1231 1238
1232 1239 # Changegroup versions that can be created from the repo
1233 1240 def supportedoutgoingversions(repo):
1234 1241 versions = allsupportedversions(repo)
1235 1242 if 'treemanifest' in repo.requirements:
1236 1243 # Versions 01 and 02 support only flat manifests and it's just too
1237 1244 # expensive to convert between the flat manifest and tree manifest on
1238 1245 # the fly. Since tree manifests are hashed differently, all of history
1239 1246 # would have to be converted. Instead, we simply don't even pretend to
1240 1247 # support versions 01 and 02.
1241 1248 versions.discard('01')
1242 1249 versions.discard('02')
1243 1250 if repository.NARROW_REQUIREMENT in repo.requirements:
1244 1251 # Versions 01 and 02 don't support revlog flags, and we need to
1245 1252 # support that for stripping and unbundling to work.
1246 1253 versions.discard('01')
1247 1254 versions.discard('02')
1248 1255 if LFS_REQUIREMENT in repo.requirements:
1249 1256 # Versions 01 and 02 don't support revlog flags, and we need to
1250 1257 # mark LFS entries with REVIDX_EXTSTORED.
1251 1258 versions.discard('01')
1252 1259 versions.discard('02')
1253 1260
1254 1261 return versions
1255 1262
1256 1263 def localversion(repo):
1257 1264 # Finds the best version to use for bundles that are meant to be used
1258 1265 # locally, such as those from strip and shelve, and temporary bundles.
1259 1266 return max(supportedoutgoingversions(repo))
1260 1267
1261 1268 def safeversion(repo):
1262 1269 # Finds the smallest version that it's safe to assume clients of the repo
1263 1270 # will support. For example, all hg versions that support generaldelta also
1264 1271 # support changegroup 02.
1265 1272 versions = supportedoutgoingversions(repo)
1266 1273 if 'generaldelta' in repo.requirements:
1267 1274 versions.discard('01')
1268 1275 assert versions
1269 1276 return min(versions)
1270 1277
1271 def getbundler(version, repo, bundlecaps=None, filematcher=None):
1278 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1279 shallow=False):
1272 1280 assert version in supportedoutgoingversions(repo)
1273 1281
1274 1282 if filematcher is None:
1275 1283 filematcher = matchmod.alwaysmatcher(repo.root, '')
1276 1284
1277 1285 if version == '01' and not filematcher.always():
1278 1286 raise error.ProgrammingError('version 01 changegroups do not support '
1279 1287 'sparse file matchers')
1280 1288
1281 1289 # Requested files could include files not in the local store. So
1282 1290 # filter those out.
1283 1291 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1284 1292 filematcher)
1285 1293
1286 1294 fn = _packermap[version][0]
1287 return fn(repo, filematcher, bundlecaps)
1295 return fn(repo, filematcher, bundlecaps, shallow=shallow)
1288 1296
1289 1297 def getunbundler(version, fh, alg, extras=None):
1290 1298 return _packermap[version][1](fh, alg, extras=extras)
1291 1299
1292 1300 def _changegroupinfo(repo, nodes, source):
1293 1301 if repo.ui.verbose or source == 'bundle':
1294 1302 repo.ui.status(_("%d changesets found\n") % len(nodes))
1295 1303 if repo.ui.debugflag:
1296 1304 repo.ui.debug("list of changesets:\n")
1297 1305 for node in nodes:
1298 1306 repo.ui.debug("%s\n" % hex(node))
1299 1307
1300 1308 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1301 1309 bundlecaps=None):
1302 1310 cgstream = makestream(repo, outgoing, version, source,
1303 1311 fastpath=fastpath, bundlecaps=bundlecaps)
1304 1312 return getunbundler(version, util.chunkbuffer(cgstream), None,
1305 1313 {'clcount': len(outgoing.missing) })
1306 1314
1307 1315 def makestream(repo, outgoing, version, source, fastpath=False,
1308 1316 bundlecaps=None, filematcher=None):
1309 1317 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1310 1318 filematcher=filematcher)
1311 1319
1312 1320 repo = repo.unfiltered()
1313 1321 commonrevs = outgoing.common
1314 1322 csets = outgoing.missing
1315 1323 heads = outgoing.missingheads
1316 1324 # We go through the fast path if we get told to, or if all (unfiltered
1317 1325 # heads have been requested (since we then know there all linkrevs will
1318 1326 # be pulled by the client).
1319 1327 heads.sort()
1320 1328 fastpathlinkrev = fastpath or (
1321 1329 repo.filtername is None and heads == sorted(repo.heads()))
1322 1330
1323 1331 repo.hook('preoutgoing', throw=True, source=source)
1324 1332 _changegroupinfo(repo, csets, source)
1325 1333 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1326 1334
1327 1335 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1328 1336 revisions = 0
1329 1337 files = 0
1330 1338 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1331 1339 total=expectedfiles)
1332 1340 for chunkdata in iter(source.filelogheader, {}):
1333 1341 files += 1
1334 1342 f = chunkdata["filename"]
1335 1343 repo.ui.debug("adding %s revisions\n" % f)
1336 1344 progress.increment()
1337 1345 fl = repo.file(f)
1338 1346 o = len(fl)
1339 1347 try:
1340 1348 deltas = source.deltaiter()
1341 1349 if not fl.addgroup(deltas, revmap, trp):
1342 1350 raise error.Abort(_("received file revlog group is empty"))
1343 1351 except error.CensoredBaseError as e:
1344 1352 raise error.Abort(_("received delta base is censored: %s") % e)
1345 1353 revisions += len(fl) - o
1346 1354 if f in needfiles:
1347 1355 needs = needfiles[f]
1348 1356 for new in pycompat.xrange(o, len(fl)):
1349 1357 n = fl.node(new)
1350 1358 if n in needs:
1351 1359 needs.remove(n)
1352 1360 else:
1353 1361 raise error.Abort(
1354 1362 _("received spurious file revlog entry"))
1355 1363 if not needs:
1356 1364 del needfiles[f]
1357 1365 progress.complete()
1358 1366
1359 1367 for f, needs in needfiles.iteritems():
1360 1368 fl = repo.file(f)
1361 1369 for n in needs:
1362 1370 try:
1363 1371 fl.rev(n)
1364 1372 except error.LookupError:
1365 1373 raise error.Abort(
1366 1374 _('missing file data for %s:%s - run hg verify') %
1367 1375 (f, hex(n)))
1368 1376
1369 1377 return revisions, files
1370 1378
1371 1379 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1372 1380 ellipsisroots, visitnodes, depth, source, version):
1373 1381 if version in ('01', '02'):
1374 1382 raise error.Abort(
1375 1383 'ellipsis nodes require at least cg3 on client and server, '
1376 1384 'but negotiated version %s' % version)
1377 1385 # We wrap cg1packer.revchunk, using a side channel to pass
1378 1386 # relevant_nodes into that area. Then if linknode isn't in the
1379 1387 # set, we know we have an ellipsis node and we should defer
1380 1388 # sending that node's data. We override close() to detect
1381 1389 # pending ellipsis nodes and flush them.
1382 packer = getbundler(version, repo, filematcher=match)
1390 packer = getbundler(version, repo, filematcher=match,
1391 shallow=depth is not None)
1383 1392 # Give the packer the list of nodes which should not be
1384 1393 # ellipsis nodes. We store this rather than the set of nodes
1385 1394 # that should be an ellipsis because for very large histories
1386 1395 # we expect this to be significantly smaller.
1387 1396 packer._full_nodes = relevant_nodes
1388 1397 # Maps ellipsis revs to their roots at the changelog level.
1389 1398 packer._precomputed_ellipsis = ellipsisroots
1390 1399 # Maps CL revs to per-revlog revisions. Cleared in close() at
1391 1400 # the end of each group.
1392 1401 packer._clrev_to_localrev = {}
1393 1402 packer._next_clrev_to_localrev = {}
1394 1403 # Maps changelog nodes to changelog revs. Filled in once
1395 1404 # during changelog stage and then left unmodified.
1396 1405 packer._clnode_to_rev = {}
1397 1406 packer._changelog_done = False
1398 # If true, informs the packer that it is serving shallow content and might
1399 # need to pack file contents not introduced by the changes being packed.
1400 packer._is_shallow = depth is not None
1401 1407
1402 1408 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now