##// END OF EJS Templates
changegroup: pass end of manifests marker into constructor...
Gregory Szorc -
r38934:67f37e8a default
parent child Browse files
Show More
@@ -1,1375 +1,1376 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, "wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, "wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103 class cg1unpacker(object):
104 104 """Unpacker for cg1 changegroup streams.
105 105
106 106 A changegroup unpacker handles the framing of the revision data in
107 107 the wire format. Most consumers will want to use the apply()
108 108 method to add the changes from the changegroup to a repository.
109 109
110 110 If you're forwarding a changegroup unmodified to another consumer,
111 111 use getchunks(), which returns an iterator of changegroup
112 112 chunks. This is mostly useful for cases where you need to know the
113 113 data stream has ended by observing the end of the changegroup.
114 114
115 115 deltachunk() is useful only if you're applying delta data. Most
116 116 consumers should prefer apply() instead.
117 117
118 118 A few other public methods exist. Those are used only for
119 119 bundlerepo and some debug commands - their use is discouraged.
120 120 """
121 121 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
122 122 deltaheadersize = deltaheader.size
123 123 version = '01'
124 124 _grouplistcount = 1 # One list of files after the manifests
125 125
126 126 def __init__(self, fh, alg, extras=None):
127 127 if alg is None:
128 128 alg = 'UN'
129 129 if alg not in util.compengines.supportedbundletypes:
130 130 raise error.Abort(_('unknown stream compression type: %s')
131 131 % alg)
132 132 if alg == 'BZ':
133 133 alg = '_truncatedBZ'
134 134
135 135 compengine = util.compengines.forbundletype(alg)
136 136 self._stream = compengine.decompressorreader(fh)
137 137 self._type = alg
138 138 self.extras = extras or {}
139 139 self.callback = None
140 140
141 141 # These methods (compressed, read, seek, tell) all appear to only
142 142 # be used by bundlerepo, but it's a little hard to tell.
143 143 def compressed(self):
144 144 return self._type is not None and self._type != 'UN'
145 145 def read(self, l):
146 146 return self._stream.read(l)
147 147 def seek(self, pos):
148 148 return self._stream.seek(pos)
149 149 def tell(self):
150 150 return self._stream.tell()
151 151 def close(self):
152 152 return self._stream.close()
153 153
154 154 def _chunklength(self):
155 155 d = readexactly(self._stream, 4)
156 156 l = struct.unpack(">l", d)[0]
157 157 if l <= 4:
158 158 if l:
159 159 raise error.Abort(_("invalid chunk length %d") % l)
160 160 return 0
161 161 if self.callback:
162 162 self.callback()
163 163 return l - 4
164 164
165 165 def changelogheader(self):
166 166 """v10 does not have a changelog header chunk"""
167 167 return {}
168 168
169 169 def manifestheader(self):
170 170 """v10 does not have a manifest header chunk"""
171 171 return {}
172 172
173 173 def filelogheader(self):
174 174 """return the header of the filelogs chunk, v10 only has the filename"""
175 175 l = self._chunklength()
176 176 if not l:
177 177 return {}
178 178 fname = readexactly(self._stream, l)
179 179 return {'filename': fname}
180 180
181 181 def _deltaheader(self, headertuple, prevnode):
182 182 node, p1, p2, cs = headertuple
183 183 if prevnode is None:
184 184 deltabase = p1
185 185 else:
186 186 deltabase = prevnode
187 187 flags = 0
188 188 return node, p1, p2, deltabase, cs, flags
189 189
190 190 def deltachunk(self, prevnode):
191 191 l = self._chunklength()
192 192 if not l:
193 193 return {}
194 194 headerdata = readexactly(self._stream, self.deltaheadersize)
195 195 header = self.deltaheader.unpack(headerdata)
196 196 delta = readexactly(self._stream, l - self.deltaheadersize)
197 197 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
198 198 return (node, p1, p2, cs, deltabase, delta, flags)
199 199
200 200 def getchunks(self):
201 201 """returns all the chunks contains in the bundle
202 202
203 203 Used when you need to forward the binary stream to a file or another
204 204 network API. To do so, it parse the changegroup data, otherwise it will
205 205 block in case of sshrepo because it don't know the end of the stream.
206 206 """
207 207 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
208 208 # and a list of filelogs. For changegroup 3, we expect 4 parts:
209 209 # changelog, manifestlog, a list of tree manifestlogs, and a list of
210 210 # filelogs.
211 211 #
212 212 # Changelog and manifestlog parts are terminated with empty chunks. The
213 213 # tree and file parts are a list of entry sections. Each entry section
214 214 # is a series of chunks terminating in an empty chunk. The list of these
215 215 # entry sections is terminated in yet another empty chunk, so we know
216 216 # we've reached the end of the tree/file list when we reach an empty
217 217 # chunk that was proceeded by no non-empty chunks.
218 218
219 219 parts = 0
220 220 while parts < 2 + self._grouplistcount:
221 221 noentries = True
222 222 while True:
223 223 chunk = getchunk(self)
224 224 if not chunk:
225 225 # The first two empty chunks represent the end of the
226 226 # changelog and the manifestlog portions. The remaining
227 227 # empty chunks represent either A) the end of individual
228 228 # tree or file entries in the file list, or B) the end of
229 229 # the entire list. It's the end of the entire list if there
230 230 # were no entries (i.e. noentries is True).
231 231 if parts < 2:
232 232 parts += 1
233 233 elif noentries:
234 234 parts += 1
235 235 break
236 236 noentries = False
237 237 yield chunkheader(len(chunk))
238 238 pos = 0
239 239 while pos < len(chunk):
240 240 next = pos + 2**20
241 241 yield chunk[pos:next]
242 242 pos = next
243 243 yield closechunk()
244 244
245 245 def _unpackmanifests(self, repo, revmap, trp, prog):
246 246 self.callback = prog.increment
247 247 # no need to check for empty manifest group here:
248 248 # if the result of the merge of 1 and 2 is the same in 3 and 4,
249 249 # no new manifest will be created and the manifest group will
250 250 # be empty during the pull
251 251 self.manifestheader()
252 252 deltas = self.deltaiter()
253 253 repo.manifestlog.addgroup(deltas, revmap, trp)
254 254 prog.complete()
255 255 self.callback = None
256 256
257 257 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
258 258 expectedtotal=None):
259 259 """Add the changegroup returned by source.read() to this repo.
260 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
261 261 the URL of the repo where this changegroup is coming from.
262 262
263 263 Return an integer summarizing the change to this repo:
264 264 - nothing changed or no source: 0
265 265 - more heads than before: 1+added heads (2..n)
266 266 - fewer heads than before: -1-removed heads (-2..-n)
267 267 - number of heads stays the same: 1
268 268 """
269 269 repo = repo.unfiltered()
270 270 def csmap(x):
271 271 repo.ui.debug("add changeset %s\n" % short(x))
272 272 return len(cl)
273 273
274 274 def revmap(x):
275 275 return cl.rev(x)
276 276
277 277 changesets = files = revisions = 0
278 278
279 279 try:
280 280 # The transaction may already carry source information. In this
281 281 # case we use the top level data. We overwrite the argument
282 282 # because we need to use the top level value (if they exist)
283 283 # in this function.
284 284 srctype = tr.hookargs.setdefault('source', srctype)
285 285 url = tr.hookargs.setdefault('url', url)
286 286 repo.hook('prechangegroup',
287 287 throw=True, **pycompat.strkwargs(tr.hookargs))
288 288
289 289 # write changelog data to temp files so concurrent readers
290 290 # will not see an inconsistent view
291 291 cl = repo.changelog
292 292 cl.delayupdate(tr)
293 293 oldheads = set(cl.heads())
294 294
295 295 trp = weakref.proxy(tr)
296 296 # pull off the changeset group
297 297 repo.ui.status(_("adding changesets\n"))
298 298 clstart = len(cl)
299 299 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
300 300 total=expectedtotal)
301 301 self.callback = progress.increment
302 302
303 303 efiles = set()
304 304 def onchangelog(cl, node):
305 305 efiles.update(cl.readfiles(node))
306 306
307 307 self.changelogheader()
308 308 deltas = self.deltaiter()
309 309 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
310 310 efiles = len(efiles)
311 311
312 312 if not cgnodes:
313 313 repo.ui.develwarn('applied empty changegroup',
314 314 config='warn-empty-changegroup')
315 315 clend = len(cl)
316 316 changesets = clend - clstart
317 317 progress.complete()
318 318 self.callback = None
319 319
320 320 # pull off the manifest group
321 321 repo.ui.status(_("adding manifests\n"))
322 322 # We know that we'll never have more manifests than we had
323 323 # changesets.
324 324 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
325 325 total=changesets)
326 326 self._unpackmanifests(repo, revmap, trp, progress)
327 327
328 328 needfiles = {}
329 329 if repo.ui.configbool('server', 'validate'):
330 330 cl = repo.changelog
331 331 ml = repo.manifestlog
332 332 # validate incoming csets have their manifests
333 333 for cset in pycompat.xrange(clstart, clend):
334 334 mfnode = cl.changelogrevision(cset).manifest
335 335 mfest = ml[mfnode].readdelta()
336 336 # store file cgnodes we must see
337 337 for f, n in mfest.iteritems():
338 338 needfiles.setdefault(f, set()).add(n)
339 339
340 340 # process the files
341 341 repo.ui.status(_("adding file changes\n"))
342 342 newrevs, newfiles = _addchangegroupfiles(
343 343 repo, self, revmap, trp, efiles, needfiles)
344 344 revisions += newrevs
345 345 files += newfiles
346 346
347 347 deltaheads = 0
348 348 if oldheads:
349 349 heads = cl.heads()
350 350 deltaheads = len(heads) - len(oldheads)
351 351 for h in heads:
352 352 if h not in oldheads and repo[h].closesbranch():
353 353 deltaheads -= 1
354 354 htext = ""
355 355 if deltaheads:
356 356 htext = _(" (%+d heads)") % deltaheads
357 357
358 358 repo.ui.status(_("added %d changesets"
359 359 " with %d changes to %d files%s\n")
360 360 % (changesets, revisions, files, htext))
361 361 repo.invalidatevolatilesets()
362 362
363 363 if changesets > 0:
364 364 if 'node' not in tr.hookargs:
365 365 tr.hookargs['node'] = hex(cl.node(clstart))
366 366 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
367 367 hookargs = dict(tr.hookargs)
368 368 else:
369 369 hookargs = dict(tr.hookargs)
370 370 hookargs['node'] = hex(cl.node(clstart))
371 371 hookargs['node_last'] = hex(cl.node(clend - 1))
372 372 repo.hook('pretxnchangegroup',
373 373 throw=True, **pycompat.strkwargs(hookargs))
374 374
375 375 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
376 376 phaseall = None
377 377 if srctype in ('push', 'serve'):
378 378 # Old servers can not push the boundary themselves.
379 379 # New servers won't push the boundary if changeset already
380 380 # exists locally as secret
381 381 #
382 382 # We should not use added here but the list of all change in
383 383 # the bundle
384 384 if repo.publishing():
385 385 targetphase = phaseall = phases.public
386 386 else:
387 387 # closer target phase computation
388 388
389 389 # Those changesets have been pushed from the
390 390 # outside, their phases are going to be pushed
391 391 # alongside. Therefor `targetphase` is
392 392 # ignored.
393 393 targetphase = phaseall = phases.draft
394 394 if added:
395 395 phases.registernew(repo, tr, targetphase, added)
396 396 if phaseall is not None:
397 397 phases.advanceboundary(repo, tr, phaseall, cgnodes)
398 398
399 399 if changesets > 0:
400 400
401 401 def runhooks():
402 402 # These hooks run when the lock releases, not when the
403 403 # transaction closes. So it's possible for the changelog
404 404 # to have changed since we last saw it.
405 405 if clstart >= len(repo):
406 406 return
407 407
408 408 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
409 409
410 410 for n in added:
411 411 args = hookargs.copy()
412 412 args['node'] = hex(n)
413 413 del args['node_last']
414 414 repo.hook("incoming", **pycompat.strkwargs(args))
415 415
416 416 newheads = [h for h in repo.heads()
417 417 if h not in oldheads]
418 418 repo.ui.log("incoming",
419 419 "%d incoming changes - new heads: %s\n",
420 420 len(added),
421 421 ', '.join([hex(c[:6]) for c in newheads]))
422 422
423 423 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
424 424 lambda tr: repo._afterlock(runhooks))
425 425 finally:
426 426 repo.ui.flush()
427 427 # never return 0 here:
428 428 if deltaheads < 0:
429 429 ret = deltaheads - 1
430 430 else:
431 431 ret = deltaheads + 1
432 432 return ret
433 433
434 434 def deltaiter(self):
435 435 """
436 436 returns an iterator of the deltas in this changegroup
437 437
438 438 Useful for passing to the underlying storage system to be stored.
439 439 """
440 440 chain = None
441 441 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
442 442 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
443 443 yield chunkdata
444 444 chain = chunkdata[0]
445 445
446 446 class cg2unpacker(cg1unpacker):
447 447 """Unpacker for cg2 streams.
448 448
449 449 cg2 streams add support for generaldelta, so the delta header
450 450 format is slightly different. All other features about the data
451 451 remain the same.
452 452 """
453 453 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
454 454 deltaheadersize = deltaheader.size
455 455 version = '02'
456 456
457 457 def _deltaheader(self, headertuple, prevnode):
458 458 node, p1, p2, deltabase, cs = headertuple
459 459 flags = 0
460 460 return node, p1, p2, deltabase, cs, flags
461 461
462 462 class cg3unpacker(cg2unpacker):
463 463 """Unpacker for cg3 streams.
464 464
465 465 cg3 streams add support for exchanging treemanifests and revlog
466 466 flags. It adds the revlog flags to the delta header and an empty chunk
467 467 separating manifests and files.
468 468 """
469 469 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
470 470 deltaheadersize = deltaheader.size
471 471 version = '03'
472 472 _grouplistcount = 2 # One list of manifests and one list of files
473 473
474 474 def _deltaheader(self, headertuple, prevnode):
475 475 node, p1, p2, deltabase, cs, flags = headertuple
476 476 return node, p1, p2, deltabase, cs, flags
477 477
478 478 def _unpackmanifests(self, repo, revmap, trp, prog):
479 479 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
480 480 for chunkdata in iter(self.filelogheader, {}):
481 481 # If we get here, there are directory manifests in the changegroup
482 482 d = chunkdata["filename"]
483 483 repo.ui.debug("adding %s revisions\n" % d)
484 484 dirlog = repo.manifestlog._revlog.dirlog(d)
485 485 deltas = self.deltaiter()
486 486 if not dirlog.addgroup(deltas, revmap, trp):
487 487 raise error.Abort(_("received dir revlog group is empty"))
488 488
489 489 class headerlessfixup(object):
490 490 def __init__(self, fh, h):
491 491 self._h = h
492 492 self._fh = fh
493 493 def read(self, n):
494 494 if self._h:
495 495 d, self._h = self._h[:n], self._h[n:]
496 496 if len(d) < n:
497 497 d += readexactly(self._fh, n - len(d))
498 498 return d
499 499 return readexactly(self._fh, n)
500 500
501 501 @attr.s(slots=True, frozen=True)
502 502 class revisiondelta(object):
503 503 """Describes a delta entry in a changegroup.
504 504
505 505 Captured data is sufficient to serialize the delta into multiple
506 506 formats.
507 507 """
508 508 # 20 byte node of this revision.
509 509 node = attr.ib()
510 510 # 20 byte nodes of parent revisions.
511 511 p1node = attr.ib()
512 512 p2node = attr.ib()
513 513 # 20 byte node of node this delta is against.
514 514 basenode = attr.ib()
515 515 # 20 byte node of changeset revision this delta is associated with.
516 516 linknode = attr.ib()
517 517 # 2 bytes of flags to apply to revision data.
518 518 flags = attr.ib()
519 519 # Iterable of chunks holding raw delta data.
520 520 deltachunks = attr.ib()
521 521
522 522 class cg1packer(object):
523 523 def __init__(self, repo, filematcher, version, builddeltaheader,
524 manifestsend,
524 525 bundlecaps=None):
525 526 """Given a source repo, construct a bundler.
526 527
527 528 filematcher is a matcher that matches on files to include in the
528 529 changegroup. Used to facilitate sparse changegroups.
529 530
530 531 builddeltaheader is a callable that constructs the header for a group
531 532 delta.
532 533
534 manifestsend is a chunk to send after manifests have been fully emitted.
535
533 536 bundlecaps is optional and can be used to specify the set of
534 537 capabilities which can be used to build the bundle. While bundlecaps is
535 538 unused in core Mercurial, extensions rely on this feature to communicate
536 539 capabilities to customize the changegroup packer.
537 540 """
538 541 assert filematcher
539 542 self._filematcher = filematcher
540 543
541 544 self.version = version
542 545 self._builddeltaheader = builddeltaheader
546 self._manifestsend = manifestsend
543 547
544 548 # Set of capabilities we can use to build the bundle.
545 549 if bundlecaps is None:
546 550 bundlecaps = set()
547 551 self._bundlecaps = bundlecaps
548 552 # experimental config: bundle.reorder
549 553 reorder = repo.ui.config('bundle', 'reorder')
550 554 if reorder == 'auto':
551 555 reorder = None
552 556 else:
553 557 reorder = stringutil.parsebool(reorder)
554 558 self._repo = repo
555 559 self._reorder = reorder
556 560 if self._repo.ui.verbose and not self._repo.ui.debugflag:
557 561 self._verbosenote = self._repo.ui.note
558 562 else:
559 563 self._verbosenote = lambda s: None
560 564
561 565 def close(self):
562 566 # Ellipses serving mode.
563 567 getattr(self, 'clrev_to_localrev', {}).clear()
564 568 if getattr(self, 'next_clrev_to_localrev', {}):
565 569 self.clrev_to_localrev = self.next_clrev_to_localrev
566 570 del self.next_clrev_to_localrev
567 571 self.changelog_done = True
568 572
569 573 return closechunk()
570 574
571 575 def fileheader(self, fname):
572 576 return chunkheader(len(fname)) + fname
573 577
574 578 # Extracted both for clarity and for overriding in extensions.
575 579 def _sortgroup(self, store, nodelist, lookup):
576 580 """Sort nodes for change group and turn them into revnums."""
577 581 # Ellipses serving mode.
578 582 #
579 583 # In a perfect world, we'd generate better ellipsis-ified graphs
580 584 # for non-changelog revlogs. In practice, we haven't started doing
581 585 # that yet, so the resulting DAGs for the manifestlog and filelogs
582 586 # are actually full of bogus parentage on all the ellipsis
583 587 # nodes. This has the side effect that, while the contents are
584 588 # correct, the individual DAGs might be completely out of whack in
585 589 # a case like 882681bc3166 and its ancestors (back about 10
586 590 # revisions or so) in the main hg repo.
587 591 #
588 592 # The one invariant we *know* holds is that the new (potentially
589 593 # bogus) DAG shape will be valid if we order the nodes in the
590 594 # order that they're introduced in dramatis personae by the
591 595 # changelog, so what we do is we sort the non-changelog histories
592 596 # by the order in which they are used by the changelog.
593 597 if util.safehasattr(self, 'full_nodes') and self.clnode_to_rev:
594 598 key = lambda n: self.clnode_to_rev[lookup(n)]
595 599 return [store.rev(n) for n in sorted(nodelist, key=key)]
596 600
597 601 # for generaldelta revlogs, we linearize the revs; this will both be
598 602 # much quicker and generate a much smaller bundle
599 603 if (store._generaldelta and self._reorder is None) or self._reorder:
600 604 dag = dagutil.revlogdag(store)
601 605 return dag.linearize(set(store.rev(n) for n in nodelist))
602 606 else:
603 607 return sorted([store.rev(n) for n in nodelist])
604 608
605 609 def group(self, nodelist, store, lookup, units=None):
606 610 """Calculate a delta group, yielding a sequence of changegroup chunks
607 611 (strings).
608 612
609 613 Given a list of changeset revs, return a set of deltas and
610 614 metadata corresponding to nodes. The first delta is
611 615 first parent(nodelist[0]) -> nodelist[0], the receiver is
612 616 guaranteed to have this parent as it has all history before
613 617 these changesets. In the case firstparent is nullrev the
614 618 changegroup starts with a full revision.
615 619
616 620 If units is not None, progress detail will be generated, units specifies
617 621 the type of revlog that is touched (changelog, manifest, etc.).
618 622 """
619 623 # if we don't have any revisions touched by these changesets, bail
620 624 if len(nodelist) == 0:
621 625 yield self.close()
622 626 return
623 627
624 628 revs = self._sortgroup(store, nodelist, lookup)
625 629
626 630 # add the parent of the first rev
627 631 p = store.parentrevs(revs[0])[0]
628 632 revs.insert(0, p)
629 633
630 634 # build deltas
631 635 progress = None
632 636 if units is not None:
633 637 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
634 638 total=(len(revs) - 1))
635 639 for r in pycompat.xrange(len(revs) - 1):
636 640 if progress:
637 641 progress.update(r + 1)
638 642 prev, curr = revs[r], revs[r + 1]
639 643 linknode = lookup(store.node(curr))
640 644 for c in self.revchunk(store, curr, prev, linknode):
641 645 yield c
642 646
643 647 if progress:
644 648 progress.complete()
645 649 yield self.close()
646 650
647 651 # filter any nodes that claim to be part of the known set
648 652 def prune(self, store, missing, commonrevs):
649 653 # TODO this violates storage abstraction for manifests.
650 654 if isinstance(store, manifest.manifestrevlog):
651 655 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
652 656 return []
653 657
654 658 rr, rl = store.rev, store.linkrev
655 659 return [n for n in missing if rl(rr(n)) not in commonrevs]
656 660
657 661 def _packmanifests(self, dir, mfnodes, lookuplinknode):
658 662 """Pack flat manifests into a changegroup stream."""
659 663 assert not dir
660 664 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
661 665 lookuplinknode, units=_('manifests')):
662 666 yield chunk
663 667
664 def _manifestsdone(self):
665 return ''
666
667 668 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
668 669 '''yield a sequence of changegroup chunks (strings)'''
669 670 repo = self._repo
670 671 cl = repo.changelog
671 672
672 673 clrevorder = {}
673 674 mfs = {} # needed manifests
674 675 fnodes = {} # needed file nodes
675 676 mfl = repo.manifestlog
676 677 # TODO violates storage abstraction.
677 678 mfrevlog = mfl._revlog
678 679 changedfiles = set()
679 680
680 681 ellipsesmode = util.safehasattr(self, 'full_nodes')
681 682
682 683 # Callback for the changelog, used to collect changed files and
683 684 # manifest nodes.
684 685 # Returns the linkrev node (identity in the changelog case).
685 686 def lookupcl(x):
686 687 c = cl.read(x)
687 688 clrevorder[x] = len(clrevorder)
688 689
689 690 if ellipsesmode:
690 691 # Only update mfs if x is going to be sent. Otherwise we
691 692 # end up with bogus linkrevs specified for manifests and
692 693 # we skip some manifest nodes that we should otherwise
693 694 # have sent.
694 695 if (x in self.full_nodes
695 696 or cl.rev(x) in self.precomputed_ellipsis):
696 697 n = c[0]
697 698 # Record the first changeset introducing this manifest
698 699 # version.
699 700 mfs.setdefault(n, x)
700 701 # Set this narrow-specific dict so we have the lowest
701 702 # manifest revnum to look up for this cl revnum. (Part of
702 703 # mapping changelog ellipsis parents to manifest ellipsis
703 704 # parents)
704 705 self.next_clrev_to_localrev.setdefault(cl.rev(x),
705 706 mfrevlog.rev(n))
706 707 # We can't trust the changed files list in the changeset if the
707 708 # client requested a shallow clone.
708 709 if self.is_shallow:
709 710 changedfiles.update(mfl[c[0]].read().keys())
710 711 else:
711 712 changedfiles.update(c[3])
712 713 else:
713 714
714 715 n = c[0]
715 716 # record the first changeset introducing this manifest version
716 717 mfs.setdefault(n, x)
717 718 # Record a complete list of potentially-changed files in
718 719 # this manifest.
719 720 changedfiles.update(c[3])
720 721
721 722 return x
722 723
723 724 self._verbosenote(_('uncompressed size of bundle content:\n'))
724 725 size = 0
725 726 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
726 727 size += len(chunk)
727 728 yield chunk
728 729 self._verbosenote(_('%8.i (changelog)\n') % size)
729 730
730 731 # We need to make sure that the linkrev in the changegroup refers to
731 732 # the first changeset that introduced the manifest or file revision.
732 733 # The fastpath is usually safer than the slowpath, because the filelogs
733 734 # are walked in revlog order.
734 735 #
735 736 # When taking the slowpath with reorder=None and the manifest revlog
736 737 # uses generaldelta, the manifest may be walked in the "wrong" order.
737 738 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
738 739 # cc0ff93d0c0c).
739 740 #
740 741 # When taking the fastpath, we are only vulnerable to reordering
741 742 # of the changelog itself. The changelog never uses generaldelta, so
742 743 # it is only reordered when reorder=True. To handle this case, we
743 744 # simply take the slowpath, which already has the 'clrevorder' logic.
744 745 # This was also fixed in cc0ff93d0c0c.
745 746 fastpathlinkrev = fastpathlinkrev and not self._reorder
746 747 # Treemanifests don't work correctly with fastpathlinkrev
747 748 # either, because we don't discover which directory nodes to
748 749 # send along with files. This could probably be fixed.
749 750 fastpathlinkrev = fastpathlinkrev and (
750 751 'treemanifest' not in repo.requirements)
751 752
752 753 for chunk in self.generatemanifests(commonrevs, clrevorder,
753 754 fastpathlinkrev, mfs, fnodes, source):
754 755 yield chunk
755 756
756 757 if ellipsesmode:
757 758 mfdicts = None
758 759 if self.is_shallow:
759 760 mfdicts = [(self._repo.manifestlog[n].read(), lr)
760 761 for (n, lr) in mfs.iteritems()]
761 762
762 763 mfs.clear()
763 764 clrevs = set(cl.rev(x) for x in clnodes)
764 765
765 766 if not fastpathlinkrev:
766 767 def linknodes(unused, fname):
767 768 return fnodes.get(fname, {})
768 769 else:
769 770 cln = cl.node
770 771 def linknodes(filerevlog, fname):
771 772 llr = filerevlog.linkrev
772 773 fln = filerevlog.node
773 774 revs = ((r, llr(r)) for r in filerevlog)
774 775 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
775 776
776 777 if ellipsesmode:
777 778 # We need to pass the mfdicts variable down into
778 779 # generatefiles(), but more than one command might have
779 780 # wrapped generatefiles so we can't modify the function
780 781 # signature. Instead, we pass the data to ourselves using an
781 782 # instance attribute. I'm sorry.
782 783 self._mfdicts = mfdicts
783 784
784 785 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
785 786 source):
786 787 yield chunk
787 788
788 789 yield self.close()
789 790
790 791 if clnodes:
791 792 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
792 793
793 794 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
794 795 fnodes, source):
795 796 """Returns an iterator of changegroup chunks containing manifests.
796 797
797 798 `source` is unused here, but is used by extensions like remotefilelog to
798 799 change what is sent based in pulls vs pushes, etc.
799 800 """
800 801 repo = self._repo
801 802 mfl = repo.manifestlog
802 803 dirlog = mfl._revlog.dirlog
803 804 tmfnodes = {'': mfs}
804 805
805 806 # Callback for the manifest, used to collect linkrevs for filelog
806 807 # revisions.
807 808 # Returns the linkrev node (collected in lookupcl).
808 809 def makelookupmflinknode(dir, nodes):
809 810 if fastpathlinkrev:
810 811 assert not dir
811 812 return mfs.__getitem__
812 813
813 814 def lookupmflinknode(x):
814 815 """Callback for looking up the linknode for manifests.
815 816
816 817 Returns the linkrev node for the specified manifest.
817 818
818 819 SIDE EFFECT:
819 820
820 821 1) fclnodes gets populated with the list of relevant
821 822 file nodes if we're not using fastpathlinkrev
822 823 2) When treemanifests are in use, collects treemanifest nodes
823 824 to send
824 825
825 826 Note that this means manifests must be completely sent to
826 827 the client before you can trust the list of files and
827 828 treemanifests to send.
828 829 """
829 830 clnode = nodes[x]
830 831 mdata = mfl.get(dir, x).readfast(shallow=True)
831 832 for p, n, fl in mdata.iterentries():
832 833 if fl == 't': # subdirectory manifest
833 834 subdir = dir + p + '/'
834 835 tmfclnodes = tmfnodes.setdefault(subdir, {})
835 836 tmfclnode = tmfclnodes.setdefault(n, clnode)
836 837 if clrevorder[clnode] < clrevorder[tmfclnode]:
837 838 tmfclnodes[n] = clnode
838 839 else:
839 840 f = dir + p
840 841 fclnodes = fnodes.setdefault(f, {})
841 842 fclnode = fclnodes.setdefault(n, clnode)
842 843 if clrevorder[clnode] < clrevorder[fclnode]:
843 844 fclnodes[n] = clnode
844 845 return clnode
845 846 return lookupmflinknode
846 847
847 848 size = 0
848 849 while tmfnodes:
849 850 dir, nodes = tmfnodes.popitem()
850 851 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
851 852 if not dir or prunednodes:
852 853 for x in self._packmanifests(dir, prunednodes,
853 854 makelookupmflinknode(dir, nodes)):
854 855 size += len(x)
855 856 yield x
856 857 self._verbosenote(_('%8.i (manifests)\n') % size)
857 yield self._manifestsdone()
858 yield self._manifestsend
858 859
859 860 # The 'source' parameter is useful for extensions
860 861 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
861 862 changedfiles = list(filter(self._filematcher, changedfiles))
862 863
863 864 if getattr(self, 'is_shallow', False):
864 865 # See comment in generate() for why this sadness is a thing.
865 866 mfdicts = self._mfdicts
866 867 del self._mfdicts
867 868 # In a shallow clone, the linknodes callback needs to also include
868 869 # those file nodes that are in the manifests we sent but weren't
869 870 # introduced by those manifests.
870 871 commonctxs = [self._repo[c] for c in commonrevs]
871 872 oldlinknodes = linknodes
872 873 clrev = self._repo.changelog.rev
873 874
874 875 # Defining this function has a side-effect of overriding the
875 876 # function of the same name that was passed in as an argument.
876 877 # TODO have caller pass in appropriate function.
877 878 def linknodes(flog, fname):
878 879 for c in commonctxs:
879 880 try:
880 881 fnode = c.filenode(fname)
881 882 self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
882 883 except error.ManifestLookupError:
883 884 pass
884 885 links = oldlinknodes(flog, fname)
885 886 if len(links) != len(mfdicts):
886 887 for mf, lr in mfdicts:
887 888 fnode = mf.get(fname, None)
888 889 if fnode in links:
889 890 links[fnode] = min(links[fnode], lr, key=clrev)
890 891 elif fnode:
891 892 links[fnode] = lr
892 893 return links
893 894
894 895 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
895 896
896 897 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
897 898 repo = self._repo
898 899 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
899 900 total=len(changedfiles))
900 901 for i, fname in enumerate(sorted(changedfiles)):
901 902 filerevlog = repo.file(fname)
902 903 if not filerevlog:
903 904 raise error.Abort(_("empty or missing file data for %s") %
904 905 fname)
905 906
906 907 linkrevnodes = linknodes(filerevlog, fname)
907 908 # Lookup for filenodes, we collected the linkrev nodes above in the
908 909 # fastpath case and with lookupmf in the slowpath case.
909 910 def lookupfilelog(x):
910 911 return linkrevnodes[x]
911 912
912 913 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
913 914 if filenodes:
914 915 progress.update(i + 1, item=fname)
915 916 h = self.fileheader(fname)
916 917 size = len(h)
917 918 yield h
918 919 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
919 920 size += len(chunk)
920 921 yield chunk
921 922 self._verbosenote(_('%8.i %s\n') % (size, fname))
922 923 progress.complete()
923 924
924 925 def deltaparent(self, store, rev, p1, p2, prev):
925 926 if not store.candelta(prev, rev):
926 927 raise error.ProgrammingError('cg1 should not be used in this case')
927 928 return prev
928 929
929 930 def revchunk(self, store, rev, prev, linknode):
930 931 if util.safehasattr(self, 'full_nodes'):
931 932 fn = self._revisiondeltanarrow
932 933 else:
933 934 fn = self._revisiondeltanormal
934 935
935 936 delta = fn(store, rev, prev, linknode)
936 937 if not delta:
937 938 return
938 939
939 940 meta = self._builddeltaheader(delta)
940 941 l = len(meta) + sum(len(x) for x in delta.deltachunks)
941 942
942 943 yield chunkheader(l)
943 944 yield meta
944 945 for x in delta.deltachunks:
945 946 yield x
946 947
947 948 def _revisiondeltanormal(self, store, rev, prev, linknode):
948 949 node = store.node(rev)
949 950 p1, p2 = store.parentrevs(rev)
950 951 base = self.deltaparent(store, rev, p1, p2, prev)
951 952
952 953 prefix = ''
953 954 if store.iscensored(base) or store.iscensored(rev):
954 955 try:
955 956 delta = store.revision(node, raw=True)
956 957 except error.CensoredNodeError as e:
957 958 delta = e.tombstone
958 959 if base == nullrev:
959 960 prefix = mdiff.trivialdiffheader(len(delta))
960 961 else:
961 962 baselen = store.rawsize(base)
962 963 prefix = mdiff.replacediffheader(baselen, len(delta))
963 964 elif base == nullrev:
964 965 delta = store.revision(node, raw=True)
965 966 prefix = mdiff.trivialdiffheader(len(delta))
966 967 else:
967 968 delta = store.revdiff(base, rev)
968 969 p1n, p2n = store.parents(node)
969 970
970 971 return revisiondelta(
971 972 node=node,
972 973 p1node=p1n,
973 974 p2node=p2n,
974 975 basenode=store.node(base),
975 976 linknode=linknode,
976 977 flags=store.flags(rev),
977 978 deltachunks=(prefix, delta),
978 979 )
979 980
980 981 def _revisiondeltanarrow(self, store, rev, prev, linknode):
981 982 # build up some mapping information that's useful later. See
982 983 # the local() nested function below.
983 984 if not self.changelog_done:
984 985 self.clnode_to_rev[linknode] = rev
985 986 linkrev = rev
986 987 self.clrev_to_localrev[linkrev] = rev
987 988 else:
988 989 linkrev = self.clnode_to_rev[linknode]
989 990 self.clrev_to_localrev[linkrev] = rev
990 991
991 992 # This is a node to send in full, because the changeset it
992 993 # corresponds to was a full changeset.
993 994 if linknode in self.full_nodes:
994 995 return self._revisiondeltanormal(store, rev, prev, linknode)
995 996
996 997 # At this point, a node can either be one we should skip or an
997 998 # ellipsis. If it's not an ellipsis, bail immediately.
998 999 if linkrev not in self.precomputed_ellipsis:
999 1000 return
1000 1001
1001 1002 linkparents = self.precomputed_ellipsis[linkrev]
1002 1003 def local(clrev):
1003 1004 """Turn a changelog revnum into a local revnum.
1004 1005
1005 1006 The ellipsis dag is stored as revnums on the changelog,
1006 1007 but when we're producing ellipsis entries for
1007 1008 non-changelog revlogs, we need to turn those numbers into
1008 1009 something local. This does that for us, and during the
1009 1010 changelog sending phase will also expand the stored
1010 1011 mappings as needed.
1011 1012 """
1012 1013 if clrev == nullrev:
1013 1014 return nullrev
1014 1015
1015 1016 if not self.changelog_done:
1016 1017 # If we're doing the changelog, it's possible that we
1017 1018 # have a parent that is already on the client, and we
1018 1019 # need to store some extra mapping information so that
1019 1020 # our contained ellipsis nodes will be able to resolve
1020 1021 # their parents.
1021 1022 if clrev not in self.clrev_to_localrev:
1022 1023 clnode = store.node(clrev)
1023 1024 self.clnode_to_rev[clnode] = clrev
1024 1025 return clrev
1025 1026
1026 1027 # Walk the ellipsis-ized changelog breadth-first looking for a
1027 1028 # change that has been linked from the current revlog.
1028 1029 #
1029 1030 # For a flat manifest revlog only a single step should be necessary
1030 1031 # as all relevant changelog entries are relevant to the flat
1031 1032 # manifest.
1032 1033 #
1033 1034 # For a filelog or tree manifest dirlog however not every changelog
1034 1035 # entry will have been relevant, so we need to skip some changelog
1035 1036 # nodes even after ellipsis-izing.
1036 1037 walk = [clrev]
1037 1038 while walk:
1038 1039 p = walk[0]
1039 1040 walk = walk[1:]
1040 1041 if p in self.clrev_to_localrev:
1041 1042 return self.clrev_to_localrev[p]
1042 1043 elif p in self.full_nodes:
1043 1044 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1044 1045 if pp != nullrev])
1045 1046 elif p in self.precomputed_ellipsis:
1046 1047 walk.extend([pp for pp in self.precomputed_ellipsis[p]
1047 1048 if pp != nullrev])
1048 1049 else:
1049 1050 # In this case, we've got an ellipsis with parents
1050 1051 # outside the current bundle (likely an
1051 1052 # incremental pull). We "know" that we can use the
1052 1053 # value of this same revlog at whatever revision
1053 1054 # is pointed to by linknode. "Know" is in scare
1054 1055 # quotes because I haven't done enough examination
1055 1056 # of edge cases to convince myself this is really
1056 1057 # a fact - it works for all the (admittedly
1057 1058 # thorough) cases in our testsuite, but I would be
1058 1059 # somewhat unsurprised to find a case in the wild
1059 1060 # where this breaks down a bit. That said, I don't
1060 1061 # know if it would hurt anything.
1061 1062 for i in pycompat.xrange(rev, 0, -1):
1062 1063 if store.linkrev(i) == clrev:
1063 1064 return i
1064 1065 # We failed to resolve a parent for this node, so
1065 1066 # we crash the changegroup construction.
1066 1067 raise error.Abort(
1067 1068 'unable to resolve parent while packing %r %r'
1068 1069 ' for changeset %r' % (store.indexfile, rev, clrev))
1069 1070
1070 1071 return nullrev
1071 1072
1072 1073 if not linkparents or (
1073 1074 store.parentrevs(rev) == (nullrev, nullrev)):
1074 1075 p1, p2 = nullrev, nullrev
1075 1076 elif len(linkparents) == 1:
1076 1077 p1, = sorted(local(p) for p in linkparents)
1077 1078 p2 = nullrev
1078 1079 else:
1079 1080 p1, p2 = sorted(local(p) for p in linkparents)
1080 1081
1081 1082 n = store.node(rev)
1082 1083 p1n, p2n = store.node(p1), store.node(p2)
1083 1084 flags = store.flags(rev)
1084 1085 flags |= revlog.REVIDX_ELLIPSIS
1085 1086
1086 1087 # TODO: try and actually send deltas for ellipsis data blocks
1087 1088 data = store.revision(n)
1088 1089 diffheader = mdiff.trivialdiffheader(len(data))
1089 1090
1090 1091 return revisiondelta(
1091 1092 node=n,
1092 1093 p1node=p1n,
1093 1094 p2node=p2n,
1094 1095 basenode=nullid,
1095 1096 linknode=linknode,
1096 1097 flags=flags,
1097 1098 deltachunks=(diffheader, data),
1098 1099 )
1099 1100
1100 1101 class cg2packer(cg1packer):
1101 1102 def __init__(self, repo, filematcher, version, builddeltaheader,
1102 bundlecaps=None):
1103 manifestsend, bundlecaps=None):
1103 1104 super(cg2packer, self).__init__(repo, filematcher, version,
1104 builddeltaheader,
1105 builddeltaheader, manifestsend,
1105 1106 bundlecaps=bundlecaps)
1106 1107
1107 1108 if self._reorder is None:
1108 1109 # Since generaldelta is directly supported by cg2, reordering
1109 1110 # generally doesn't help, so we disable it by default (treating
1110 1111 # bundle.reorder=auto just like bundle.reorder=False).
1111 1112 self._reorder = False
1112 1113
1113 1114 def deltaparent(self, store, rev, p1, p2, prev):
1114 1115 # Narrow ellipses mode.
1115 1116 if util.safehasattr(self, 'full_nodes'):
1116 1117 # TODO: send better deltas when in narrow mode.
1117 1118 #
1118 1119 # changegroup.group() loops over revisions to send,
1119 1120 # including revisions we'll skip. What this means is that
1120 1121 # `prev` will be a potentially useless delta base for all
1121 1122 # ellipsis nodes, as the client likely won't have it. In
1122 1123 # the future we should do bookkeeping about which nodes
1123 1124 # have been sent to the client, and try to be
1124 1125 # significantly smarter about delta bases. This is
1125 1126 # slightly tricky because this same code has to work for
1126 1127 # all revlogs, and we don't have the linkrev/linknode here.
1127 1128 return p1
1128 1129
1129 1130 dp = store.deltaparent(rev)
1130 1131 if dp == nullrev and store.storedeltachains:
1131 1132 # Avoid sending full revisions when delta parent is null. Pick prev
1132 1133 # in that case. It's tempting to pick p1 in this case, as p1 will
1133 1134 # be smaller in the common case. However, computing a delta against
1134 1135 # p1 may require resolving the raw text of p1, which could be
1135 1136 # expensive. The revlog caches should have prev cached, meaning
1136 1137 # less CPU for changegroup generation. There is likely room to add
1137 1138 # a flag and/or config option to control this behavior.
1138 1139 base = prev
1139 1140 elif dp == nullrev:
1140 1141 # revlog is configured to use full snapshot for a reason,
1141 1142 # stick to full snapshot.
1142 1143 base = nullrev
1143 1144 elif dp not in (p1, p2, prev):
1144 1145 # Pick prev when we can't be sure remote has the base revision.
1145 1146 return prev
1146 1147 else:
1147 1148 base = dp
1148 1149 if base != nullrev and not store.candelta(base, rev):
1149 1150 base = nullrev
1150 1151 return base
1151 1152
1152 1153 class cg3packer(cg2packer):
1153 1154 def _packmanifests(self, dir, mfnodes, lookuplinknode):
1154 1155 if dir:
1155 1156 yield self.fileheader(dir)
1156 1157
1157 1158 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
1158 1159 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
1159 1160 units=_('manifests')):
1160 1161 yield chunk
1161 1162
1162 def _manifestsdone(self):
1163 return self.close()
1164
1165 1163 def _makecg1packer(repo, filematcher, bundlecaps):
1166 1164 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1167 1165 d.node, d.p1node, d.p2node, d.linknode)
1168 1166
1169 1167 return cg1packer(repo, filematcher, b'01', builddeltaheader,
1168 manifestsend=b'',
1170 1169 bundlecaps=bundlecaps)
1171 1170
1172 1171 def _makecg2packer(repo, filematcher, bundlecaps):
1173 1172 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1174 1173 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1175 1174
1176 1175 return cg2packer(repo, filematcher, b'02', builddeltaheader,
1176 manifestsend=b'',
1177 1177 bundlecaps=bundlecaps)
1178 1178
1179 1179 def _makecg3packer(repo, filematcher, bundlecaps):
1180 1180 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1181 1181 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1182 1182
1183 1183 return cg3packer(repo, filematcher, b'03', builddeltaheader,
1184 manifestsend=closechunk(),
1184 1185 bundlecaps=bundlecaps)
1185 1186
1186 1187 _packermap = {'01': (_makecg1packer, cg1unpacker),
1187 1188 # cg2 adds support for exchanging generaldelta
1188 1189 '02': (_makecg2packer, cg2unpacker),
1189 1190 # cg3 adds support for exchanging revlog flags and treemanifests
1190 1191 '03': (_makecg3packer, cg3unpacker),
1191 1192 }
1192 1193
1193 1194 def allsupportedversions(repo):
1194 1195 versions = set(_packermap.keys())
1195 1196 if not (repo.ui.configbool('experimental', 'changegroup3') or
1196 1197 repo.ui.configbool('experimental', 'treemanifest') or
1197 1198 'treemanifest' in repo.requirements):
1198 1199 versions.discard('03')
1199 1200 return versions
1200 1201
1201 1202 # Changegroup versions that can be applied to the repo
1202 1203 def supportedincomingversions(repo):
1203 1204 return allsupportedversions(repo)
1204 1205
1205 1206 # Changegroup versions that can be created from the repo
1206 1207 def supportedoutgoingversions(repo):
1207 1208 versions = allsupportedversions(repo)
1208 1209 if 'treemanifest' in repo.requirements:
1209 1210 # Versions 01 and 02 support only flat manifests and it's just too
1210 1211 # expensive to convert between the flat manifest and tree manifest on
1211 1212 # the fly. Since tree manifests are hashed differently, all of history
1212 1213 # would have to be converted. Instead, we simply don't even pretend to
1213 1214 # support versions 01 and 02.
1214 1215 versions.discard('01')
1215 1216 versions.discard('02')
1216 1217 if repository.NARROW_REQUIREMENT in repo.requirements:
1217 1218 # Versions 01 and 02 don't support revlog flags, and we need to
1218 1219 # support that for stripping and unbundling to work.
1219 1220 versions.discard('01')
1220 1221 versions.discard('02')
1221 1222 if LFS_REQUIREMENT in repo.requirements:
1222 1223 # Versions 01 and 02 don't support revlog flags, and we need to
1223 1224 # mark LFS entries with REVIDX_EXTSTORED.
1224 1225 versions.discard('01')
1225 1226 versions.discard('02')
1226 1227
1227 1228 return versions
1228 1229
1229 1230 def localversion(repo):
1230 1231 # Finds the best version to use for bundles that are meant to be used
1231 1232 # locally, such as those from strip and shelve, and temporary bundles.
1232 1233 return max(supportedoutgoingversions(repo))
1233 1234
1234 1235 def safeversion(repo):
1235 1236 # Finds the smallest version that it's safe to assume clients of the repo
1236 1237 # will support. For example, all hg versions that support generaldelta also
1237 1238 # support changegroup 02.
1238 1239 versions = supportedoutgoingversions(repo)
1239 1240 if 'generaldelta' in repo.requirements:
1240 1241 versions.discard('01')
1241 1242 assert versions
1242 1243 return min(versions)
1243 1244
1244 1245 def getbundler(version, repo, bundlecaps=None, filematcher=None):
1245 1246 assert version in supportedoutgoingversions(repo)
1246 1247
1247 1248 if filematcher is None:
1248 1249 filematcher = matchmod.alwaysmatcher(repo.root, '')
1249 1250
1250 1251 if version == '01' and not filematcher.always():
1251 1252 raise error.ProgrammingError('version 01 changegroups do not support '
1252 1253 'sparse file matchers')
1253 1254
1254 1255 # Requested files could include files not in the local store. So
1255 1256 # filter those out.
1256 1257 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1257 1258 filematcher)
1258 1259
1259 1260 fn = _packermap[version][0]
1260 1261 return fn(repo, filematcher, bundlecaps)
1261 1262
1262 1263 def getunbundler(version, fh, alg, extras=None):
1263 1264 return _packermap[version][1](fh, alg, extras=extras)
1264 1265
1265 1266 def _changegroupinfo(repo, nodes, source):
1266 1267 if repo.ui.verbose or source == 'bundle':
1267 1268 repo.ui.status(_("%d changesets found\n") % len(nodes))
1268 1269 if repo.ui.debugflag:
1269 1270 repo.ui.debug("list of changesets:\n")
1270 1271 for node in nodes:
1271 1272 repo.ui.debug("%s\n" % hex(node))
1272 1273
1273 1274 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1274 1275 bundlecaps=None):
1275 1276 cgstream = makestream(repo, outgoing, version, source,
1276 1277 fastpath=fastpath, bundlecaps=bundlecaps)
1277 1278 return getunbundler(version, util.chunkbuffer(cgstream), None,
1278 1279 {'clcount': len(outgoing.missing) })
1279 1280
1280 1281 def makestream(repo, outgoing, version, source, fastpath=False,
1281 1282 bundlecaps=None, filematcher=None):
1282 1283 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1283 1284 filematcher=filematcher)
1284 1285
1285 1286 repo = repo.unfiltered()
1286 1287 commonrevs = outgoing.common
1287 1288 csets = outgoing.missing
1288 1289 heads = outgoing.missingheads
1289 1290 # We go through the fast path if we get told to, or if all (unfiltered
1290 1291 # heads have been requested (since we then know there all linkrevs will
1291 1292 # be pulled by the client).
1292 1293 heads.sort()
1293 1294 fastpathlinkrev = fastpath or (
1294 1295 repo.filtername is None and heads == sorted(repo.heads()))
1295 1296
1296 1297 repo.hook('preoutgoing', throw=True, source=source)
1297 1298 _changegroupinfo(repo, csets, source)
1298 1299 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1299 1300
1300 1301 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1301 1302 revisions = 0
1302 1303 files = 0
1303 1304 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1304 1305 total=expectedfiles)
1305 1306 for chunkdata in iter(source.filelogheader, {}):
1306 1307 files += 1
1307 1308 f = chunkdata["filename"]
1308 1309 repo.ui.debug("adding %s revisions\n" % f)
1309 1310 progress.increment()
1310 1311 fl = repo.file(f)
1311 1312 o = len(fl)
1312 1313 try:
1313 1314 deltas = source.deltaiter()
1314 1315 if not fl.addgroup(deltas, revmap, trp):
1315 1316 raise error.Abort(_("received file revlog group is empty"))
1316 1317 except error.CensoredBaseError as e:
1317 1318 raise error.Abort(_("received delta base is censored: %s") % e)
1318 1319 revisions += len(fl) - o
1319 1320 if f in needfiles:
1320 1321 needs = needfiles[f]
1321 1322 for new in pycompat.xrange(o, len(fl)):
1322 1323 n = fl.node(new)
1323 1324 if n in needs:
1324 1325 needs.remove(n)
1325 1326 else:
1326 1327 raise error.Abort(
1327 1328 _("received spurious file revlog entry"))
1328 1329 if not needs:
1329 1330 del needfiles[f]
1330 1331 progress.complete()
1331 1332
1332 1333 for f, needs in needfiles.iteritems():
1333 1334 fl = repo.file(f)
1334 1335 for n in needs:
1335 1336 try:
1336 1337 fl.rev(n)
1337 1338 except error.LookupError:
1338 1339 raise error.Abort(
1339 1340 _('missing file data for %s:%s - run hg verify') %
1340 1341 (f, hex(n)))
1341 1342
1342 1343 return revisions, files
1343 1344
1344 1345 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1345 1346 ellipsisroots, visitnodes, depth, source, version):
1346 1347 if version in ('01', '02'):
1347 1348 raise error.Abort(
1348 1349 'ellipsis nodes require at least cg3 on client and server, '
1349 1350 'but negotiated version %s' % version)
1350 1351 # We wrap cg1packer.revchunk, using a side channel to pass
1351 1352 # relevant_nodes into that area. Then if linknode isn't in the
1352 1353 # set, we know we have an ellipsis node and we should defer
1353 1354 # sending that node's data. We override close() to detect
1354 1355 # pending ellipsis nodes and flush them.
1355 1356 packer = getbundler(version, repo, filematcher=match)
1356 1357 # Give the packer the list of nodes which should not be
1357 1358 # ellipsis nodes. We store this rather than the set of nodes
1358 1359 # that should be an ellipsis because for very large histories
1359 1360 # we expect this to be significantly smaller.
1360 1361 packer.full_nodes = relevant_nodes
1361 1362 # Maps ellipsis revs to their roots at the changelog level.
1362 1363 packer.precomputed_ellipsis = ellipsisroots
1363 1364 # Maps CL revs to per-revlog revisions. Cleared in close() at
1364 1365 # the end of each group.
1365 1366 packer.clrev_to_localrev = {}
1366 1367 packer.next_clrev_to_localrev = {}
1367 1368 # Maps changelog nodes to changelog revs. Filled in once
1368 1369 # during changelog stage and then left unmodified.
1369 1370 packer.clnode_to_rev = {}
1370 1371 packer.changelog_done = False
1371 1372 # If true, informs the packer that it is serving shallow content and might
1372 1373 # need to pack file contents not introduced by the changes being packed.
1373 1374 packer.is_shallow = depth is not None
1374 1375
1375 1376 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now