##// END OF EJS Templates
changegroup: define functions for creating changegroup packers...
Gregory Szorc -
r38930:19344024 default
parent child Browse files
Show More
@@ -1,1368 +1,1377 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
44 44 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
45 45 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, "wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, "wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103 class cg1unpacker(object):
104 104 """Unpacker for cg1 changegroup streams.
105 105
106 106 A changegroup unpacker handles the framing of the revision data in
107 107 the wire format. Most consumers will want to use the apply()
108 108 method to add the changes from the changegroup to a repository.
109 109
110 110 If you're forwarding a changegroup unmodified to another consumer,
111 111 use getchunks(), which returns an iterator of changegroup
112 112 chunks. This is mostly useful for cases where you need to know the
113 113 data stream has ended by observing the end of the changegroup.
114 114
115 115 deltachunk() is useful only if you're applying delta data. Most
116 116 consumers should prefer apply() instead.
117 117
118 118 A few other public methods exist. Those are used only for
119 119 bundlerepo and some debug commands - their use is discouraged.
120 120 """
121 121 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
122 122 deltaheadersize = struct.calcsize(deltaheader)
123 123 version = '01'
124 124 _grouplistcount = 1 # One list of files after the manifests
125 125
126 126 def __init__(self, fh, alg, extras=None):
127 127 if alg is None:
128 128 alg = 'UN'
129 129 if alg not in util.compengines.supportedbundletypes:
130 130 raise error.Abort(_('unknown stream compression type: %s')
131 131 % alg)
132 132 if alg == 'BZ':
133 133 alg = '_truncatedBZ'
134 134
135 135 compengine = util.compengines.forbundletype(alg)
136 136 self._stream = compengine.decompressorreader(fh)
137 137 self._type = alg
138 138 self.extras = extras or {}
139 139 self.callback = None
140 140
141 141 # These methods (compressed, read, seek, tell) all appear to only
142 142 # be used by bundlerepo, but it's a little hard to tell.
143 143 def compressed(self):
144 144 return self._type is not None and self._type != 'UN'
145 145 def read(self, l):
146 146 return self._stream.read(l)
147 147 def seek(self, pos):
148 148 return self._stream.seek(pos)
149 149 def tell(self):
150 150 return self._stream.tell()
151 151 def close(self):
152 152 return self._stream.close()
153 153
154 154 def _chunklength(self):
155 155 d = readexactly(self._stream, 4)
156 156 l = struct.unpack(">l", d)[0]
157 157 if l <= 4:
158 158 if l:
159 159 raise error.Abort(_("invalid chunk length %d") % l)
160 160 return 0
161 161 if self.callback:
162 162 self.callback()
163 163 return l - 4
164 164
165 165 def changelogheader(self):
166 166 """v10 does not have a changelog header chunk"""
167 167 return {}
168 168
169 169 def manifestheader(self):
170 170 """v10 does not have a manifest header chunk"""
171 171 return {}
172 172
173 173 def filelogheader(self):
174 174 """return the header of the filelogs chunk, v10 only has the filename"""
175 175 l = self._chunklength()
176 176 if not l:
177 177 return {}
178 178 fname = readexactly(self._stream, l)
179 179 return {'filename': fname}
180 180
181 181 def _deltaheader(self, headertuple, prevnode):
182 182 node, p1, p2, cs = headertuple
183 183 if prevnode is None:
184 184 deltabase = p1
185 185 else:
186 186 deltabase = prevnode
187 187 flags = 0
188 188 return node, p1, p2, deltabase, cs, flags
189 189
190 190 def deltachunk(self, prevnode):
191 191 l = self._chunklength()
192 192 if not l:
193 193 return {}
194 194 headerdata = readexactly(self._stream, self.deltaheadersize)
195 195 header = struct.unpack(self.deltaheader, headerdata)
196 196 delta = readexactly(self._stream, l - self.deltaheadersize)
197 197 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
198 198 return (node, p1, p2, cs, deltabase, delta, flags)
199 199
200 200 def getchunks(self):
201 201 """returns all the chunks contains in the bundle
202 202
203 203 Used when you need to forward the binary stream to a file or another
204 204 network API. To do so, it parse the changegroup data, otherwise it will
205 205 block in case of sshrepo because it don't know the end of the stream.
206 206 """
207 207 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
208 208 # and a list of filelogs. For changegroup 3, we expect 4 parts:
209 209 # changelog, manifestlog, a list of tree manifestlogs, and a list of
210 210 # filelogs.
211 211 #
212 212 # Changelog and manifestlog parts are terminated with empty chunks. The
213 213 # tree and file parts are a list of entry sections. Each entry section
214 214 # is a series of chunks terminating in an empty chunk. The list of these
215 215 # entry sections is terminated in yet another empty chunk, so we know
216 216 # we've reached the end of the tree/file list when we reach an empty
217 217 # chunk that was proceeded by no non-empty chunks.
218 218
219 219 parts = 0
220 220 while parts < 2 + self._grouplistcount:
221 221 noentries = True
222 222 while True:
223 223 chunk = getchunk(self)
224 224 if not chunk:
225 225 # The first two empty chunks represent the end of the
226 226 # changelog and the manifestlog portions. The remaining
227 227 # empty chunks represent either A) the end of individual
228 228 # tree or file entries in the file list, or B) the end of
229 229 # the entire list. It's the end of the entire list if there
230 230 # were no entries (i.e. noentries is True).
231 231 if parts < 2:
232 232 parts += 1
233 233 elif noentries:
234 234 parts += 1
235 235 break
236 236 noentries = False
237 237 yield chunkheader(len(chunk))
238 238 pos = 0
239 239 while pos < len(chunk):
240 240 next = pos + 2**20
241 241 yield chunk[pos:next]
242 242 pos = next
243 243 yield closechunk()
244 244
245 245 def _unpackmanifests(self, repo, revmap, trp, prog):
246 246 self.callback = prog.increment
247 247 # no need to check for empty manifest group here:
248 248 # if the result of the merge of 1 and 2 is the same in 3 and 4,
249 249 # no new manifest will be created and the manifest group will
250 250 # be empty during the pull
251 251 self.manifestheader()
252 252 deltas = self.deltaiter()
253 253 repo.manifestlog.addgroup(deltas, revmap, trp)
254 254 prog.complete()
255 255 self.callback = None
256 256
257 257 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
258 258 expectedtotal=None):
259 259 """Add the changegroup returned by source.read() to this repo.
260 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
261 261 the URL of the repo where this changegroup is coming from.
262 262
263 263 Return an integer summarizing the change to this repo:
264 264 - nothing changed or no source: 0
265 265 - more heads than before: 1+added heads (2..n)
266 266 - fewer heads than before: -1-removed heads (-2..-n)
267 267 - number of heads stays the same: 1
268 268 """
269 269 repo = repo.unfiltered()
270 270 def csmap(x):
271 271 repo.ui.debug("add changeset %s\n" % short(x))
272 272 return len(cl)
273 273
274 274 def revmap(x):
275 275 return cl.rev(x)
276 276
277 277 changesets = files = revisions = 0
278 278
279 279 try:
280 280 # The transaction may already carry source information. In this
281 281 # case we use the top level data. We overwrite the argument
282 282 # because we need to use the top level value (if they exist)
283 283 # in this function.
284 284 srctype = tr.hookargs.setdefault('source', srctype)
285 285 url = tr.hookargs.setdefault('url', url)
286 286 repo.hook('prechangegroup',
287 287 throw=True, **pycompat.strkwargs(tr.hookargs))
288 288
289 289 # write changelog data to temp files so concurrent readers
290 290 # will not see an inconsistent view
291 291 cl = repo.changelog
292 292 cl.delayupdate(tr)
293 293 oldheads = set(cl.heads())
294 294
295 295 trp = weakref.proxy(tr)
296 296 # pull off the changeset group
297 297 repo.ui.status(_("adding changesets\n"))
298 298 clstart = len(cl)
299 299 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
300 300 total=expectedtotal)
301 301 self.callback = progress.increment
302 302
303 303 efiles = set()
304 304 def onchangelog(cl, node):
305 305 efiles.update(cl.readfiles(node))
306 306
307 307 self.changelogheader()
308 308 deltas = self.deltaiter()
309 309 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
310 310 efiles = len(efiles)
311 311
312 312 if not cgnodes:
313 313 repo.ui.develwarn('applied empty changegroup',
314 314 config='warn-empty-changegroup')
315 315 clend = len(cl)
316 316 changesets = clend - clstart
317 317 progress.complete()
318 318 self.callback = None
319 319
320 320 # pull off the manifest group
321 321 repo.ui.status(_("adding manifests\n"))
322 322 # We know that we'll never have more manifests than we had
323 323 # changesets.
324 324 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
325 325 total=changesets)
326 326 self._unpackmanifests(repo, revmap, trp, progress)
327 327
328 328 needfiles = {}
329 329 if repo.ui.configbool('server', 'validate'):
330 330 cl = repo.changelog
331 331 ml = repo.manifestlog
332 332 # validate incoming csets have their manifests
333 333 for cset in pycompat.xrange(clstart, clend):
334 334 mfnode = cl.changelogrevision(cset).manifest
335 335 mfest = ml[mfnode].readdelta()
336 336 # store file cgnodes we must see
337 337 for f, n in mfest.iteritems():
338 338 needfiles.setdefault(f, set()).add(n)
339 339
340 340 # process the files
341 341 repo.ui.status(_("adding file changes\n"))
342 342 newrevs, newfiles = _addchangegroupfiles(
343 343 repo, self, revmap, trp, efiles, needfiles)
344 344 revisions += newrevs
345 345 files += newfiles
346 346
347 347 deltaheads = 0
348 348 if oldheads:
349 349 heads = cl.heads()
350 350 deltaheads = len(heads) - len(oldheads)
351 351 for h in heads:
352 352 if h not in oldheads and repo[h].closesbranch():
353 353 deltaheads -= 1
354 354 htext = ""
355 355 if deltaheads:
356 356 htext = _(" (%+d heads)") % deltaheads
357 357
358 358 repo.ui.status(_("added %d changesets"
359 359 " with %d changes to %d files%s\n")
360 360 % (changesets, revisions, files, htext))
361 361 repo.invalidatevolatilesets()
362 362
363 363 if changesets > 0:
364 364 if 'node' not in tr.hookargs:
365 365 tr.hookargs['node'] = hex(cl.node(clstart))
366 366 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
367 367 hookargs = dict(tr.hookargs)
368 368 else:
369 369 hookargs = dict(tr.hookargs)
370 370 hookargs['node'] = hex(cl.node(clstart))
371 371 hookargs['node_last'] = hex(cl.node(clend - 1))
372 372 repo.hook('pretxnchangegroup',
373 373 throw=True, **pycompat.strkwargs(hookargs))
374 374
375 375 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
376 376 phaseall = None
377 377 if srctype in ('push', 'serve'):
378 378 # Old servers can not push the boundary themselves.
379 379 # New servers won't push the boundary if changeset already
380 380 # exists locally as secret
381 381 #
382 382 # We should not use added here but the list of all change in
383 383 # the bundle
384 384 if repo.publishing():
385 385 targetphase = phaseall = phases.public
386 386 else:
387 387 # closer target phase computation
388 388
389 389 # Those changesets have been pushed from the
390 390 # outside, their phases are going to be pushed
391 391 # alongside. Therefor `targetphase` is
392 392 # ignored.
393 393 targetphase = phaseall = phases.draft
394 394 if added:
395 395 phases.registernew(repo, tr, targetphase, added)
396 396 if phaseall is not None:
397 397 phases.advanceboundary(repo, tr, phaseall, cgnodes)
398 398
399 399 if changesets > 0:
400 400
401 401 def runhooks():
402 402 # These hooks run when the lock releases, not when the
403 403 # transaction closes. So it's possible for the changelog
404 404 # to have changed since we last saw it.
405 405 if clstart >= len(repo):
406 406 return
407 407
408 408 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
409 409
410 410 for n in added:
411 411 args = hookargs.copy()
412 412 args['node'] = hex(n)
413 413 del args['node_last']
414 414 repo.hook("incoming", **pycompat.strkwargs(args))
415 415
416 416 newheads = [h for h in repo.heads()
417 417 if h not in oldheads]
418 418 repo.ui.log("incoming",
419 419 "%d incoming changes - new heads: %s\n",
420 420 len(added),
421 421 ', '.join([hex(c[:6]) for c in newheads]))
422 422
423 423 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
424 424 lambda tr: repo._afterlock(runhooks))
425 425 finally:
426 426 repo.ui.flush()
427 427 # never return 0 here:
428 428 if deltaheads < 0:
429 429 ret = deltaheads - 1
430 430 else:
431 431 ret = deltaheads + 1
432 432 return ret
433 433
434 434 def deltaiter(self):
435 435 """
436 436 returns an iterator of the deltas in this changegroup
437 437
438 438 Useful for passing to the underlying storage system to be stored.
439 439 """
440 440 chain = None
441 441 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
442 442 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
443 443 yield chunkdata
444 444 chain = chunkdata[0]
445 445
446 446 class cg2unpacker(cg1unpacker):
447 447 """Unpacker for cg2 streams.
448 448
449 449 cg2 streams add support for generaldelta, so the delta header
450 450 format is slightly different. All other features about the data
451 451 remain the same.
452 452 """
453 453 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
454 454 deltaheadersize = struct.calcsize(deltaheader)
455 455 version = '02'
456 456
457 457 def _deltaheader(self, headertuple, prevnode):
458 458 node, p1, p2, deltabase, cs = headertuple
459 459 flags = 0
460 460 return node, p1, p2, deltabase, cs, flags
461 461
462 462 class cg3unpacker(cg2unpacker):
463 463 """Unpacker for cg3 streams.
464 464
465 465 cg3 streams add support for exchanging treemanifests and revlog
466 466 flags. It adds the revlog flags to the delta header and an empty chunk
467 467 separating manifests and files.
468 468 """
469 469 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
470 470 deltaheadersize = struct.calcsize(deltaheader)
471 471 version = '03'
472 472 _grouplistcount = 2 # One list of manifests and one list of files
473 473
474 474 def _deltaheader(self, headertuple, prevnode):
475 475 node, p1, p2, deltabase, cs, flags = headertuple
476 476 return node, p1, p2, deltabase, cs, flags
477 477
478 478 def _unpackmanifests(self, repo, revmap, trp, prog):
479 479 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
480 480 for chunkdata in iter(self.filelogheader, {}):
481 481 # If we get here, there are directory manifests in the changegroup
482 482 d = chunkdata["filename"]
483 483 repo.ui.debug("adding %s revisions\n" % d)
484 484 dirlog = repo.manifestlog._revlog.dirlog(d)
485 485 deltas = self.deltaiter()
486 486 if not dirlog.addgroup(deltas, revmap, trp):
487 487 raise error.Abort(_("received dir revlog group is empty"))
488 488
489 489 class headerlessfixup(object):
490 490 def __init__(self, fh, h):
491 491 self._h = h
492 492 self._fh = fh
493 493 def read(self, n):
494 494 if self._h:
495 495 d, self._h = self._h[:n], self._h[n:]
496 496 if len(d) < n:
497 497 d += readexactly(self._fh, n - len(d))
498 498 return d
499 499 return readexactly(self._fh, n)
500 500
501 501 @attr.s(slots=True, frozen=True)
502 502 class revisiondelta(object):
503 503 """Describes a delta entry in a changegroup.
504 504
505 505 Captured data is sufficient to serialize the delta into multiple
506 506 formats.
507 507 """
508 508 # 20 byte node of this revision.
509 509 node = attr.ib()
510 510 # 20 byte nodes of parent revisions.
511 511 p1node = attr.ib()
512 512 p2node = attr.ib()
513 513 # 20 byte node of node this delta is against.
514 514 basenode = attr.ib()
515 515 # 20 byte node of changeset revision this delta is associated with.
516 516 linknode = attr.ib()
517 517 # 2 bytes of flags to apply to revision data.
518 518 flags = attr.ib()
519 519 # Iterable of chunks holding raw delta data.
520 520 deltachunks = attr.ib()
521 521
522 522 class cg1packer(object):
523 523 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
524 524 version = '01'
525 525 def __init__(self, repo, filematcher, bundlecaps=None):
526 526 """Given a source repo, construct a bundler.
527 527
528 528 filematcher is a matcher that matches on files to include in the
529 529 changegroup. Used to facilitate sparse changegroups.
530 530
531 531 bundlecaps is optional and can be used to specify the set of
532 532 capabilities which can be used to build the bundle. While bundlecaps is
533 533 unused in core Mercurial, extensions rely on this feature to communicate
534 534 capabilities to customize the changegroup packer.
535 535 """
536 536 assert filematcher
537 537 self._filematcher = filematcher
538 538
539 539 # Set of capabilities we can use to build the bundle.
540 540 if bundlecaps is None:
541 541 bundlecaps = set()
542 542 self._bundlecaps = bundlecaps
543 543 # experimental config: bundle.reorder
544 544 reorder = repo.ui.config('bundle', 'reorder')
545 545 if reorder == 'auto':
546 546 reorder = None
547 547 else:
548 548 reorder = stringutil.parsebool(reorder)
549 549 self._repo = repo
550 550 self._reorder = reorder
551 551 if self._repo.ui.verbose and not self._repo.ui.debugflag:
552 552 self._verbosenote = self._repo.ui.note
553 553 else:
554 554 self._verbosenote = lambda s: None
555 555
556 556 def close(self):
557 557 # Ellipses serving mode.
558 558 getattr(self, 'clrev_to_localrev', {}).clear()
559 559 if getattr(self, 'next_clrev_to_localrev', {}):
560 560 self.clrev_to_localrev = self.next_clrev_to_localrev
561 561 del self.next_clrev_to_localrev
562 562 self.changelog_done = True
563 563
564 564 return closechunk()
565 565
566 566 def fileheader(self, fname):
567 567 return chunkheader(len(fname)) + fname
568 568
569 569 # Extracted both for clarity and for overriding in extensions.
570 570 def _sortgroup(self, store, nodelist, lookup):
571 571 """Sort nodes for change group and turn them into revnums."""
572 572 # Ellipses serving mode.
573 573 #
574 574 # In a perfect world, we'd generate better ellipsis-ified graphs
575 575 # for non-changelog revlogs. In practice, we haven't started doing
576 576 # that yet, so the resulting DAGs for the manifestlog and filelogs
577 577 # are actually full of bogus parentage on all the ellipsis
578 578 # nodes. This has the side effect that, while the contents are
579 579 # correct, the individual DAGs might be completely out of whack in
580 580 # a case like 882681bc3166 and its ancestors (back about 10
581 581 # revisions or so) in the main hg repo.
582 582 #
583 583 # The one invariant we *know* holds is that the new (potentially
584 584 # bogus) DAG shape will be valid if we order the nodes in the
585 585 # order that they're introduced in dramatis personae by the
586 586 # changelog, so what we do is we sort the non-changelog histories
587 587 # by the order in which they are used by the changelog.
588 588 if util.safehasattr(self, 'full_nodes') and self.clnode_to_rev:
589 589 key = lambda n: self.clnode_to_rev[lookup(n)]
590 590 return [store.rev(n) for n in sorted(nodelist, key=key)]
591 591
592 592 # for generaldelta revlogs, we linearize the revs; this will both be
593 593 # much quicker and generate a much smaller bundle
594 594 if (store._generaldelta and self._reorder is None) or self._reorder:
595 595 dag = dagutil.revlogdag(store)
596 596 return dag.linearize(set(store.rev(n) for n in nodelist))
597 597 else:
598 598 return sorted([store.rev(n) for n in nodelist])
599 599
600 600 def group(self, nodelist, store, lookup, units=None):
601 601 """Calculate a delta group, yielding a sequence of changegroup chunks
602 602 (strings).
603 603
604 604 Given a list of changeset revs, return a set of deltas and
605 605 metadata corresponding to nodes. The first delta is
606 606 first parent(nodelist[0]) -> nodelist[0], the receiver is
607 607 guaranteed to have this parent as it has all history before
608 608 these changesets. In the case firstparent is nullrev the
609 609 changegroup starts with a full revision.
610 610
611 611 If units is not None, progress detail will be generated, units specifies
612 612 the type of revlog that is touched (changelog, manifest, etc.).
613 613 """
614 614 # if we don't have any revisions touched by these changesets, bail
615 615 if len(nodelist) == 0:
616 616 yield self.close()
617 617 return
618 618
619 619 revs = self._sortgroup(store, nodelist, lookup)
620 620
621 621 # add the parent of the first rev
622 622 p = store.parentrevs(revs[0])[0]
623 623 revs.insert(0, p)
624 624
625 625 # build deltas
626 626 progress = None
627 627 if units is not None:
628 628 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
629 629 total=(len(revs) - 1))
630 630 for r in pycompat.xrange(len(revs) - 1):
631 631 if progress:
632 632 progress.update(r + 1)
633 633 prev, curr = revs[r], revs[r + 1]
634 634 linknode = lookup(store.node(curr))
635 635 for c in self.revchunk(store, curr, prev, linknode):
636 636 yield c
637 637
638 638 if progress:
639 639 progress.complete()
640 640 yield self.close()
641 641
642 642 # filter any nodes that claim to be part of the known set
643 643 def prune(self, store, missing, commonrevs):
644 644 # TODO this violates storage abstraction for manifests.
645 645 if isinstance(store, manifest.manifestrevlog):
646 646 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
647 647 return []
648 648
649 649 rr, rl = store.rev, store.linkrev
650 650 return [n for n in missing if rl(rr(n)) not in commonrevs]
651 651
652 652 def _packmanifests(self, dir, mfnodes, lookuplinknode):
653 653 """Pack flat manifests into a changegroup stream."""
654 654 assert not dir
655 655 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
656 656 lookuplinknode, units=_('manifests')):
657 657 yield chunk
658 658
659 659 def _manifestsdone(self):
660 660 return ''
661 661
662 662 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
663 663 '''yield a sequence of changegroup chunks (strings)'''
664 664 repo = self._repo
665 665 cl = repo.changelog
666 666
667 667 clrevorder = {}
668 668 mfs = {} # needed manifests
669 669 fnodes = {} # needed file nodes
670 670 mfl = repo.manifestlog
671 671 # TODO violates storage abstraction.
672 672 mfrevlog = mfl._revlog
673 673 changedfiles = set()
674 674
675 675 ellipsesmode = util.safehasattr(self, 'full_nodes')
676 676
677 677 # Callback for the changelog, used to collect changed files and
678 678 # manifest nodes.
679 679 # Returns the linkrev node (identity in the changelog case).
680 680 def lookupcl(x):
681 681 c = cl.read(x)
682 682 clrevorder[x] = len(clrevorder)
683 683
684 684 if ellipsesmode:
685 685 # Only update mfs if x is going to be sent. Otherwise we
686 686 # end up with bogus linkrevs specified for manifests and
687 687 # we skip some manifest nodes that we should otherwise
688 688 # have sent.
689 689 if (x in self.full_nodes
690 690 or cl.rev(x) in self.precomputed_ellipsis):
691 691 n = c[0]
692 692 # Record the first changeset introducing this manifest
693 693 # version.
694 694 mfs.setdefault(n, x)
695 695 # Set this narrow-specific dict so we have the lowest
696 696 # manifest revnum to look up for this cl revnum. (Part of
697 697 # mapping changelog ellipsis parents to manifest ellipsis
698 698 # parents)
699 699 self.next_clrev_to_localrev.setdefault(cl.rev(x),
700 700 mfrevlog.rev(n))
701 701 # We can't trust the changed files list in the changeset if the
702 702 # client requested a shallow clone.
703 703 if self.is_shallow:
704 704 changedfiles.update(mfl[c[0]].read().keys())
705 705 else:
706 706 changedfiles.update(c[3])
707 707 else:
708 708
709 709 n = c[0]
710 710 # record the first changeset introducing this manifest version
711 711 mfs.setdefault(n, x)
712 712 # Record a complete list of potentially-changed files in
713 713 # this manifest.
714 714 changedfiles.update(c[3])
715 715
716 716 return x
717 717
718 718 self._verbosenote(_('uncompressed size of bundle content:\n'))
719 719 size = 0
720 720 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
721 721 size += len(chunk)
722 722 yield chunk
723 723 self._verbosenote(_('%8.i (changelog)\n') % size)
724 724
725 725 # We need to make sure that the linkrev in the changegroup refers to
726 726 # the first changeset that introduced the manifest or file revision.
727 727 # The fastpath is usually safer than the slowpath, because the filelogs
728 728 # are walked in revlog order.
729 729 #
730 730 # When taking the slowpath with reorder=None and the manifest revlog
731 731 # uses generaldelta, the manifest may be walked in the "wrong" order.
732 732 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
733 733 # cc0ff93d0c0c).
734 734 #
735 735 # When taking the fastpath, we are only vulnerable to reordering
736 736 # of the changelog itself. The changelog never uses generaldelta, so
737 737 # it is only reordered when reorder=True. To handle this case, we
738 738 # simply take the slowpath, which already has the 'clrevorder' logic.
739 739 # This was also fixed in cc0ff93d0c0c.
740 740 fastpathlinkrev = fastpathlinkrev and not self._reorder
741 741 # Treemanifests don't work correctly with fastpathlinkrev
742 742 # either, because we don't discover which directory nodes to
743 743 # send along with files. This could probably be fixed.
744 744 fastpathlinkrev = fastpathlinkrev and (
745 745 'treemanifest' not in repo.requirements)
746 746
747 747 for chunk in self.generatemanifests(commonrevs, clrevorder,
748 748 fastpathlinkrev, mfs, fnodes, source):
749 749 yield chunk
750 750
751 751 if ellipsesmode:
752 752 mfdicts = None
753 753 if self.is_shallow:
754 754 mfdicts = [(self._repo.manifestlog[n].read(), lr)
755 755 for (n, lr) in mfs.iteritems()]
756 756
757 757 mfs.clear()
758 758 clrevs = set(cl.rev(x) for x in clnodes)
759 759
760 760 if not fastpathlinkrev:
761 761 def linknodes(unused, fname):
762 762 return fnodes.get(fname, {})
763 763 else:
764 764 cln = cl.node
765 765 def linknodes(filerevlog, fname):
766 766 llr = filerevlog.linkrev
767 767 fln = filerevlog.node
768 768 revs = ((r, llr(r)) for r in filerevlog)
769 769 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
770 770
771 771 if ellipsesmode:
772 772 # We need to pass the mfdicts variable down into
773 773 # generatefiles(), but more than one command might have
774 774 # wrapped generatefiles so we can't modify the function
775 775 # signature. Instead, we pass the data to ourselves using an
776 776 # instance attribute. I'm sorry.
777 777 self._mfdicts = mfdicts
778 778
779 779 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
780 780 source):
781 781 yield chunk
782 782
783 783 yield self.close()
784 784
785 785 if clnodes:
786 786 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
787 787
788 788 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
789 789 fnodes, source):
790 790 """Returns an iterator of changegroup chunks containing manifests.
791 791
792 792 `source` is unused here, but is used by extensions like remotefilelog to
793 793 change what is sent based in pulls vs pushes, etc.
794 794 """
795 795 repo = self._repo
796 796 mfl = repo.manifestlog
797 797 dirlog = mfl._revlog.dirlog
798 798 tmfnodes = {'': mfs}
799 799
800 800 # Callback for the manifest, used to collect linkrevs for filelog
801 801 # revisions.
802 802 # Returns the linkrev node (collected in lookupcl).
803 803 def makelookupmflinknode(dir, nodes):
804 804 if fastpathlinkrev:
805 805 assert not dir
806 806 return mfs.__getitem__
807 807
808 808 def lookupmflinknode(x):
809 809 """Callback for looking up the linknode for manifests.
810 810
811 811 Returns the linkrev node for the specified manifest.
812 812
813 813 SIDE EFFECT:
814 814
815 815 1) fclnodes gets populated with the list of relevant
816 816 file nodes if we're not using fastpathlinkrev
817 817 2) When treemanifests are in use, collects treemanifest nodes
818 818 to send
819 819
820 820 Note that this means manifests must be completely sent to
821 821 the client before you can trust the list of files and
822 822 treemanifests to send.
823 823 """
824 824 clnode = nodes[x]
825 825 mdata = mfl.get(dir, x).readfast(shallow=True)
826 826 for p, n, fl in mdata.iterentries():
827 827 if fl == 't': # subdirectory manifest
828 828 subdir = dir + p + '/'
829 829 tmfclnodes = tmfnodes.setdefault(subdir, {})
830 830 tmfclnode = tmfclnodes.setdefault(n, clnode)
831 831 if clrevorder[clnode] < clrevorder[tmfclnode]:
832 832 tmfclnodes[n] = clnode
833 833 else:
834 834 f = dir + p
835 835 fclnodes = fnodes.setdefault(f, {})
836 836 fclnode = fclnodes.setdefault(n, clnode)
837 837 if clrevorder[clnode] < clrevorder[fclnode]:
838 838 fclnodes[n] = clnode
839 839 return clnode
840 840 return lookupmflinknode
841 841
842 842 size = 0
843 843 while tmfnodes:
844 844 dir, nodes = tmfnodes.popitem()
845 845 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
846 846 if not dir or prunednodes:
847 847 for x in self._packmanifests(dir, prunednodes,
848 848 makelookupmflinknode(dir, nodes)):
849 849 size += len(x)
850 850 yield x
851 851 self._verbosenote(_('%8.i (manifests)\n') % size)
852 852 yield self._manifestsdone()
853 853
854 854 # The 'source' parameter is useful for extensions
855 855 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
856 856 changedfiles = list(filter(self._filematcher, changedfiles))
857 857
858 858 if getattr(self, 'is_shallow', False):
859 859 # See comment in generate() for why this sadness is a thing.
860 860 mfdicts = self._mfdicts
861 861 del self._mfdicts
862 862 # In a shallow clone, the linknodes callback needs to also include
863 863 # those file nodes that are in the manifests we sent but weren't
864 864 # introduced by those manifests.
865 865 commonctxs = [self._repo[c] for c in commonrevs]
866 866 oldlinknodes = linknodes
867 867 clrev = self._repo.changelog.rev
868 868
869 869 # Defining this function has a side-effect of overriding the
870 870 # function of the same name that was passed in as an argument.
871 871 # TODO have caller pass in appropriate function.
872 872 def linknodes(flog, fname):
873 873 for c in commonctxs:
874 874 try:
875 875 fnode = c.filenode(fname)
876 876 self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
877 877 except error.ManifestLookupError:
878 878 pass
879 879 links = oldlinknodes(flog, fname)
880 880 if len(links) != len(mfdicts):
881 881 for mf, lr in mfdicts:
882 882 fnode = mf.get(fname, None)
883 883 if fnode in links:
884 884 links[fnode] = min(links[fnode], lr, key=clrev)
885 885 elif fnode:
886 886 links[fnode] = lr
887 887 return links
888 888
889 889 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
890 890
891 891 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
892 892 repo = self._repo
893 893 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
894 894 total=len(changedfiles))
895 895 for i, fname in enumerate(sorted(changedfiles)):
896 896 filerevlog = repo.file(fname)
897 897 if not filerevlog:
898 898 raise error.Abort(_("empty or missing file data for %s") %
899 899 fname)
900 900
901 901 linkrevnodes = linknodes(filerevlog, fname)
902 902 # Lookup for filenodes, we collected the linkrev nodes above in the
903 903 # fastpath case and with lookupmf in the slowpath case.
904 904 def lookupfilelog(x):
905 905 return linkrevnodes[x]
906 906
907 907 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
908 908 if filenodes:
909 909 progress.update(i + 1, item=fname)
910 910 h = self.fileheader(fname)
911 911 size = len(h)
912 912 yield h
913 913 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
914 914 size += len(chunk)
915 915 yield chunk
916 916 self._verbosenote(_('%8.i %s\n') % (size, fname))
917 917 progress.complete()
918 918
919 919 def deltaparent(self, store, rev, p1, p2, prev):
920 920 if not store.candelta(prev, rev):
921 921 raise error.ProgrammingError('cg1 should not be used in this case')
922 922 return prev
923 923
924 924 def revchunk(self, store, rev, prev, linknode):
925 925 if util.safehasattr(self, 'full_nodes'):
926 926 fn = self._revisiondeltanarrow
927 927 else:
928 928 fn = self._revisiondeltanormal
929 929
930 930 delta = fn(store, rev, prev, linknode)
931 931 if not delta:
932 932 return
933 933
934 934 meta = self.builddeltaheader(delta.node, delta.p1node, delta.p2node,
935 935 delta.basenode, delta.linknode,
936 936 delta.flags)
937 937 l = len(meta) + sum(len(x) for x in delta.deltachunks)
938 938
939 939 yield chunkheader(l)
940 940 yield meta
941 941 for x in delta.deltachunks:
942 942 yield x
943 943
944 944 def _revisiondeltanormal(self, store, rev, prev, linknode):
945 945 node = store.node(rev)
946 946 p1, p2 = store.parentrevs(rev)
947 947 base = self.deltaparent(store, rev, p1, p2, prev)
948 948
949 949 prefix = ''
950 950 if store.iscensored(base) or store.iscensored(rev):
951 951 try:
952 952 delta = store.revision(node, raw=True)
953 953 except error.CensoredNodeError as e:
954 954 delta = e.tombstone
955 955 if base == nullrev:
956 956 prefix = mdiff.trivialdiffheader(len(delta))
957 957 else:
958 958 baselen = store.rawsize(base)
959 959 prefix = mdiff.replacediffheader(baselen, len(delta))
960 960 elif base == nullrev:
961 961 delta = store.revision(node, raw=True)
962 962 prefix = mdiff.trivialdiffheader(len(delta))
963 963 else:
964 964 delta = store.revdiff(base, rev)
965 965 p1n, p2n = store.parents(node)
966 966
967 967 return revisiondelta(
968 968 node=node,
969 969 p1node=p1n,
970 970 p2node=p2n,
971 971 basenode=store.node(base),
972 972 linknode=linknode,
973 973 flags=store.flags(rev),
974 974 deltachunks=(prefix, delta),
975 975 )
976 976
977 977 def _revisiondeltanarrow(self, store, rev, prev, linknode):
978 978 # build up some mapping information that's useful later. See
979 979 # the local() nested function below.
980 980 if not self.changelog_done:
981 981 self.clnode_to_rev[linknode] = rev
982 982 linkrev = rev
983 983 self.clrev_to_localrev[linkrev] = rev
984 984 else:
985 985 linkrev = self.clnode_to_rev[linknode]
986 986 self.clrev_to_localrev[linkrev] = rev
987 987
988 988 # This is a node to send in full, because the changeset it
989 989 # corresponds to was a full changeset.
990 990 if linknode in self.full_nodes:
991 991 return self._revisiondeltanormal(store, rev, prev, linknode)
992 992
993 993 # At this point, a node can either be one we should skip or an
994 994 # ellipsis. If it's not an ellipsis, bail immediately.
995 995 if linkrev not in self.precomputed_ellipsis:
996 996 return
997 997
998 998 linkparents = self.precomputed_ellipsis[linkrev]
999 999 def local(clrev):
1000 1000 """Turn a changelog revnum into a local revnum.
1001 1001
1002 1002 The ellipsis dag is stored as revnums on the changelog,
1003 1003 but when we're producing ellipsis entries for
1004 1004 non-changelog revlogs, we need to turn those numbers into
1005 1005 something local. This does that for us, and during the
1006 1006 changelog sending phase will also expand the stored
1007 1007 mappings as needed.
1008 1008 """
1009 1009 if clrev == nullrev:
1010 1010 return nullrev
1011 1011
1012 1012 if not self.changelog_done:
1013 1013 # If we're doing the changelog, it's possible that we
1014 1014 # have a parent that is already on the client, and we
1015 1015 # need to store some extra mapping information so that
1016 1016 # our contained ellipsis nodes will be able to resolve
1017 1017 # their parents.
1018 1018 if clrev not in self.clrev_to_localrev:
1019 1019 clnode = store.node(clrev)
1020 1020 self.clnode_to_rev[clnode] = clrev
1021 1021 return clrev
1022 1022
1023 1023 # Walk the ellipsis-ized changelog breadth-first looking for a
1024 1024 # change that has been linked from the current revlog.
1025 1025 #
1026 1026 # For a flat manifest revlog only a single step should be necessary
1027 1027 # as all relevant changelog entries are relevant to the flat
1028 1028 # manifest.
1029 1029 #
1030 1030 # For a filelog or tree manifest dirlog however not every changelog
1031 1031 # entry will have been relevant, so we need to skip some changelog
1032 1032 # nodes even after ellipsis-izing.
1033 1033 walk = [clrev]
1034 1034 while walk:
1035 1035 p = walk[0]
1036 1036 walk = walk[1:]
1037 1037 if p in self.clrev_to_localrev:
1038 1038 return self.clrev_to_localrev[p]
1039 1039 elif p in self.full_nodes:
1040 1040 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1041 1041 if pp != nullrev])
1042 1042 elif p in self.precomputed_ellipsis:
1043 1043 walk.extend([pp for pp in self.precomputed_ellipsis[p]
1044 1044 if pp != nullrev])
1045 1045 else:
1046 1046 # In this case, we've got an ellipsis with parents
1047 1047 # outside the current bundle (likely an
1048 1048 # incremental pull). We "know" that we can use the
1049 1049 # value of this same revlog at whatever revision
1050 1050 # is pointed to by linknode. "Know" is in scare
1051 1051 # quotes because I haven't done enough examination
1052 1052 # of edge cases to convince myself this is really
1053 1053 # a fact - it works for all the (admittedly
1054 1054 # thorough) cases in our testsuite, but I would be
1055 1055 # somewhat unsurprised to find a case in the wild
1056 1056 # where this breaks down a bit. That said, I don't
1057 1057 # know if it would hurt anything.
1058 1058 for i in pycompat.xrange(rev, 0, -1):
1059 1059 if store.linkrev(i) == clrev:
1060 1060 return i
1061 1061 # We failed to resolve a parent for this node, so
1062 1062 # we crash the changegroup construction.
1063 1063 raise error.Abort(
1064 1064 'unable to resolve parent while packing %r %r'
1065 1065 ' for changeset %r' % (store.indexfile, rev, clrev))
1066 1066
1067 1067 return nullrev
1068 1068
1069 1069 if not linkparents or (
1070 1070 store.parentrevs(rev) == (nullrev, nullrev)):
1071 1071 p1, p2 = nullrev, nullrev
1072 1072 elif len(linkparents) == 1:
1073 1073 p1, = sorted(local(p) for p in linkparents)
1074 1074 p2 = nullrev
1075 1075 else:
1076 1076 p1, p2 = sorted(local(p) for p in linkparents)
1077 1077
1078 1078 n = store.node(rev)
1079 1079 p1n, p2n = store.node(p1), store.node(p2)
1080 1080 flags = store.flags(rev)
1081 1081 flags |= revlog.REVIDX_ELLIPSIS
1082 1082
1083 1083 # TODO: try and actually send deltas for ellipsis data blocks
1084 1084 data = store.revision(n)
1085 1085 diffheader = mdiff.trivialdiffheader(len(data))
1086 1086
1087 1087 return revisiondelta(
1088 1088 node=n,
1089 1089 p1node=p1n,
1090 1090 p2node=p2n,
1091 1091 basenode=nullid,
1092 1092 linknode=linknode,
1093 1093 flags=flags,
1094 1094 deltachunks=(diffheader, data),
1095 1095 )
1096 1096
1097 1097 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1098 1098 # do nothing with basenode, it is implicitly the previous one in HG10
1099 1099 # do nothing with flags, it is implicitly 0 for cg1 and cg2
1100 1100 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
1101 1101
1102 1102 class cg2packer(cg1packer):
1103 1103 version = '02'
1104 1104 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
1105 1105
1106 1106 def __init__(self, repo, filematcher, bundlecaps=None):
1107 1107 super(cg2packer, self).__init__(repo, filematcher,
1108 1108 bundlecaps=bundlecaps)
1109 1109
1110 1110 if self._reorder is None:
1111 1111 # Since generaldelta is directly supported by cg2, reordering
1112 1112 # generally doesn't help, so we disable it by default (treating
1113 1113 # bundle.reorder=auto just like bundle.reorder=False).
1114 1114 self._reorder = False
1115 1115
1116 1116 def deltaparent(self, store, rev, p1, p2, prev):
1117 1117 # Narrow ellipses mode.
1118 1118 if util.safehasattr(self, 'full_nodes'):
1119 1119 # TODO: send better deltas when in narrow mode.
1120 1120 #
1121 1121 # changegroup.group() loops over revisions to send,
1122 1122 # including revisions we'll skip. What this means is that
1123 1123 # `prev` will be a potentially useless delta base for all
1124 1124 # ellipsis nodes, as the client likely won't have it. In
1125 1125 # the future we should do bookkeeping about which nodes
1126 1126 # have been sent to the client, and try to be
1127 1127 # significantly smarter about delta bases. This is
1128 1128 # slightly tricky because this same code has to work for
1129 1129 # all revlogs, and we don't have the linkrev/linknode here.
1130 1130 return p1
1131 1131
1132 1132 dp = store.deltaparent(rev)
1133 1133 if dp == nullrev and store.storedeltachains:
1134 1134 # Avoid sending full revisions when delta parent is null. Pick prev
1135 1135 # in that case. It's tempting to pick p1 in this case, as p1 will
1136 1136 # be smaller in the common case. However, computing a delta against
1137 1137 # p1 may require resolving the raw text of p1, which could be
1138 1138 # expensive. The revlog caches should have prev cached, meaning
1139 1139 # less CPU for changegroup generation. There is likely room to add
1140 1140 # a flag and/or config option to control this behavior.
1141 1141 base = prev
1142 1142 elif dp == nullrev:
1143 1143 # revlog is configured to use full snapshot for a reason,
1144 1144 # stick to full snapshot.
1145 1145 base = nullrev
1146 1146 elif dp not in (p1, p2, prev):
1147 1147 # Pick prev when we can't be sure remote has the base revision.
1148 1148 return prev
1149 1149 else:
1150 1150 base = dp
1151 1151 if base != nullrev and not store.candelta(base, rev):
1152 1152 base = nullrev
1153 1153 return base
1154 1154
1155 1155 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1156 1156 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
1157 1157 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
1158 1158
1159 1159 class cg3packer(cg2packer):
1160 1160 version = '03'
1161 1161 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
1162 1162
1163 1163 def _packmanifests(self, dir, mfnodes, lookuplinknode):
1164 1164 if dir:
1165 1165 yield self.fileheader(dir)
1166 1166
1167 1167 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
1168 1168 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
1169 1169 units=_('manifests')):
1170 1170 yield chunk
1171 1171
1172 1172 def _manifestsdone(self):
1173 1173 return self.close()
1174 1174
1175 1175 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1176 1176 return struct.pack(
1177 1177 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
1178 1178
1179 _packermap = {'01': (cg1packer, cg1unpacker),
1179 def _makecg1packer(repo, filematcher, bundlecaps):
1180 return cg1packer(repo, filematcher, bundlecaps=bundlecaps)
1181
1182 def _makecg2packer(repo, filematcher, bundlecaps):
1183 return cg2packer(repo, filematcher, bundlecaps=bundlecaps)
1184
1185 def _makecg3packer(repo, filematcher, bundlecaps):
1186 return cg3packer(repo, filematcher, bundlecaps=bundlecaps)
1187
1188 _packermap = {'01': (_makecg1packer, cg1unpacker),
1180 1189 # cg2 adds support for exchanging generaldelta
1181 '02': (cg2packer, cg2unpacker),
1190 '02': (_makecg2packer, cg2unpacker),
1182 1191 # cg3 adds support for exchanging revlog flags and treemanifests
1183 '03': (cg3packer, cg3unpacker),
1192 '03': (_makecg3packer, cg3unpacker),
1184 1193 }
1185 1194
1186 1195 def allsupportedversions(repo):
1187 1196 versions = set(_packermap.keys())
1188 1197 if not (repo.ui.configbool('experimental', 'changegroup3') or
1189 1198 repo.ui.configbool('experimental', 'treemanifest') or
1190 1199 'treemanifest' in repo.requirements):
1191 1200 versions.discard('03')
1192 1201 return versions
1193 1202
1194 1203 # Changegroup versions that can be applied to the repo
1195 1204 def supportedincomingversions(repo):
1196 1205 return allsupportedversions(repo)
1197 1206
1198 1207 # Changegroup versions that can be created from the repo
1199 1208 def supportedoutgoingversions(repo):
1200 1209 versions = allsupportedversions(repo)
1201 1210 if 'treemanifest' in repo.requirements:
1202 1211 # Versions 01 and 02 support only flat manifests and it's just too
1203 1212 # expensive to convert between the flat manifest and tree manifest on
1204 1213 # the fly. Since tree manifests are hashed differently, all of history
1205 1214 # would have to be converted. Instead, we simply don't even pretend to
1206 1215 # support versions 01 and 02.
1207 1216 versions.discard('01')
1208 1217 versions.discard('02')
1209 1218 if repository.NARROW_REQUIREMENT in repo.requirements:
1210 1219 # Versions 01 and 02 don't support revlog flags, and we need to
1211 1220 # support that for stripping and unbundling to work.
1212 1221 versions.discard('01')
1213 1222 versions.discard('02')
1214 1223 if LFS_REQUIREMENT in repo.requirements:
1215 1224 # Versions 01 and 02 don't support revlog flags, and we need to
1216 1225 # mark LFS entries with REVIDX_EXTSTORED.
1217 1226 versions.discard('01')
1218 1227 versions.discard('02')
1219 1228
1220 1229 return versions
1221 1230
1222 1231 def localversion(repo):
1223 1232 # Finds the best version to use for bundles that are meant to be used
1224 1233 # locally, such as those from strip and shelve, and temporary bundles.
1225 1234 return max(supportedoutgoingversions(repo))
1226 1235
1227 1236 def safeversion(repo):
1228 1237 # Finds the smallest version that it's safe to assume clients of the repo
1229 1238 # will support. For example, all hg versions that support generaldelta also
1230 1239 # support changegroup 02.
1231 1240 versions = supportedoutgoingversions(repo)
1232 1241 if 'generaldelta' in repo.requirements:
1233 1242 versions.discard('01')
1234 1243 assert versions
1235 1244 return min(versions)
1236 1245
1237 1246 def getbundler(version, repo, bundlecaps=None, filematcher=None):
1238 1247 assert version in supportedoutgoingversions(repo)
1239 1248
1240 1249 if filematcher is None:
1241 1250 filematcher = matchmod.alwaysmatcher(repo.root, '')
1242 1251
1243 1252 if version == '01' and not filematcher.always():
1244 1253 raise error.ProgrammingError('version 01 changegroups do not support '
1245 1254 'sparse file matchers')
1246 1255
1247 1256 # Requested files could include files not in the local store. So
1248 1257 # filter those out.
1249 1258 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1250 1259 filematcher)
1251 1260
1252 return _packermap[version][0](repo, filematcher=filematcher,
1253 bundlecaps=bundlecaps)
1261 fn = _packermap[version][0]
1262 return fn(repo, filematcher, bundlecaps)
1254 1263
1255 1264 def getunbundler(version, fh, alg, extras=None):
1256 1265 return _packermap[version][1](fh, alg, extras=extras)
1257 1266
1258 1267 def _changegroupinfo(repo, nodes, source):
1259 1268 if repo.ui.verbose or source == 'bundle':
1260 1269 repo.ui.status(_("%d changesets found\n") % len(nodes))
1261 1270 if repo.ui.debugflag:
1262 1271 repo.ui.debug("list of changesets:\n")
1263 1272 for node in nodes:
1264 1273 repo.ui.debug("%s\n" % hex(node))
1265 1274
1266 1275 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1267 1276 bundlecaps=None):
1268 1277 cgstream = makestream(repo, outgoing, version, source,
1269 1278 fastpath=fastpath, bundlecaps=bundlecaps)
1270 1279 return getunbundler(version, util.chunkbuffer(cgstream), None,
1271 1280 {'clcount': len(outgoing.missing) })
1272 1281
1273 1282 def makestream(repo, outgoing, version, source, fastpath=False,
1274 1283 bundlecaps=None, filematcher=None):
1275 1284 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1276 1285 filematcher=filematcher)
1277 1286
1278 1287 repo = repo.unfiltered()
1279 1288 commonrevs = outgoing.common
1280 1289 csets = outgoing.missing
1281 1290 heads = outgoing.missingheads
1282 1291 # We go through the fast path if we get told to, or if all (unfiltered
1283 1292 # heads have been requested (since we then know there all linkrevs will
1284 1293 # be pulled by the client).
1285 1294 heads.sort()
1286 1295 fastpathlinkrev = fastpath or (
1287 1296 repo.filtername is None and heads == sorted(repo.heads()))
1288 1297
1289 1298 repo.hook('preoutgoing', throw=True, source=source)
1290 1299 _changegroupinfo(repo, csets, source)
1291 1300 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1292 1301
1293 1302 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1294 1303 revisions = 0
1295 1304 files = 0
1296 1305 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1297 1306 total=expectedfiles)
1298 1307 for chunkdata in iter(source.filelogheader, {}):
1299 1308 files += 1
1300 1309 f = chunkdata["filename"]
1301 1310 repo.ui.debug("adding %s revisions\n" % f)
1302 1311 progress.increment()
1303 1312 fl = repo.file(f)
1304 1313 o = len(fl)
1305 1314 try:
1306 1315 deltas = source.deltaiter()
1307 1316 if not fl.addgroup(deltas, revmap, trp):
1308 1317 raise error.Abort(_("received file revlog group is empty"))
1309 1318 except error.CensoredBaseError as e:
1310 1319 raise error.Abort(_("received delta base is censored: %s") % e)
1311 1320 revisions += len(fl) - o
1312 1321 if f in needfiles:
1313 1322 needs = needfiles[f]
1314 1323 for new in pycompat.xrange(o, len(fl)):
1315 1324 n = fl.node(new)
1316 1325 if n in needs:
1317 1326 needs.remove(n)
1318 1327 else:
1319 1328 raise error.Abort(
1320 1329 _("received spurious file revlog entry"))
1321 1330 if not needs:
1322 1331 del needfiles[f]
1323 1332 progress.complete()
1324 1333
1325 1334 for f, needs in needfiles.iteritems():
1326 1335 fl = repo.file(f)
1327 1336 for n in needs:
1328 1337 try:
1329 1338 fl.rev(n)
1330 1339 except error.LookupError:
1331 1340 raise error.Abort(
1332 1341 _('missing file data for %s:%s - run hg verify') %
1333 1342 (f, hex(n)))
1334 1343
1335 1344 return revisions, files
1336 1345
1337 1346 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1338 1347 ellipsisroots, visitnodes, depth, source, version):
1339 1348 if version in ('01', '02'):
1340 1349 raise error.Abort(
1341 1350 'ellipsis nodes require at least cg3 on client and server, '
1342 1351 'but negotiated version %s' % version)
1343 1352 # We wrap cg1packer.revchunk, using a side channel to pass
1344 1353 # relevant_nodes into that area. Then if linknode isn't in the
1345 1354 # set, we know we have an ellipsis node and we should defer
1346 1355 # sending that node's data. We override close() to detect
1347 1356 # pending ellipsis nodes and flush them.
1348 1357 packer = getbundler(version, repo, filematcher=match)
1349 1358 # Give the packer the list of nodes which should not be
1350 1359 # ellipsis nodes. We store this rather than the set of nodes
1351 1360 # that should be an ellipsis because for very large histories
1352 1361 # we expect this to be significantly smaller.
1353 1362 packer.full_nodes = relevant_nodes
1354 1363 # Maps ellipsis revs to their roots at the changelog level.
1355 1364 packer.precomputed_ellipsis = ellipsisroots
1356 1365 # Maps CL revs to per-revlog revisions. Cleared in close() at
1357 1366 # the end of each group.
1358 1367 packer.clrev_to_localrev = {}
1359 1368 packer.next_clrev_to_localrev = {}
1360 1369 # Maps changelog nodes to changelog revs. Filled in once
1361 1370 # during changelog stage and then left unmodified.
1362 1371 packer.clnode_to_rev = {}
1363 1372 packer.changelog_done = False
1364 1373 # If true, informs the packer that it is serving shallow content and might
1365 1374 # need to pack file contents not introduced by the changes being packed.
1366 1375 packer.is_shallow = depth is not None
1367 1376
1368 1377 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now