##// END OF EJS Templates
changegroup: capture revision delta in a data structure...
Gregory Szorc -
r38929:23d582ca default
parent child Browse files
Show More
@@ -1,1328 +1,1368 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 from .thirdparty import (
23 attr,
24 )
25
22 26 from . import (
23 27 dagutil,
24 28 error,
25 29 manifest,
26 30 match as matchmod,
27 31 mdiff,
28 32 phases,
29 33 pycompat,
30 34 repository,
31 35 revlog,
32 36 util,
33 37 )
34 38
35 39 from .utils import (
36 40 stringutil,
37 41 )
38 42
39 43 _CHANGEGROUPV1_DELTA_HEADER = "20s20s20s20s"
40 44 _CHANGEGROUPV2_DELTA_HEADER = "20s20s20s20s20s"
41 45 _CHANGEGROUPV3_DELTA_HEADER = ">20s20s20s20s20sH"
42 46
43 47 LFS_REQUIREMENT = 'lfs'
44 48
45 49 readexactly = util.readexactly
46 50
47 51 def getchunk(stream):
48 52 """return the next chunk from stream as a string"""
49 53 d = readexactly(stream, 4)
50 54 l = struct.unpack(">l", d)[0]
51 55 if l <= 4:
52 56 if l:
53 57 raise error.Abort(_("invalid chunk length %d") % l)
54 58 return ""
55 59 return readexactly(stream, l - 4)
56 60
57 61 def chunkheader(length):
58 62 """return a changegroup chunk header (string)"""
59 63 return struct.pack(">l", length + 4)
60 64
61 65 def closechunk():
62 66 """return a changegroup chunk header (string) for a zero-length chunk"""
63 67 return struct.pack(">l", 0)
64 68
65 69 def writechunks(ui, chunks, filename, vfs=None):
66 70 """Write chunks to a file and return its filename.
67 71
68 72 The stream is assumed to be a bundle file.
69 73 Existing files will not be overwritten.
70 74 If no filename is specified, a temporary file is created.
71 75 """
72 76 fh = None
73 77 cleanup = None
74 78 try:
75 79 if filename:
76 80 if vfs:
77 81 fh = vfs.open(filename, "wb")
78 82 else:
79 83 # Increase default buffer size because default is usually
80 84 # small (4k is common on Linux).
81 85 fh = open(filename, "wb", 131072)
82 86 else:
83 87 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
84 88 fh = os.fdopen(fd, r"wb")
85 89 cleanup = filename
86 90 for c in chunks:
87 91 fh.write(c)
88 92 cleanup = None
89 93 return filename
90 94 finally:
91 95 if fh is not None:
92 96 fh.close()
93 97 if cleanup is not None:
94 98 if filename and vfs:
95 99 vfs.unlink(cleanup)
96 100 else:
97 101 os.unlink(cleanup)
98 102
99 103 class cg1unpacker(object):
100 104 """Unpacker for cg1 changegroup streams.
101 105
102 106 A changegroup unpacker handles the framing of the revision data in
103 107 the wire format. Most consumers will want to use the apply()
104 108 method to add the changes from the changegroup to a repository.
105 109
106 110 If you're forwarding a changegroup unmodified to another consumer,
107 111 use getchunks(), which returns an iterator of changegroup
108 112 chunks. This is mostly useful for cases where you need to know the
109 113 data stream has ended by observing the end of the changegroup.
110 114
111 115 deltachunk() is useful only if you're applying delta data. Most
112 116 consumers should prefer apply() instead.
113 117
114 118 A few other public methods exist. Those are used only for
115 119 bundlerepo and some debug commands - their use is discouraged.
116 120 """
117 121 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
118 122 deltaheadersize = struct.calcsize(deltaheader)
119 123 version = '01'
120 124 _grouplistcount = 1 # One list of files after the manifests
121 125
122 126 def __init__(self, fh, alg, extras=None):
123 127 if alg is None:
124 128 alg = 'UN'
125 129 if alg not in util.compengines.supportedbundletypes:
126 130 raise error.Abort(_('unknown stream compression type: %s')
127 131 % alg)
128 132 if alg == 'BZ':
129 133 alg = '_truncatedBZ'
130 134
131 135 compengine = util.compengines.forbundletype(alg)
132 136 self._stream = compengine.decompressorreader(fh)
133 137 self._type = alg
134 138 self.extras = extras or {}
135 139 self.callback = None
136 140
137 141 # These methods (compressed, read, seek, tell) all appear to only
138 142 # be used by bundlerepo, but it's a little hard to tell.
139 143 def compressed(self):
140 144 return self._type is not None and self._type != 'UN'
141 145 def read(self, l):
142 146 return self._stream.read(l)
143 147 def seek(self, pos):
144 148 return self._stream.seek(pos)
145 149 def tell(self):
146 150 return self._stream.tell()
147 151 def close(self):
148 152 return self._stream.close()
149 153
150 154 def _chunklength(self):
151 155 d = readexactly(self._stream, 4)
152 156 l = struct.unpack(">l", d)[0]
153 157 if l <= 4:
154 158 if l:
155 159 raise error.Abort(_("invalid chunk length %d") % l)
156 160 return 0
157 161 if self.callback:
158 162 self.callback()
159 163 return l - 4
160 164
161 165 def changelogheader(self):
162 166 """v10 does not have a changelog header chunk"""
163 167 return {}
164 168
165 169 def manifestheader(self):
166 170 """v10 does not have a manifest header chunk"""
167 171 return {}
168 172
169 173 def filelogheader(self):
170 174 """return the header of the filelogs chunk, v10 only has the filename"""
171 175 l = self._chunklength()
172 176 if not l:
173 177 return {}
174 178 fname = readexactly(self._stream, l)
175 179 return {'filename': fname}
176 180
177 181 def _deltaheader(self, headertuple, prevnode):
178 182 node, p1, p2, cs = headertuple
179 183 if prevnode is None:
180 184 deltabase = p1
181 185 else:
182 186 deltabase = prevnode
183 187 flags = 0
184 188 return node, p1, p2, deltabase, cs, flags
185 189
186 190 def deltachunk(self, prevnode):
187 191 l = self._chunklength()
188 192 if not l:
189 193 return {}
190 194 headerdata = readexactly(self._stream, self.deltaheadersize)
191 195 header = struct.unpack(self.deltaheader, headerdata)
192 196 delta = readexactly(self._stream, l - self.deltaheadersize)
193 197 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
194 198 return (node, p1, p2, cs, deltabase, delta, flags)
195 199
196 200 def getchunks(self):
197 201 """returns all the chunks contains in the bundle
198 202
199 203 Used when you need to forward the binary stream to a file or another
200 204 network API. To do so, it parse the changegroup data, otherwise it will
201 205 block in case of sshrepo because it don't know the end of the stream.
202 206 """
203 207 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
204 208 # and a list of filelogs. For changegroup 3, we expect 4 parts:
205 209 # changelog, manifestlog, a list of tree manifestlogs, and a list of
206 210 # filelogs.
207 211 #
208 212 # Changelog and manifestlog parts are terminated with empty chunks. The
209 213 # tree and file parts are a list of entry sections. Each entry section
210 214 # is a series of chunks terminating in an empty chunk. The list of these
211 215 # entry sections is terminated in yet another empty chunk, so we know
212 216 # we've reached the end of the tree/file list when we reach an empty
213 217 # chunk that was proceeded by no non-empty chunks.
214 218
215 219 parts = 0
216 220 while parts < 2 + self._grouplistcount:
217 221 noentries = True
218 222 while True:
219 223 chunk = getchunk(self)
220 224 if not chunk:
221 225 # The first two empty chunks represent the end of the
222 226 # changelog and the manifestlog portions. The remaining
223 227 # empty chunks represent either A) the end of individual
224 228 # tree or file entries in the file list, or B) the end of
225 229 # the entire list. It's the end of the entire list if there
226 230 # were no entries (i.e. noentries is True).
227 231 if parts < 2:
228 232 parts += 1
229 233 elif noentries:
230 234 parts += 1
231 235 break
232 236 noentries = False
233 237 yield chunkheader(len(chunk))
234 238 pos = 0
235 239 while pos < len(chunk):
236 240 next = pos + 2**20
237 241 yield chunk[pos:next]
238 242 pos = next
239 243 yield closechunk()
240 244
241 245 def _unpackmanifests(self, repo, revmap, trp, prog):
242 246 self.callback = prog.increment
243 247 # no need to check for empty manifest group here:
244 248 # if the result of the merge of 1 and 2 is the same in 3 and 4,
245 249 # no new manifest will be created and the manifest group will
246 250 # be empty during the pull
247 251 self.manifestheader()
248 252 deltas = self.deltaiter()
249 253 repo.manifestlog.addgroup(deltas, revmap, trp)
250 254 prog.complete()
251 255 self.callback = None
252 256
253 257 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
254 258 expectedtotal=None):
255 259 """Add the changegroup returned by source.read() to this repo.
256 260 srctype is a string like 'push', 'pull', or 'unbundle'. url is
257 261 the URL of the repo where this changegroup is coming from.
258 262
259 263 Return an integer summarizing the change to this repo:
260 264 - nothing changed or no source: 0
261 265 - more heads than before: 1+added heads (2..n)
262 266 - fewer heads than before: -1-removed heads (-2..-n)
263 267 - number of heads stays the same: 1
264 268 """
265 269 repo = repo.unfiltered()
266 270 def csmap(x):
267 271 repo.ui.debug("add changeset %s\n" % short(x))
268 272 return len(cl)
269 273
270 274 def revmap(x):
271 275 return cl.rev(x)
272 276
273 277 changesets = files = revisions = 0
274 278
275 279 try:
276 280 # The transaction may already carry source information. In this
277 281 # case we use the top level data. We overwrite the argument
278 282 # because we need to use the top level value (if they exist)
279 283 # in this function.
280 284 srctype = tr.hookargs.setdefault('source', srctype)
281 285 url = tr.hookargs.setdefault('url', url)
282 286 repo.hook('prechangegroup',
283 287 throw=True, **pycompat.strkwargs(tr.hookargs))
284 288
285 289 # write changelog data to temp files so concurrent readers
286 290 # will not see an inconsistent view
287 291 cl = repo.changelog
288 292 cl.delayupdate(tr)
289 293 oldheads = set(cl.heads())
290 294
291 295 trp = weakref.proxy(tr)
292 296 # pull off the changeset group
293 297 repo.ui.status(_("adding changesets\n"))
294 298 clstart = len(cl)
295 299 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
296 300 total=expectedtotal)
297 301 self.callback = progress.increment
298 302
299 303 efiles = set()
300 304 def onchangelog(cl, node):
301 305 efiles.update(cl.readfiles(node))
302 306
303 307 self.changelogheader()
304 308 deltas = self.deltaiter()
305 309 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
306 310 efiles = len(efiles)
307 311
308 312 if not cgnodes:
309 313 repo.ui.develwarn('applied empty changegroup',
310 314 config='warn-empty-changegroup')
311 315 clend = len(cl)
312 316 changesets = clend - clstart
313 317 progress.complete()
314 318 self.callback = None
315 319
316 320 # pull off the manifest group
317 321 repo.ui.status(_("adding manifests\n"))
318 322 # We know that we'll never have more manifests than we had
319 323 # changesets.
320 324 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
321 325 total=changesets)
322 326 self._unpackmanifests(repo, revmap, trp, progress)
323 327
324 328 needfiles = {}
325 329 if repo.ui.configbool('server', 'validate'):
326 330 cl = repo.changelog
327 331 ml = repo.manifestlog
328 332 # validate incoming csets have their manifests
329 333 for cset in pycompat.xrange(clstart, clend):
330 334 mfnode = cl.changelogrevision(cset).manifest
331 335 mfest = ml[mfnode].readdelta()
332 336 # store file cgnodes we must see
333 337 for f, n in mfest.iteritems():
334 338 needfiles.setdefault(f, set()).add(n)
335 339
336 340 # process the files
337 341 repo.ui.status(_("adding file changes\n"))
338 342 newrevs, newfiles = _addchangegroupfiles(
339 343 repo, self, revmap, trp, efiles, needfiles)
340 344 revisions += newrevs
341 345 files += newfiles
342 346
343 347 deltaheads = 0
344 348 if oldheads:
345 349 heads = cl.heads()
346 350 deltaheads = len(heads) - len(oldheads)
347 351 for h in heads:
348 352 if h not in oldheads and repo[h].closesbranch():
349 353 deltaheads -= 1
350 354 htext = ""
351 355 if deltaheads:
352 356 htext = _(" (%+d heads)") % deltaheads
353 357
354 358 repo.ui.status(_("added %d changesets"
355 359 " with %d changes to %d files%s\n")
356 360 % (changesets, revisions, files, htext))
357 361 repo.invalidatevolatilesets()
358 362
359 363 if changesets > 0:
360 364 if 'node' not in tr.hookargs:
361 365 tr.hookargs['node'] = hex(cl.node(clstart))
362 366 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
363 367 hookargs = dict(tr.hookargs)
364 368 else:
365 369 hookargs = dict(tr.hookargs)
366 370 hookargs['node'] = hex(cl.node(clstart))
367 371 hookargs['node_last'] = hex(cl.node(clend - 1))
368 372 repo.hook('pretxnchangegroup',
369 373 throw=True, **pycompat.strkwargs(hookargs))
370 374
371 375 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
372 376 phaseall = None
373 377 if srctype in ('push', 'serve'):
374 378 # Old servers can not push the boundary themselves.
375 379 # New servers won't push the boundary if changeset already
376 380 # exists locally as secret
377 381 #
378 382 # We should not use added here but the list of all change in
379 383 # the bundle
380 384 if repo.publishing():
381 385 targetphase = phaseall = phases.public
382 386 else:
383 387 # closer target phase computation
384 388
385 389 # Those changesets have been pushed from the
386 390 # outside, their phases are going to be pushed
387 391 # alongside. Therefor `targetphase` is
388 392 # ignored.
389 393 targetphase = phaseall = phases.draft
390 394 if added:
391 395 phases.registernew(repo, tr, targetphase, added)
392 396 if phaseall is not None:
393 397 phases.advanceboundary(repo, tr, phaseall, cgnodes)
394 398
395 399 if changesets > 0:
396 400
397 401 def runhooks():
398 402 # These hooks run when the lock releases, not when the
399 403 # transaction closes. So it's possible for the changelog
400 404 # to have changed since we last saw it.
401 405 if clstart >= len(repo):
402 406 return
403 407
404 408 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
405 409
406 410 for n in added:
407 411 args = hookargs.copy()
408 412 args['node'] = hex(n)
409 413 del args['node_last']
410 414 repo.hook("incoming", **pycompat.strkwargs(args))
411 415
412 416 newheads = [h for h in repo.heads()
413 417 if h not in oldheads]
414 418 repo.ui.log("incoming",
415 419 "%d incoming changes - new heads: %s\n",
416 420 len(added),
417 421 ', '.join([hex(c[:6]) for c in newheads]))
418 422
419 423 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
420 424 lambda tr: repo._afterlock(runhooks))
421 425 finally:
422 426 repo.ui.flush()
423 427 # never return 0 here:
424 428 if deltaheads < 0:
425 429 ret = deltaheads - 1
426 430 else:
427 431 ret = deltaheads + 1
428 432 return ret
429 433
430 434 def deltaiter(self):
431 435 """
432 436 returns an iterator of the deltas in this changegroup
433 437
434 438 Useful for passing to the underlying storage system to be stored.
435 439 """
436 440 chain = None
437 441 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
438 442 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
439 443 yield chunkdata
440 444 chain = chunkdata[0]
441 445
442 446 class cg2unpacker(cg1unpacker):
443 447 """Unpacker for cg2 streams.
444 448
445 449 cg2 streams add support for generaldelta, so the delta header
446 450 format is slightly different. All other features about the data
447 451 remain the same.
448 452 """
449 453 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
450 454 deltaheadersize = struct.calcsize(deltaheader)
451 455 version = '02'
452 456
453 457 def _deltaheader(self, headertuple, prevnode):
454 458 node, p1, p2, deltabase, cs = headertuple
455 459 flags = 0
456 460 return node, p1, p2, deltabase, cs, flags
457 461
458 462 class cg3unpacker(cg2unpacker):
459 463 """Unpacker for cg3 streams.
460 464
461 465 cg3 streams add support for exchanging treemanifests and revlog
462 466 flags. It adds the revlog flags to the delta header and an empty chunk
463 467 separating manifests and files.
464 468 """
465 469 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
466 470 deltaheadersize = struct.calcsize(deltaheader)
467 471 version = '03'
468 472 _grouplistcount = 2 # One list of manifests and one list of files
469 473
470 474 def _deltaheader(self, headertuple, prevnode):
471 475 node, p1, p2, deltabase, cs, flags = headertuple
472 476 return node, p1, p2, deltabase, cs, flags
473 477
474 478 def _unpackmanifests(self, repo, revmap, trp, prog):
475 479 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
476 480 for chunkdata in iter(self.filelogheader, {}):
477 481 # If we get here, there are directory manifests in the changegroup
478 482 d = chunkdata["filename"]
479 483 repo.ui.debug("adding %s revisions\n" % d)
480 484 dirlog = repo.manifestlog._revlog.dirlog(d)
481 485 deltas = self.deltaiter()
482 486 if not dirlog.addgroup(deltas, revmap, trp):
483 487 raise error.Abort(_("received dir revlog group is empty"))
484 488
485 489 class headerlessfixup(object):
486 490 def __init__(self, fh, h):
487 491 self._h = h
488 492 self._fh = fh
489 493 def read(self, n):
490 494 if self._h:
491 495 d, self._h = self._h[:n], self._h[n:]
492 496 if len(d) < n:
493 497 d += readexactly(self._fh, n - len(d))
494 498 return d
495 499 return readexactly(self._fh, n)
496 500
501 @attr.s(slots=True, frozen=True)
502 class revisiondelta(object):
503 """Describes a delta entry in a changegroup.
504
505 Captured data is sufficient to serialize the delta into multiple
506 formats.
507 """
508 # 20 byte node of this revision.
509 node = attr.ib()
510 # 20 byte nodes of parent revisions.
511 p1node = attr.ib()
512 p2node = attr.ib()
513 # 20 byte node of node this delta is against.
514 basenode = attr.ib()
515 # 20 byte node of changeset revision this delta is associated with.
516 linknode = attr.ib()
517 # 2 bytes of flags to apply to revision data.
518 flags = attr.ib()
519 # Iterable of chunks holding raw delta data.
520 deltachunks = attr.ib()
497 521
498 522 class cg1packer(object):
499 523 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
500 524 version = '01'
501 525 def __init__(self, repo, filematcher, bundlecaps=None):
502 526 """Given a source repo, construct a bundler.
503 527
504 528 filematcher is a matcher that matches on files to include in the
505 529 changegroup. Used to facilitate sparse changegroups.
506 530
507 531 bundlecaps is optional and can be used to specify the set of
508 532 capabilities which can be used to build the bundle. While bundlecaps is
509 533 unused in core Mercurial, extensions rely on this feature to communicate
510 534 capabilities to customize the changegroup packer.
511 535 """
512 536 assert filematcher
513 537 self._filematcher = filematcher
514 538
515 539 # Set of capabilities we can use to build the bundle.
516 540 if bundlecaps is None:
517 541 bundlecaps = set()
518 542 self._bundlecaps = bundlecaps
519 543 # experimental config: bundle.reorder
520 544 reorder = repo.ui.config('bundle', 'reorder')
521 545 if reorder == 'auto':
522 546 reorder = None
523 547 else:
524 548 reorder = stringutil.parsebool(reorder)
525 549 self._repo = repo
526 550 self._reorder = reorder
527 551 if self._repo.ui.verbose and not self._repo.ui.debugflag:
528 552 self._verbosenote = self._repo.ui.note
529 553 else:
530 554 self._verbosenote = lambda s: None
531 555
532 556 def close(self):
533 557 # Ellipses serving mode.
534 558 getattr(self, 'clrev_to_localrev', {}).clear()
535 559 if getattr(self, 'next_clrev_to_localrev', {}):
536 560 self.clrev_to_localrev = self.next_clrev_to_localrev
537 561 del self.next_clrev_to_localrev
538 562 self.changelog_done = True
539 563
540 564 return closechunk()
541 565
542 566 def fileheader(self, fname):
543 567 return chunkheader(len(fname)) + fname
544 568
545 569 # Extracted both for clarity and for overriding in extensions.
546 570 def _sortgroup(self, store, nodelist, lookup):
547 571 """Sort nodes for change group and turn them into revnums."""
548 572 # Ellipses serving mode.
549 573 #
550 574 # In a perfect world, we'd generate better ellipsis-ified graphs
551 575 # for non-changelog revlogs. In practice, we haven't started doing
552 576 # that yet, so the resulting DAGs for the manifestlog and filelogs
553 577 # are actually full of bogus parentage on all the ellipsis
554 578 # nodes. This has the side effect that, while the contents are
555 579 # correct, the individual DAGs might be completely out of whack in
556 580 # a case like 882681bc3166 and its ancestors (back about 10
557 581 # revisions or so) in the main hg repo.
558 582 #
559 583 # The one invariant we *know* holds is that the new (potentially
560 584 # bogus) DAG shape will be valid if we order the nodes in the
561 585 # order that they're introduced in dramatis personae by the
562 586 # changelog, so what we do is we sort the non-changelog histories
563 587 # by the order in which they are used by the changelog.
564 588 if util.safehasattr(self, 'full_nodes') and self.clnode_to_rev:
565 589 key = lambda n: self.clnode_to_rev[lookup(n)]
566 590 return [store.rev(n) for n in sorted(nodelist, key=key)]
567 591
568 592 # for generaldelta revlogs, we linearize the revs; this will both be
569 593 # much quicker and generate a much smaller bundle
570 594 if (store._generaldelta and self._reorder is None) or self._reorder:
571 595 dag = dagutil.revlogdag(store)
572 596 return dag.linearize(set(store.rev(n) for n in nodelist))
573 597 else:
574 598 return sorted([store.rev(n) for n in nodelist])
575 599
576 600 def group(self, nodelist, store, lookup, units=None):
577 601 """Calculate a delta group, yielding a sequence of changegroup chunks
578 602 (strings).
579 603
580 604 Given a list of changeset revs, return a set of deltas and
581 605 metadata corresponding to nodes. The first delta is
582 606 first parent(nodelist[0]) -> nodelist[0], the receiver is
583 607 guaranteed to have this parent as it has all history before
584 608 these changesets. In the case firstparent is nullrev the
585 609 changegroup starts with a full revision.
586 610
587 611 If units is not None, progress detail will be generated, units specifies
588 612 the type of revlog that is touched (changelog, manifest, etc.).
589 613 """
590 614 # if we don't have any revisions touched by these changesets, bail
591 615 if len(nodelist) == 0:
592 616 yield self.close()
593 617 return
594 618
595 619 revs = self._sortgroup(store, nodelist, lookup)
596 620
597 621 # add the parent of the first rev
598 622 p = store.parentrevs(revs[0])[0]
599 623 revs.insert(0, p)
600 624
601 625 # build deltas
602 626 progress = None
603 627 if units is not None:
604 628 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
605 629 total=(len(revs) - 1))
606 630 for r in pycompat.xrange(len(revs) - 1):
607 631 if progress:
608 632 progress.update(r + 1)
609 633 prev, curr = revs[r], revs[r + 1]
610 634 linknode = lookup(store.node(curr))
611 635 for c in self.revchunk(store, curr, prev, linknode):
612 636 yield c
613 637
614 638 if progress:
615 639 progress.complete()
616 640 yield self.close()
617 641
618 642 # filter any nodes that claim to be part of the known set
619 643 def prune(self, store, missing, commonrevs):
620 644 # TODO this violates storage abstraction for manifests.
621 645 if isinstance(store, manifest.manifestrevlog):
622 646 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
623 647 return []
624 648
625 649 rr, rl = store.rev, store.linkrev
626 650 return [n for n in missing if rl(rr(n)) not in commonrevs]
627 651
628 652 def _packmanifests(self, dir, mfnodes, lookuplinknode):
629 653 """Pack flat manifests into a changegroup stream."""
630 654 assert not dir
631 655 for chunk in self.group(mfnodes, self._repo.manifestlog._revlog,
632 656 lookuplinknode, units=_('manifests')):
633 657 yield chunk
634 658
635 659 def _manifestsdone(self):
636 660 return ''
637 661
638 662 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
639 663 '''yield a sequence of changegroup chunks (strings)'''
640 664 repo = self._repo
641 665 cl = repo.changelog
642 666
643 667 clrevorder = {}
644 668 mfs = {} # needed manifests
645 669 fnodes = {} # needed file nodes
646 670 mfl = repo.manifestlog
647 671 # TODO violates storage abstraction.
648 672 mfrevlog = mfl._revlog
649 673 changedfiles = set()
650 674
651 675 ellipsesmode = util.safehasattr(self, 'full_nodes')
652 676
653 677 # Callback for the changelog, used to collect changed files and
654 678 # manifest nodes.
655 679 # Returns the linkrev node (identity in the changelog case).
656 680 def lookupcl(x):
657 681 c = cl.read(x)
658 682 clrevorder[x] = len(clrevorder)
659 683
660 684 if ellipsesmode:
661 685 # Only update mfs if x is going to be sent. Otherwise we
662 686 # end up with bogus linkrevs specified for manifests and
663 687 # we skip some manifest nodes that we should otherwise
664 688 # have sent.
665 689 if (x in self.full_nodes
666 690 or cl.rev(x) in self.precomputed_ellipsis):
667 691 n = c[0]
668 692 # Record the first changeset introducing this manifest
669 693 # version.
670 694 mfs.setdefault(n, x)
671 695 # Set this narrow-specific dict so we have the lowest
672 696 # manifest revnum to look up for this cl revnum. (Part of
673 697 # mapping changelog ellipsis parents to manifest ellipsis
674 698 # parents)
675 699 self.next_clrev_to_localrev.setdefault(cl.rev(x),
676 700 mfrevlog.rev(n))
677 701 # We can't trust the changed files list in the changeset if the
678 702 # client requested a shallow clone.
679 703 if self.is_shallow:
680 704 changedfiles.update(mfl[c[0]].read().keys())
681 705 else:
682 706 changedfiles.update(c[3])
683 707 else:
684 708
685 709 n = c[0]
686 710 # record the first changeset introducing this manifest version
687 711 mfs.setdefault(n, x)
688 712 # Record a complete list of potentially-changed files in
689 713 # this manifest.
690 714 changedfiles.update(c[3])
691 715
692 716 return x
693 717
694 718 self._verbosenote(_('uncompressed size of bundle content:\n'))
695 719 size = 0
696 720 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
697 721 size += len(chunk)
698 722 yield chunk
699 723 self._verbosenote(_('%8.i (changelog)\n') % size)
700 724
701 725 # We need to make sure that the linkrev in the changegroup refers to
702 726 # the first changeset that introduced the manifest or file revision.
703 727 # The fastpath is usually safer than the slowpath, because the filelogs
704 728 # are walked in revlog order.
705 729 #
706 730 # When taking the slowpath with reorder=None and the manifest revlog
707 731 # uses generaldelta, the manifest may be walked in the "wrong" order.
708 732 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
709 733 # cc0ff93d0c0c).
710 734 #
711 735 # When taking the fastpath, we are only vulnerable to reordering
712 736 # of the changelog itself. The changelog never uses generaldelta, so
713 737 # it is only reordered when reorder=True. To handle this case, we
714 738 # simply take the slowpath, which already has the 'clrevorder' logic.
715 739 # This was also fixed in cc0ff93d0c0c.
716 740 fastpathlinkrev = fastpathlinkrev and not self._reorder
717 741 # Treemanifests don't work correctly with fastpathlinkrev
718 742 # either, because we don't discover which directory nodes to
719 743 # send along with files. This could probably be fixed.
720 744 fastpathlinkrev = fastpathlinkrev and (
721 745 'treemanifest' not in repo.requirements)
722 746
723 747 for chunk in self.generatemanifests(commonrevs, clrevorder,
724 748 fastpathlinkrev, mfs, fnodes, source):
725 749 yield chunk
726 750
727 751 if ellipsesmode:
728 752 mfdicts = None
729 753 if self.is_shallow:
730 754 mfdicts = [(self._repo.manifestlog[n].read(), lr)
731 755 for (n, lr) in mfs.iteritems()]
732 756
733 757 mfs.clear()
734 758 clrevs = set(cl.rev(x) for x in clnodes)
735 759
736 760 if not fastpathlinkrev:
737 761 def linknodes(unused, fname):
738 762 return fnodes.get(fname, {})
739 763 else:
740 764 cln = cl.node
741 765 def linknodes(filerevlog, fname):
742 766 llr = filerevlog.linkrev
743 767 fln = filerevlog.node
744 768 revs = ((r, llr(r)) for r in filerevlog)
745 769 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
746 770
747 771 if ellipsesmode:
748 772 # We need to pass the mfdicts variable down into
749 773 # generatefiles(), but more than one command might have
750 774 # wrapped generatefiles so we can't modify the function
751 775 # signature. Instead, we pass the data to ourselves using an
752 776 # instance attribute. I'm sorry.
753 777 self._mfdicts = mfdicts
754 778
755 779 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
756 780 source):
757 781 yield chunk
758 782
759 783 yield self.close()
760 784
761 785 if clnodes:
762 786 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
763 787
764 788 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
765 789 fnodes, source):
766 790 """Returns an iterator of changegroup chunks containing manifests.
767 791
768 792 `source` is unused here, but is used by extensions like remotefilelog to
769 793 change what is sent based in pulls vs pushes, etc.
770 794 """
771 795 repo = self._repo
772 796 mfl = repo.manifestlog
773 797 dirlog = mfl._revlog.dirlog
774 798 tmfnodes = {'': mfs}
775 799
776 800 # Callback for the manifest, used to collect linkrevs for filelog
777 801 # revisions.
778 802 # Returns the linkrev node (collected in lookupcl).
779 803 def makelookupmflinknode(dir, nodes):
780 804 if fastpathlinkrev:
781 805 assert not dir
782 806 return mfs.__getitem__
783 807
784 808 def lookupmflinknode(x):
785 809 """Callback for looking up the linknode for manifests.
786 810
787 811 Returns the linkrev node for the specified manifest.
788 812
789 813 SIDE EFFECT:
790 814
791 815 1) fclnodes gets populated with the list of relevant
792 816 file nodes if we're not using fastpathlinkrev
793 817 2) When treemanifests are in use, collects treemanifest nodes
794 818 to send
795 819
796 820 Note that this means manifests must be completely sent to
797 821 the client before you can trust the list of files and
798 822 treemanifests to send.
799 823 """
800 824 clnode = nodes[x]
801 825 mdata = mfl.get(dir, x).readfast(shallow=True)
802 826 for p, n, fl in mdata.iterentries():
803 827 if fl == 't': # subdirectory manifest
804 828 subdir = dir + p + '/'
805 829 tmfclnodes = tmfnodes.setdefault(subdir, {})
806 830 tmfclnode = tmfclnodes.setdefault(n, clnode)
807 831 if clrevorder[clnode] < clrevorder[tmfclnode]:
808 832 tmfclnodes[n] = clnode
809 833 else:
810 834 f = dir + p
811 835 fclnodes = fnodes.setdefault(f, {})
812 836 fclnode = fclnodes.setdefault(n, clnode)
813 837 if clrevorder[clnode] < clrevorder[fclnode]:
814 838 fclnodes[n] = clnode
815 839 return clnode
816 840 return lookupmflinknode
817 841
818 842 size = 0
819 843 while tmfnodes:
820 844 dir, nodes = tmfnodes.popitem()
821 845 prunednodes = self.prune(dirlog(dir), nodes, commonrevs)
822 846 if not dir or prunednodes:
823 847 for x in self._packmanifests(dir, prunednodes,
824 848 makelookupmflinknode(dir, nodes)):
825 849 size += len(x)
826 850 yield x
827 851 self._verbosenote(_('%8.i (manifests)\n') % size)
828 852 yield self._manifestsdone()
829 853
830 854 # The 'source' parameter is useful for extensions
831 855 def generatefiles(self, changedfiles, linknodes, commonrevs, source):
832 856 changedfiles = list(filter(self._filematcher, changedfiles))
833 857
834 858 if getattr(self, 'is_shallow', False):
835 859 # See comment in generate() for why this sadness is a thing.
836 860 mfdicts = self._mfdicts
837 861 del self._mfdicts
838 862 # In a shallow clone, the linknodes callback needs to also include
839 863 # those file nodes that are in the manifests we sent but weren't
840 864 # introduced by those manifests.
841 865 commonctxs = [self._repo[c] for c in commonrevs]
842 866 oldlinknodes = linknodes
843 867 clrev = self._repo.changelog.rev
844 868
845 869 # Defining this function has a side-effect of overriding the
846 870 # function of the same name that was passed in as an argument.
847 871 # TODO have caller pass in appropriate function.
848 872 def linknodes(flog, fname):
849 873 for c in commonctxs:
850 874 try:
851 875 fnode = c.filenode(fname)
852 876 self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
853 877 except error.ManifestLookupError:
854 878 pass
855 879 links = oldlinknodes(flog, fname)
856 880 if len(links) != len(mfdicts):
857 881 for mf, lr in mfdicts:
858 882 fnode = mf.get(fname, None)
859 883 if fnode in links:
860 884 links[fnode] = min(links[fnode], lr, key=clrev)
861 885 elif fnode:
862 886 links[fnode] = lr
863 887 return links
864 888
865 889 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
866 890
867 891 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
868 892 repo = self._repo
869 893 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
870 894 total=len(changedfiles))
871 895 for i, fname in enumerate(sorted(changedfiles)):
872 896 filerevlog = repo.file(fname)
873 897 if not filerevlog:
874 898 raise error.Abort(_("empty or missing file data for %s") %
875 899 fname)
876 900
877 901 linkrevnodes = linknodes(filerevlog, fname)
878 902 # Lookup for filenodes, we collected the linkrev nodes above in the
879 903 # fastpath case and with lookupmf in the slowpath case.
880 904 def lookupfilelog(x):
881 905 return linkrevnodes[x]
882 906
883 907 filenodes = self.prune(filerevlog, linkrevnodes, commonrevs)
884 908 if filenodes:
885 909 progress.update(i + 1, item=fname)
886 910 h = self.fileheader(fname)
887 911 size = len(h)
888 912 yield h
889 913 for chunk in self.group(filenodes, filerevlog, lookupfilelog):
890 914 size += len(chunk)
891 915 yield chunk
892 916 self._verbosenote(_('%8.i %s\n') % (size, fname))
893 917 progress.complete()
894 918
895 919 def deltaparent(self, store, rev, p1, p2, prev):
896 920 if not store.candelta(prev, rev):
897 921 raise error.ProgrammingError('cg1 should not be used in this case')
898 922 return prev
899 923
900 924 def revchunk(self, store, rev, prev, linknode):
901 925 if util.safehasattr(self, 'full_nodes'):
902 fn = self._revchunknarrow
926 fn = self._revisiondeltanarrow
903 927 else:
904 fn = self._revchunknormal
928 fn = self._revisiondeltanormal
929
930 delta = fn(store, rev, prev, linknode)
931 if not delta:
932 return
905 933
906 return fn(store, rev, prev, linknode)
934 meta = self.builddeltaheader(delta.node, delta.p1node, delta.p2node,
935 delta.basenode, delta.linknode,
936 delta.flags)
937 l = len(meta) + sum(len(x) for x in delta.deltachunks)
907 938
908 def _revchunknormal(self, store, rev, prev, linknode):
939 yield chunkheader(l)
940 yield meta
941 for x in delta.deltachunks:
942 yield x
943
944 def _revisiondeltanormal(self, store, rev, prev, linknode):
909 945 node = store.node(rev)
910 946 p1, p2 = store.parentrevs(rev)
911 947 base = self.deltaparent(store, rev, p1, p2, prev)
912 948
913 949 prefix = ''
914 950 if store.iscensored(base) or store.iscensored(rev):
915 951 try:
916 952 delta = store.revision(node, raw=True)
917 953 except error.CensoredNodeError as e:
918 954 delta = e.tombstone
919 955 if base == nullrev:
920 956 prefix = mdiff.trivialdiffheader(len(delta))
921 957 else:
922 958 baselen = store.rawsize(base)
923 959 prefix = mdiff.replacediffheader(baselen, len(delta))
924 960 elif base == nullrev:
925 961 delta = store.revision(node, raw=True)
926 962 prefix = mdiff.trivialdiffheader(len(delta))
927 963 else:
928 964 delta = store.revdiff(base, rev)
929 965 p1n, p2n = store.parents(node)
930 basenode = store.node(base)
931 flags = store.flags(rev)
932 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags)
933 meta += prefix
934 l = len(meta) + len(delta)
935 yield chunkheader(l)
936 yield meta
937 yield delta
938 966
939 def _revchunknarrow(self, store, rev, prev, linknode):
967 return revisiondelta(
968 node=node,
969 p1node=p1n,
970 p2node=p2n,
971 basenode=store.node(base),
972 linknode=linknode,
973 flags=store.flags(rev),
974 deltachunks=(prefix, delta),
975 )
976
977 def _revisiondeltanarrow(self, store, rev, prev, linknode):
940 978 # build up some mapping information that's useful later. See
941 979 # the local() nested function below.
942 980 if not self.changelog_done:
943 981 self.clnode_to_rev[linknode] = rev
944 982 linkrev = rev
945 983 self.clrev_to_localrev[linkrev] = rev
946 984 else:
947 985 linkrev = self.clnode_to_rev[linknode]
948 986 self.clrev_to_localrev[linkrev] = rev
949 987
950 988 # This is a node to send in full, because the changeset it
951 989 # corresponds to was a full changeset.
952 990 if linknode in self.full_nodes:
953 for x in self._revchunknormal(store, rev, prev, linknode):
954 yield x
955 return
991 return self._revisiondeltanormal(store, rev, prev, linknode)
956 992
957 993 # At this point, a node can either be one we should skip or an
958 994 # ellipsis. If it's not an ellipsis, bail immediately.
959 995 if linkrev not in self.precomputed_ellipsis:
960 996 return
961 997
962 998 linkparents = self.precomputed_ellipsis[linkrev]
963 999 def local(clrev):
964 1000 """Turn a changelog revnum into a local revnum.
965 1001
966 1002 The ellipsis dag is stored as revnums on the changelog,
967 1003 but when we're producing ellipsis entries for
968 1004 non-changelog revlogs, we need to turn those numbers into
969 1005 something local. This does that for us, and during the
970 1006 changelog sending phase will also expand the stored
971 1007 mappings as needed.
972 1008 """
973 1009 if clrev == nullrev:
974 1010 return nullrev
975 1011
976 1012 if not self.changelog_done:
977 1013 # If we're doing the changelog, it's possible that we
978 1014 # have a parent that is already on the client, and we
979 1015 # need to store some extra mapping information so that
980 1016 # our contained ellipsis nodes will be able to resolve
981 1017 # their parents.
982 1018 if clrev not in self.clrev_to_localrev:
983 1019 clnode = store.node(clrev)
984 1020 self.clnode_to_rev[clnode] = clrev
985 1021 return clrev
986 1022
987 1023 # Walk the ellipsis-ized changelog breadth-first looking for a
988 1024 # change that has been linked from the current revlog.
989 1025 #
990 1026 # For a flat manifest revlog only a single step should be necessary
991 1027 # as all relevant changelog entries are relevant to the flat
992 1028 # manifest.
993 1029 #
994 1030 # For a filelog or tree manifest dirlog however not every changelog
995 1031 # entry will have been relevant, so we need to skip some changelog
996 1032 # nodes even after ellipsis-izing.
997 1033 walk = [clrev]
998 1034 while walk:
999 1035 p = walk[0]
1000 1036 walk = walk[1:]
1001 1037 if p in self.clrev_to_localrev:
1002 1038 return self.clrev_to_localrev[p]
1003 1039 elif p in self.full_nodes:
1004 1040 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1005 1041 if pp != nullrev])
1006 1042 elif p in self.precomputed_ellipsis:
1007 1043 walk.extend([pp for pp in self.precomputed_ellipsis[p]
1008 1044 if pp != nullrev])
1009 1045 else:
1010 1046 # In this case, we've got an ellipsis with parents
1011 1047 # outside the current bundle (likely an
1012 1048 # incremental pull). We "know" that we can use the
1013 1049 # value of this same revlog at whatever revision
1014 1050 # is pointed to by linknode. "Know" is in scare
1015 1051 # quotes because I haven't done enough examination
1016 1052 # of edge cases to convince myself this is really
1017 1053 # a fact - it works for all the (admittedly
1018 1054 # thorough) cases in our testsuite, but I would be
1019 1055 # somewhat unsurprised to find a case in the wild
1020 1056 # where this breaks down a bit. That said, I don't
1021 1057 # know if it would hurt anything.
1022 1058 for i in pycompat.xrange(rev, 0, -1):
1023 1059 if store.linkrev(i) == clrev:
1024 1060 return i
1025 1061 # We failed to resolve a parent for this node, so
1026 1062 # we crash the changegroup construction.
1027 1063 raise error.Abort(
1028 1064 'unable to resolve parent while packing %r %r'
1029 1065 ' for changeset %r' % (store.indexfile, rev, clrev))
1030 1066
1031 1067 return nullrev
1032 1068
1033 1069 if not linkparents or (
1034 1070 store.parentrevs(rev) == (nullrev, nullrev)):
1035 1071 p1, p2 = nullrev, nullrev
1036 1072 elif len(linkparents) == 1:
1037 1073 p1, = sorted(local(p) for p in linkparents)
1038 1074 p2 = nullrev
1039 1075 else:
1040 1076 p1, p2 = sorted(local(p) for p in linkparents)
1041 1077
1042 1078 n = store.node(rev)
1043 1079 p1n, p2n = store.node(p1), store.node(p2)
1044 1080 flags = store.flags(rev)
1045 1081 flags |= revlog.REVIDX_ELLIPSIS
1046 meta = self.builddeltaheader(
1047 n, p1n, p2n, nullid, linknode, flags)
1082
1048 1083 # TODO: try and actually send deltas for ellipsis data blocks
1049 1084 data = store.revision(n)
1050 1085 diffheader = mdiff.trivialdiffheader(len(data))
1051 l = len(meta) + len(diffheader) + len(data)
1052 yield ''.join((chunkheader(l),
1053 meta,
1054 diffheader,
1055 data))
1086
1087 return revisiondelta(
1088 node=n,
1089 p1node=p1n,
1090 p2node=p2n,
1091 basenode=nullid,
1092 linknode=linknode,
1093 flags=flags,
1094 deltachunks=(diffheader, data),
1095 )
1056 1096
1057 1097 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1058 1098 # do nothing with basenode, it is implicitly the previous one in HG10
1059 1099 # do nothing with flags, it is implicitly 0 for cg1 and cg2
1060 1100 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)
1061 1101
1062 1102 class cg2packer(cg1packer):
1063 1103 version = '02'
1064 1104 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
1065 1105
1066 1106 def __init__(self, repo, filematcher, bundlecaps=None):
1067 1107 super(cg2packer, self).__init__(repo, filematcher,
1068 1108 bundlecaps=bundlecaps)
1069 1109
1070 1110 if self._reorder is None:
1071 1111 # Since generaldelta is directly supported by cg2, reordering
1072 1112 # generally doesn't help, so we disable it by default (treating
1073 1113 # bundle.reorder=auto just like bundle.reorder=False).
1074 1114 self._reorder = False
1075 1115
1076 1116 def deltaparent(self, store, rev, p1, p2, prev):
1077 1117 # Narrow ellipses mode.
1078 1118 if util.safehasattr(self, 'full_nodes'):
1079 1119 # TODO: send better deltas when in narrow mode.
1080 1120 #
1081 1121 # changegroup.group() loops over revisions to send,
1082 1122 # including revisions we'll skip. What this means is that
1083 1123 # `prev` will be a potentially useless delta base for all
1084 1124 # ellipsis nodes, as the client likely won't have it. In
1085 1125 # the future we should do bookkeeping about which nodes
1086 1126 # have been sent to the client, and try to be
1087 1127 # significantly smarter about delta bases. This is
1088 1128 # slightly tricky because this same code has to work for
1089 1129 # all revlogs, and we don't have the linkrev/linknode here.
1090 1130 return p1
1091 1131
1092 1132 dp = store.deltaparent(rev)
1093 1133 if dp == nullrev and store.storedeltachains:
1094 1134 # Avoid sending full revisions when delta parent is null. Pick prev
1095 1135 # in that case. It's tempting to pick p1 in this case, as p1 will
1096 1136 # be smaller in the common case. However, computing a delta against
1097 1137 # p1 may require resolving the raw text of p1, which could be
1098 1138 # expensive. The revlog caches should have prev cached, meaning
1099 1139 # less CPU for changegroup generation. There is likely room to add
1100 1140 # a flag and/or config option to control this behavior.
1101 1141 base = prev
1102 1142 elif dp == nullrev:
1103 1143 # revlog is configured to use full snapshot for a reason,
1104 1144 # stick to full snapshot.
1105 1145 base = nullrev
1106 1146 elif dp not in (p1, p2, prev):
1107 1147 # Pick prev when we can't be sure remote has the base revision.
1108 1148 return prev
1109 1149 else:
1110 1150 base = dp
1111 1151 if base != nullrev and not store.candelta(base, rev):
1112 1152 base = nullrev
1113 1153 return base
1114 1154
1115 1155 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1116 1156 # Do nothing with flags, it is implicitly 0 in cg1 and cg2
1117 1157 return struct.pack(self.deltaheader, node, p1n, p2n, basenode, linknode)
1118 1158
1119 1159 class cg3packer(cg2packer):
1120 1160 version = '03'
1121 1161 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
1122 1162
1123 1163 def _packmanifests(self, dir, mfnodes, lookuplinknode):
1124 1164 if dir:
1125 1165 yield self.fileheader(dir)
1126 1166
1127 1167 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
1128 1168 for chunk in self.group(mfnodes, dirlog, lookuplinknode,
1129 1169 units=_('manifests')):
1130 1170 yield chunk
1131 1171
1132 1172 def _manifestsdone(self):
1133 1173 return self.close()
1134 1174
1135 1175 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1136 1176 return struct.pack(
1137 1177 self.deltaheader, node, p1n, p2n, basenode, linknode, flags)
1138 1178
1139 1179 _packermap = {'01': (cg1packer, cg1unpacker),
1140 1180 # cg2 adds support for exchanging generaldelta
1141 1181 '02': (cg2packer, cg2unpacker),
1142 1182 # cg3 adds support for exchanging revlog flags and treemanifests
1143 1183 '03': (cg3packer, cg3unpacker),
1144 1184 }
1145 1185
1146 1186 def allsupportedversions(repo):
1147 1187 versions = set(_packermap.keys())
1148 1188 if not (repo.ui.configbool('experimental', 'changegroup3') or
1149 1189 repo.ui.configbool('experimental', 'treemanifest') or
1150 1190 'treemanifest' in repo.requirements):
1151 1191 versions.discard('03')
1152 1192 return versions
1153 1193
1154 1194 # Changegroup versions that can be applied to the repo
1155 1195 def supportedincomingversions(repo):
1156 1196 return allsupportedversions(repo)
1157 1197
1158 1198 # Changegroup versions that can be created from the repo
1159 1199 def supportedoutgoingversions(repo):
1160 1200 versions = allsupportedversions(repo)
1161 1201 if 'treemanifest' in repo.requirements:
1162 1202 # Versions 01 and 02 support only flat manifests and it's just too
1163 1203 # expensive to convert between the flat manifest and tree manifest on
1164 1204 # the fly. Since tree manifests are hashed differently, all of history
1165 1205 # would have to be converted. Instead, we simply don't even pretend to
1166 1206 # support versions 01 and 02.
1167 1207 versions.discard('01')
1168 1208 versions.discard('02')
1169 1209 if repository.NARROW_REQUIREMENT in repo.requirements:
1170 1210 # Versions 01 and 02 don't support revlog flags, and we need to
1171 1211 # support that for stripping and unbundling to work.
1172 1212 versions.discard('01')
1173 1213 versions.discard('02')
1174 1214 if LFS_REQUIREMENT in repo.requirements:
1175 1215 # Versions 01 and 02 don't support revlog flags, and we need to
1176 1216 # mark LFS entries with REVIDX_EXTSTORED.
1177 1217 versions.discard('01')
1178 1218 versions.discard('02')
1179 1219
1180 1220 return versions
1181 1221
1182 1222 def localversion(repo):
1183 1223 # Finds the best version to use for bundles that are meant to be used
1184 1224 # locally, such as those from strip and shelve, and temporary bundles.
1185 1225 return max(supportedoutgoingversions(repo))
1186 1226
1187 1227 def safeversion(repo):
1188 1228 # Finds the smallest version that it's safe to assume clients of the repo
1189 1229 # will support. For example, all hg versions that support generaldelta also
1190 1230 # support changegroup 02.
1191 1231 versions = supportedoutgoingversions(repo)
1192 1232 if 'generaldelta' in repo.requirements:
1193 1233 versions.discard('01')
1194 1234 assert versions
1195 1235 return min(versions)
1196 1236
1197 1237 def getbundler(version, repo, bundlecaps=None, filematcher=None):
1198 1238 assert version in supportedoutgoingversions(repo)
1199 1239
1200 1240 if filematcher is None:
1201 1241 filematcher = matchmod.alwaysmatcher(repo.root, '')
1202 1242
1203 1243 if version == '01' and not filematcher.always():
1204 1244 raise error.ProgrammingError('version 01 changegroups do not support '
1205 1245 'sparse file matchers')
1206 1246
1207 1247 # Requested files could include files not in the local store. So
1208 1248 # filter those out.
1209 1249 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1210 1250 filematcher)
1211 1251
1212 1252 return _packermap[version][0](repo, filematcher=filematcher,
1213 1253 bundlecaps=bundlecaps)
1214 1254
1215 1255 def getunbundler(version, fh, alg, extras=None):
1216 1256 return _packermap[version][1](fh, alg, extras=extras)
1217 1257
1218 1258 def _changegroupinfo(repo, nodes, source):
1219 1259 if repo.ui.verbose or source == 'bundle':
1220 1260 repo.ui.status(_("%d changesets found\n") % len(nodes))
1221 1261 if repo.ui.debugflag:
1222 1262 repo.ui.debug("list of changesets:\n")
1223 1263 for node in nodes:
1224 1264 repo.ui.debug("%s\n" % hex(node))
1225 1265
1226 1266 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1227 1267 bundlecaps=None):
1228 1268 cgstream = makestream(repo, outgoing, version, source,
1229 1269 fastpath=fastpath, bundlecaps=bundlecaps)
1230 1270 return getunbundler(version, util.chunkbuffer(cgstream), None,
1231 1271 {'clcount': len(outgoing.missing) })
1232 1272
1233 1273 def makestream(repo, outgoing, version, source, fastpath=False,
1234 1274 bundlecaps=None, filematcher=None):
1235 1275 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1236 1276 filematcher=filematcher)
1237 1277
1238 1278 repo = repo.unfiltered()
1239 1279 commonrevs = outgoing.common
1240 1280 csets = outgoing.missing
1241 1281 heads = outgoing.missingheads
1242 1282 # We go through the fast path if we get told to, or if all (unfiltered
1243 1283 # heads have been requested (since we then know there all linkrevs will
1244 1284 # be pulled by the client).
1245 1285 heads.sort()
1246 1286 fastpathlinkrev = fastpath or (
1247 1287 repo.filtername is None and heads == sorted(repo.heads()))
1248 1288
1249 1289 repo.hook('preoutgoing', throw=True, source=source)
1250 1290 _changegroupinfo(repo, csets, source)
1251 1291 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1252 1292
1253 1293 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1254 1294 revisions = 0
1255 1295 files = 0
1256 1296 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1257 1297 total=expectedfiles)
1258 1298 for chunkdata in iter(source.filelogheader, {}):
1259 1299 files += 1
1260 1300 f = chunkdata["filename"]
1261 1301 repo.ui.debug("adding %s revisions\n" % f)
1262 1302 progress.increment()
1263 1303 fl = repo.file(f)
1264 1304 o = len(fl)
1265 1305 try:
1266 1306 deltas = source.deltaiter()
1267 1307 if not fl.addgroup(deltas, revmap, trp):
1268 1308 raise error.Abort(_("received file revlog group is empty"))
1269 1309 except error.CensoredBaseError as e:
1270 1310 raise error.Abort(_("received delta base is censored: %s") % e)
1271 1311 revisions += len(fl) - o
1272 1312 if f in needfiles:
1273 1313 needs = needfiles[f]
1274 1314 for new in pycompat.xrange(o, len(fl)):
1275 1315 n = fl.node(new)
1276 1316 if n in needs:
1277 1317 needs.remove(n)
1278 1318 else:
1279 1319 raise error.Abort(
1280 1320 _("received spurious file revlog entry"))
1281 1321 if not needs:
1282 1322 del needfiles[f]
1283 1323 progress.complete()
1284 1324
1285 1325 for f, needs in needfiles.iteritems():
1286 1326 fl = repo.file(f)
1287 1327 for n in needs:
1288 1328 try:
1289 1329 fl.rev(n)
1290 1330 except error.LookupError:
1291 1331 raise error.Abort(
1292 1332 _('missing file data for %s:%s - run hg verify') %
1293 1333 (f, hex(n)))
1294 1334
1295 1335 return revisions, files
1296 1336
1297 1337 def _packellipsischangegroup(repo, common, match, relevant_nodes,
1298 1338 ellipsisroots, visitnodes, depth, source, version):
1299 1339 if version in ('01', '02'):
1300 1340 raise error.Abort(
1301 1341 'ellipsis nodes require at least cg3 on client and server, '
1302 1342 'but negotiated version %s' % version)
1303 1343 # We wrap cg1packer.revchunk, using a side channel to pass
1304 1344 # relevant_nodes into that area. Then if linknode isn't in the
1305 1345 # set, we know we have an ellipsis node and we should defer
1306 1346 # sending that node's data. We override close() to detect
1307 1347 # pending ellipsis nodes and flush them.
1308 1348 packer = getbundler(version, repo, filematcher=match)
1309 1349 # Give the packer the list of nodes which should not be
1310 1350 # ellipsis nodes. We store this rather than the set of nodes
1311 1351 # that should be an ellipsis because for very large histories
1312 1352 # we expect this to be significantly smaller.
1313 1353 packer.full_nodes = relevant_nodes
1314 1354 # Maps ellipsis revs to their roots at the changelog level.
1315 1355 packer.precomputed_ellipsis = ellipsisroots
1316 1356 # Maps CL revs to per-revlog revisions. Cleared in close() at
1317 1357 # the end of each group.
1318 1358 packer.clrev_to_localrev = {}
1319 1359 packer.next_clrev_to_localrev = {}
1320 1360 # Maps changelog nodes to changelog revs. Filled in once
1321 1361 # during changelog stage and then left unmodified.
1322 1362 packer.clnode_to_rev = {}
1323 1363 packer.changelog_done = False
1324 1364 # If true, informs the packer that it is serving shallow content and might
1325 1365 # need to pack file contents not introduced by the changes being packed.
1326 1366 packer.is_shallow = depth is not None
1327 1367
1328 1368 return packer.generate(common, visitnodes, False, source)
General Comments 0
You need to be logged in to leave comments. Login now