##// END OF EJS Templates
changegroup: invert conditional and dedent...
Gregory Szorc -
r39041:d56a6b78 default
parent child Browse files
Show More
@@ -1,1423 +1,1427 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def _fileheader(path):
70 70 """Obtain a changegroup chunk header for a named path."""
71 71 return chunkheader(len(path)) + path
72 72
73 73 def writechunks(ui, chunks, filename, vfs=None):
74 74 """Write chunks to a file and return its filename.
75 75
76 76 The stream is assumed to be a bundle file.
77 77 Existing files will not be overwritten.
78 78 If no filename is specified, a temporary file is created.
79 79 """
80 80 fh = None
81 81 cleanup = None
82 82 try:
83 83 if filename:
84 84 if vfs:
85 85 fh = vfs.open(filename, "wb")
86 86 else:
87 87 # Increase default buffer size because default is usually
88 88 # small (4k is common on Linux).
89 89 fh = open(filename, "wb", 131072)
90 90 else:
91 91 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
92 92 fh = os.fdopen(fd, r"wb")
93 93 cleanup = filename
94 94 for c in chunks:
95 95 fh.write(c)
96 96 cleanup = None
97 97 return filename
98 98 finally:
99 99 if fh is not None:
100 100 fh.close()
101 101 if cleanup is not None:
102 102 if filename and vfs:
103 103 vfs.unlink(cleanup)
104 104 else:
105 105 os.unlink(cleanup)
106 106
107 107 class cg1unpacker(object):
108 108 """Unpacker for cg1 changegroup streams.
109 109
110 110 A changegroup unpacker handles the framing of the revision data in
111 111 the wire format. Most consumers will want to use the apply()
112 112 method to add the changes from the changegroup to a repository.
113 113
114 114 If you're forwarding a changegroup unmodified to another consumer,
115 115 use getchunks(), which returns an iterator of changegroup
116 116 chunks. This is mostly useful for cases where you need to know the
117 117 data stream has ended by observing the end of the changegroup.
118 118
119 119 deltachunk() is useful only if you're applying delta data. Most
120 120 consumers should prefer apply() instead.
121 121
122 122 A few other public methods exist. Those are used only for
123 123 bundlerepo and some debug commands - their use is discouraged.
124 124 """
125 125 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 126 deltaheadersize = deltaheader.size
127 127 version = '01'
128 128 _grouplistcount = 1 # One list of files after the manifests
129 129
130 130 def __init__(self, fh, alg, extras=None):
131 131 if alg is None:
132 132 alg = 'UN'
133 133 if alg not in util.compengines.supportedbundletypes:
134 134 raise error.Abort(_('unknown stream compression type: %s')
135 135 % alg)
136 136 if alg == 'BZ':
137 137 alg = '_truncatedBZ'
138 138
139 139 compengine = util.compengines.forbundletype(alg)
140 140 self._stream = compengine.decompressorreader(fh)
141 141 self._type = alg
142 142 self.extras = extras or {}
143 143 self.callback = None
144 144
145 145 # These methods (compressed, read, seek, tell) all appear to only
146 146 # be used by bundlerepo, but it's a little hard to tell.
147 147 def compressed(self):
148 148 return self._type is not None and self._type != 'UN'
149 149 def read(self, l):
150 150 return self._stream.read(l)
151 151 def seek(self, pos):
152 152 return self._stream.seek(pos)
153 153 def tell(self):
154 154 return self._stream.tell()
155 155 def close(self):
156 156 return self._stream.close()
157 157
158 158 def _chunklength(self):
159 159 d = readexactly(self._stream, 4)
160 160 l = struct.unpack(">l", d)[0]
161 161 if l <= 4:
162 162 if l:
163 163 raise error.Abort(_("invalid chunk length %d") % l)
164 164 return 0
165 165 if self.callback:
166 166 self.callback()
167 167 return l - 4
168 168
169 169 def changelogheader(self):
170 170 """v10 does not have a changelog header chunk"""
171 171 return {}
172 172
173 173 def manifestheader(self):
174 174 """v10 does not have a manifest header chunk"""
175 175 return {}
176 176
177 177 def filelogheader(self):
178 178 """return the header of the filelogs chunk, v10 only has the filename"""
179 179 l = self._chunklength()
180 180 if not l:
181 181 return {}
182 182 fname = readexactly(self._stream, l)
183 183 return {'filename': fname}
184 184
185 185 def _deltaheader(self, headertuple, prevnode):
186 186 node, p1, p2, cs = headertuple
187 187 if prevnode is None:
188 188 deltabase = p1
189 189 else:
190 190 deltabase = prevnode
191 191 flags = 0
192 192 return node, p1, p2, deltabase, cs, flags
193 193
194 194 def deltachunk(self, prevnode):
195 195 l = self._chunklength()
196 196 if not l:
197 197 return {}
198 198 headerdata = readexactly(self._stream, self.deltaheadersize)
199 199 header = self.deltaheader.unpack(headerdata)
200 200 delta = readexactly(self._stream, l - self.deltaheadersize)
201 201 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
202 202 return (node, p1, p2, cs, deltabase, delta, flags)
203 203
204 204 def getchunks(self):
205 205 """returns all the chunks contains in the bundle
206 206
207 207 Used when you need to forward the binary stream to a file or another
208 208 network API. To do so, it parse the changegroup data, otherwise it will
209 209 block in case of sshrepo because it don't know the end of the stream.
210 210 """
211 211 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
212 212 # and a list of filelogs. For changegroup 3, we expect 4 parts:
213 213 # changelog, manifestlog, a list of tree manifestlogs, and a list of
214 214 # filelogs.
215 215 #
216 216 # Changelog and manifestlog parts are terminated with empty chunks. The
217 217 # tree and file parts are a list of entry sections. Each entry section
218 218 # is a series of chunks terminating in an empty chunk. The list of these
219 219 # entry sections is terminated in yet another empty chunk, so we know
220 220 # we've reached the end of the tree/file list when we reach an empty
221 221 # chunk that was proceeded by no non-empty chunks.
222 222
223 223 parts = 0
224 224 while parts < 2 + self._grouplistcount:
225 225 noentries = True
226 226 while True:
227 227 chunk = getchunk(self)
228 228 if not chunk:
229 229 # The first two empty chunks represent the end of the
230 230 # changelog and the manifestlog portions. The remaining
231 231 # empty chunks represent either A) the end of individual
232 232 # tree or file entries in the file list, or B) the end of
233 233 # the entire list. It's the end of the entire list if there
234 234 # were no entries (i.e. noentries is True).
235 235 if parts < 2:
236 236 parts += 1
237 237 elif noentries:
238 238 parts += 1
239 239 break
240 240 noentries = False
241 241 yield chunkheader(len(chunk))
242 242 pos = 0
243 243 while pos < len(chunk):
244 244 next = pos + 2**20
245 245 yield chunk[pos:next]
246 246 pos = next
247 247 yield closechunk()
248 248
249 249 def _unpackmanifests(self, repo, revmap, trp, prog):
250 250 self.callback = prog.increment
251 251 # no need to check for empty manifest group here:
252 252 # if the result of the merge of 1 and 2 is the same in 3 and 4,
253 253 # no new manifest will be created and the manifest group will
254 254 # be empty during the pull
255 255 self.manifestheader()
256 256 deltas = self.deltaiter()
257 257 repo.manifestlog.addgroup(deltas, revmap, trp)
258 258 prog.complete()
259 259 self.callback = None
260 260
261 261 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
262 262 expectedtotal=None):
263 263 """Add the changegroup returned by source.read() to this repo.
264 264 srctype is a string like 'push', 'pull', or 'unbundle'. url is
265 265 the URL of the repo where this changegroup is coming from.
266 266
267 267 Return an integer summarizing the change to this repo:
268 268 - nothing changed or no source: 0
269 269 - more heads than before: 1+added heads (2..n)
270 270 - fewer heads than before: -1-removed heads (-2..-n)
271 271 - number of heads stays the same: 1
272 272 """
273 273 repo = repo.unfiltered()
274 274 def csmap(x):
275 275 repo.ui.debug("add changeset %s\n" % short(x))
276 276 return len(cl)
277 277
278 278 def revmap(x):
279 279 return cl.rev(x)
280 280
281 281 changesets = files = revisions = 0
282 282
283 283 try:
284 284 # The transaction may already carry source information. In this
285 285 # case we use the top level data. We overwrite the argument
286 286 # because we need to use the top level value (if they exist)
287 287 # in this function.
288 288 srctype = tr.hookargs.setdefault('source', srctype)
289 289 url = tr.hookargs.setdefault('url', url)
290 290 repo.hook('prechangegroup',
291 291 throw=True, **pycompat.strkwargs(tr.hookargs))
292 292
293 293 # write changelog data to temp files so concurrent readers
294 294 # will not see an inconsistent view
295 295 cl = repo.changelog
296 296 cl.delayupdate(tr)
297 297 oldheads = set(cl.heads())
298 298
299 299 trp = weakref.proxy(tr)
300 300 # pull off the changeset group
301 301 repo.ui.status(_("adding changesets\n"))
302 302 clstart = len(cl)
303 303 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
304 304 total=expectedtotal)
305 305 self.callback = progress.increment
306 306
307 307 efiles = set()
308 308 def onchangelog(cl, node):
309 309 efiles.update(cl.readfiles(node))
310 310
311 311 self.changelogheader()
312 312 deltas = self.deltaiter()
313 313 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
314 314 efiles = len(efiles)
315 315
316 316 if not cgnodes:
317 317 repo.ui.develwarn('applied empty changegroup',
318 318 config='warn-empty-changegroup')
319 319 clend = len(cl)
320 320 changesets = clend - clstart
321 321 progress.complete()
322 322 self.callback = None
323 323
324 324 # pull off the manifest group
325 325 repo.ui.status(_("adding manifests\n"))
326 326 # We know that we'll never have more manifests than we had
327 327 # changesets.
328 328 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
329 329 total=changesets)
330 330 self._unpackmanifests(repo, revmap, trp, progress)
331 331
332 332 needfiles = {}
333 333 if repo.ui.configbool('server', 'validate'):
334 334 cl = repo.changelog
335 335 ml = repo.manifestlog
336 336 # validate incoming csets have their manifests
337 337 for cset in pycompat.xrange(clstart, clend):
338 338 mfnode = cl.changelogrevision(cset).manifest
339 339 mfest = ml[mfnode].readdelta()
340 340 # store file cgnodes we must see
341 341 for f, n in mfest.iteritems():
342 342 needfiles.setdefault(f, set()).add(n)
343 343
344 344 # process the files
345 345 repo.ui.status(_("adding file changes\n"))
346 346 newrevs, newfiles = _addchangegroupfiles(
347 347 repo, self, revmap, trp, efiles, needfiles)
348 348 revisions += newrevs
349 349 files += newfiles
350 350
351 351 deltaheads = 0
352 352 if oldheads:
353 353 heads = cl.heads()
354 354 deltaheads = len(heads) - len(oldheads)
355 355 for h in heads:
356 356 if h not in oldheads and repo[h].closesbranch():
357 357 deltaheads -= 1
358 358 htext = ""
359 359 if deltaheads:
360 360 htext = _(" (%+d heads)") % deltaheads
361 361
362 362 repo.ui.status(_("added %d changesets"
363 363 " with %d changes to %d files%s\n")
364 364 % (changesets, revisions, files, htext))
365 365 repo.invalidatevolatilesets()
366 366
367 367 if changesets > 0:
368 368 if 'node' not in tr.hookargs:
369 369 tr.hookargs['node'] = hex(cl.node(clstart))
370 370 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
371 371 hookargs = dict(tr.hookargs)
372 372 else:
373 373 hookargs = dict(tr.hookargs)
374 374 hookargs['node'] = hex(cl.node(clstart))
375 375 hookargs['node_last'] = hex(cl.node(clend - 1))
376 376 repo.hook('pretxnchangegroup',
377 377 throw=True, **pycompat.strkwargs(hookargs))
378 378
379 379 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
380 380 phaseall = None
381 381 if srctype in ('push', 'serve'):
382 382 # Old servers can not push the boundary themselves.
383 383 # New servers won't push the boundary if changeset already
384 384 # exists locally as secret
385 385 #
386 386 # We should not use added here but the list of all change in
387 387 # the bundle
388 388 if repo.publishing():
389 389 targetphase = phaseall = phases.public
390 390 else:
391 391 # closer target phase computation
392 392
393 393 # Those changesets have been pushed from the
394 394 # outside, their phases are going to be pushed
395 395 # alongside. Therefor `targetphase` is
396 396 # ignored.
397 397 targetphase = phaseall = phases.draft
398 398 if added:
399 399 phases.registernew(repo, tr, targetphase, added)
400 400 if phaseall is not None:
401 401 phases.advanceboundary(repo, tr, phaseall, cgnodes)
402 402
403 403 if changesets > 0:
404 404
405 405 def runhooks():
406 406 # These hooks run when the lock releases, not when the
407 407 # transaction closes. So it's possible for the changelog
408 408 # to have changed since we last saw it.
409 409 if clstart >= len(repo):
410 410 return
411 411
412 412 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
413 413
414 414 for n in added:
415 415 args = hookargs.copy()
416 416 args['node'] = hex(n)
417 417 del args['node_last']
418 418 repo.hook("incoming", **pycompat.strkwargs(args))
419 419
420 420 newheads = [h for h in repo.heads()
421 421 if h not in oldheads]
422 422 repo.ui.log("incoming",
423 423 "%d incoming changes - new heads: %s\n",
424 424 len(added),
425 425 ', '.join([hex(c[:6]) for c in newheads]))
426 426
427 427 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
428 428 lambda tr: repo._afterlock(runhooks))
429 429 finally:
430 430 repo.ui.flush()
431 431 # never return 0 here:
432 432 if deltaheads < 0:
433 433 ret = deltaheads - 1
434 434 else:
435 435 ret = deltaheads + 1
436 436 return ret
437 437
438 438 def deltaiter(self):
439 439 """
440 440 returns an iterator of the deltas in this changegroup
441 441
442 442 Useful for passing to the underlying storage system to be stored.
443 443 """
444 444 chain = None
445 445 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
446 446 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
447 447 yield chunkdata
448 448 chain = chunkdata[0]
449 449
450 450 class cg2unpacker(cg1unpacker):
451 451 """Unpacker for cg2 streams.
452 452
453 453 cg2 streams add support for generaldelta, so the delta header
454 454 format is slightly different. All other features about the data
455 455 remain the same.
456 456 """
457 457 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
458 458 deltaheadersize = deltaheader.size
459 459 version = '02'
460 460
461 461 def _deltaheader(self, headertuple, prevnode):
462 462 node, p1, p2, deltabase, cs = headertuple
463 463 flags = 0
464 464 return node, p1, p2, deltabase, cs, flags
465 465
466 466 class cg3unpacker(cg2unpacker):
467 467 """Unpacker for cg3 streams.
468 468
469 469 cg3 streams add support for exchanging treemanifests and revlog
470 470 flags. It adds the revlog flags to the delta header and an empty chunk
471 471 separating manifests and files.
472 472 """
473 473 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
474 474 deltaheadersize = deltaheader.size
475 475 version = '03'
476 476 _grouplistcount = 2 # One list of manifests and one list of files
477 477
478 478 def _deltaheader(self, headertuple, prevnode):
479 479 node, p1, p2, deltabase, cs, flags = headertuple
480 480 return node, p1, p2, deltabase, cs, flags
481 481
482 482 def _unpackmanifests(self, repo, revmap, trp, prog):
483 483 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
484 484 for chunkdata in iter(self.filelogheader, {}):
485 485 # If we get here, there are directory manifests in the changegroup
486 486 d = chunkdata["filename"]
487 487 repo.ui.debug("adding %s revisions\n" % d)
488 488 dirlog = repo.manifestlog._revlog.dirlog(d)
489 489 deltas = self.deltaiter()
490 490 if not dirlog.addgroup(deltas, revmap, trp):
491 491 raise error.Abort(_("received dir revlog group is empty"))
492 492
493 493 class headerlessfixup(object):
494 494 def __init__(self, fh, h):
495 495 self._h = h
496 496 self._fh = fh
497 497 def read(self, n):
498 498 if self._h:
499 499 d, self._h = self._h[:n], self._h[n:]
500 500 if len(d) < n:
501 501 d += readexactly(self._fh, n - len(d))
502 502 return d
503 503 return readexactly(self._fh, n)
504 504
505 505 @attr.s(slots=True, frozen=True)
506 506 class revisiondelta(object):
507 507 """Describes a delta entry in a changegroup.
508 508
509 509 Captured data is sufficient to serialize the delta into multiple
510 510 formats.
511 511 """
512 512 # 20 byte node of this revision.
513 513 node = attr.ib()
514 514 # 20 byte nodes of parent revisions.
515 515 p1node = attr.ib()
516 516 p2node = attr.ib()
517 517 # 20 byte node of node this delta is against.
518 518 basenode = attr.ib()
519 519 # 20 byte node of changeset revision this delta is associated with.
520 520 linknode = attr.ib()
521 521 # 2 bytes of flags to apply to revision data.
522 522 flags = attr.ib()
523 523 # Iterable of chunks holding raw delta data.
524 524 deltachunks = attr.ib()
525 525
526 526 def _sortnodesnormal(store, nodes, reorder):
527 527 """Sort nodes for changegroup generation and turn into revnums."""
528 528 # for generaldelta revlogs, we linearize the revs; this will both be
529 529 # much quicker and generate a much smaller bundle
530 530 if (store._generaldelta and reorder is None) or reorder:
531 531 dag = dagutil.revlogdag(store)
532 532 return dag.linearize(set(store.rev(n) for n in nodes))
533 533 else:
534 534 return sorted([store.rev(n) for n in nodes])
535 535
536 536 def _sortnodesellipsis(store, nodes, cl, lookup):
537 537 """Sort nodes for changegroup generation and turn into revnums."""
538 538 # Ellipses serving mode.
539 539 #
540 540 # In a perfect world, we'd generate better ellipsis-ified graphs
541 541 # for non-changelog revlogs. In practice, we haven't started doing
542 542 # that yet, so the resulting DAGs for the manifestlog and filelogs
543 543 # are actually full of bogus parentage on all the ellipsis
544 544 # nodes. This has the side effect that, while the contents are
545 545 # correct, the individual DAGs might be completely out of whack in
546 546 # a case like 882681bc3166 and its ancestors (back about 10
547 547 # revisions or so) in the main hg repo.
548 548 #
549 549 # The one invariant we *know* holds is that the new (potentially
550 550 # bogus) DAG shape will be valid if we order the nodes in the
551 551 # order that they're introduced in dramatis personae by the
552 552 # changelog, so what we do is we sort the non-changelog histories
553 553 # by the order in which they are used by the changelog.
554 554 key = lambda n: cl.rev(lookup(n))
555 555 return [store.rev(n) for n in sorted(nodes, key=key)]
556 556
557 557 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
558 558 """Construct a revision delta for non-ellipses changegroup generation."""
559 559 node = store.node(rev)
560 560 p1, p2 = store.parentrevs(rev)
561 561 base = deltaparentfn(store, rev, p1, p2, prev)
562 562
563 563 prefix = ''
564 564 if store.iscensored(base) or store.iscensored(rev):
565 565 try:
566 566 delta = store.revision(node, raw=True)
567 567 except error.CensoredNodeError as e:
568 568 delta = e.tombstone
569 569 if base == nullrev:
570 570 prefix = mdiff.trivialdiffheader(len(delta))
571 571 else:
572 572 baselen = store.rawsize(base)
573 573 prefix = mdiff.replacediffheader(baselen, len(delta))
574 574 elif base == nullrev:
575 575 delta = store.revision(node, raw=True)
576 576 prefix = mdiff.trivialdiffheader(len(delta))
577 577 else:
578 578 delta = store.revdiff(base, rev)
579 579 p1n, p2n = store.parents(node)
580 580
581 581 return revisiondelta(
582 582 node=node,
583 583 p1node=p1n,
584 584 p2node=p2n,
585 585 basenode=store.node(base),
586 586 linknode=linknode,
587 587 flags=store.flags(rev),
588 588 deltachunks=(prefix, delta),
589 589 )
590 590
591 591 def _revisiondeltanarrow(cl, store, ischangelog, rev, linkrev,
592 592 linknode, clrevtolocalrev, fullclnodes,
593 593 precomputedellipsis):
594 594 linkparents = precomputedellipsis[linkrev]
595 595 def local(clrev):
596 596 """Turn a changelog revnum into a local revnum.
597 597
598 598 The ellipsis dag is stored as revnums on the changelog,
599 599 but when we're producing ellipsis entries for
600 600 non-changelog revlogs, we need to turn those numbers into
601 601 something local. This does that for us, and during the
602 602 changelog sending phase will also expand the stored
603 603 mappings as needed.
604 604 """
605 605 if clrev == nullrev:
606 606 return nullrev
607 607
608 608 if ischangelog:
609 609 return clrev
610 610
611 611 # Walk the ellipsis-ized changelog breadth-first looking for a
612 612 # change that has been linked from the current revlog.
613 613 #
614 614 # For a flat manifest revlog only a single step should be necessary
615 615 # as all relevant changelog entries are relevant to the flat
616 616 # manifest.
617 617 #
618 618 # For a filelog or tree manifest dirlog however not every changelog
619 619 # entry will have been relevant, so we need to skip some changelog
620 620 # nodes even after ellipsis-izing.
621 621 walk = [clrev]
622 622 while walk:
623 623 p = walk[0]
624 624 walk = walk[1:]
625 625 if p in clrevtolocalrev:
626 626 return clrevtolocalrev[p]
627 627 elif p in fullclnodes:
628 628 walk.extend([pp for pp in cl.parentrevs(p)
629 629 if pp != nullrev])
630 630 elif p in precomputedellipsis:
631 631 walk.extend([pp for pp in precomputedellipsis[p]
632 632 if pp != nullrev])
633 633 else:
634 634 # In this case, we've got an ellipsis with parents
635 635 # outside the current bundle (likely an
636 636 # incremental pull). We "know" that we can use the
637 637 # value of this same revlog at whatever revision
638 638 # is pointed to by linknode. "Know" is in scare
639 639 # quotes because I haven't done enough examination
640 640 # of edge cases to convince myself this is really
641 641 # a fact - it works for all the (admittedly
642 642 # thorough) cases in our testsuite, but I would be
643 643 # somewhat unsurprised to find a case in the wild
644 644 # where this breaks down a bit. That said, I don't
645 645 # know if it would hurt anything.
646 646 for i in pycompat.xrange(rev, 0, -1):
647 647 if store.linkrev(i) == clrev:
648 648 return i
649 649 # We failed to resolve a parent for this node, so
650 650 # we crash the changegroup construction.
651 651 raise error.Abort(
652 652 'unable to resolve parent while packing %r %r'
653 653 ' for changeset %r' % (store.indexfile, rev, clrev))
654 654
655 655 return nullrev
656 656
657 657 if not linkparents or (
658 658 store.parentrevs(rev) == (nullrev, nullrev)):
659 659 p1, p2 = nullrev, nullrev
660 660 elif len(linkparents) == 1:
661 661 p1, = sorted(local(p) for p in linkparents)
662 662 p2 = nullrev
663 663 else:
664 664 p1, p2 = sorted(local(p) for p in linkparents)
665 665
666 666 n = store.node(rev)
667 667 p1n, p2n = store.node(p1), store.node(p2)
668 668 flags = store.flags(rev)
669 669 flags |= revlog.REVIDX_ELLIPSIS
670 670
671 671 # TODO: try and actually send deltas for ellipsis data blocks
672 672 data = store.revision(n)
673 673 diffheader = mdiff.trivialdiffheader(len(data))
674 674
675 675 return revisiondelta(
676 676 node=n,
677 677 p1node=p1n,
678 678 p2node=p2n,
679 679 basenode=nullid,
680 680 linknode=linknode,
681 681 flags=flags,
682 682 deltachunks=(diffheader, data),
683 683 )
684 684
685 685 class cgpacker(object):
686 686 def __init__(self, repo, filematcher, version, allowreorder,
687 687 deltaparentfn, builddeltaheader, manifestsend,
688 688 bundlecaps=None, ellipses=False,
689 689 shallow=False, ellipsisroots=None, fullnodes=None):
690 690 """Given a source repo, construct a bundler.
691 691
692 692 filematcher is a matcher that matches on files to include in the
693 693 changegroup. Used to facilitate sparse changegroups.
694 694
695 695 allowreorder controls whether reordering of revisions is allowed.
696 696 This value is used when ``bundle.reorder`` is ``auto`` or isn't
697 697 set.
698 698
699 699 deltaparentfn is a callable that resolves the delta parent for
700 700 a specific revision.
701 701
702 702 builddeltaheader is a callable that constructs the header for a group
703 703 delta.
704 704
705 705 manifestsend is a chunk to send after manifests have been fully emitted.
706 706
707 707 ellipses indicates whether ellipsis serving mode is enabled.
708 708
709 709 bundlecaps is optional and can be used to specify the set of
710 710 capabilities which can be used to build the bundle. While bundlecaps is
711 711 unused in core Mercurial, extensions rely on this feature to communicate
712 712 capabilities to customize the changegroup packer.
713 713
714 714 shallow indicates whether shallow data might be sent. The packer may
715 715 need to pack file contents not introduced by the changes being packed.
716 716
717 717 fullnodes is the set of changelog nodes which should not be ellipsis
718 718 nodes. We store this rather than the set of nodes that should be
719 719 ellipsis because for very large histories we expect this to be
720 720 significantly smaller.
721 721 """
722 722 assert filematcher
723 723 self._filematcher = filematcher
724 724
725 725 self.version = version
726 726 self._deltaparentfn = deltaparentfn
727 727 self._builddeltaheader = builddeltaheader
728 728 self._manifestsend = manifestsend
729 729 self._ellipses = ellipses
730 730
731 731 # Set of capabilities we can use to build the bundle.
732 732 if bundlecaps is None:
733 733 bundlecaps = set()
734 734 self._bundlecaps = bundlecaps
735 735 self._isshallow = shallow
736 736 self._fullclnodes = fullnodes
737 737
738 738 # Maps ellipsis revs to their roots at the changelog level.
739 739 self._precomputedellipsis = ellipsisroots
740 740
741 741 # experimental config: bundle.reorder
742 742 reorder = repo.ui.config('bundle', 'reorder')
743 743 if reorder == 'auto':
744 744 self._reorder = allowreorder
745 745 else:
746 746 self._reorder = stringutil.parsebool(reorder)
747 747
748 748 self._repo = repo
749 749
750 750 if self._repo.ui.verbose and not self._repo.ui.debugflag:
751 751 self._verbosenote = self._repo.ui.note
752 752 else:
753 753 self._verbosenote = lambda s: None
754 754
755 755 def group(self, revs, store, ischangelog, lookup, units=None,
756 756 clrevtolocalrev=None):
757 757 """Calculate a delta group, yielding a sequence of changegroup chunks
758 758 (strings).
759 759
760 760 Given a list of changeset revs, return a set of deltas and
761 761 metadata corresponding to nodes. The first delta is
762 762 first parent(nodelist[0]) -> nodelist[0], the receiver is
763 763 guaranteed to have this parent as it has all history before
764 764 these changesets. In the case firstparent is nullrev the
765 765 changegroup starts with a full revision.
766 766
767 767 If units is not None, progress detail will be generated, units specifies
768 768 the type of revlog that is touched (changelog, manifest, etc.).
769 769 """
770 770 # if we don't have any revisions touched by these changesets, bail
771 771 if len(revs) == 0:
772 772 yield closechunk()
773 773 return
774 774
775 775 cl = self._repo.changelog
776 776
777 777 # add the parent of the first rev
778 778 p = store.parentrevs(revs[0])[0]
779 779 revs.insert(0, p)
780 780
781 781 # build deltas
782 782 progress = None
783 783 if units is not None:
784 784 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
785 785 total=(len(revs) - 1))
786 786 for r in pycompat.xrange(len(revs) - 1):
787 787 if progress:
788 788 progress.update(r + 1)
789 789 prev, curr = revs[r], revs[r + 1]
790 790 linknode = lookup(store.node(curr))
791 791
792 792 if self._ellipses:
793 793 linkrev = cl.rev(linknode)
794 794 clrevtolocalrev[linkrev] = curr
795 795
796 796 # This is a node to send in full, because the changeset it
797 797 # corresponds to was a full changeset.
798 798 if linknode in self._fullclnodes:
799 799 delta = _revisiondeltanormal(store, curr, prev, linknode,
800 800 self._deltaparentfn)
801 801 elif linkrev not in self._precomputedellipsis:
802 802 delta = None
803 803 else:
804 804 delta = _revisiondeltanarrow(
805 805 cl, store, ischangelog, curr, linkrev, linknode,
806 806 clrevtolocalrev, self._fullclnodes,
807 807 self._precomputedellipsis)
808 808 else:
809 809 delta = _revisiondeltanormal(store, curr, prev, linknode,
810 810 self._deltaparentfn)
811 811
812 812 if not delta:
813 813 continue
814 814
815 815 meta = self._builddeltaheader(delta)
816 816 l = len(meta) + sum(len(x) for x in delta.deltachunks)
817 817 yield chunkheader(l)
818 818 yield meta
819 819 for x in delta.deltachunks:
820 820 yield x
821 821
822 822 if progress:
823 823 progress.complete()
824 824
825 825 yield closechunk()
826 826
827 827 # filter any nodes that claim to be part of the known set
828 828 def _prune(self, store, missing, commonrevs):
829 829 # TODO this violates storage abstraction for manifests.
830 830 if isinstance(store, manifest.manifestrevlog):
831 831 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
832 832 return []
833 833
834 834 rr, rl = store.rev, store.linkrev
835 835 return [n for n in missing if rl(rr(n)) not in commonrevs]
836 836
837 837 def _packmanifests(self, dir, dirlog, revs, lookuplinknode,
838 838 clrevtolocalrev):
839 839 """Pack manifests into a changegroup stream.
840 840
841 841 Encodes the directory name in the output so multiple manifests
842 842 can be sent. Multiple manifests is not supported by cg1 and cg2.
843 843 """
844 844 if dir:
845 845 assert self.version == b'03'
846 846 yield _fileheader(dir)
847 847
848 848 for chunk in self.group(revs, dirlog, False, lookuplinknode,
849 849 units=_('manifests'),
850 850 clrevtolocalrev=clrevtolocalrev):
851 851 yield chunk
852 852
853 853 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
854 854 """Yield a sequence of changegroup byte chunks."""
855 855
856 856 repo = self._repo
857 857 cl = repo.changelog
858 858
859 859 self._verbosenote(_('uncompressed size of bundle content:\n'))
860 860 size = 0
861 861
862 862 clstate, chunks = self._generatechangelog(cl, clnodes)
863 863 for chunk in chunks:
864 864 size += len(chunk)
865 865 yield chunk
866 866
867 867 self._verbosenote(_('%8.i (changelog)\n') % size)
868 868
869 869 clrevorder = clstate['clrevorder']
870 870 mfs = clstate['mfs']
871 871 changedfiles = clstate['changedfiles']
872 872
873 873 # We need to make sure that the linkrev in the changegroup refers to
874 874 # the first changeset that introduced the manifest or file revision.
875 875 # The fastpath is usually safer than the slowpath, because the filelogs
876 876 # are walked in revlog order.
877 877 #
878 878 # When taking the slowpath with reorder=None and the manifest revlog
879 879 # uses generaldelta, the manifest may be walked in the "wrong" order.
880 880 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
881 881 # cc0ff93d0c0c).
882 882 #
883 883 # When taking the fastpath, we are only vulnerable to reordering
884 884 # of the changelog itself. The changelog never uses generaldelta, so
885 885 # it is only reordered when reorder=True. To handle this case, we
886 886 # simply take the slowpath, which already has the 'clrevorder' logic.
887 887 # This was also fixed in cc0ff93d0c0c.
888 888 fastpathlinkrev = fastpathlinkrev and not self._reorder
889 889 # Treemanifests don't work correctly with fastpathlinkrev
890 890 # either, because we don't discover which directory nodes to
891 891 # send along with files. This could probably be fixed.
892 892 fastpathlinkrev = fastpathlinkrev and (
893 893 'treemanifest' not in repo.requirements)
894 894
895 895 fnodes = {} # needed file nodes
896 896
897 897 for chunk in self.generatemanifests(commonrevs, clrevorder,
898 898 fastpathlinkrev, mfs, fnodes, source,
899 899 clstate['clrevtomanifestrev']):
900 900 yield chunk
901 901
902 902 mfdicts = None
903 903 if self._ellipses and self._isshallow:
904 904 mfdicts = [(self._repo.manifestlog[n].read(), lr)
905 905 for (n, lr) in mfs.iteritems()]
906 906
907 907 mfs.clear()
908 908 clrevs = set(cl.rev(x) for x in clnodes)
909 909
910 910 for chunk in self.generatefiles(changedfiles, commonrevs,
911 911 source, mfdicts, fastpathlinkrev,
912 912 fnodes, clrevs):
913 913 yield chunk
914 914
915 915 yield closechunk()
916 916
917 917 if clnodes:
918 918 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
919 919
920 920 def _generatechangelog(self, cl, nodes):
921 921 """Generate data for changelog chunks.
922 922
923 923 Returns a 2-tuple of a dict containing state and an iterable of
924 924 byte chunks. The state will not be fully populated until the
925 925 chunk stream has been fully consumed.
926 926 """
927 927 clrevorder = {}
928 928 mfs = {} # needed manifests
929 929 mfl = self._repo.manifestlog
930 930 # TODO violates storage abstraction.
931 931 mfrevlog = mfl._revlog
932 932 changedfiles = set()
933 933 clrevtomanifestrev = {}
934 934
935 935 # Callback for the changelog, used to collect changed files and
936 936 # manifest nodes.
937 937 # Returns the linkrev node (identity in the changelog case).
938 938 def lookupcl(x):
939 939 c = cl.read(x)
940 940 clrevorder[x] = len(clrevorder)
941 941
942 942 if self._ellipses:
943 943 # Only update mfs if x is going to be sent. Otherwise we
944 944 # end up with bogus linkrevs specified for manifests and
945 945 # we skip some manifest nodes that we should otherwise
946 946 # have sent.
947 947 if (x in self._fullclnodes
948 948 or cl.rev(x) in self._precomputedellipsis):
949 949 n = c[0]
950 950 # Record the first changeset introducing this manifest
951 951 # version.
952 952 mfs.setdefault(n, x)
953 953 # Set this narrow-specific dict so we have the lowest
954 954 # manifest revnum to look up for this cl revnum. (Part of
955 955 # mapping changelog ellipsis parents to manifest ellipsis
956 956 # parents)
957 957 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
958 958 # We can't trust the changed files list in the changeset if the
959 959 # client requested a shallow clone.
960 960 if self._isshallow:
961 961 changedfiles.update(mfl[c[0]].read().keys())
962 962 else:
963 963 changedfiles.update(c[3])
964 964 else:
965 965
966 966 n = c[0]
967 967 # record the first changeset introducing this manifest version
968 968 mfs.setdefault(n, x)
969 969 # Record a complete list of potentially-changed files in
970 970 # this manifest.
971 971 changedfiles.update(c[3])
972 972
973 973 return x
974 974
975 975 # Changelog doesn't benefit from reordering revisions. So send out
976 976 # revisions in store order.
977 977 revs = sorted(cl.rev(n) for n in nodes)
978 978
979 979 state = {
980 980 'clrevorder': clrevorder,
981 981 'mfs': mfs,
982 982 'changedfiles': changedfiles,
983 983 'clrevtomanifestrev': clrevtomanifestrev,
984 984 }
985 985
986 986 gen = self.group(revs, cl, True, lookupcl, units=_('changesets'),
987 987 clrevtolocalrev={})
988 988
989 989 return state, gen
990 990
991 991 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
992 992 fnodes, source, clrevtolocalrev):
993 993 """Returns an iterator of changegroup chunks containing manifests.
994 994
995 995 `source` is unused here, but is used by extensions like remotefilelog to
996 996 change what is sent based in pulls vs pushes, etc.
997 997 """
998 998 repo = self._repo
999 999 cl = repo.changelog
1000 1000 mfl = repo.manifestlog
1001 1001 dirlog = mfl._revlog.dirlog
1002 1002 tmfnodes = {'': mfs}
1003 1003
1004 1004 # Callback for the manifest, used to collect linkrevs for filelog
1005 1005 # revisions.
1006 1006 # Returns the linkrev node (collected in lookupcl).
1007 1007 def makelookupmflinknode(dir, nodes):
1008 1008 if fastpathlinkrev:
1009 1009 assert not dir
1010 1010 return mfs.__getitem__
1011 1011
1012 1012 def lookupmflinknode(x):
1013 1013 """Callback for looking up the linknode for manifests.
1014 1014
1015 1015 Returns the linkrev node for the specified manifest.
1016 1016
1017 1017 SIDE EFFECT:
1018 1018
1019 1019 1) fclnodes gets populated with the list of relevant
1020 1020 file nodes if we're not using fastpathlinkrev
1021 1021 2) When treemanifests are in use, collects treemanifest nodes
1022 1022 to send
1023 1023
1024 1024 Note that this means manifests must be completely sent to
1025 1025 the client before you can trust the list of files and
1026 1026 treemanifests to send.
1027 1027 """
1028 1028 clnode = nodes[x]
1029 1029 mdata = mfl.get(dir, x).readfast(shallow=True)
1030 1030 for p, n, fl in mdata.iterentries():
1031 1031 if fl == 't': # subdirectory manifest
1032 1032 subdir = dir + p + '/'
1033 1033 tmfclnodes = tmfnodes.setdefault(subdir, {})
1034 1034 tmfclnode = tmfclnodes.setdefault(n, clnode)
1035 1035 if clrevorder[clnode] < clrevorder[tmfclnode]:
1036 1036 tmfclnodes[n] = clnode
1037 1037 else:
1038 1038 f = dir + p
1039 1039 fclnodes = fnodes.setdefault(f, {})
1040 1040 fclnode = fclnodes.setdefault(n, clnode)
1041 1041 if clrevorder[clnode] < clrevorder[fclnode]:
1042 1042 fclnodes[n] = clnode
1043 1043 return clnode
1044 1044 return lookupmflinknode
1045 1045
1046 1046 size = 0
1047 1047 while tmfnodes:
1048 1048 dir, nodes = tmfnodes.popitem()
1049 1049 store = dirlog(dir)
1050 1050 prunednodes = self._prune(store, nodes, commonrevs)
1051 if not dir or prunednodes:
1052 lookupfn = makelookupmflinknode(dir, nodes)
1051
1052 if dir and not prunednodes:
1053 continue
1054
1055 lookupfn = makelookupmflinknode(dir, nodes)
1053 1056
1054 if self._ellipses:
1055 revs = _sortnodesellipsis(store, prunednodes, cl,
1056 lookupfn)
1057 else:
1058 revs = _sortnodesnormal(store, prunednodes,
1059 self._reorder)
1057 if self._ellipses:
1058 revs = _sortnodesellipsis(store, prunednodes, cl,
1059 lookupfn)
1060 else:
1061 revs = _sortnodesnormal(store, prunednodes,
1062 self._reorder)
1060 1063
1061 for x in self._packmanifests(dir, store, revs, lookupfn,
1062 clrevtolocalrev):
1063 size += len(x)
1064 yield x
1064 for x in self._packmanifests(dir, store, revs, lookupfn,
1065 clrevtolocalrev):
1066 size += len(x)
1067 yield x
1068
1065 1069 self._verbosenote(_('%8.i (manifests)\n') % size)
1066 1070 yield self._manifestsend
1067 1071
1068 1072 # The 'source' parameter is useful for extensions
1069 1073 def generatefiles(self, changedfiles, commonrevs, source,
1070 1074 mfdicts, fastpathlinkrev, fnodes, clrevs):
1071 1075 changedfiles = list(filter(self._filematcher, changedfiles))
1072 1076
1073 1077 if not fastpathlinkrev:
1074 1078 def normallinknodes(unused, fname):
1075 1079 return fnodes.get(fname, {})
1076 1080 else:
1077 1081 cln = self._repo.changelog.node
1078 1082
1079 1083 def normallinknodes(store, fname):
1080 1084 flinkrev = store.linkrev
1081 1085 fnode = store.node
1082 1086 revs = ((r, flinkrev(r)) for r in store)
1083 1087 return dict((fnode(r), cln(lr))
1084 1088 for r, lr in revs if lr in clrevs)
1085 1089
1086 1090 clrevtolocalrev = {}
1087 1091
1088 1092 if self._isshallow:
1089 1093 # In a shallow clone, the linknodes callback needs to also include
1090 1094 # those file nodes that are in the manifests we sent but weren't
1091 1095 # introduced by those manifests.
1092 1096 commonctxs = [self._repo[c] for c in commonrevs]
1093 1097 clrev = self._repo.changelog.rev
1094 1098
1095 1099 # Defining this function has a side-effect of overriding the
1096 1100 # function of the same name that was passed in as an argument.
1097 1101 # TODO have caller pass in appropriate function.
1098 1102 def linknodes(flog, fname):
1099 1103 for c in commonctxs:
1100 1104 try:
1101 1105 fnode = c.filenode(fname)
1102 1106 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1103 1107 except error.ManifestLookupError:
1104 1108 pass
1105 1109 links = normallinknodes(flog, fname)
1106 1110 if len(links) != len(mfdicts):
1107 1111 for mf, lr in mfdicts:
1108 1112 fnode = mf.get(fname, None)
1109 1113 if fnode in links:
1110 1114 links[fnode] = min(links[fnode], lr, key=clrev)
1111 1115 elif fnode:
1112 1116 links[fnode] = lr
1113 1117 return links
1114 1118 else:
1115 1119 linknodes = normallinknodes
1116 1120
1117 1121 repo = self._repo
1118 1122 cl = repo.changelog
1119 1123 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1120 1124 total=len(changedfiles))
1121 1125 for i, fname in enumerate(sorted(changedfiles)):
1122 1126 filerevlog = repo.file(fname)
1123 1127 if not filerevlog:
1124 1128 raise error.Abort(_("empty or missing file data for %s") %
1125 1129 fname)
1126 1130
1127 1131 clrevtolocalrev.clear()
1128 1132
1129 1133 linkrevnodes = linknodes(filerevlog, fname)
1130 1134 # Lookup for filenodes, we collected the linkrev nodes above in the
1131 1135 # fastpath case and with lookupmf in the slowpath case.
1132 1136 def lookupfilelog(x):
1133 1137 return linkrevnodes[x]
1134 1138
1135 1139 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
1136 1140 if filenodes:
1137 1141 if self._ellipses:
1138 1142 revs = _sortnodesellipsis(filerevlog, filenodes,
1139 1143 cl, lookupfilelog)
1140 1144 else:
1141 1145 revs = _sortnodesnormal(filerevlog, filenodes,
1142 1146 self._reorder)
1143 1147
1144 1148 progress.update(i + 1, item=fname)
1145 1149 h = _fileheader(fname)
1146 1150 size = len(h)
1147 1151 yield h
1148 1152 for chunk in self.group(revs, filerevlog, False, lookupfilelog,
1149 1153 clrevtolocalrev=clrevtolocalrev):
1150 1154 size += len(chunk)
1151 1155 yield chunk
1152 1156 self._verbosenote(_('%8.i %s\n') % (size, fname))
1153 1157 progress.complete()
1154 1158
1155 1159 def _deltaparentprev(store, rev, p1, p2, prev):
1156 1160 """Resolve a delta parent to the previous revision.
1157 1161
1158 1162 Used for version 1 changegroups, which don't support generaldelta.
1159 1163 """
1160 1164 return prev
1161 1165
1162 1166 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1163 1167 """Resolve a delta parent when general deltas are supported."""
1164 1168 dp = store.deltaparent(rev)
1165 1169 if dp == nullrev and store.storedeltachains:
1166 1170 # Avoid sending full revisions when delta parent is null. Pick prev
1167 1171 # in that case. It's tempting to pick p1 in this case, as p1 will
1168 1172 # be smaller in the common case. However, computing a delta against
1169 1173 # p1 may require resolving the raw text of p1, which could be
1170 1174 # expensive. The revlog caches should have prev cached, meaning
1171 1175 # less CPU for changegroup generation. There is likely room to add
1172 1176 # a flag and/or config option to control this behavior.
1173 1177 base = prev
1174 1178 elif dp == nullrev:
1175 1179 # revlog is configured to use full snapshot for a reason,
1176 1180 # stick to full snapshot.
1177 1181 base = nullrev
1178 1182 elif dp not in (p1, p2, prev):
1179 1183 # Pick prev when we can't be sure remote has the base revision.
1180 1184 return prev
1181 1185 else:
1182 1186 base = dp
1183 1187
1184 1188 if base != nullrev and not store.candelta(base, rev):
1185 1189 base = nullrev
1186 1190
1187 1191 return base
1188 1192
1189 1193 def _deltaparentellipses(store, rev, p1, p2, prev):
1190 1194 """Resolve a delta parent when in ellipses mode."""
1191 1195 # TODO: send better deltas when in narrow mode.
1192 1196 #
1193 1197 # changegroup.group() loops over revisions to send,
1194 1198 # including revisions we'll skip. What this means is that
1195 1199 # `prev` will be a potentially useless delta base for all
1196 1200 # ellipsis nodes, as the client likely won't have it. In
1197 1201 # the future we should do bookkeeping about which nodes
1198 1202 # have been sent to the client, and try to be
1199 1203 # significantly smarter about delta bases. This is
1200 1204 # slightly tricky because this same code has to work for
1201 1205 # all revlogs, and we don't have the linkrev/linknode here.
1202 1206 return p1
1203 1207
1204 1208 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1205 1209 shallow=False, ellipsisroots=None, fullnodes=None):
1206 1210 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1207 1211 d.node, d.p1node, d.p2node, d.linknode)
1208 1212
1209 1213 return cgpacker(repo, filematcher, b'01',
1210 1214 deltaparentfn=_deltaparentprev,
1211 1215 allowreorder=None,
1212 1216 builddeltaheader=builddeltaheader,
1213 1217 manifestsend=b'',
1214 1218 bundlecaps=bundlecaps,
1215 1219 ellipses=ellipses,
1216 1220 shallow=shallow,
1217 1221 ellipsisroots=ellipsisroots,
1218 1222 fullnodes=fullnodes)
1219 1223
1220 1224 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1221 1225 shallow=False, ellipsisroots=None, fullnodes=None):
1222 1226 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1223 1227 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1224 1228
1225 1229 # Since generaldelta is directly supported by cg2, reordering
1226 1230 # generally doesn't help, so we disable it by default (treating
1227 1231 # bundle.reorder=auto just like bundle.reorder=False).
1228 1232 return cgpacker(repo, filematcher, b'02',
1229 1233 deltaparentfn=_deltaparentgeneraldelta,
1230 1234 allowreorder=False,
1231 1235 builddeltaheader=builddeltaheader,
1232 1236 manifestsend=b'',
1233 1237 bundlecaps=bundlecaps,
1234 1238 ellipses=ellipses,
1235 1239 shallow=shallow,
1236 1240 ellipsisroots=ellipsisroots,
1237 1241 fullnodes=fullnodes)
1238 1242
1239 1243 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1240 1244 shallow=False, ellipsisroots=None, fullnodes=None):
1241 1245 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1242 1246 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1243 1247
1244 1248 deltaparentfn = (_deltaparentellipses if ellipses
1245 1249 else _deltaparentgeneraldelta)
1246 1250
1247 1251 return cgpacker(repo, filematcher, b'03',
1248 1252 deltaparentfn=deltaparentfn,
1249 1253 allowreorder=False,
1250 1254 builddeltaheader=builddeltaheader,
1251 1255 manifestsend=closechunk(),
1252 1256 bundlecaps=bundlecaps,
1253 1257 ellipses=ellipses,
1254 1258 shallow=shallow,
1255 1259 ellipsisroots=ellipsisroots,
1256 1260 fullnodes=fullnodes)
1257 1261
1258 1262 _packermap = {'01': (_makecg1packer, cg1unpacker),
1259 1263 # cg2 adds support for exchanging generaldelta
1260 1264 '02': (_makecg2packer, cg2unpacker),
1261 1265 # cg3 adds support for exchanging revlog flags and treemanifests
1262 1266 '03': (_makecg3packer, cg3unpacker),
1263 1267 }
1264 1268
1265 1269 def allsupportedversions(repo):
1266 1270 versions = set(_packermap.keys())
1267 1271 if not (repo.ui.configbool('experimental', 'changegroup3') or
1268 1272 repo.ui.configbool('experimental', 'treemanifest') or
1269 1273 'treemanifest' in repo.requirements):
1270 1274 versions.discard('03')
1271 1275 return versions
1272 1276
1273 1277 # Changegroup versions that can be applied to the repo
1274 1278 def supportedincomingversions(repo):
1275 1279 return allsupportedversions(repo)
1276 1280
1277 1281 # Changegroup versions that can be created from the repo
1278 1282 def supportedoutgoingversions(repo):
1279 1283 versions = allsupportedversions(repo)
1280 1284 if 'treemanifest' in repo.requirements:
1281 1285 # Versions 01 and 02 support only flat manifests and it's just too
1282 1286 # expensive to convert between the flat manifest and tree manifest on
1283 1287 # the fly. Since tree manifests are hashed differently, all of history
1284 1288 # would have to be converted. Instead, we simply don't even pretend to
1285 1289 # support versions 01 and 02.
1286 1290 versions.discard('01')
1287 1291 versions.discard('02')
1288 1292 if repository.NARROW_REQUIREMENT in repo.requirements:
1289 1293 # Versions 01 and 02 don't support revlog flags, and we need to
1290 1294 # support that for stripping and unbundling to work.
1291 1295 versions.discard('01')
1292 1296 versions.discard('02')
1293 1297 if LFS_REQUIREMENT in repo.requirements:
1294 1298 # Versions 01 and 02 don't support revlog flags, and we need to
1295 1299 # mark LFS entries with REVIDX_EXTSTORED.
1296 1300 versions.discard('01')
1297 1301 versions.discard('02')
1298 1302
1299 1303 return versions
1300 1304
1301 1305 def localversion(repo):
1302 1306 # Finds the best version to use for bundles that are meant to be used
1303 1307 # locally, such as those from strip and shelve, and temporary bundles.
1304 1308 return max(supportedoutgoingversions(repo))
1305 1309
1306 1310 def safeversion(repo):
1307 1311 # Finds the smallest version that it's safe to assume clients of the repo
1308 1312 # will support. For example, all hg versions that support generaldelta also
1309 1313 # support changegroup 02.
1310 1314 versions = supportedoutgoingversions(repo)
1311 1315 if 'generaldelta' in repo.requirements:
1312 1316 versions.discard('01')
1313 1317 assert versions
1314 1318 return min(versions)
1315 1319
1316 1320 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1317 1321 ellipses=False, shallow=False, ellipsisroots=None,
1318 1322 fullnodes=None):
1319 1323 assert version in supportedoutgoingversions(repo)
1320 1324
1321 1325 if filematcher is None:
1322 1326 filematcher = matchmod.alwaysmatcher(repo.root, '')
1323 1327
1324 1328 if version == '01' and not filematcher.always():
1325 1329 raise error.ProgrammingError('version 01 changegroups do not support '
1326 1330 'sparse file matchers')
1327 1331
1328 1332 if ellipses and version in (b'01', b'02'):
1329 1333 raise error.Abort(
1330 1334 _('ellipsis nodes require at least cg3 on client and server, '
1331 1335 'but negotiated version %s') % version)
1332 1336
1333 1337 # Requested files could include files not in the local store. So
1334 1338 # filter those out.
1335 1339 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1336 1340 filematcher)
1337 1341
1338 1342 fn = _packermap[version][0]
1339 1343 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1340 1344 shallow=shallow, ellipsisroots=ellipsisroots,
1341 1345 fullnodes=fullnodes)
1342 1346
1343 1347 def getunbundler(version, fh, alg, extras=None):
1344 1348 return _packermap[version][1](fh, alg, extras=extras)
1345 1349
1346 1350 def _changegroupinfo(repo, nodes, source):
1347 1351 if repo.ui.verbose or source == 'bundle':
1348 1352 repo.ui.status(_("%d changesets found\n") % len(nodes))
1349 1353 if repo.ui.debugflag:
1350 1354 repo.ui.debug("list of changesets:\n")
1351 1355 for node in nodes:
1352 1356 repo.ui.debug("%s\n" % hex(node))
1353 1357
1354 1358 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1355 1359 bundlecaps=None):
1356 1360 cgstream = makestream(repo, outgoing, version, source,
1357 1361 fastpath=fastpath, bundlecaps=bundlecaps)
1358 1362 return getunbundler(version, util.chunkbuffer(cgstream), None,
1359 1363 {'clcount': len(outgoing.missing) })
1360 1364
1361 1365 def makestream(repo, outgoing, version, source, fastpath=False,
1362 1366 bundlecaps=None, filematcher=None):
1363 1367 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1364 1368 filematcher=filematcher)
1365 1369
1366 1370 repo = repo.unfiltered()
1367 1371 commonrevs = outgoing.common
1368 1372 csets = outgoing.missing
1369 1373 heads = outgoing.missingheads
1370 1374 # We go through the fast path if we get told to, or if all (unfiltered
1371 1375 # heads have been requested (since we then know there all linkrevs will
1372 1376 # be pulled by the client).
1373 1377 heads.sort()
1374 1378 fastpathlinkrev = fastpath or (
1375 1379 repo.filtername is None and heads == sorted(repo.heads()))
1376 1380
1377 1381 repo.hook('preoutgoing', throw=True, source=source)
1378 1382 _changegroupinfo(repo, csets, source)
1379 1383 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1380 1384
1381 1385 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1382 1386 revisions = 0
1383 1387 files = 0
1384 1388 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1385 1389 total=expectedfiles)
1386 1390 for chunkdata in iter(source.filelogheader, {}):
1387 1391 files += 1
1388 1392 f = chunkdata["filename"]
1389 1393 repo.ui.debug("adding %s revisions\n" % f)
1390 1394 progress.increment()
1391 1395 fl = repo.file(f)
1392 1396 o = len(fl)
1393 1397 try:
1394 1398 deltas = source.deltaiter()
1395 1399 if not fl.addgroup(deltas, revmap, trp):
1396 1400 raise error.Abort(_("received file revlog group is empty"))
1397 1401 except error.CensoredBaseError as e:
1398 1402 raise error.Abort(_("received delta base is censored: %s") % e)
1399 1403 revisions += len(fl) - o
1400 1404 if f in needfiles:
1401 1405 needs = needfiles[f]
1402 1406 for new in pycompat.xrange(o, len(fl)):
1403 1407 n = fl.node(new)
1404 1408 if n in needs:
1405 1409 needs.remove(n)
1406 1410 else:
1407 1411 raise error.Abort(
1408 1412 _("received spurious file revlog entry"))
1409 1413 if not needs:
1410 1414 del needfiles[f]
1411 1415 progress.complete()
1412 1416
1413 1417 for f, needs in needfiles.iteritems():
1414 1418 fl = repo.file(f)
1415 1419 for n in needs:
1416 1420 try:
1417 1421 fl.rev(n)
1418 1422 except error.LookupError:
1419 1423 raise error.Abort(
1420 1424 _('missing file data for %s:%s - run hg verify') %
1421 1425 (f, hex(n)))
1422 1426
1423 1427 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now