##// END OF EJS Templates
changegroup: move size tracking and end of manifests to generate()...
Gregory Szorc -
r39047:2ebdd265 default
parent child Browse files
Show More
@@ -1,1449 +1,1447
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 match as matchmod,
30 30 mdiff,
31 31 phases,
32 32 pycompat,
33 33 repository,
34 34 revlog,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 stringutil,
40 40 )
41 41
42 42 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
43 43 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
44 44 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
45 45
46 46 LFS_REQUIREMENT = 'lfs'
47 47
48 48 readexactly = util.readexactly
49 49
50 50 def getchunk(stream):
51 51 """return the next chunk from stream as a string"""
52 52 d = readexactly(stream, 4)
53 53 l = struct.unpack(">l", d)[0]
54 54 if l <= 4:
55 55 if l:
56 56 raise error.Abort(_("invalid chunk length %d") % l)
57 57 return ""
58 58 return readexactly(stream, l - 4)
59 59
60 60 def chunkheader(length):
61 61 """return a changegroup chunk header (string)"""
62 62 return struct.pack(">l", length + 4)
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(">l", 0)
67 67
68 68 def _fileheader(path):
69 69 """Obtain a changegroup chunk header for a named path."""
70 70 return chunkheader(len(path)) + path
71 71
72 72 def writechunks(ui, chunks, filename, vfs=None):
73 73 """Write chunks to a file and return its filename.
74 74
75 75 The stream is assumed to be a bundle file.
76 76 Existing files will not be overwritten.
77 77 If no filename is specified, a temporary file is created.
78 78 """
79 79 fh = None
80 80 cleanup = None
81 81 try:
82 82 if filename:
83 83 if vfs:
84 84 fh = vfs.open(filename, "wb")
85 85 else:
86 86 # Increase default buffer size because default is usually
87 87 # small (4k is common on Linux).
88 88 fh = open(filename, "wb", 131072)
89 89 else:
90 90 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
91 91 fh = os.fdopen(fd, r"wb")
92 92 cleanup = filename
93 93 for c in chunks:
94 94 fh.write(c)
95 95 cleanup = None
96 96 return filename
97 97 finally:
98 98 if fh is not None:
99 99 fh.close()
100 100 if cleanup is not None:
101 101 if filename and vfs:
102 102 vfs.unlink(cleanup)
103 103 else:
104 104 os.unlink(cleanup)
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
125 125 deltaheadersize = deltaheader.size
126 126 version = '01'
127 127 _grouplistcount = 1 # One list of files after the manifests
128 128
129 129 def __init__(self, fh, alg, extras=None):
130 130 if alg is None:
131 131 alg = 'UN'
132 132 if alg not in util.compengines.supportedbundletypes:
133 133 raise error.Abort(_('unknown stream compression type: %s')
134 134 % alg)
135 135 if alg == 'BZ':
136 136 alg = '_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != 'UN'
148 148 def read(self, l):
149 149 return self._stream.read(l)
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152 def tell(self):
153 153 return self._stream.tell()
154 154 def close(self):
155 155 return self._stream.close()
156 156
157 157 def _chunklength(self):
158 158 d = readexactly(self._stream, 4)
159 159 l = struct.unpack(">l", d)[0]
160 160 if l <= 4:
161 161 if l:
162 162 raise error.Abort(_("invalid chunk length %d") % l)
163 163 return 0
164 164 if self.callback:
165 165 self.callback()
166 166 return l - 4
167 167
168 168 def changelogheader(self):
169 169 """v10 does not have a changelog header chunk"""
170 170 return {}
171 171
172 172 def manifestheader(self):
173 173 """v10 does not have a manifest header chunk"""
174 174 return {}
175 175
176 176 def filelogheader(self):
177 177 """return the header of the filelogs chunk, v10 only has the filename"""
178 178 l = self._chunklength()
179 179 if not l:
180 180 return {}
181 181 fname = readexactly(self._stream, l)
182 182 return {'filename': fname}
183 183
184 184 def _deltaheader(self, headertuple, prevnode):
185 185 node, p1, p2, cs = headertuple
186 186 if prevnode is None:
187 187 deltabase = p1
188 188 else:
189 189 deltabase = prevnode
190 190 flags = 0
191 191 return node, p1, p2, deltabase, cs, flags
192 192
193 193 def deltachunk(self, prevnode):
194 194 l = self._chunklength()
195 195 if not l:
196 196 return {}
197 197 headerdata = readexactly(self._stream, self.deltaheadersize)
198 198 header = self.deltaheader.unpack(headerdata)
199 199 delta = readexactly(self._stream, l - self.deltaheadersize)
200 200 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
201 201 return (node, p1, p2, cs, deltabase, delta, flags)
202 202
203 203 def getchunks(self):
204 204 """returns all the chunks contains in the bundle
205 205
206 206 Used when you need to forward the binary stream to a file or another
207 207 network API. To do so, it parse the changegroup data, otherwise it will
208 208 block in case of sshrepo because it don't know the end of the stream.
209 209 """
210 210 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
211 211 # and a list of filelogs. For changegroup 3, we expect 4 parts:
212 212 # changelog, manifestlog, a list of tree manifestlogs, and a list of
213 213 # filelogs.
214 214 #
215 215 # Changelog and manifestlog parts are terminated with empty chunks. The
216 216 # tree and file parts are a list of entry sections. Each entry section
217 217 # is a series of chunks terminating in an empty chunk. The list of these
218 218 # entry sections is terminated in yet another empty chunk, so we know
219 219 # we've reached the end of the tree/file list when we reach an empty
220 220 # chunk that was proceeded by no non-empty chunks.
221 221
222 222 parts = 0
223 223 while parts < 2 + self._grouplistcount:
224 224 noentries = True
225 225 while True:
226 226 chunk = getchunk(self)
227 227 if not chunk:
228 228 # The first two empty chunks represent the end of the
229 229 # changelog and the manifestlog portions. The remaining
230 230 # empty chunks represent either A) the end of individual
231 231 # tree or file entries in the file list, or B) the end of
232 232 # the entire list. It's the end of the entire list if there
233 233 # were no entries (i.e. noentries is True).
234 234 if parts < 2:
235 235 parts += 1
236 236 elif noentries:
237 237 parts += 1
238 238 break
239 239 noentries = False
240 240 yield chunkheader(len(chunk))
241 241 pos = 0
242 242 while pos < len(chunk):
243 243 next = pos + 2**20
244 244 yield chunk[pos:next]
245 245 pos = next
246 246 yield closechunk()
247 247
248 248 def _unpackmanifests(self, repo, revmap, trp, prog):
249 249 self.callback = prog.increment
250 250 # no need to check for empty manifest group here:
251 251 # if the result of the merge of 1 and 2 is the same in 3 and 4,
252 252 # no new manifest will be created and the manifest group will
253 253 # be empty during the pull
254 254 self.manifestheader()
255 255 deltas = self.deltaiter()
256 256 repo.manifestlog.addgroup(deltas, revmap, trp)
257 257 prog.complete()
258 258 self.callback = None
259 259
260 260 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
261 261 expectedtotal=None):
262 262 """Add the changegroup returned by source.read() to this repo.
263 263 srctype is a string like 'push', 'pull', or 'unbundle'. url is
264 264 the URL of the repo where this changegroup is coming from.
265 265
266 266 Return an integer summarizing the change to this repo:
267 267 - nothing changed or no source: 0
268 268 - more heads than before: 1+added heads (2..n)
269 269 - fewer heads than before: -1-removed heads (-2..-n)
270 270 - number of heads stays the same: 1
271 271 """
272 272 repo = repo.unfiltered()
273 273 def csmap(x):
274 274 repo.ui.debug("add changeset %s\n" % short(x))
275 275 return len(cl)
276 276
277 277 def revmap(x):
278 278 return cl.rev(x)
279 279
280 280 changesets = files = revisions = 0
281 281
282 282 try:
283 283 # The transaction may already carry source information. In this
284 284 # case we use the top level data. We overwrite the argument
285 285 # because we need to use the top level value (if they exist)
286 286 # in this function.
287 287 srctype = tr.hookargs.setdefault('source', srctype)
288 288 url = tr.hookargs.setdefault('url', url)
289 289 repo.hook('prechangegroup',
290 290 throw=True, **pycompat.strkwargs(tr.hookargs))
291 291
292 292 # write changelog data to temp files so concurrent readers
293 293 # will not see an inconsistent view
294 294 cl = repo.changelog
295 295 cl.delayupdate(tr)
296 296 oldheads = set(cl.heads())
297 297
298 298 trp = weakref.proxy(tr)
299 299 # pull off the changeset group
300 300 repo.ui.status(_("adding changesets\n"))
301 301 clstart = len(cl)
302 302 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
303 303 total=expectedtotal)
304 304 self.callback = progress.increment
305 305
306 306 efiles = set()
307 307 def onchangelog(cl, node):
308 308 efiles.update(cl.readfiles(node))
309 309
310 310 self.changelogheader()
311 311 deltas = self.deltaiter()
312 312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
313 313 efiles = len(efiles)
314 314
315 315 if not cgnodes:
316 316 repo.ui.develwarn('applied empty changegroup',
317 317 config='warn-empty-changegroup')
318 318 clend = len(cl)
319 319 changesets = clend - clstart
320 320 progress.complete()
321 321 self.callback = None
322 322
323 323 # pull off the manifest group
324 324 repo.ui.status(_("adding manifests\n"))
325 325 # We know that we'll never have more manifests than we had
326 326 # changesets.
327 327 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
328 328 total=changesets)
329 329 self._unpackmanifests(repo, revmap, trp, progress)
330 330
331 331 needfiles = {}
332 332 if repo.ui.configbool('server', 'validate'):
333 333 cl = repo.changelog
334 334 ml = repo.manifestlog
335 335 # validate incoming csets have their manifests
336 336 for cset in pycompat.xrange(clstart, clend):
337 337 mfnode = cl.changelogrevision(cset).manifest
338 338 mfest = ml[mfnode].readdelta()
339 339 # store file cgnodes we must see
340 340 for f, n in mfest.iteritems():
341 341 needfiles.setdefault(f, set()).add(n)
342 342
343 343 # process the files
344 344 repo.ui.status(_("adding file changes\n"))
345 345 newrevs, newfiles = _addchangegroupfiles(
346 346 repo, self, revmap, trp, efiles, needfiles)
347 347 revisions += newrevs
348 348 files += newfiles
349 349
350 350 deltaheads = 0
351 351 if oldheads:
352 352 heads = cl.heads()
353 353 deltaheads = len(heads) - len(oldheads)
354 354 for h in heads:
355 355 if h not in oldheads and repo[h].closesbranch():
356 356 deltaheads -= 1
357 357 htext = ""
358 358 if deltaheads:
359 359 htext = _(" (%+d heads)") % deltaheads
360 360
361 361 repo.ui.status(_("added %d changesets"
362 362 " with %d changes to %d files%s\n")
363 363 % (changesets, revisions, files, htext))
364 364 repo.invalidatevolatilesets()
365 365
366 366 if changesets > 0:
367 367 if 'node' not in tr.hookargs:
368 368 tr.hookargs['node'] = hex(cl.node(clstart))
369 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
370 370 hookargs = dict(tr.hookargs)
371 371 else:
372 372 hookargs = dict(tr.hookargs)
373 373 hookargs['node'] = hex(cl.node(clstart))
374 374 hookargs['node_last'] = hex(cl.node(clend - 1))
375 375 repo.hook('pretxnchangegroup',
376 376 throw=True, **pycompat.strkwargs(hookargs))
377 377
378 378 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
379 379 phaseall = None
380 380 if srctype in ('push', 'serve'):
381 381 # Old servers can not push the boundary themselves.
382 382 # New servers won't push the boundary if changeset already
383 383 # exists locally as secret
384 384 #
385 385 # We should not use added here but the list of all change in
386 386 # the bundle
387 387 if repo.publishing():
388 388 targetphase = phaseall = phases.public
389 389 else:
390 390 # closer target phase computation
391 391
392 392 # Those changesets have been pushed from the
393 393 # outside, their phases are going to be pushed
394 394 # alongside. Therefor `targetphase` is
395 395 # ignored.
396 396 targetphase = phaseall = phases.draft
397 397 if added:
398 398 phases.registernew(repo, tr, targetphase, added)
399 399 if phaseall is not None:
400 400 phases.advanceboundary(repo, tr, phaseall, cgnodes)
401 401
402 402 if changesets > 0:
403 403
404 404 def runhooks():
405 405 # These hooks run when the lock releases, not when the
406 406 # transaction closes. So it's possible for the changelog
407 407 # to have changed since we last saw it.
408 408 if clstart >= len(repo):
409 409 return
410 410
411 411 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
412 412
413 413 for n in added:
414 414 args = hookargs.copy()
415 415 args['node'] = hex(n)
416 416 del args['node_last']
417 417 repo.hook("incoming", **pycompat.strkwargs(args))
418 418
419 419 newheads = [h for h in repo.heads()
420 420 if h not in oldheads]
421 421 repo.ui.log("incoming",
422 422 "%d incoming changes - new heads: %s\n",
423 423 len(added),
424 424 ', '.join([hex(c[:6]) for c in newheads]))
425 425
426 426 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
427 427 lambda tr: repo._afterlock(runhooks))
428 428 finally:
429 429 repo.ui.flush()
430 430 # never return 0 here:
431 431 if deltaheads < 0:
432 432 ret = deltaheads - 1
433 433 else:
434 434 ret = deltaheads + 1
435 435 return ret
436 436
437 437 def deltaiter(self):
438 438 """
439 439 returns an iterator of the deltas in this changegroup
440 440
441 441 Useful for passing to the underlying storage system to be stored.
442 442 """
443 443 chain = None
444 444 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
445 445 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
446 446 yield chunkdata
447 447 chain = chunkdata[0]
448 448
449 449 class cg2unpacker(cg1unpacker):
450 450 """Unpacker for cg2 streams.
451 451
452 452 cg2 streams add support for generaldelta, so the delta header
453 453 format is slightly different. All other features about the data
454 454 remain the same.
455 455 """
456 456 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
457 457 deltaheadersize = deltaheader.size
458 458 version = '02'
459 459
460 460 def _deltaheader(self, headertuple, prevnode):
461 461 node, p1, p2, deltabase, cs = headertuple
462 462 flags = 0
463 463 return node, p1, p2, deltabase, cs, flags
464 464
465 465 class cg3unpacker(cg2unpacker):
466 466 """Unpacker for cg3 streams.
467 467
468 468 cg3 streams add support for exchanging treemanifests and revlog
469 469 flags. It adds the revlog flags to the delta header and an empty chunk
470 470 separating manifests and files.
471 471 """
472 472 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
473 473 deltaheadersize = deltaheader.size
474 474 version = '03'
475 475 _grouplistcount = 2 # One list of manifests and one list of files
476 476
477 477 def _deltaheader(self, headertuple, prevnode):
478 478 node, p1, p2, deltabase, cs, flags = headertuple
479 479 return node, p1, p2, deltabase, cs, flags
480 480
481 481 def _unpackmanifests(self, repo, revmap, trp, prog):
482 482 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
483 483 for chunkdata in iter(self.filelogheader, {}):
484 484 # If we get here, there are directory manifests in the changegroup
485 485 d = chunkdata["filename"]
486 486 repo.ui.debug("adding %s revisions\n" % d)
487 487 dirlog = repo.manifestlog._revlog.dirlog(d)
488 488 deltas = self.deltaiter()
489 489 if not dirlog.addgroup(deltas, revmap, trp):
490 490 raise error.Abort(_("received dir revlog group is empty"))
491 491
492 492 class headerlessfixup(object):
493 493 def __init__(self, fh, h):
494 494 self._h = h
495 495 self._fh = fh
496 496 def read(self, n):
497 497 if self._h:
498 498 d, self._h = self._h[:n], self._h[n:]
499 499 if len(d) < n:
500 500 d += readexactly(self._fh, n - len(d))
501 501 return d
502 502 return readexactly(self._fh, n)
503 503
504 504 @attr.s(slots=True, frozen=True)
505 505 class revisiondelta(object):
506 506 """Describes a delta entry in a changegroup.
507 507
508 508 Captured data is sufficient to serialize the delta into multiple
509 509 formats.
510 510 """
511 511 # 20 byte node of this revision.
512 512 node = attr.ib()
513 513 # 20 byte nodes of parent revisions.
514 514 p1node = attr.ib()
515 515 p2node = attr.ib()
516 516 # 20 byte node of node this delta is against.
517 517 basenode = attr.ib()
518 518 # 20 byte node of changeset revision this delta is associated with.
519 519 linknode = attr.ib()
520 520 # 2 bytes of flags to apply to revision data.
521 521 flags = attr.ib()
522 522 # Iterable of chunks holding raw delta data.
523 523 deltachunks = attr.ib()
524 524
525 525 def _sortnodesnormal(store, nodes, reorder):
526 526 """Sort nodes for changegroup generation and turn into revnums."""
527 527 # for generaldelta revlogs, we linearize the revs; this will both be
528 528 # much quicker and generate a much smaller bundle
529 529 if (store._generaldelta and reorder is None) or reorder:
530 530 dag = dagutil.revlogdag(store)
531 531 return dag.linearize(set(store.rev(n) for n in nodes))
532 532 else:
533 533 return sorted([store.rev(n) for n in nodes])
534 534
535 535 def _sortnodesellipsis(store, nodes, cl, lookup):
536 536 """Sort nodes for changegroup generation and turn into revnums."""
537 537 # Ellipses serving mode.
538 538 #
539 539 # In a perfect world, we'd generate better ellipsis-ified graphs
540 540 # for non-changelog revlogs. In practice, we haven't started doing
541 541 # that yet, so the resulting DAGs for the manifestlog and filelogs
542 542 # are actually full of bogus parentage on all the ellipsis
543 543 # nodes. This has the side effect that, while the contents are
544 544 # correct, the individual DAGs might be completely out of whack in
545 545 # a case like 882681bc3166 and its ancestors (back about 10
546 546 # revisions or so) in the main hg repo.
547 547 #
548 548 # The one invariant we *know* holds is that the new (potentially
549 549 # bogus) DAG shape will be valid if we order the nodes in the
550 550 # order that they're introduced in dramatis personae by the
551 551 # changelog, so what we do is we sort the non-changelog histories
552 552 # by the order in which they are used by the changelog.
553 553 key = lambda n: cl.rev(lookup(n))
554 554 return [store.rev(n) for n in sorted(nodes, key=key)]
555 555
556 556 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
557 557 """Construct a revision delta for non-ellipses changegroup generation."""
558 558 node = store.node(rev)
559 559 p1, p2 = store.parentrevs(rev)
560 560 base = deltaparentfn(store, rev, p1, p2, prev)
561 561
562 562 prefix = ''
563 563 if store.iscensored(base) or store.iscensored(rev):
564 564 try:
565 565 delta = store.revision(node, raw=True)
566 566 except error.CensoredNodeError as e:
567 567 delta = e.tombstone
568 568 if base == nullrev:
569 569 prefix = mdiff.trivialdiffheader(len(delta))
570 570 else:
571 571 baselen = store.rawsize(base)
572 572 prefix = mdiff.replacediffheader(baselen, len(delta))
573 573 elif base == nullrev:
574 574 delta = store.revision(node, raw=True)
575 575 prefix = mdiff.trivialdiffheader(len(delta))
576 576 else:
577 577 delta = store.revdiff(base, rev)
578 578 p1n, p2n = store.parents(node)
579 579
580 580 return revisiondelta(
581 581 node=node,
582 582 p1node=p1n,
583 583 p2node=p2n,
584 584 basenode=store.node(base),
585 585 linknode=linknode,
586 586 flags=store.flags(rev),
587 587 deltachunks=(prefix, delta),
588 588 )
589 589
590 590 def _revisiondeltanarrow(cl, store, ischangelog, rev, linkrev,
591 591 linknode, clrevtolocalrev, fullclnodes,
592 592 precomputedellipsis):
593 593 linkparents = precomputedellipsis[linkrev]
594 594 def local(clrev):
595 595 """Turn a changelog revnum into a local revnum.
596 596
597 597 The ellipsis dag is stored as revnums on the changelog,
598 598 but when we're producing ellipsis entries for
599 599 non-changelog revlogs, we need to turn those numbers into
600 600 something local. This does that for us, and during the
601 601 changelog sending phase will also expand the stored
602 602 mappings as needed.
603 603 """
604 604 if clrev == nullrev:
605 605 return nullrev
606 606
607 607 if ischangelog:
608 608 return clrev
609 609
610 610 # Walk the ellipsis-ized changelog breadth-first looking for a
611 611 # change that has been linked from the current revlog.
612 612 #
613 613 # For a flat manifest revlog only a single step should be necessary
614 614 # as all relevant changelog entries are relevant to the flat
615 615 # manifest.
616 616 #
617 617 # For a filelog or tree manifest dirlog however not every changelog
618 618 # entry will have been relevant, so we need to skip some changelog
619 619 # nodes even after ellipsis-izing.
620 620 walk = [clrev]
621 621 while walk:
622 622 p = walk[0]
623 623 walk = walk[1:]
624 624 if p in clrevtolocalrev:
625 625 return clrevtolocalrev[p]
626 626 elif p in fullclnodes:
627 627 walk.extend([pp for pp in cl.parentrevs(p)
628 628 if pp != nullrev])
629 629 elif p in precomputedellipsis:
630 630 walk.extend([pp for pp in precomputedellipsis[p]
631 631 if pp != nullrev])
632 632 else:
633 633 # In this case, we've got an ellipsis with parents
634 634 # outside the current bundle (likely an
635 635 # incremental pull). We "know" that we can use the
636 636 # value of this same revlog at whatever revision
637 637 # is pointed to by linknode. "Know" is in scare
638 638 # quotes because I haven't done enough examination
639 639 # of edge cases to convince myself this is really
640 640 # a fact - it works for all the (admittedly
641 641 # thorough) cases in our testsuite, but I would be
642 642 # somewhat unsurprised to find a case in the wild
643 643 # where this breaks down a bit. That said, I don't
644 644 # know if it would hurt anything.
645 645 for i in pycompat.xrange(rev, 0, -1):
646 646 if store.linkrev(i) == clrev:
647 647 return i
648 648 # We failed to resolve a parent for this node, so
649 649 # we crash the changegroup construction.
650 650 raise error.Abort(
651 651 'unable to resolve parent while packing %r %r'
652 652 ' for changeset %r' % (store.indexfile, rev, clrev))
653 653
654 654 return nullrev
655 655
656 656 if not linkparents or (
657 657 store.parentrevs(rev) == (nullrev, nullrev)):
658 658 p1, p2 = nullrev, nullrev
659 659 elif len(linkparents) == 1:
660 660 p1, = sorted(local(p) for p in linkparents)
661 661 p2 = nullrev
662 662 else:
663 663 p1, p2 = sorted(local(p) for p in linkparents)
664 664
665 665 n = store.node(rev)
666 666 p1n, p2n = store.node(p1), store.node(p2)
667 667 flags = store.flags(rev)
668 668 flags |= revlog.REVIDX_ELLIPSIS
669 669
670 670 # TODO: try and actually send deltas for ellipsis data blocks
671 671 data = store.revision(n)
672 672 diffheader = mdiff.trivialdiffheader(len(data))
673 673
674 674 return revisiondelta(
675 675 node=n,
676 676 p1node=p1n,
677 677 p2node=p2n,
678 678 basenode=nullid,
679 679 linknode=linknode,
680 680 flags=flags,
681 681 deltachunks=(diffheader, data),
682 682 )
683 683
684 684 def deltagroup(repo, revs, store, ischangelog, lookup, deltaparentfn,
685 685 deltaheaderfn, units=None,
686 686 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
687 687 precomputedellipsis=None):
688 688 """Calculate a delta group, yielding a sequence of changegroup chunks
689 689 (strings).
690 690
691 691 Given a list of changeset revs, return a set of deltas and
692 692 metadata corresponding to nodes. The first delta is
693 693 first parent(nodelist[0]) -> nodelist[0], the receiver is
694 694 guaranteed to have this parent as it has all history before
695 695 these changesets. In the case firstparent is nullrev the
696 696 changegroup starts with a full revision.
697 697
698 698 If units is not None, progress detail will be generated, units specifies
699 699 the type of revlog that is touched (changelog, manifest, etc.).
700 700 """
701 701 # if we don't have any revisions touched by these changesets, bail
702 702 if len(revs) == 0:
703 703 return
704 704
705 705 cl = repo.changelog
706 706
707 707 # add the parent of the first rev
708 708 p = store.parentrevs(revs[0])[0]
709 709 revs.insert(0, p)
710 710
711 711 # build deltas
712 712 progress = None
713 713 if units is not None:
714 714 progress = repo.ui.makeprogress(_('bundling'), unit=units,
715 715 total=(len(revs) - 1))
716 716 for r in pycompat.xrange(len(revs) - 1):
717 717 if progress:
718 718 progress.update(r + 1)
719 719 prev, curr = revs[r], revs[r + 1]
720 720 linknode = lookup(store.node(curr))
721 721
722 722 if ellipses:
723 723 linkrev = cl.rev(linknode)
724 724 clrevtolocalrev[linkrev] = curr
725 725
726 726 # This is a node to send in full, because the changeset it
727 727 # corresponds to was a full changeset.
728 728 if linknode in fullclnodes:
729 729 delta = _revisiondeltanormal(store, curr, prev, linknode,
730 730 deltaparentfn)
731 731 elif linkrev not in precomputedellipsis:
732 732 delta = None
733 733 else:
734 734 delta = _revisiondeltanarrow(
735 735 cl, store, ischangelog, curr, linkrev, linknode,
736 736 clrevtolocalrev, fullclnodes,
737 737 precomputedellipsis)
738 738 else:
739 739 delta = _revisiondeltanormal(store, curr, prev, linknode,
740 740 deltaparentfn)
741 741
742 742 if not delta:
743 743 continue
744 744
745 745 meta = deltaheaderfn(delta)
746 746 l = len(meta) + sum(len(x) for x in delta.deltachunks)
747 747 yield chunkheader(l)
748 748 yield meta
749 749 for x in delta.deltachunks:
750 750 yield x
751 751
752 752 if progress:
753 753 progress.complete()
754 754
755 755 class cgpacker(object):
756 756 def __init__(self, repo, filematcher, version, allowreorder,
757 757 deltaparentfn, builddeltaheader, manifestsend,
758 758 bundlecaps=None, ellipses=False,
759 759 shallow=False, ellipsisroots=None, fullnodes=None):
760 760 """Given a source repo, construct a bundler.
761 761
762 762 filematcher is a matcher that matches on files to include in the
763 763 changegroup. Used to facilitate sparse changegroups.
764 764
765 765 allowreorder controls whether reordering of revisions is allowed.
766 766 This value is used when ``bundle.reorder`` is ``auto`` or isn't
767 767 set.
768 768
769 769 deltaparentfn is a callable that resolves the delta parent for
770 770 a specific revision.
771 771
772 772 builddeltaheader is a callable that constructs the header for a group
773 773 delta.
774 774
775 775 manifestsend is a chunk to send after manifests have been fully emitted.
776 776
777 777 ellipses indicates whether ellipsis serving mode is enabled.
778 778
779 779 bundlecaps is optional and can be used to specify the set of
780 780 capabilities which can be used to build the bundle. While bundlecaps is
781 781 unused in core Mercurial, extensions rely on this feature to communicate
782 782 capabilities to customize the changegroup packer.
783 783
784 784 shallow indicates whether shallow data might be sent. The packer may
785 785 need to pack file contents not introduced by the changes being packed.
786 786
787 787 fullnodes is the set of changelog nodes which should not be ellipsis
788 788 nodes. We store this rather than the set of nodes that should be
789 789 ellipsis because for very large histories we expect this to be
790 790 significantly smaller.
791 791 """
792 792 assert filematcher
793 793 self._filematcher = filematcher
794 794
795 795 self.version = version
796 796 self._deltaparentfn = deltaparentfn
797 797 self._builddeltaheader = builddeltaheader
798 798 self._manifestsend = manifestsend
799 799 self._ellipses = ellipses
800 800
801 801 # Set of capabilities we can use to build the bundle.
802 802 if bundlecaps is None:
803 803 bundlecaps = set()
804 804 self._bundlecaps = bundlecaps
805 805 self._isshallow = shallow
806 806 self._fullclnodes = fullnodes
807 807
808 808 # Maps ellipsis revs to their roots at the changelog level.
809 809 self._precomputedellipsis = ellipsisroots
810 810
811 811 # experimental config: bundle.reorder
812 812 reorder = repo.ui.config('bundle', 'reorder')
813 813 if reorder == 'auto':
814 814 self._reorder = allowreorder
815 815 else:
816 816 self._reorder = stringutil.parsebool(reorder)
817 817
818 818 self._repo = repo
819 819
820 820 if self._repo.ui.verbose and not self._repo.ui.debugflag:
821 821 self._verbosenote = self._repo.ui.note
822 822 else:
823 823 self._verbosenote = lambda s: None
824 824
825 825 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
826 826 """Yield a sequence of changegroup byte chunks."""
827 827
828 828 repo = self._repo
829 829 cl = repo.changelog
830 830
831 831 self._verbosenote(_('uncompressed size of bundle content:\n'))
832 832 size = 0
833 833
834 834 clstate, chunks = self._generatechangelog(cl, clnodes)
835 835 for chunk in chunks:
836 836 size += len(chunk)
837 837 yield chunk
838 838
839 839 close = closechunk()
840 840 size += len(close)
841 841 yield closechunk()
842 842
843 843 self._verbosenote(_('%8.i (changelog)\n') % size)
844 844
845 845 clrevorder = clstate['clrevorder']
846 846 mfs = clstate['mfs']
847 847 changedfiles = clstate['changedfiles']
848 848
849 849 # We need to make sure that the linkrev in the changegroup refers to
850 850 # the first changeset that introduced the manifest or file revision.
851 851 # The fastpath is usually safer than the slowpath, because the filelogs
852 852 # are walked in revlog order.
853 853 #
854 854 # When taking the slowpath with reorder=None and the manifest revlog
855 855 # uses generaldelta, the manifest may be walked in the "wrong" order.
856 856 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
857 857 # cc0ff93d0c0c).
858 858 #
859 859 # When taking the fastpath, we are only vulnerable to reordering
860 860 # of the changelog itself. The changelog never uses generaldelta, so
861 861 # it is only reordered when reorder=True. To handle this case, we
862 862 # simply take the slowpath, which already has the 'clrevorder' logic.
863 863 # This was also fixed in cc0ff93d0c0c.
864 864 fastpathlinkrev = fastpathlinkrev and not self._reorder
865 865 # Treemanifests don't work correctly with fastpathlinkrev
866 866 # either, because we don't discover which directory nodes to
867 867 # send along with files. This could probably be fixed.
868 868 fastpathlinkrev = fastpathlinkrev and (
869 869 'treemanifest' not in repo.requirements)
870 870
871 871 fnodes = {} # needed file nodes
872 872
873 size = 0
873 874 for chunk in self.generatemanifests(commonrevs, clrevorder,
874 875 fastpathlinkrev, mfs, fnodes, source,
875 876 clstate['clrevtomanifestrev']):
877 size += len(chunk)
876 878 yield chunk
877 879
880 self._verbosenote(_('%8.i (manifests)\n') % size)
881 yield self._manifestsend
882
878 883 mfdicts = None
879 884 if self._ellipses and self._isshallow:
880 885 mfdicts = [(self._repo.manifestlog[n].read(), lr)
881 886 for (n, lr) in mfs.iteritems()]
882 887
883 888 mfs.clear()
884 889 clrevs = set(cl.rev(x) for x in clnodes)
885 890
886 891 for chunk in self.generatefiles(changedfiles, commonrevs,
887 892 source, mfdicts, fastpathlinkrev,
888 893 fnodes, clrevs):
889 894 yield chunk
890 895
891 896 yield closechunk()
892 897
893 898 if clnodes:
894 899 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
895 900
896 901 def _generatechangelog(self, cl, nodes):
897 902 """Generate data for changelog chunks.
898 903
899 904 Returns a 2-tuple of a dict containing state and an iterable of
900 905 byte chunks. The state will not be fully populated until the
901 906 chunk stream has been fully consumed.
902 907 """
903 908 clrevorder = {}
904 909 mfs = {} # needed manifests
905 910 mfl = self._repo.manifestlog
906 911 # TODO violates storage abstraction.
907 912 mfrevlog = mfl._revlog
908 913 changedfiles = set()
909 914 clrevtomanifestrev = {}
910 915
911 916 # Callback for the changelog, used to collect changed files and
912 917 # manifest nodes.
913 918 # Returns the linkrev node (identity in the changelog case).
914 919 def lookupcl(x):
915 920 c = cl.read(x)
916 921 clrevorder[x] = len(clrevorder)
917 922
918 923 if self._ellipses:
919 924 # Only update mfs if x is going to be sent. Otherwise we
920 925 # end up with bogus linkrevs specified for manifests and
921 926 # we skip some manifest nodes that we should otherwise
922 927 # have sent.
923 928 if (x in self._fullclnodes
924 929 or cl.rev(x) in self._precomputedellipsis):
925 930 n = c[0]
926 931 # Record the first changeset introducing this manifest
927 932 # version.
928 933 mfs.setdefault(n, x)
929 934 # Set this narrow-specific dict so we have the lowest
930 935 # manifest revnum to look up for this cl revnum. (Part of
931 936 # mapping changelog ellipsis parents to manifest ellipsis
932 937 # parents)
933 938 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
934 939 # We can't trust the changed files list in the changeset if the
935 940 # client requested a shallow clone.
936 941 if self._isshallow:
937 942 changedfiles.update(mfl[c[0]].read().keys())
938 943 else:
939 944 changedfiles.update(c[3])
940 945 else:
941 946
942 947 n = c[0]
943 948 # record the first changeset introducing this manifest version
944 949 mfs.setdefault(n, x)
945 950 # Record a complete list of potentially-changed files in
946 951 # this manifest.
947 952 changedfiles.update(c[3])
948 953
949 954 return x
950 955
951 956 # Changelog doesn't benefit from reordering revisions. So send out
952 957 # revisions in store order.
953 958 revs = sorted(cl.rev(n) for n in nodes)
954 959
955 960 state = {
956 961 'clrevorder': clrevorder,
957 962 'mfs': mfs,
958 963 'changedfiles': changedfiles,
959 964 'clrevtomanifestrev': clrevtomanifestrev,
960 965 }
961 966
962 967 gen = deltagroup(
963 968 self._repo, revs, cl, True, lookupcl,
964 969 self._deltaparentfn, self._builddeltaheader,
965 970 ellipses=self._ellipses,
966 971 units=_('changesets'),
967 972 clrevtolocalrev={},
968 973 fullclnodes=self._fullclnodes,
969 974 precomputedellipsis=self._precomputedellipsis)
970 975
971 976 return state, gen
972 977
973 978 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
974 979 fnodes, source, clrevtolocalrev):
975 980 """Returns an iterator of changegroup chunks containing manifests.
976 981
977 982 `source` is unused here, but is used by extensions like remotefilelog to
978 983 change what is sent based in pulls vs pushes, etc.
979 984 """
980 985 repo = self._repo
981 986 cl = repo.changelog
982 987 mfl = repo.manifestlog
983 988 dirlog = mfl._revlog.dirlog
984 989 tmfnodes = {'': mfs}
985 990
986 991 # Callback for the manifest, used to collect linkrevs for filelog
987 992 # revisions.
988 993 # Returns the linkrev node (collected in lookupcl).
989 994 def makelookupmflinknode(dir, nodes):
990 995 if fastpathlinkrev:
991 996 assert not dir
992 997 return mfs.__getitem__
993 998
994 999 def lookupmflinknode(x):
995 1000 """Callback for looking up the linknode for manifests.
996 1001
997 1002 Returns the linkrev node for the specified manifest.
998 1003
999 1004 SIDE EFFECT:
1000 1005
1001 1006 1) fclnodes gets populated with the list of relevant
1002 1007 file nodes if we're not using fastpathlinkrev
1003 1008 2) When treemanifests are in use, collects treemanifest nodes
1004 1009 to send
1005 1010
1006 1011 Note that this means manifests must be completely sent to
1007 1012 the client before you can trust the list of files and
1008 1013 treemanifests to send.
1009 1014 """
1010 1015 clnode = nodes[x]
1011 1016 mdata = mfl.get(dir, x).readfast(shallow=True)
1012 1017 for p, n, fl in mdata.iterentries():
1013 1018 if fl == 't': # subdirectory manifest
1014 1019 subdir = dir + p + '/'
1015 1020 tmfclnodes = tmfnodes.setdefault(subdir, {})
1016 1021 tmfclnode = tmfclnodes.setdefault(n, clnode)
1017 1022 if clrevorder[clnode] < clrevorder[tmfclnode]:
1018 1023 tmfclnodes[n] = clnode
1019 1024 else:
1020 1025 f = dir + p
1021 1026 fclnodes = fnodes.setdefault(f, {})
1022 1027 fclnode = fclnodes.setdefault(n, clnode)
1023 1028 if clrevorder[clnode] < clrevorder[fclnode]:
1024 1029 fclnodes[n] = clnode
1025 1030 return clnode
1026 1031 return lookupmflinknode
1027 1032
1028 size = 0
1029 1033 while tmfnodes:
1030 1034 dir, nodes = tmfnodes.popitem()
1031 1035 store = dirlog(dir)
1032 1036
1033 1037 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
1034 1038 prunednodes = []
1035 1039 else:
1036 1040 frev, flr = store.rev, store.linkrev
1037 1041 prunednodes = [n for n in nodes
1038 1042 if flr(frev(n)) not in commonrevs]
1039 1043
1040 1044 if dir and not prunednodes:
1041 1045 continue
1042 1046
1043 1047 lookupfn = makelookupmflinknode(dir, nodes)
1044 1048
1045 1049 if self._ellipses:
1046 1050 revs = _sortnodesellipsis(store, prunednodes, cl,
1047 1051 lookupfn)
1048 1052 else:
1049 1053 revs = _sortnodesnormal(store, prunednodes,
1050 1054 self._reorder)
1051 1055
1052 1056 if dir:
1053 1057 assert self.version == b'03'
1054 1058 chunk = _fileheader(dir)
1055 size += len(chunk)
1056 1059 yield chunk
1057 1060
1058 1061 it = deltagroup(
1059 1062 self._repo, revs, store, False, lookupfn,
1060 1063 self._deltaparentfn, self._builddeltaheader,
1061 1064 ellipses=self._ellipses,
1062 1065 units=_('manifests'),
1063 1066 clrevtolocalrev=clrevtolocalrev,
1064 1067 fullclnodes=self._fullclnodes,
1065 1068 precomputedellipsis=self._precomputedellipsis)
1066 1069
1067 1070 for chunk in it:
1068 size += len(chunk)
1069 1071 yield chunk
1070 1072
1071 1073 close = closechunk()
1072 size += len(close)
1073 1074 yield close
1074 1075
1075 self._verbosenote(_('%8.i (manifests)\n') % size)
1076 yield self._manifestsend
1077
1078 1076 # The 'source' parameter is useful for extensions
1079 1077 def generatefiles(self, changedfiles, commonrevs, source,
1080 1078 mfdicts, fastpathlinkrev, fnodes, clrevs):
1081 1079 changedfiles = list(filter(self._filematcher, changedfiles))
1082 1080
1083 1081 if not fastpathlinkrev:
1084 1082 def normallinknodes(unused, fname):
1085 1083 return fnodes.get(fname, {})
1086 1084 else:
1087 1085 cln = self._repo.changelog.node
1088 1086
1089 1087 def normallinknodes(store, fname):
1090 1088 flinkrev = store.linkrev
1091 1089 fnode = store.node
1092 1090 revs = ((r, flinkrev(r)) for r in store)
1093 1091 return dict((fnode(r), cln(lr))
1094 1092 for r, lr in revs if lr in clrevs)
1095 1093
1096 1094 clrevtolocalrev = {}
1097 1095
1098 1096 if self._isshallow:
1099 1097 # In a shallow clone, the linknodes callback needs to also include
1100 1098 # those file nodes that are in the manifests we sent but weren't
1101 1099 # introduced by those manifests.
1102 1100 commonctxs = [self._repo[c] for c in commonrevs]
1103 1101 clrev = self._repo.changelog.rev
1104 1102
1105 1103 # Defining this function has a side-effect of overriding the
1106 1104 # function of the same name that was passed in as an argument.
1107 1105 # TODO have caller pass in appropriate function.
1108 1106 def linknodes(flog, fname):
1109 1107 for c in commonctxs:
1110 1108 try:
1111 1109 fnode = c.filenode(fname)
1112 1110 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1113 1111 except error.ManifestLookupError:
1114 1112 pass
1115 1113 links = normallinknodes(flog, fname)
1116 1114 if len(links) != len(mfdicts):
1117 1115 for mf, lr in mfdicts:
1118 1116 fnode = mf.get(fname, None)
1119 1117 if fnode in links:
1120 1118 links[fnode] = min(links[fnode], lr, key=clrev)
1121 1119 elif fnode:
1122 1120 links[fnode] = lr
1123 1121 return links
1124 1122 else:
1125 1123 linknodes = normallinknodes
1126 1124
1127 1125 repo = self._repo
1128 1126 cl = repo.changelog
1129 1127 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1130 1128 total=len(changedfiles))
1131 1129 for i, fname in enumerate(sorted(changedfiles)):
1132 1130 filerevlog = repo.file(fname)
1133 1131 if not filerevlog:
1134 1132 raise error.Abort(_("empty or missing file data for %s") %
1135 1133 fname)
1136 1134
1137 1135 clrevtolocalrev.clear()
1138 1136
1139 1137 linkrevnodes = linknodes(filerevlog, fname)
1140 1138 # Lookup for filenodes, we collected the linkrev nodes above in the
1141 1139 # fastpath case and with lookupmf in the slowpath case.
1142 1140 def lookupfilelog(x):
1143 1141 return linkrevnodes[x]
1144 1142
1145 1143 frev, flr = filerevlog.rev, filerevlog.linkrev
1146 1144 filenodes = [n for n in linkrevnodes
1147 1145 if flr(frev(n)) not in commonrevs]
1148 1146
1149 1147 if filenodes:
1150 1148 if self._ellipses:
1151 1149 revs = _sortnodesellipsis(filerevlog, filenodes,
1152 1150 cl, lookupfilelog)
1153 1151 else:
1154 1152 revs = _sortnodesnormal(filerevlog, filenodes,
1155 1153 self._reorder)
1156 1154
1157 1155 progress.update(i + 1, item=fname)
1158 1156 h = _fileheader(fname)
1159 1157 size = len(h)
1160 1158 yield h
1161 1159
1162 1160 it = deltagroup(
1163 1161 self._repo, revs, filerevlog, False, lookupfilelog,
1164 1162 self._deltaparentfn, self._builddeltaheader,
1165 1163 ellipses=self._ellipses,
1166 1164 clrevtolocalrev=clrevtolocalrev,
1167 1165 fullclnodes=self._fullclnodes,
1168 1166 precomputedellipsis=self._precomputedellipsis)
1169 1167
1170 1168 for chunk in it:
1171 1169 size += len(chunk)
1172 1170 yield chunk
1173 1171
1174 1172 close = closechunk()
1175 1173 size += len(close)
1176 1174 yield close
1177 1175
1178 1176 self._verbosenote(_('%8.i %s\n') % (size, fname))
1179 1177 progress.complete()
1180 1178
1181 1179 def _deltaparentprev(store, rev, p1, p2, prev):
1182 1180 """Resolve a delta parent to the previous revision.
1183 1181
1184 1182 Used for version 1 changegroups, which don't support generaldelta.
1185 1183 """
1186 1184 return prev
1187 1185
1188 1186 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1189 1187 """Resolve a delta parent when general deltas are supported."""
1190 1188 dp = store.deltaparent(rev)
1191 1189 if dp == nullrev and store.storedeltachains:
1192 1190 # Avoid sending full revisions when delta parent is null. Pick prev
1193 1191 # in that case. It's tempting to pick p1 in this case, as p1 will
1194 1192 # be smaller in the common case. However, computing a delta against
1195 1193 # p1 may require resolving the raw text of p1, which could be
1196 1194 # expensive. The revlog caches should have prev cached, meaning
1197 1195 # less CPU for changegroup generation. There is likely room to add
1198 1196 # a flag and/or config option to control this behavior.
1199 1197 base = prev
1200 1198 elif dp == nullrev:
1201 1199 # revlog is configured to use full snapshot for a reason,
1202 1200 # stick to full snapshot.
1203 1201 base = nullrev
1204 1202 elif dp not in (p1, p2, prev):
1205 1203 # Pick prev when we can't be sure remote has the base revision.
1206 1204 return prev
1207 1205 else:
1208 1206 base = dp
1209 1207
1210 1208 if base != nullrev and not store.candelta(base, rev):
1211 1209 base = nullrev
1212 1210
1213 1211 return base
1214 1212
1215 1213 def _deltaparentellipses(store, rev, p1, p2, prev):
1216 1214 """Resolve a delta parent when in ellipses mode."""
1217 1215 # TODO: send better deltas when in narrow mode.
1218 1216 #
1219 1217 # changegroup.group() loops over revisions to send,
1220 1218 # including revisions we'll skip. What this means is that
1221 1219 # `prev` will be a potentially useless delta base for all
1222 1220 # ellipsis nodes, as the client likely won't have it. In
1223 1221 # the future we should do bookkeeping about which nodes
1224 1222 # have been sent to the client, and try to be
1225 1223 # significantly smarter about delta bases. This is
1226 1224 # slightly tricky because this same code has to work for
1227 1225 # all revlogs, and we don't have the linkrev/linknode here.
1228 1226 return p1
1229 1227
1230 1228 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1231 1229 shallow=False, ellipsisroots=None, fullnodes=None):
1232 1230 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1233 1231 d.node, d.p1node, d.p2node, d.linknode)
1234 1232
1235 1233 return cgpacker(repo, filematcher, b'01',
1236 1234 deltaparentfn=_deltaparentprev,
1237 1235 allowreorder=None,
1238 1236 builddeltaheader=builddeltaheader,
1239 1237 manifestsend=b'',
1240 1238 bundlecaps=bundlecaps,
1241 1239 ellipses=ellipses,
1242 1240 shallow=shallow,
1243 1241 ellipsisroots=ellipsisroots,
1244 1242 fullnodes=fullnodes)
1245 1243
1246 1244 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1247 1245 shallow=False, ellipsisroots=None, fullnodes=None):
1248 1246 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1249 1247 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1250 1248
1251 1249 # Since generaldelta is directly supported by cg2, reordering
1252 1250 # generally doesn't help, so we disable it by default (treating
1253 1251 # bundle.reorder=auto just like bundle.reorder=False).
1254 1252 return cgpacker(repo, filematcher, b'02',
1255 1253 deltaparentfn=_deltaparentgeneraldelta,
1256 1254 allowreorder=False,
1257 1255 builddeltaheader=builddeltaheader,
1258 1256 manifestsend=b'',
1259 1257 bundlecaps=bundlecaps,
1260 1258 ellipses=ellipses,
1261 1259 shallow=shallow,
1262 1260 ellipsisroots=ellipsisroots,
1263 1261 fullnodes=fullnodes)
1264 1262
1265 1263 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1266 1264 shallow=False, ellipsisroots=None, fullnodes=None):
1267 1265 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1268 1266 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1269 1267
1270 1268 deltaparentfn = (_deltaparentellipses if ellipses
1271 1269 else _deltaparentgeneraldelta)
1272 1270
1273 1271 return cgpacker(repo, filematcher, b'03',
1274 1272 deltaparentfn=deltaparentfn,
1275 1273 allowreorder=False,
1276 1274 builddeltaheader=builddeltaheader,
1277 1275 manifestsend=closechunk(),
1278 1276 bundlecaps=bundlecaps,
1279 1277 ellipses=ellipses,
1280 1278 shallow=shallow,
1281 1279 ellipsisroots=ellipsisroots,
1282 1280 fullnodes=fullnodes)
1283 1281
1284 1282 _packermap = {'01': (_makecg1packer, cg1unpacker),
1285 1283 # cg2 adds support for exchanging generaldelta
1286 1284 '02': (_makecg2packer, cg2unpacker),
1287 1285 # cg3 adds support for exchanging revlog flags and treemanifests
1288 1286 '03': (_makecg3packer, cg3unpacker),
1289 1287 }
1290 1288
1291 1289 def allsupportedversions(repo):
1292 1290 versions = set(_packermap.keys())
1293 1291 if not (repo.ui.configbool('experimental', 'changegroup3') or
1294 1292 repo.ui.configbool('experimental', 'treemanifest') or
1295 1293 'treemanifest' in repo.requirements):
1296 1294 versions.discard('03')
1297 1295 return versions
1298 1296
1299 1297 # Changegroup versions that can be applied to the repo
1300 1298 def supportedincomingversions(repo):
1301 1299 return allsupportedversions(repo)
1302 1300
1303 1301 # Changegroup versions that can be created from the repo
1304 1302 def supportedoutgoingversions(repo):
1305 1303 versions = allsupportedversions(repo)
1306 1304 if 'treemanifest' in repo.requirements:
1307 1305 # Versions 01 and 02 support only flat manifests and it's just too
1308 1306 # expensive to convert between the flat manifest and tree manifest on
1309 1307 # the fly. Since tree manifests are hashed differently, all of history
1310 1308 # would have to be converted. Instead, we simply don't even pretend to
1311 1309 # support versions 01 and 02.
1312 1310 versions.discard('01')
1313 1311 versions.discard('02')
1314 1312 if repository.NARROW_REQUIREMENT in repo.requirements:
1315 1313 # Versions 01 and 02 don't support revlog flags, and we need to
1316 1314 # support that for stripping and unbundling to work.
1317 1315 versions.discard('01')
1318 1316 versions.discard('02')
1319 1317 if LFS_REQUIREMENT in repo.requirements:
1320 1318 # Versions 01 and 02 don't support revlog flags, and we need to
1321 1319 # mark LFS entries with REVIDX_EXTSTORED.
1322 1320 versions.discard('01')
1323 1321 versions.discard('02')
1324 1322
1325 1323 return versions
1326 1324
1327 1325 def localversion(repo):
1328 1326 # Finds the best version to use for bundles that are meant to be used
1329 1327 # locally, such as those from strip and shelve, and temporary bundles.
1330 1328 return max(supportedoutgoingversions(repo))
1331 1329
1332 1330 def safeversion(repo):
1333 1331 # Finds the smallest version that it's safe to assume clients of the repo
1334 1332 # will support. For example, all hg versions that support generaldelta also
1335 1333 # support changegroup 02.
1336 1334 versions = supportedoutgoingversions(repo)
1337 1335 if 'generaldelta' in repo.requirements:
1338 1336 versions.discard('01')
1339 1337 assert versions
1340 1338 return min(versions)
1341 1339
1342 1340 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1343 1341 ellipses=False, shallow=False, ellipsisroots=None,
1344 1342 fullnodes=None):
1345 1343 assert version in supportedoutgoingversions(repo)
1346 1344
1347 1345 if filematcher is None:
1348 1346 filematcher = matchmod.alwaysmatcher(repo.root, '')
1349 1347
1350 1348 if version == '01' and not filematcher.always():
1351 1349 raise error.ProgrammingError('version 01 changegroups do not support '
1352 1350 'sparse file matchers')
1353 1351
1354 1352 if ellipses and version in (b'01', b'02'):
1355 1353 raise error.Abort(
1356 1354 _('ellipsis nodes require at least cg3 on client and server, '
1357 1355 'but negotiated version %s') % version)
1358 1356
1359 1357 # Requested files could include files not in the local store. So
1360 1358 # filter those out.
1361 1359 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1362 1360 filematcher)
1363 1361
1364 1362 fn = _packermap[version][0]
1365 1363 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1366 1364 shallow=shallow, ellipsisroots=ellipsisroots,
1367 1365 fullnodes=fullnodes)
1368 1366
1369 1367 def getunbundler(version, fh, alg, extras=None):
1370 1368 return _packermap[version][1](fh, alg, extras=extras)
1371 1369
1372 1370 def _changegroupinfo(repo, nodes, source):
1373 1371 if repo.ui.verbose or source == 'bundle':
1374 1372 repo.ui.status(_("%d changesets found\n") % len(nodes))
1375 1373 if repo.ui.debugflag:
1376 1374 repo.ui.debug("list of changesets:\n")
1377 1375 for node in nodes:
1378 1376 repo.ui.debug("%s\n" % hex(node))
1379 1377
1380 1378 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1381 1379 bundlecaps=None):
1382 1380 cgstream = makestream(repo, outgoing, version, source,
1383 1381 fastpath=fastpath, bundlecaps=bundlecaps)
1384 1382 return getunbundler(version, util.chunkbuffer(cgstream), None,
1385 1383 {'clcount': len(outgoing.missing) })
1386 1384
1387 1385 def makestream(repo, outgoing, version, source, fastpath=False,
1388 1386 bundlecaps=None, filematcher=None):
1389 1387 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1390 1388 filematcher=filematcher)
1391 1389
1392 1390 repo = repo.unfiltered()
1393 1391 commonrevs = outgoing.common
1394 1392 csets = outgoing.missing
1395 1393 heads = outgoing.missingheads
1396 1394 # We go through the fast path if we get told to, or if all (unfiltered
1397 1395 # heads have been requested (since we then know there all linkrevs will
1398 1396 # be pulled by the client).
1399 1397 heads.sort()
1400 1398 fastpathlinkrev = fastpath or (
1401 1399 repo.filtername is None and heads == sorted(repo.heads()))
1402 1400
1403 1401 repo.hook('preoutgoing', throw=True, source=source)
1404 1402 _changegroupinfo(repo, csets, source)
1405 1403 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1406 1404
1407 1405 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1408 1406 revisions = 0
1409 1407 files = 0
1410 1408 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1411 1409 total=expectedfiles)
1412 1410 for chunkdata in iter(source.filelogheader, {}):
1413 1411 files += 1
1414 1412 f = chunkdata["filename"]
1415 1413 repo.ui.debug("adding %s revisions\n" % f)
1416 1414 progress.increment()
1417 1415 fl = repo.file(f)
1418 1416 o = len(fl)
1419 1417 try:
1420 1418 deltas = source.deltaiter()
1421 1419 if not fl.addgroup(deltas, revmap, trp):
1422 1420 raise error.Abort(_("received file revlog group is empty"))
1423 1421 except error.CensoredBaseError as e:
1424 1422 raise error.Abort(_("received delta base is censored: %s") % e)
1425 1423 revisions += len(fl) - o
1426 1424 if f in needfiles:
1427 1425 needs = needfiles[f]
1428 1426 for new in pycompat.xrange(o, len(fl)):
1429 1427 n = fl.node(new)
1430 1428 if n in needs:
1431 1429 needs.remove(n)
1432 1430 else:
1433 1431 raise error.Abort(
1434 1432 _("received spurious file revlog entry"))
1435 1433 if not needs:
1436 1434 del needfiles[f]
1437 1435 progress.complete()
1438 1436
1439 1437 for f, needs in needfiles.iteritems():
1440 1438 fl = repo.file(f)
1441 1439 for n in needs:
1442 1440 try:
1443 1441 fl.rev(n)
1444 1442 except error.LookupError:
1445 1443 raise error.Abort(
1446 1444 _('missing file data for %s:%s - run hg verify') %
1447 1445 (f, hex(n)))
1448 1446
1449 1447 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now