##// END OF EJS Templates
changegroup: don't try to build changelog chunks if not required...
Pulkit Goyal -
r41491:fa7d61f9 default
parent child Browse files
Show More
@@ -1,1401 +1,1418 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from . import (
23 23 error,
24 24 match as matchmod,
25 25 mdiff,
26 26 phases,
27 27 pycompat,
28 28 repository,
29 29 util,
30 30 )
31 31
32 32 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
33 33 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
34 34 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
35 35
36 36 LFS_REQUIREMENT = 'lfs'
37 37
38 38 readexactly = util.readexactly
39 39
40 40 def getchunk(stream):
41 41 """return the next chunk from stream as a string"""
42 42 d = readexactly(stream, 4)
43 43 l = struct.unpack(">l", d)[0]
44 44 if l <= 4:
45 45 if l:
46 46 raise error.Abort(_("invalid chunk length %d") % l)
47 47 return ""
48 48 return readexactly(stream, l - 4)
49 49
50 50 def chunkheader(length):
51 51 """return a changegroup chunk header (string)"""
52 52 return struct.pack(">l", length + 4)
53 53
54 54 def closechunk():
55 55 """return a changegroup chunk header (string) for a zero-length chunk"""
56 56 return struct.pack(">l", 0)
57 57
58 58 def _fileheader(path):
59 59 """Obtain a changegroup chunk header for a named path."""
60 60 return chunkheader(len(path)) + path
61 61
62 62 def writechunks(ui, chunks, filename, vfs=None):
63 63 """Write chunks to a file and return its filename.
64 64
65 65 The stream is assumed to be a bundle file.
66 66 Existing files will not be overwritten.
67 67 If no filename is specified, a temporary file is created.
68 68 """
69 69 fh = None
70 70 cleanup = None
71 71 try:
72 72 if filename:
73 73 if vfs:
74 74 fh = vfs.open(filename, "wb")
75 75 else:
76 76 # Increase default buffer size because default is usually
77 77 # small (4k is common on Linux).
78 78 fh = open(filename, "wb", 131072)
79 79 else:
80 80 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
81 81 fh = os.fdopen(fd, r"wb")
82 82 cleanup = filename
83 83 for c in chunks:
84 84 fh.write(c)
85 85 cleanup = None
86 86 return filename
87 87 finally:
88 88 if fh is not None:
89 89 fh.close()
90 90 if cleanup is not None:
91 91 if filename and vfs:
92 92 vfs.unlink(cleanup)
93 93 else:
94 94 os.unlink(cleanup)
95 95
96 96 class cg1unpacker(object):
97 97 """Unpacker for cg1 changegroup streams.
98 98
99 99 A changegroup unpacker handles the framing of the revision data in
100 100 the wire format. Most consumers will want to use the apply()
101 101 method to add the changes from the changegroup to a repository.
102 102
103 103 If you're forwarding a changegroup unmodified to another consumer,
104 104 use getchunks(), which returns an iterator of changegroup
105 105 chunks. This is mostly useful for cases where you need to know the
106 106 data stream has ended by observing the end of the changegroup.
107 107
108 108 deltachunk() is useful only if you're applying delta data. Most
109 109 consumers should prefer apply() instead.
110 110
111 111 A few other public methods exist. Those are used only for
112 112 bundlerepo and some debug commands - their use is discouraged.
113 113 """
114 114 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
115 115 deltaheadersize = deltaheader.size
116 116 version = '01'
117 117 _grouplistcount = 1 # One list of files after the manifests
118 118
119 119 def __init__(self, fh, alg, extras=None):
120 120 if alg is None:
121 121 alg = 'UN'
122 122 if alg not in util.compengines.supportedbundletypes:
123 123 raise error.Abort(_('unknown stream compression type: %s')
124 124 % alg)
125 125 if alg == 'BZ':
126 126 alg = '_truncatedBZ'
127 127
128 128 compengine = util.compengines.forbundletype(alg)
129 129 self._stream = compengine.decompressorreader(fh)
130 130 self._type = alg
131 131 self.extras = extras or {}
132 132 self.callback = None
133 133
134 134 # These methods (compressed, read, seek, tell) all appear to only
135 135 # be used by bundlerepo, but it's a little hard to tell.
136 136 def compressed(self):
137 137 return self._type is not None and self._type != 'UN'
138 138 def read(self, l):
139 139 return self._stream.read(l)
140 140 def seek(self, pos):
141 141 return self._stream.seek(pos)
142 142 def tell(self):
143 143 return self._stream.tell()
144 144 def close(self):
145 145 return self._stream.close()
146 146
147 147 def _chunklength(self):
148 148 d = readexactly(self._stream, 4)
149 149 l = struct.unpack(">l", d)[0]
150 150 if l <= 4:
151 151 if l:
152 152 raise error.Abort(_("invalid chunk length %d") % l)
153 153 return 0
154 154 if self.callback:
155 155 self.callback()
156 156 return l - 4
157 157
158 158 def changelogheader(self):
159 159 """v10 does not have a changelog header chunk"""
160 160 return {}
161 161
162 162 def manifestheader(self):
163 163 """v10 does not have a manifest header chunk"""
164 164 return {}
165 165
166 166 def filelogheader(self):
167 167 """return the header of the filelogs chunk, v10 only has the filename"""
168 168 l = self._chunklength()
169 169 if not l:
170 170 return {}
171 171 fname = readexactly(self._stream, l)
172 172 return {'filename': fname}
173 173
174 174 def _deltaheader(self, headertuple, prevnode):
175 175 node, p1, p2, cs = headertuple
176 176 if prevnode is None:
177 177 deltabase = p1
178 178 else:
179 179 deltabase = prevnode
180 180 flags = 0
181 181 return node, p1, p2, deltabase, cs, flags
182 182
183 183 def deltachunk(self, prevnode):
184 184 l = self._chunklength()
185 185 if not l:
186 186 return {}
187 187 headerdata = readexactly(self._stream, self.deltaheadersize)
188 188 header = self.deltaheader.unpack(headerdata)
189 189 delta = readexactly(self._stream, l - self.deltaheadersize)
190 190 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
191 191 return (node, p1, p2, cs, deltabase, delta, flags)
192 192
193 193 def getchunks(self):
194 194 """returns all the chunks contains in the bundle
195 195
196 196 Used when you need to forward the binary stream to a file or another
197 197 network API. To do so, it parse the changegroup data, otherwise it will
198 198 block in case of sshrepo because it don't know the end of the stream.
199 199 """
200 200 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
201 201 # and a list of filelogs. For changegroup 3, we expect 4 parts:
202 202 # changelog, manifestlog, a list of tree manifestlogs, and a list of
203 203 # filelogs.
204 204 #
205 205 # Changelog and manifestlog parts are terminated with empty chunks. The
206 206 # tree and file parts are a list of entry sections. Each entry section
207 207 # is a series of chunks terminating in an empty chunk. The list of these
208 208 # entry sections is terminated in yet another empty chunk, so we know
209 209 # we've reached the end of the tree/file list when we reach an empty
210 210 # chunk that was proceeded by no non-empty chunks.
211 211
212 212 parts = 0
213 213 while parts < 2 + self._grouplistcount:
214 214 noentries = True
215 215 while True:
216 216 chunk = getchunk(self)
217 217 if not chunk:
218 218 # The first two empty chunks represent the end of the
219 219 # changelog and the manifestlog portions. The remaining
220 220 # empty chunks represent either A) the end of individual
221 221 # tree or file entries in the file list, or B) the end of
222 222 # the entire list. It's the end of the entire list if there
223 223 # were no entries (i.e. noentries is True).
224 224 if parts < 2:
225 225 parts += 1
226 226 elif noentries:
227 227 parts += 1
228 228 break
229 229 noentries = False
230 230 yield chunkheader(len(chunk))
231 231 pos = 0
232 232 while pos < len(chunk):
233 233 next = pos + 2**20
234 234 yield chunk[pos:next]
235 235 pos = next
236 236 yield closechunk()
237 237
238 238 def _unpackmanifests(self, repo, revmap, trp, prog):
239 239 self.callback = prog.increment
240 240 # no need to check for empty manifest group here:
241 241 # if the result of the merge of 1 and 2 is the same in 3 and 4,
242 242 # no new manifest will be created and the manifest group will
243 243 # be empty during the pull
244 244 self.manifestheader()
245 245 deltas = self.deltaiter()
246 246 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
247 247 prog.complete()
248 248 self.callback = None
249 249
250 250 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
251 251 expectedtotal=None):
252 252 """Add the changegroup returned by source.read() to this repo.
253 253 srctype is a string like 'push', 'pull', or 'unbundle'. url is
254 254 the URL of the repo where this changegroup is coming from.
255 255
256 256 Return an integer summarizing the change to this repo:
257 257 - nothing changed or no source: 0
258 258 - more heads than before: 1+added heads (2..n)
259 259 - fewer heads than before: -1-removed heads (-2..-n)
260 260 - number of heads stays the same: 1
261 261 """
262 262 repo = repo.unfiltered()
263 263 def csmap(x):
264 264 repo.ui.debug("add changeset %s\n" % short(x))
265 265 return len(cl)
266 266
267 267 def revmap(x):
268 268 return cl.rev(x)
269 269
270 270 changesets = files = revisions = 0
271 271
272 272 try:
273 273 # The transaction may already carry source information. In this
274 274 # case we use the top level data. We overwrite the argument
275 275 # because we need to use the top level value (if they exist)
276 276 # in this function.
277 277 srctype = tr.hookargs.setdefault('source', srctype)
278 278 tr.hookargs.setdefault('url', url)
279 279 repo.hook('prechangegroup',
280 280 throw=True, **pycompat.strkwargs(tr.hookargs))
281 281
282 282 # write changelog data to temp files so concurrent readers
283 283 # will not see an inconsistent view
284 284 cl = repo.changelog
285 285 cl.delayupdate(tr)
286 286 oldheads = set(cl.heads())
287 287
288 288 trp = weakref.proxy(tr)
289 289 # pull off the changeset group
290 290 repo.ui.status(_("adding changesets\n"))
291 291 clstart = len(cl)
292 292 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
293 293 total=expectedtotal)
294 294 self.callback = progress.increment
295 295
296 296 efiles = set()
297 297 def onchangelog(cl, node):
298 298 efiles.update(cl.readfiles(node))
299 299
300 300 self.changelogheader()
301 301 deltas = self.deltaiter()
302 302 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
303 303 efiles = len(efiles)
304 304
305 305 if not cgnodes:
306 306 repo.ui.develwarn('applied empty changelog from changegroup',
307 307 config='warn-empty-changegroup')
308 308 clend = len(cl)
309 309 changesets = clend - clstart
310 310 progress.complete()
311 311 self.callback = None
312 312
313 313 # pull off the manifest group
314 314 repo.ui.status(_("adding manifests\n"))
315 315 # We know that we'll never have more manifests than we had
316 316 # changesets.
317 317 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
318 318 total=changesets)
319 319 self._unpackmanifests(repo, revmap, trp, progress)
320 320
321 321 needfiles = {}
322 322 if repo.ui.configbool('server', 'validate'):
323 323 cl = repo.changelog
324 324 ml = repo.manifestlog
325 325 # validate incoming csets have their manifests
326 326 for cset in pycompat.xrange(clstart, clend):
327 327 mfnode = cl.changelogrevision(cset).manifest
328 328 mfest = ml[mfnode].readdelta()
329 329 # store file cgnodes we must see
330 330 for f, n in mfest.iteritems():
331 331 needfiles.setdefault(f, set()).add(n)
332 332
333 333 # process the files
334 334 repo.ui.status(_("adding file changes\n"))
335 335 newrevs, newfiles = _addchangegroupfiles(
336 336 repo, self, revmap, trp, efiles, needfiles)
337 337 revisions += newrevs
338 338 files += newfiles
339 339
340 340 deltaheads = 0
341 341 if oldheads:
342 342 heads = cl.heads()
343 343 deltaheads = len(heads) - len(oldheads)
344 344 for h in heads:
345 345 if h not in oldheads and repo[h].closesbranch():
346 346 deltaheads -= 1
347 347 htext = ""
348 348 if deltaheads:
349 349 htext = _(" (%+d heads)") % deltaheads
350 350
351 351 repo.ui.status(_("added %d changesets"
352 352 " with %d changes to %d files%s\n")
353 353 % (changesets, revisions, files, htext))
354 354 repo.invalidatevolatilesets()
355 355
356 356 if changesets > 0:
357 357 if 'node' not in tr.hookargs:
358 358 tr.hookargs['node'] = hex(cl.node(clstart))
359 359 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
360 360 hookargs = dict(tr.hookargs)
361 361 else:
362 362 hookargs = dict(tr.hookargs)
363 363 hookargs['node'] = hex(cl.node(clstart))
364 364 hookargs['node_last'] = hex(cl.node(clend - 1))
365 365 repo.hook('pretxnchangegroup',
366 366 throw=True, **pycompat.strkwargs(hookargs))
367 367
368 368 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
369 369 phaseall = None
370 370 if srctype in ('push', 'serve'):
371 371 # Old servers can not push the boundary themselves.
372 372 # New servers won't push the boundary if changeset already
373 373 # exists locally as secret
374 374 #
375 375 # We should not use added here but the list of all change in
376 376 # the bundle
377 377 if repo.publishing():
378 378 targetphase = phaseall = phases.public
379 379 else:
380 380 # closer target phase computation
381 381
382 382 # Those changesets have been pushed from the
383 383 # outside, their phases are going to be pushed
384 384 # alongside. Therefor `targetphase` is
385 385 # ignored.
386 386 targetphase = phaseall = phases.draft
387 387 if added:
388 388 phases.registernew(repo, tr, targetphase, added)
389 389 if phaseall is not None:
390 390 phases.advanceboundary(repo, tr, phaseall, cgnodes)
391 391
392 392 if changesets > 0:
393 393
394 394 def runhooks():
395 395 # These hooks run when the lock releases, not when the
396 396 # transaction closes. So it's possible for the changelog
397 397 # to have changed since we last saw it.
398 398 if clstart >= len(repo):
399 399 return
400 400
401 401 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
402 402
403 403 for n in added:
404 404 args = hookargs.copy()
405 405 args['node'] = hex(n)
406 406 del args['node_last']
407 407 repo.hook("incoming", **pycompat.strkwargs(args))
408 408
409 409 newheads = [h for h in repo.heads()
410 410 if h not in oldheads]
411 411 repo.ui.log("incoming",
412 412 "%d incoming changes - new heads: %s\n",
413 413 len(added),
414 414 ', '.join([hex(c[:6]) for c in newheads]))
415 415
416 416 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
417 417 lambda tr: repo._afterlock(runhooks))
418 418 finally:
419 419 repo.ui.flush()
420 420 # never return 0 here:
421 421 if deltaheads < 0:
422 422 ret = deltaheads - 1
423 423 else:
424 424 ret = deltaheads + 1
425 425 return ret
426 426
427 427 def deltaiter(self):
428 428 """
429 429 returns an iterator of the deltas in this changegroup
430 430
431 431 Useful for passing to the underlying storage system to be stored.
432 432 """
433 433 chain = None
434 434 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
435 435 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
436 436 yield chunkdata
437 437 chain = chunkdata[0]
438 438
439 439 class cg2unpacker(cg1unpacker):
440 440 """Unpacker for cg2 streams.
441 441
442 442 cg2 streams add support for generaldelta, so the delta header
443 443 format is slightly different. All other features about the data
444 444 remain the same.
445 445 """
446 446 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
447 447 deltaheadersize = deltaheader.size
448 448 version = '02'
449 449
450 450 def _deltaheader(self, headertuple, prevnode):
451 451 node, p1, p2, deltabase, cs = headertuple
452 452 flags = 0
453 453 return node, p1, p2, deltabase, cs, flags
454 454
455 455 class cg3unpacker(cg2unpacker):
456 456 """Unpacker for cg3 streams.
457 457
458 458 cg3 streams add support for exchanging treemanifests and revlog
459 459 flags. It adds the revlog flags to the delta header and an empty chunk
460 460 separating manifests and files.
461 461 """
462 462 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
463 463 deltaheadersize = deltaheader.size
464 464 version = '03'
465 465 _grouplistcount = 2 # One list of manifests and one list of files
466 466
467 467 def _deltaheader(self, headertuple, prevnode):
468 468 node, p1, p2, deltabase, cs, flags = headertuple
469 469 return node, p1, p2, deltabase, cs, flags
470 470
471 471 def _unpackmanifests(self, repo, revmap, trp, prog):
472 472 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
473 473 for chunkdata in iter(self.filelogheader, {}):
474 474 # If we get here, there are directory manifests in the changegroup
475 475 d = chunkdata["filename"]
476 476 repo.ui.debug("adding %s revisions\n" % d)
477 477 deltas = self.deltaiter()
478 478 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
479 479 raise error.Abort(_("received dir revlog group is empty"))
480 480
481 481 class headerlessfixup(object):
482 482 def __init__(self, fh, h):
483 483 self._h = h
484 484 self._fh = fh
485 485 def read(self, n):
486 486 if self._h:
487 487 d, self._h = self._h[:n], self._h[n:]
488 488 if len(d) < n:
489 489 d += readexactly(self._fh, n - len(d))
490 490 return d
491 491 return readexactly(self._fh, n)
492 492
493 493 def _revisiondeltatochunks(delta, headerfn):
494 494 """Serialize a revisiondelta to changegroup chunks."""
495 495
496 496 # The captured revision delta may be encoded as a delta against
497 497 # a base revision or as a full revision. The changegroup format
498 498 # requires that everything on the wire be deltas. So for full
499 499 # revisions, we need to invent a header that says to rewrite
500 500 # data.
501 501
502 502 if delta.delta is not None:
503 503 prefix, data = b'', delta.delta
504 504 elif delta.basenode == nullid:
505 505 data = delta.revision
506 506 prefix = mdiff.trivialdiffheader(len(data))
507 507 else:
508 508 data = delta.revision
509 509 prefix = mdiff.replacediffheader(delta.baserevisionsize,
510 510 len(data))
511 511
512 512 meta = headerfn(delta)
513 513
514 514 yield chunkheader(len(meta) + len(prefix) + len(data))
515 515 yield meta
516 516 if prefix:
517 517 yield prefix
518 518 yield data
519 519
520 520 def _sortnodesellipsis(store, nodes, cl, lookup):
521 521 """Sort nodes for changegroup generation."""
522 522 # Ellipses serving mode.
523 523 #
524 524 # In a perfect world, we'd generate better ellipsis-ified graphs
525 525 # for non-changelog revlogs. In practice, we haven't started doing
526 526 # that yet, so the resulting DAGs for the manifestlog and filelogs
527 527 # are actually full of bogus parentage on all the ellipsis
528 528 # nodes. This has the side effect that, while the contents are
529 529 # correct, the individual DAGs might be completely out of whack in
530 530 # a case like 882681bc3166 and its ancestors (back about 10
531 531 # revisions or so) in the main hg repo.
532 532 #
533 533 # The one invariant we *know* holds is that the new (potentially
534 534 # bogus) DAG shape will be valid if we order the nodes in the
535 535 # order that they're introduced in dramatis personae by the
536 536 # changelog, so what we do is we sort the non-changelog histories
537 537 # by the order in which they are used by the changelog.
538 538 key = lambda n: cl.rev(lookup(n))
539 539 return sorted(nodes, key=key)
540 540
541 541 def _resolvenarrowrevisioninfo(cl, store, ischangelog, rev, linkrev,
542 542 linknode, clrevtolocalrev, fullclnodes,
543 543 precomputedellipsis):
544 544 linkparents = precomputedellipsis[linkrev]
545 545 def local(clrev):
546 546 """Turn a changelog revnum into a local revnum.
547 547
548 548 The ellipsis dag is stored as revnums on the changelog,
549 549 but when we're producing ellipsis entries for
550 550 non-changelog revlogs, we need to turn those numbers into
551 551 something local. This does that for us, and during the
552 552 changelog sending phase will also expand the stored
553 553 mappings as needed.
554 554 """
555 555 if clrev == nullrev:
556 556 return nullrev
557 557
558 558 if ischangelog:
559 559 return clrev
560 560
561 561 # Walk the ellipsis-ized changelog breadth-first looking for a
562 562 # change that has been linked from the current revlog.
563 563 #
564 564 # For a flat manifest revlog only a single step should be necessary
565 565 # as all relevant changelog entries are relevant to the flat
566 566 # manifest.
567 567 #
568 568 # For a filelog or tree manifest dirlog however not every changelog
569 569 # entry will have been relevant, so we need to skip some changelog
570 570 # nodes even after ellipsis-izing.
571 571 walk = [clrev]
572 572 while walk:
573 573 p = walk[0]
574 574 walk = walk[1:]
575 575 if p in clrevtolocalrev:
576 576 return clrevtolocalrev[p]
577 577 elif p in fullclnodes:
578 578 walk.extend([pp for pp in cl.parentrevs(p)
579 579 if pp != nullrev])
580 580 elif p in precomputedellipsis:
581 581 walk.extend([pp for pp in precomputedellipsis[p]
582 582 if pp != nullrev])
583 583 else:
584 584 # In this case, we've got an ellipsis with parents
585 585 # outside the current bundle (likely an
586 586 # incremental pull). We "know" that we can use the
587 587 # value of this same revlog at whatever revision
588 588 # is pointed to by linknode. "Know" is in scare
589 589 # quotes because I haven't done enough examination
590 590 # of edge cases to convince myself this is really
591 591 # a fact - it works for all the (admittedly
592 592 # thorough) cases in our testsuite, but I would be
593 593 # somewhat unsurprised to find a case in the wild
594 594 # where this breaks down a bit. That said, I don't
595 595 # know if it would hurt anything.
596 596 for i in pycompat.xrange(rev, 0, -1):
597 597 if store.linkrev(i) == clrev:
598 598 return i
599 599 # We failed to resolve a parent for this node, so
600 600 # we crash the changegroup construction.
601 601 raise error.Abort(
602 602 'unable to resolve parent while packing %r %r'
603 603 ' for changeset %r' % (store.indexfile, rev, clrev))
604 604
605 605 return nullrev
606 606
607 607 if not linkparents or (
608 608 store.parentrevs(rev) == (nullrev, nullrev)):
609 609 p1, p2 = nullrev, nullrev
610 610 elif len(linkparents) == 1:
611 611 p1, = sorted(local(p) for p in linkparents)
612 612 p2 = nullrev
613 613 else:
614 614 p1, p2 = sorted(local(p) for p in linkparents)
615 615
616 616 p1node, p2node = store.node(p1), store.node(p2)
617 617
618 618 return p1node, p2node, linknode
619 619
620 620 def deltagroup(repo, store, nodes, ischangelog, lookup, forcedeltaparentprev,
621 621 topic=None,
622 622 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
623 623 precomputedellipsis=None):
624 624 """Calculate deltas for a set of revisions.
625 625
626 626 Is a generator of ``revisiondelta`` instances.
627 627
628 628 If topic is not None, progress detail will be generated using this
629 629 topic name (e.g. changesets, manifests, etc).
630 630 """
631 631 if not nodes:
632 632 return
633 633
634 634 cl = repo.changelog
635 635
636 636 if ischangelog:
637 637 # `hg log` shows changesets in storage order. To preserve order
638 638 # across clones, send out changesets in storage order.
639 639 nodesorder = 'storage'
640 640 elif ellipses:
641 641 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
642 642 nodesorder = 'nodes'
643 643 else:
644 644 nodesorder = None
645 645
646 646 # Perform ellipses filtering and revision massaging. We do this before
647 647 # emitrevisions() because a) filtering out revisions creates less work
648 648 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
649 649 # assumptions about delta choices and we would possibly send a delta
650 650 # referencing a missing base revision.
651 651 #
652 652 # Also, calling lookup() has side-effects with regards to populating
653 653 # data structures. If we don't call lookup() for each node or if we call
654 654 # lookup() after the first pass through each node, things can break -
655 655 # possibly intermittently depending on the python hash seed! For that
656 656 # reason, we store a mapping of all linknodes during the initial node
657 657 # pass rather than use lookup() on the output side.
658 658 if ellipses:
659 659 filtered = []
660 660 adjustedparents = {}
661 661 linknodes = {}
662 662
663 663 for node in nodes:
664 664 rev = store.rev(node)
665 665 linknode = lookup(node)
666 666 linkrev = cl.rev(linknode)
667 667 clrevtolocalrev[linkrev] = rev
668 668
669 669 # If linknode is in fullclnodes, it means the corresponding
670 670 # changeset was a full changeset and is being sent unaltered.
671 671 if linknode in fullclnodes:
672 672 linknodes[node] = linknode
673 673
674 674 # If the corresponding changeset wasn't in the set computed
675 675 # as relevant to us, it should be dropped outright.
676 676 elif linkrev not in precomputedellipsis:
677 677 continue
678 678
679 679 else:
680 680 # We could probably do this later and avoid the dict
681 681 # holding state. But it likely doesn't matter.
682 682 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
683 683 cl, store, ischangelog, rev, linkrev, linknode,
684 684 clrevtolocalrev, fullclnodes, precomputedellipsis)
685 685
686 686 adjustedparents[node] = (p1node, p2node)
687 687 linknodes[node] = linknode
688 688
689 689 filtered.append(node)
690 690
691 691 nodes = filtered
692 692
693 693 # We expect the first pass to be fast, so we only engage the progress
694 694 # meter for constructing the revision deltas.
695 695 progress = None
696 696 if topic is not None:
697 697 progress = repo.ui.makeprogress(topic, unit=_('chunks'),
698 698 total=len(nodes))
699 699
700 700 configtarget = repo.ui.config('devel', 'bundle.delta')
701 701 if configtarget not in ('', 'p1', 'full'):
702 702 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
703 703 repo.ui.warn(msg % configtarget)
704 704
705 705 deltamode = repository.CG_DELTAMODE_STD
706 706 if forcedeltaparentprev:
707 707 deltamode = repository.CG_DELTAMODE_PREV
708 708 elif configtarget == 'p1':
709 709 deltamode = repository.CG_DELTAMODE_P1
710 710 elif configtarget == 'full':
711 711 deltamode = repository.CG_DELTAMODE_FULL
712 712
713 713 revisions = store.emitrevisions(
714 714 nodes,
715 715 nodesorder=nodesorder,
716 716 revisiondata=True,
717 717 assumehaveparentrevisions=not ellipses,
718 718 deltamode=deltamode)
719 719
720 720 for i, revision in enumerate(revisions):
721 721 if progress:
722 722 progress.update(i + 1)
723 723
724 724 if ellipses:
725 725 linknode = linknodes[revision.node]
726 726
727 727 if revision.node in adjustedparents:
728 728 p1node, p2node = adjustedparents[revision.node]
729 729 revision.p1node = p1node
730 730 revision.p2node = p2node
731 731 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
732 732
733 733 else:
734 734 linknode = lookup(revision.node)
735 735
736 736 revision.linknode = linknode
737 737 yield revision
738 738
739 739 if progress:
740 740 progress.complete()
741 741
742 742 class cgpacker(object):
743 743 def __init__(self, repo, oldmatcher, matcher, version,
744 744 builddeltaheader, manifestsend,
745 745 forcedeltaparentprev=False,
746 746 bundlecaps=None, ellipses=False,
747 747 shallow=False, ellipsisroots=None, fullnodes=None):
748 748 """Given a source repo, construct a bundler.
749 749
750 750 oldmatcher is a matcher that matches on files the client already has.
751 751 These will not be included in the changegroup.
752 752
753 753 matcher is a matcher that matches on files to include in the
754 754 changegroup. Used to facilitate sparse changegroups.
755 755
756 756 forcedeltaparentprev indicates whether delta parents must be against
757 757 the previous revision in a delta group. This should only be used for
758 758 compatibility with changegroup version 1.
759 759
760 760 builddeltaheader is a callable that constructs the header for a group
761 761 delta.
762 762
763 763 manifestsend is a chunk to send after manifests have been fully emitted.
764 764
765 765 ellipses indicates whether ellipsis serving mode is enabled.
766 766
767 767 bundlecaps is optional and can be used to specify the set of
768 768 capabilities which can be used to build the bundle. While bundlecaps is
769 769 unused in core Mercurial, extensions rely on this feature to communicate
770 770 capabilities to customize the changegroup packer.
771 771
772 772 shallow indicates whether shallow data might be sent. The packer may
773 773 need to pack file contents not introduced by the changes being packed.
774 774
775 775 fullnodes is the set of changelog nodes which should not be ellipsis
776 776 nodes. We store this rather than the set of nodes that should be
777 777 ellipsis because for very large histories we expect this to be
778 778 significantly smaller.
779 779 """
780 780 assert oldmatcher
781 781 assert matcher
782 782 self._oldmatcher = oldmatcher
783 783 self._matcher = matcher
784 784
785 785 self.version = version
786 786 self._forcedeltaparentprev = forcedeltaparentprev
787 787 self._builddeltaheader = builddeltaheader
788 788 self._manifestsend = manifestsend
789 789 self._ellipses = ellipses
790 790
791 791 # Set of capabilities we can use to build the bundle.
792 792 if bundlecaps is None:
793 793 bundlecaps = set()
794 794 self._bundlecaps = bundlecaps
795 795 self._isshallow = shallow
796 796 self._fullclnodes = fullnodes
797 797
798 798 # Maps ellipsis revs to their roots at the changelog level.
799 799 self._precomputedellipsis = ellipsisroots
800 800
801 801 self._repo = repo
802 802
803 803 if self._repo.ui.verbose and not self._repo.ui.debugflag:
804 804 self._verbosenote = self._repo.ui.note
805 805 else:
806 806 self._verbosenote = lambda s: None
807 807
808 808 def generate(self, commonrevs, clnodes, fastpathlinkrev, source,
809 809 changelog=True):
810 810 """Yield a sequence of changegroup byte chunks.
811 811 If changelog is False, changelog data won't be added to changegroup
812 812 """
813 813
814 814 repo = self._repo
815 815 cl = repo.changelog
816 816
817 817 self._verbosenote(_('uncompressed size of bundle content:\n'))
818 818 size = 0
819 819
820 clstate, deltas = self._generatechangelog(cl, clnodes)
820 clstate, deltas = self._generatechangelog(cl, clnodes,
821 generate=changelog)
821 822 for delta in deltas:
822 if changelog:
823 823 for chunk in _revisiondeltatochunks(delta,
824 824 self._builddeltaheader):
825 825 size += len(chunk)
826 826 yield chunk
827 827
828 828 close = closechunk()
829 829 size += len(close)
830 830 yield closechunk()
831 831
832 832 self._verbosenote(_('%8.i (changelog)\n') % size)
833 833
834 834 clrevorder = clstate['clrevorder']
835 835 manifests = clstate['manifests']
836 836 changedfiles = clstate['changedfiles']
837 837
838 838 # We need to make sure that the linkrev in the changegroup refers to
839 839 # the first changeset that introduced the manifest or file revision.
840 840 # The fastpath is usually safer than the slowpath, because the filelogs
841 841 # are walked in revlog order.
842 842 #
843 843 # When taking the slowpath when the manifest revlog uses generaldelta,
844 844 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
845 845 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
846 846 #
847 847 # When taking the fastpath, we are only vulnerable to reordering
848 848 # of the changelog itself. The changelog never uses generaldelta and is
849 849 # never reordered. To handle this case, we simply take the slowpath,
850 850 # which already has the 'clrevorder' logic. This was also fixed in
851 851 # cc0ff93d0c0c.
852 852
853 853 # Treemanifests don't work correctly with fastpathlinkrev
854 854 # either, because we don't discover which directory nodes to
855 855 # send along with files. This could probably be fixed.
856 856 fastpathlinkrev = fastpathlinkrev and (
857 857 'treemanifest' not in repo.requirements)
858 858
859 859 fnodes = {} # needed file nodes
860 860
861 861 size = 0
862 862 it = self.generatemanifests(
863 863 commonrevs, clrevorder, fastpathlinkrev, manifests, fnodes, source,
864 864 clstate['clrevtomanifestrev'])
865 865
866 866 for tree, deltas in it:
867 867 if tree:
868 868 assert self.version == b'03'
869 869 chunk = _fileheader(tree)
870 870 size += len(chunk)
871 871 yield chunk
872 872
873 873 for delta in deltas:
874 874 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
875 875 for chunk in chunks:
876 876 size += len(chunk)
877 877 yield chunk
878 878
879 879 close = closechunk()
880 880 size += len(close)
881 881 yield close
882 882
883 883 self._verbosenote(_('%8.i (manifests)\n') % size)
884 884 yield self._manifestsend
885 885
886 886 mfdicts = None
887 887 if self._ellipses and self._isshallow:
888 888 mfdicts = [(self._repo.manifestlog[n].read(), lr)
889 889 for (n, lr) in manifests.iteritems()]
890 890
891 891 manifests.clear()
892 892 clrevs = set(cl.rev(x) for x in clnodes)
893 893
894 894 it = self.generatefiles(changedfiles, commonrevs,
895 895 source, mfdicts, fastpathlinkrev,
896 896 fnodes, clrevs)
897 897
898 898 for path, deltas in it:
899 899 h = _fileheader(path)
900 900 size = len(h)
901 901 yield h
902 902
903 903 for delta in deltas:
904 904 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
905 905 for chunk in chunks:
906 906 size += len(chunk)
907 907 yield chunk
908 908
909 909 close = closechunk()
910 910 size += len(close)
911 911 yield close
912 912
913 913 self._verbosenote(_('%8.i %s\n') % (size, path))
914 914
915 915 yield closechunk()
916 916
917 917 if clnodes:
918 918 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
919 919
920 def _generatechangelog(self, cl, nodes):
920 def _generatechangelog(self, cl, nodes, generate=True):
921 921 """Generate data for changelog chunks.
922 922
923 923 Returns a 2-tuple of a dict containing state and an iterable of
924 924 byte chunks. The state will not be fully populated until the
925 925 chunk stream has been fully consumed.
926
927 if generate is False, the state will be fully populated and no chunk
928 stream will be yielded
926 929 """
927 930 clrevorder = {}
928 931 manifests = {}
929 932 mfl = self._repo.manifestlog
930 933 changedfiles = set()
931 934 clrevtomanifestrev = {}
932 935
933 936 state = {
934 937 'clrevorder': clrevorder,
935 938 'manifests': manifests,
936 939 'changedfiles': changedfiles,
937 940 'clrevtomanifestrev': clrevtomanifestrev,
938 941 }
939 942
943 if not (generate or self._ellipses):
944 # sort the nodes in storage order
945 nodes = sorted(nodes, key=cl.rev)
946 for node in nodes:
947 c = cl.changelogrevision(node)
948 clrevorder[node] = len(clrevorder)
949 # record the first changeset introducing this manifest version
950 manifests.setdefault(c.manifest, node)
951 # Record a complete list of potentially-changed files in
952 # this manifest.
953 changedfiles.update(c.files)
954
955 return state, ()
956
940 957 # Callback for the changelog, used to collect changed files and
941 958 # manifest nodes.
942 959 # Returns the linkrev node (identity in the changelog case).
943 960 def lookupcl(x):
944 961 c = cl.changelogrevision(x)
945 962 clrevorder[x] = len(clrevorder)
946 963
947 964 if self._ellipses:
948 965 # Only update manifests if x is going to be sent. Otherwise we
949 966 # end up with bogus linkrevs specified for manifests and
950 967 # we skip some manifest nodes that we should otherwise
951 968 # have sent.
952 969 if (x in self._fullclnodes
953 970 or cl.rev(x) in self._precomputedellipsis):
954 971
955 972 manifestnode = c.manifest
956 973 # Record the first changeset introducing this manifest
957 974 # version.
958 975 manifests.setdefault(manifestnode, x)
959 976 # Set this narrow-specific dict so we have the lowest
960 977 # manifest revnum to look up for this cl revnum. (Part of
961 978 # mapping changelog ellipsis parents to manifest ellipsis
962 979 # parents)
963 980 clrevtomanifestrev.setdefault(
964 981 cl.rev(x), mfl.rev(manifestnode))
965 982 # We can't trust the changed files list in the changeset if the
966 983 # client requested a shallow clone.
967 984 if self._isshallow:
968 985 changedfiles.update(mfl[c.manifest].read().keys())
969 986 else:
970 987 changedfiles.update(c.files)
971 988 else:
972 989 # record the first changeset introducing this manifest version
973 990 manifests.setdefault(c.manifest, x)
974 991 # Record a complete list of potentially-changed files in
975 992 # this manifest.
976 993 changedfiles.update(c.files)
977 994
978 995 return x
979 996
980 997 gen = deltagroup(
981 998 self._repo, cl, nodes, True, lookupcl,
982 999 self._forcedeltaparentprev,
983 1000 ellipses=self._ellipses,
984 1001 topic=_('changesets'),
985 1002 clrevtolocalrev={},
986 1003 fullclnodes=self._fullclnodes,
987 1004 precomputedellipsis=self._precomputedellipsis)
988 1005
989 1006 return state, gen
990 1007
991 1008 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev,
992 1009 manifests, fnodes, source, clrevtolocalrev):
993 1010 """Returns an iterator of changegroup chunks containing manifests.
994 1011
995 1012 `source` is unused here, but is used by extensions like remotefilelog to
996 1013 change what is sent based in pulls vs pushes, etc.
997 1014 """
998 1015 repo = self._repo
999 1016 mfl = repo.manifestlog
1000 1017 tmfnodes = {'': manifests}
1001 1018
1002 1019 # Callback for the manifest, used to collect linkrevs for filelog
1003 1020 # revisions.
1004 1021 # Returns the linkrev node (collected in lookupcl).
1005 1022 def makelookupmflinknode(tree, nodes):
1006 1023 if fastpathlinkrev:
1007 1024 assert not tree
1008 1025 return manifests.__getitem__
1009 1026
1010 1027 def lookupmflinknode(x):
1011 1028 """Callback for looking up the linknode for manifests.
1012 1029
1013 1030 Returns the linkrev node for the specified manifest.
1014 1031
1015 1032 SIDE EFFECT:
1016 1033
1017 1034 1) fclnodes gets populated with the list of relevant
1018 1035 file nodes if we're not using fastpathlinkrev
1019 1036 2) When treemanifests are in use, collects treemanifest nodes
1020 1037 to send
1021 1038
1022 1039 Note that this means manifests must be completely sent to
1023 1040 the client before you can trust the list of files and
1024 1041 treemanifests to send.
1025 1042 """
1026 1043 clnode = nodes[x]
1027 1044 mdata = mfl.get(tree, x).readfast(shallow=True)
1028 1045 for p, n, fl in mdata.iterentries():
1029 1046 if fl == 't': # subdirectory manifest
1030 1047 subtree = tree + p + '/'
1031 1048 tmfclnodes = tmfnodes.setdefault(subtree, {})
1032 1049 tmfclnode = tmfclnodes.setdefault(n, clnode)
1033 1050 if clrevorder[clnode] < clrevorder[tmfclnode]:
1034 1051 tmfclnodes[n] = clnode
1035 1052 else:
1036 1053 f = tree + p
1037 1054 fclnodes = fnodes.setdefault(f, {})
1038 1055 fclnode = fclnodes.setdefault(n, clnode)
1039 1056 if clrevorder[clnode] < clrevorder[fclnode]:
1040 1057 fclnodes[n] = clnode
1041 1058 return clnode
1042 1059 return lookupmflinknode
1043 1060
1044 1061 while tmfnodes:
1045 1062 tree, nodes = tmfnodes.popitem()
1046 1063
1047 1064 should_visit = self._matcher.visitdir(tree[:-1] or '.')
1048 1065 if tree and not should_visit:
1049 1066 continue
1050 1067
1051 1068 store = mfl.getstorage(tree)
1052 1069
1053 1070 if not should_visit:
1054 1071 # No nodes to send because this directory is out of
1055 1072 # the client's view of the repository (probably
1056 1073 # because of narrow clones). Do this even for the root
1057 1074 # directory (tree=='')
1058 1075 prunednodes = []
1059 1076 else:
1060 1077 # Avoid sending any manifest nodes we can prove the
1061 1078 # client already has by checking linkrevs. See the
1062 1079 # related comment in generatefiles().
1063 1080 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1064 1081
1065 1082 if tree and not prunednodes:
1066 1083 continue
1067 1084
1068 1085 lookupfn = makelookupmflinknode(tree, nodes)
1069 1086
1070 1087 deltas = deltagroup(
1071 1088 self._repo, store, prunednodes, False, lookupfn,
1072 1089 self._forcedeltaparentprev,
1073 1090 ellipses=self._ellipses,
1074 1091 topic=_('manifests'),
1075 1092 clrevtolocalrev=clrevtolocalrev,
1076 1093 fullclnodes=self._fullclnodes,
1077 1094 precomputedellipsis=self._precomputedellipsis)
1078 1095
1079 1096 if not self._oldmatcher.visitdir(store.tree[:-1] or '.'):
1080 1097 yield tree, deltas
1081 1098 else:
1082 1099 # 'deltas' is a generator and we need to consume it even if
1083 1100 # we are not going to send it because a side-effect is that
1084 1101 # it updates tmdnodes (via lookupfn)
1085 1102 for d in deltas:
1086 1103 pass
1087 1104 if not tree:
1088 1105 yield tree, []
1089 1106
1090 1107 def _prunemanifests(self, store, nodes, commonrevs):
1091 1108 # This is split out as a separate method to allow filtering
1092 1109 # commonrevs in extension code.
1093 1110 #
1094 1111 # TODO(augie): this shouldn't be required, instead we should
1095 1112 # make filtering of revisions to send delegated to the store
1096 1113 # layer.
1097 1114 frev, flr = store.rev, store.linkrev
1098 1115 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1099 1116
1100 1117 # The 'source' parameter is useful for extensions
1101 1118 def generatefiles(self, changedfiles, commonrevs, source,
1102 1119 mfdicts, fastpathlinkrev, fnodes, clrevs):
1103 1120 changedfiles = [f for f in changedfiles
1104 1121 if self._matcher(f) and not self._oldmatcher(f)]
1105 1122
1106 1123 if not fastpathlinkrev:
1107 1124 def normallinknodes(unused, fname):
1108 1125 return fnodes.get(fname, {})
1109 1126 else:
1110 1127 cln = self._repo.changelog.node
1111 1128
1112 1129 def normallinknodes(store, fname):
1113 1130 flinkrev = store.linkrev
1114 1131 fnode = store.node
1115 1132 revs = ((r, flinkrev(r)) for r in store)
1116 1133 return dict((fnode(r), cln(lr))
1117 1134 for r, lr in revs if lr in clrevs)
1118 1135
1119 1136 clrevtolocalrev = {}
1120 1137
1121 1138 if self._isshallow:
1122 1139 # In a shallow clone, the linknodes callback needs to also include
1123 1140 # those file nodes that are in the manifests we sent but weren't
1124 1141 # introduced by those manifests.
1125 1142 commonctxs = [self._repo[c] for c in commonrevs]
1126 1143 clrev = self._repo.changelog.rev
1127 1144
1128 1145 def linknodes(flog, fname):
1129 1146 for c in commonctxs:
1130 1147 try:
1131 1148 fnode = c.filenode(fname)
1132 1149 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1133 1150 except error.ManifestLookupError:
1134 1151 pass
1135 1152 links = normallinknodes(flog, fname)
1136 1153 if len(links) != len(mfdicts):
1137 1154 for mf, lr in mfdicts:
1138 1155 fnode = mf.get(fname, None)
1139 1156 if fnode in links:
1140 1157 links[fnode] = min(links[fnode], lr, key=clrev)
1141 1158 elif fnode:
1142 1159 links[fnode] = lr
1143 1160 return links
1144 1161 else:
1145 1162 linknodes = normallinknodes
1146 1163
1147 1164 repo = self._repo
1148 1165 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1149 1166 total=len(changedfiles))
1150 1167 for i, fname in enumerate(sorted(changedfiles)):
1151 1168 filerevlog = repo.file(fname)
1152 1169 if not filerevlog:
1153 1170 raise error.Abort(_("empty or missing file data for %s") %
1154 1171 fname)
1155 1172
1156 1173 clrevtolocalrev.clear()
1157 1174
1158 1175 linkrevnodes = linknodes(filerevlog, fname)
1159 1176 # Lookup for filenodes, we collected the linkrev nodes above in the
1160 1177 # fastpath case and with lookupmf in the slowpath case.
1161 1178 def lookupfilelog(x):
1162 1179 return linkrevnodes[x]
1163 1180
1164 1181 frev, flr = filerevlog.rev, filerevlog.linkrev
1165 1182 # Skip sending any filenode we know the client already
1166 1183 # has. This avoids over-sending files relatively
1167 1184 # inexpensively, so it's not a problem if we under-filter
1168 1185 # here.
1169 1186 filenodes = [n for n in linkrevnodes
1170 1187 if flr(frev(n)) not in commonrevs]
1171 1188
1172 1189 if not filenodes:
1173 1190 continue
1174 1191
1175 1192 progress.update(i + 1, item=fname)
1176 1193
1177 1194 deltas = deltagroup(
1178 1195 self._repo, filerevlog, filenodes, False, lookupfilelog,
1179 1196 self._forcedeltaparentprev,
1180 1197 ellipses=self._ellipses,
1181 1198 clrevtolocalrev=clrevtolocalrev,
1182 1199 fullclnodes=self._fullclnodes,
1183 1200 precomputedellipsis=self._precomputedellipsis)
1184 1201
1185 1202 yield fname, deltas
1186 1203
1187 1204 progress.complete()
1188 1205
1189 1206 def _makecg1packer(repo, oldmatcher, matcher, bundlecaps,
1190 1207 ellipses=False, shallow=False, ellipsisroots=None,
1191 1208 fullnodes=None):
1192 1209 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1193 1210 d.node, d.p1node, d.p2node, d.linknode)
1194 1211
1195 1212 return cgpacker(repo, oldmatcher, matcher, b'01',
1196 1213 builddeltaheader=builddeltaheader,
1197 1214 manifestsend=b'',
1198 1215 forcedeltaparentprev=True,
1199 1216 bundlecaps=bundlecaps,
1200 1217 ellipses=ellipses,
1201 1218 shallow=shallow,
1202 1219 ellipsisroots=ellipsisroots,
1203 1220 fullnodes=fullnodes)
1204 1221
1205 1222 def _makecg2packer(repo, oldmatcher, matcher, bundlecaps,
1206 1223 ellipses=False, shallow=False, ellipsisroots=None,
1207 1224 fullnodes=None):
1208 1225 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1209 1226 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1210 1227
1211 1228 return cgpacker(repo, oldmatcher, matcher, b'02',
1212 1229 builddeltaheader=builddeltaheader,
1213 1230 manifestsend=b'',
1214 1231 bundlecaps=bundlecaps,
1215 1232 ellipses=ellipses,
1216 1233 shallow=shallow,
1217 1234 ellipsisroots=ellipsisroots,
1218 1235 fullnodes=fullnodes)
1219 1236
1220 1237 def _makecg3packer(repo, oldmatcher, matcher, bundlecaps,
1221 1238 ellipses=False, shallow=False, ellipsisroots=None,
1222 1239 fullnodes=None):
1223 1240 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1224 1241 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1225 1242
1226 1243 return cgpacker(repo, oldmatcher, matcher, b'03',
1227 1244 builddeltaheader=builddeltaheader,
1228 1245 manifestsend=closechunk(),
1229 1246 bundlecaps=bundlecaps,
1230 1247 ellipses=ellipses,
1231 1248 shallow=shallow,
1232 1249 ellipsisroots=ellipsisroots,
1233 1250 fullnodes=fullnodes)
1234 1251
1235 1252 _packermap = {'01': (_makecg1packer, cg1unpacker),
1236 1253 # cg2 adds support for exchanging generaldelta
1237 1254 '02': (_makecg2packer, cg2unpacker),
1238 1255 # cg3 adds support for exchanging revlog flags and treemanifests
1239 1256 '03': (_makecg3packer, cg3unpacker),
1240 1257 }
1241 1258
1242 1259 def allsupportedversions(repo):
1243 1260 versions = set(_packermap.keys())
1244 1261 if not (repo.ui.configbool('experimental', 'changegroup3') or
1245 1262 repo.ui.configbool('experimental', 'treemanifest') or
1246 1263 'treemanifest' in repo.requirements):
1247 1264 versions.discard('03')
1248 1265 return versions
1249 1266
1250 1267 # Changegroup versions that can be applied to the repo
1251 1268 def supportedincomingversions(repo):
1252 1269 return allsupportedversions(repo)
1253 1270
1254 1271 # Changegroup versions that can be created from the repo
1255 1272 def supportedoutgoingversions(repo):
1256 1273 versions = allsupportedversions(repo)
1257 1274 if 'treemanifest' in repo.requirements:
1258 1275 # Versions 01 and 02 support only flat manifests and it's just too
1259 1276 # expensive to convert between the flat manifest and tree manifest on
1260 1277 # the fly. Since tree manifests are hashed differently, all of history
1261 1278 # would have to be converted. Instead, we simply don't even pretend to
1262 1279 # support versions 01 and 02.
1263 1280 versions.discard('01')
1264 1281 versions.discard('02')
1265 1282 if repository.NARROW_REQUIREMENT in repo.requirements:
1266 1283 # Versions 01 and 02 don't support revlog flags, and we need to
1267 1284 # support that for stripping and unbundling to work.
1268 1285 versions.discard('01')
1269 1286 versions.discard('02')
1270 1287 if LFS_REQUIREMENT in repo.requirements:
1271 1288 # Versions 01 and 02 don't support revlog flags, and we need to
1272 1289 # mark LFS entries with REVIDX_EXTSTORED.
1273 1290 versions.discard('01')
1274 1291 versions.discard('02')
1275 1292
1276 1293 return versions
1277 1294
1278 1295 def localversion(repo):
1279 1296 # Finds the best version to use for bundles that are meant to be used
1280 1297 # locally, such as those from strip and shelve, and temporary bundles.
1281 1298 return max(supportedoutgoingversions(repo))
1282 1299
1283 1300 def safeversion(repo):
1284 1301 # Finds the smallest version that it's safe to assume clients of the repo
1285 1302 # will support. For example, all hg versions that support generaldelta also
1286 1303 # support changegroup 02.
1287 1304 versions = supportedoutgoingversions(repo)
1288 1305 if 'generaldelta' in repo.requirements:
1289 1306 versions.discard('01')
1290 1307 assert versions
1291 1308 return min(versions)
1292 1309
1293 1310 def getbundler(version, repo, bundlecaps=None, oldmatcher=None,
1294 1311 matcher=None, ellipses=False, shallow=False,
1295 1312 ellipsisroots=None, fullnodes=None):
1296 1313 assert version in supportedoutgoingversions(repo)
1297 1314
1298 1315 if matcher is None:
1299 1316 matcher = matchmod.alwaysmatcher(repo.root, '')
1300 1317 if oldmatcher is None:
1301 1318 oldmatcher = matchmod.nevermatcher(repo.root, '')
1302 1319
1303 1320 if version == '01' and not matcher.always():
1304 1321 raise error.ProgrammingError('version 01 changegroups do not support '
1305 1322 'sparse file matchers')
1306 1323
1307 1324 if ellipses and version in (b'01', b'02'):
1308 1325 raise error.Abort(
1309 1326 _('ellipsis nodes require at least cg3 on client and server, '
1310 1327 'but negotiated version %s') % version)
1311 1328
1312 1329 # Requested files could include files not in the local store. So
1313 1330 # filter those out.
1314 1331 matcher = repo.narrowmatch(matcher)
1315 1332
1316 1333 fn = _packermap[version][0]
1317 1334 return fn(repo, oldmatcher, matcher, bundlecaps, ellipses=ellipses,
1318 1335 shallow=shallow, ellipsisroots=ellipsisroots,
1319 1336 fullnodes=fullnodes)
1320 1337
1321 1338 def getunbundler(version, fh, alg, extras=None):
1322 1339 return _packermap[version][1](fh, alg, extras=extras)
1323 1340
1324 1341 def _changegroupinfo(repo, nodes, source):
1325 1342 if repo.ui.verbose or source == 'bundle':
1326 1343 repo.ui.status(_("%d changesets found\n") % len(nodes))
1327 1344 if repo.ui.debugflag:
1328 1345 repo.ui.debug("list of changesets:\n")
1329 1346 for node in nodes:
1330 1347 repo.ui.debug("%s\n" % hex(node))
1331 1348
1332 1349 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1333 1350 bundlecaps=None):
1334 1351 cgstream = makestream(repo, outgoing, version, source,
1335 1352 fastpath=fastpath, bundlecaps=bundlecaps)
1336 1353 return getunbundler(version, util.chunkbuffer(cgstream), None,
1337 1354 {'clcount': len(outgoing.missing) })
1338 1355
1339 1356 def makestream(repo, outgoing, version, source, fastpath=False,
1340 1357 bundlecaps=None, matcher=None):
1341 1358 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1342 1359 matcher=matcher)
1343 1360
1344 1361 repo = repo.unfiltered()
1345 1362 commonrevs = outgoing.common
1346 1363 csets = outgoing.missing
1347 1364 heads = outgoing.missingheads
1348 1365 # We go through the fast path if we get told to, or if all (unfiltered
1349 1366 # heads have been requested (since we then know there all linkrevs will
1350 1367 # be pulled by the client).
1351 1368 heads.sort()
1352 1369 fastpathlinkrev = fastpath or (
1353 1370 repo.filtername is None and heads == sorted(repo.heads()))
1354 1371
1355 1372 repo.hook('preoutgoing', throw=True, source=source)
1356 1373 _changegroupinfo(repo, csets, source)
1357 1374 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1358 1375
1359 1376 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1360 1377 revisions = 0
1361 1378 files = 0
1362 1379 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1363 1380 total=expectedfiles)
1364 1381 for chunkdata in iter(source.filelogheader, {}):
1365 1382 files += 1
1366 1383 f = chunkdata["filename"]
1367 1384 repo.ui.debug("adding %s revisions\n" % f)
1368 1385 progress.increment()
1369 1386 fl = repo.file(f)
1370 1387 o = len(fl)
1371 1388 try:
1372 1389 deltas = source.deltaiter()
1373 1390 if not fl.addgroup(deltas, revmap, trp):
1374 1391 raise error.Abort(_("received file revlog group is empty"))
1375 1392 except error.CensoredBaseError as e:
1376 1393 raise error.Abort(_("received delta base is censored: %s") % e)
1377 1394 revisions += len(fl) - o
1378 1395 if f in needfiles:
1379 1396 needs = needfiles[f]
1380 1397 for new in pycompat.xrange(o, len(fl)):
1381 1398 n = fl.node(new)
1382 1399 if n in needs:
1383 1400 needs.remove(n)
1384 1401 else:
1385 1402 raise error.Abort(
1386 1403 _("received spurious file revlog entry"))
1387 1404 if not needs:
1388 1405 del needfiles[f]
1389 1406 progress.complete()
1390 1407
1391 1408 for f, needs in needfiles.iteritems():
1392 1409 fl = repo.file(f)
1393 1410 for n in needs:
1394 1411 try:
1395 1412 fl.rev(n)
1396 1413 except error.LookupError:
1397 1414 raise error.Abort(
1398 1415 _('missing file data for %s:%s - run hg verify') %
1399 1416 (f, hex(n)))
1400 1417
1401 1418 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now