##// END OF EJS Templates
changegroup: port to emitrevisions() (issue5976)...
Gregory Szorc -
r39901:31b7e8e7 default
parent child Browse files
Show More
@@ -1,1389 +1,1381 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 dagop,
28 27 error,
29 28 match as matchmod,
30 29 mdiff,
31 30 phases,
32 31 pycompat,
33 32 repository,
33 revlog,
34 34 util,
35 35 )
36 36
37 37 from .utils import (
38 38 interfaceutil,
39 39 )
40 40
41 41 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
42 42 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
43 43 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
44 44
45 45 LFS_REQUIREMENT = 'lfs'
46 46
47 47 readexactly = util.readexactly
48 48
49 49 def getchunk(stream):
50 50 """return the next chunk from stream as a string"""
51 51 d = readexactly(stream, 4)
52 52 l = struct.unpack(">l", d)[0]
53 53 if l <= 4:
54 54 if l:
55 55 raise error.Abort(_("invalid chunk length %d") % l)
56 56 return ""
57 57 return readexactly(stream, l - 4)
58 58
59 59 def chunkheader(length):
60 60 """return a changegroup chunk header (string)"""
61 61 return struct.pack(">l", length + 4)
62 62
63 63 def closechunk():
64 64 """return a changegroup chunk header (string) for a zero-length chunk"""
65 65 return struct.pack(">l", 0)
66 66
67 67 def _fileheader(path):
68 68 """Obtain a changegroup chunk header for a named path."""
69 69 return chunkheader(len(path)) + path
70 70
71 71 def writechunks(ui, chunks, filename, vfs=None):
72 72 """Write chunks to a file and return its filename.
73 73
74 74 The stream is assumed to be a bundle file.
75 75 Existing files will not be overwritten.
76 76 If no filename is specified, a temporary file is created.
77 77 """
78 78 fh = None
79 79 cleanup = None
80 80 try:
81 81 if filename:
82 82 if vfs:
83 83 fh = vfs.open(filename, "wb")
84 84 else:
85 85 # Increase default buffer size because default is usually
86 86 # small (4k is common on Linux).
87 87 fh = open(filename, "wb", 131072)
88 88 else:
89 89 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
90 90 fh = os.fdopen(fd, r"wb")
91 91 cleanup = filename
92 92 for c in chunks:
93 93 fh.write(c)
94 94 cleanup = None
95 95 return filename
96 96 finally:
97 97 if fh is not None:
98 98 fh.close()
99 99 if cleanup is not None:
100 100 if filename and vfs:
101 101 vfs.unlink(cleanup)
102 102 else:
103 103 os.unlink(cleanup)
104 104
105 105 class cg1unpacker(object):
106 106 """Unpacker for cg1 changegroup streams.
107 107
108 108 A changegroup unpacker handles the framing of the revision data in
109 109 the wire format. Most consumers will want to use the apply()
110 110 method to add the changes from the changegroup to a repository.
111 111
112 112 If you're forwarding a changegroup unmodified to another consumer,
113 113 use getchunks(), which returns an iterator of changegroup
114 114 chunks. This is mostly useful for cases where you need to know the
115 115 data stream has ended by observing the end of the changegroup.
116 116
117 117 deltachunk() is useful only if you're applying delta data. Most
118 118 consumers should prefer apply() instead.
119 119
120 120 A few other public methods exist. Those are used only for
121 121 bundlerepo and some debug commands - their use is discouraged.
122 122 """
123 123 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
124 124 deltaheadersize = deltaheader.size
125 125 version = '01'
126 126 _grouplistcount = 1 # One list of files after the manifests
127 127
128 128 def __init__(self, fh, alg, extras=None):
129 129 if alg is None:
130 130 alg = 'UN'
131 131 if alg not in util.compengines.supportedbundletypes:
132 132 raise error.Abort(_('unknown stream compression type: %s')
133 133 % alg)
134 134 if alg == 'BZ':
135 135 alg = '_truncatedBZ'
136 136
137 137 compengine = util.compengines.forbundletype(alg)
138 138 self._stream = compengine.decompressorreader(fh)
139 139 self._type = alg
140 140 self.extras = extras or {}
141 141 self.callback = None
142 142
143 143 # These methods (compressed, read, seek, tell) all appear to only
144 144 # be used by bundlerepo, but it's a little hard to tell.
145 145 def compressed(self):
146 146 return self._type is not None and self._type != 'UN'
147 147 def read(self, l):
148 148 return self._stream.read(l)
149 149 def seek(self, pos):
150 150 return self._stream.seek(pos)
151 151 def tell(self):
152 152 return self._stream.tell()
153 153 def close(self):
154 154 return self._stream.close()
155 155
156 156 def _chunklength(self):
157 157 d = readexactly(self._stream, 4)
158 158 l = struct.unpack(">l", d)[0]
159 159 if l <= 4:
160 160 if l:
161 161 raise error.Abort(_("invalid chunk length %d") % l)
162 162 return 0
163 163 if self.callback:
164 164 self.callback()
165 165 return l - 4
166 166
167 167 def changelogheader(self):
168 168 """v10 does not have a changelog header chunk"""
169 169 return {}
170 170
171 171 def manifestheader(self):
172 172 """v10 does not have a manifest header chunk"""
173 173 return {}
174 174
175 175 def filelogheader(self):
176 176 """return the header of the filelogs chunk, v10 only has the filename"""
177 177 l = self._chunklength()
178 178 if not l:
179 179 return {}
180 180 fname = readexactly(self._stream, l)
181 181 return {'filename': fname}
182 182
183 183 def _deltaheader(self, headertuple, prevnode):
184 184 node, p1, p2, cs = headertuple
185 185 if prevnode is None:
186 186 deltabase = p1
187 187 else:
188 188 deltabase = prevnode
189 189 flags = 0
190 190 return node, p1, p2, deltabase, cs, flags
191 191
192 192 def deltachunk(self, prevnode):
193 193 l = self._chunklength()
194 194 if not l:
195 195 return {}
196 196 headerdata = readexactly(self._stream, self.deltaheadersize)
197 197 header = self.deltaheader.unpack(headerdata)
198 198 delta = readexactly(self._stream, l - self.deltaheadersize)
199 199 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
200 200 return (node, p1, p2, cs, deltabase, delta, flags)
201 201
202 202 def getchunks(self):
203 203 """returns all the chunks contains in the bundle
204 204
205 205 Used when you need to forward the binary stream to a file or another
206 206 network API. To do so, it parse the changegroup data, otherwise it will
207 207 block in case of sshrepo because it don't know the end of the stream.
208 208 """
209 209 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
210 210 # and a list of filelogs. For changegroup 3, we expect 4 parts:
211 211 # changelog, manifestlog, a list of tree manifestlogs, and a list of
212 212 # filelogs.
213 213 #
214 214 # Changelog and manifestlog parts are terminated with empty chunks. The
215 215 # tree and file parts are a list of entry sections. Each entry section
216 216 # is a series of chunks terminating in an empty chunk. The list of these
217 217 # entry sections is terminated in yet another empty chunk, so we know
218 218 # we've reached the end of the tree/file list when we reach an empty
219 219 # chunk that was proceeded by no non-empty chunks.
220 220
221 221 parts = 0
222 222 while parts < 2 + self._grouplistcount:
223 223 noentries = True
224 224 while True:
225 225 chunk = getchunk(self)
226 226 if not chunk:
227 227 # The first two empty chunks represent the end of the
228 228 # changelog and the manifestlog portions. The remaining
229 229 # empty chunks represent either A) the end of individual
230 230 # tree or file entries in the file list, or B) the end of
231 231 # the entire list. It's the end of the entire list if there
232 232 # were no entries (i.e. noentries is True).
233 233 if parts < 2:
234 234 parts += 1
235 235 elif noentries:
236 236 parts += 1
237 237 break
238 238 noentries = False
239 239 yield chunkheader(len(chunk))
240 240 pos = 0
241 241 while pos < len(chunk):
242 242 next = pos + 2**20
243 243 yield chunk[pos:next]
244 244 pos = next
245 245 yield closechunk()
246 246
247 247 def _unpackmanifests(self, repo, revmap, trp, prog):
248 248 self.callback = prog.increment
249 249 # no need to check for empty manifest group here:
250 250 # if the result of the merge of 1 and 2 is the same in 3 and 4,
251 251 # no new manifest will be created and the manifest group will
252 252 # be empty during the pull
253 253 self.manifestheader()
254 254 deltas = self.deltaiter()
255 255 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
256 256 prog.complete()
257 257 self.callback = None
258 258
259 259 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
260 260 expectedtotal=None):
261 261 """Add the changegroup returned by source.read() to this repo.
262 262 srctype is a string like 'push', 'pull', or 'unbundle'. url is
263 263 the URL of the repo where this changegroup is coming from.
264 264
265 265 Return an integer summarizing the change to this repo:
266 266 - nothing changed or no source: 0
267 267 - more heads than before: 1+added heads (2..n)
268 268 - fewer heads than before: -1-removed heads (-2..-n)
269 269 - number of heads stays the same: 1
270 270 """
271 271 repo = repo.unfiltered()
272 272 def csmap(x):
273 273 repo.ui.debug("add changeset %s\n" % short(x))
274 274 return len(cl)
275 275
276 276 def revmap(x):
277 277 return cl.rev(x)
278 278
279 279 changesets = files = revisions = 0
280 280
281 281 try:
282 282 # The transaction may already carry source information. In this
283 283 # case we use the top level data. We overwrite the argument
284 284 # because we need to use the top level value (if they exist)
285 285 # in this function.
286 286 srctype = tr.hookargs.setdefault('source', srctype)
287 287 url = tr.hookargs.setdefault('url', url)
288 288 repo.hook('prechangegroup',
289 289 throw=True, **pycompat.strkwargs(tr.hookargs))
290 290
291 291 # write changelog data to temp files so concurrent readers
292 292 # will not see an inconsistent view
293 293 cl = repo.changelog
294 294 cl.delayupdate(tr)
295 295 oldheads = set(cl.heads())
296 296
297 297 trp = weakref.proxy(tr)
298 298 # pull off the changeset group
299 299 repo.ui.status(_("adding changesets\n"))
300 300 clstart = len(cl)
301 301 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
302 302 total=expectedtotal)
303 303 self.callback = progress.increment
304 304
305 305 efiles = set()
306 306 def onchangelog(cl, node):
307 307 efiles.update(cl.readfiles(node))
308 308
309 309 self.changelogheader()
310 310 deltas = self.deltaiter()
311 311 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
312 312 efiles = len(efiles)
313 313
314 314 if not cgnodes:
315 315 repo.ui.develwarn('applied empty changelog from changegroup',
316 316 config='warn-empty-changegroup')
317 317 clend = len(cl)
318 318 changesets = clend - clstart
319 319 progress.complete()
320 320 self.callback = None
321 321
322 322 # pull off the manifest group
323 323 repo.ui.status(_("adding manifests\n"))
324 324 # We know that we'll never have more manifests than we had
325 325 # changesets.
326 326 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
327 327 total=changesets)
328 328 self._unpackmanifests(repo, revmap, trp, progress)
329 329
330 330 needfiles = {}
331 331 if repo.ui.configbool('server', 'validate'):
332 332 cl = repo.changelog
333 333 ml = repo.manifestlog
334 334 # validate incoming csets have their manifests
335 335 for cset in pycompat.xrange(clstart, clend):
336 336 mfnode = cl.changelogrevision(cset).manifest
337 337 mfest = ml[mfnode].readdelta()
338 338 # store file cgnodes we must see
339 339 for f, n in mfest.iteritems():
340 340 needfiles.setdefault(f, set()).add(n)
341 341
342 342 # process the files
343 343 repo.ui.status(_("adding file changes\n"))
344 344 newrevs, newfiles = _addchangegroupfiles(
345 345 repo, self, revmap, trp, efiles, needfiles)
346 346 revisions += newrevs
347 347 files += newfiles
348 348
349 349 deltaheads = 0
350 350 if oldheads:
351 351 heads = cl.heads()
352 352 deltaheads = len(heads) - len(oldheads)
353 353 for h in heads:
354 354 if h not in oldheads and repo[h].closesbranch():
355 355 deltaheads -= 1
356 356 htext = ""
357 357 if deltaheads:
358 358 htext = _(" (%+d heads)") % deltaheads
359 359
360 360 repo.ui.status(_("added %d changesets"
361 361 " with %d changes to %d files%s\n")
362 362 % (changesets, revisions, files, htext))
363 363 repo.invalidatevolatilesets()
364 364
365 365 if changesets > 0:
366 366 if 'node' not in tr.hookargs:
367 367 tr.hookargs['node'] = hex(cl.node(clstart))
368 368 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
369 369 hookargs = dict(tr.hookargs)
370 370 else:
371 371 hookargs = dict(tr.hookargs)
372 372 hookargs['node'] = hex(cl.node(clstart))
373 373 hookargs['node_last'] = hex(cl.node(clend - 1))
374 374 repo.hook('pretxnchangegroup',
375 375 throw=True, **pycompat.strkwargs(hookargs))
376 376
377 377 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
378 378 phaseall = None
379 379 if srctype in ('push', 'serve'):
380 380 # Old servers can not push the boundary themselves.
381 381 # New servers won't push the boundary if changeset already
382 382 # exists locally as secret
383 383 #
384 384 # We should not use added here but the list of all change in
385 385 # the bundle
386 386 if repo.publishing():
387 387 targetphase = phaseall = phases.public
388 388 else:
389 389 # closer target phase computation
390 390
391 391 # Those changesets have been pushed from the
392 392 # outside, their phases are going to be pushed
393 393 # alongside. Therefor `targetphase` is
394 394 # ignored.
395 395 targetphase = phaseall = phases.draft
396 396 if added:
397 397 phases.registernew(repo, tr, targetphase, added)
398 398 if phaseall is not None:
399 399 phases.advanceboundary(repo, tr, phaseall, cgnodes)
400 400
401 401 if changesets > 0:
402 402
403 403 def runhooks():
404 404 # These hooks run when the lock releases, not when the
405 405 # transaction closes. So it's possible for the changelog
406 406 # to have changed since we last saw it.
407 407 if clstart >= len(repo):
408 408 return
409 409
410 410 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
411 411
412 412 for n in added:
413 413 args = hookargs.copy()
414 414 args['node'] = hex(n)
415 415 del args['node_last']
416 416 repo.hook("incoming", **pycompat.strkwargs(args))
417 417
418 418 newheads = [h for h in repo.heads()
419 419 if h not in oldheads]
420 420 repo.ui.log("incoming",
421 421 "%d incoming changes - new heads: %s\n",
422 422 len(added),
423 423 ', '.join([hex(c[:6]) for c in newheads]))
424 424
425 425 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
426 426 lambda tr: repo._afterlock(runhooks))
427 427 finally:
428 428 repo.ui.flush()
429 429 # never return 0 here:
430 430 if deltaheads < 0:
431 431 ret = deltaheads - 1
432 432 else:
433 433 ret = deltaheads + 1
434 434 return ret
435 435
436 436 def deltaiter(self):
437 437 """
438 438 returns an iterator of the deltas in this changegroup
439 439
440 440 Useful for passing to the underlying storage system to be stored.
441 441 """
442 442 chain = None
443 443 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
444 444 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
445 445 yield chunkdata
446 446 chain = chunkdata[0]
447 447
448 448 class cg2unpacker(cg1unpacker):
449 449 """Unpacker for cg2 streams.
450 450
451 451 cg2 streams add support for generaldelta, so the delta header
452 452 format is slightly different. All other features about the data
453 453 remain the same.
454 454 """
455 455 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
456 456 deltaheadersize = deltaheader.size
457 457 version = '02'
458 458
459 459 def _deltaheader(self, headertuple, prevnode):
460 460 node, p1, p2, deltabase, cs = headertuple
461 461 flags = 0
462 462 return node, p1, p2, deltabase, cs, flags
463 463
464 464 class cg3unpacker(cg2unpacker):
465 465 """Unpacker for cg3 streams.
466 466
467 467 cg3 streams add support for exchanging treemanifests and revlog
468 468 flags. It adds the revlog flags to the delta header and an empty chunk
469 469 separating manifests and files.
470 470 """
471 471 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
472 472 deltaheadersize = deltaheader.size
473 473 version = '03'
474 474 _grouplistcount = 2 # One list of manifests and one list of files
475 475
476 476 def _deltaheader(self, headertuple, prevnode):
477 477 node, p1, p2, deltabase, cs, flags = headertuple
478 478 return node, p1, p2, deltabase, cs, flags
479 479
480 480 def _unpackmanifests(self, repo, revmap, trp, prog):
481 481 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
482 482 for chunkdata in iter(self.filelogheader, {}):
483 483 # If we get here, there are directory manifests in the changegroup
484 484 d = chunkdata["filename"]
485 485 repo.ui.debug("adding %s revisions\n" % d)
486 486 deltas = self.deltaiter()
487 487 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
488 488 raise error.Abort(_("received dir revlog group is empty"))
489 489
490 490 class headerlessfixup(object):
491 491 def __init__(self, fh, h):
492 492 self._h = h
493 493 self._fh = fh
494 494 def read(self, n):
495 495 if self._h:
496 496 d, self._h = self._h[:n], self._h[n:]
497 497 if len(d) < n:
498 498 d += readexactly(self._fh, n - len(d))
499 499 return d
500 500 return readexactly(self._fh, n)
501 501
502 502 @interfaceutil.implementer(repository.irevisiondeltarequest)
503 503 @attr.s(slots=True, frozen=True)
504 504 class revisiondeltarequest(object):
505 505 node = attr.ib()
506 506 linknode = attr.ib()
507 507 p1node = attr.ib()
508 508 p2node = attr.ib()
509 509 basenode = attr.ib()
510 510 ellipsis = attr.ib(default=False)
511 511
512 512 def _revisiondeltatochunks(delta, headerfn):
513 513 """Serialize a revisiondelta to changegroup chunks."""
514 514
515 515 # The captured revision delta may be encoded as a delta against
516 516 # a base revision or as a full revision. The changegroup format
517 517 # requires that everything on the wire be deltas. So for full
518 518 # revisions, we need to invent a header that says to rewrite
519 519 # data.
520 520
521 521 if delta.delta is not None:
522 522 prefix, data = b'', delta.delta
523 523 elif delta.basenode == nullid:
524 524 data = delta.revision
525 525 prefix = mdiff.trivialdiffheader(len(data))
526 526 else:
527 527 data = delta.revision
528 528 prefix = mdiff.replacediffheader(delta.baserevisionsize,
529 529 len(data))
530 530
531 531 meta = headerfn(delta)
532 532
533 533 yield chunkheader(len(meta) + len(prefix) + len(data))
534 534 yield meta
535 535 if prefix:
536 536 yield prefix
537 537 yield data
538 538
539 def _sortnodesnormal(store, nodes):
540 """Sort nodes for changegroup generation and turn into revnums."""
541 # for generaldelta revlogs, we linearize the revs; this will both be
542 # much quicker and generate a much smaller bundle
543 if store._generaldelta:
544 revs = set(store.rev(n) for n in nodes)
545 return dagop.linearize(revs, store.parentrevs)
546 else:
547 return sorted([store.rev(n) for n in nodes])
548
549 539 def _sortnodesellipsis(store, nodes, cl, lookup):
550 """Sort nodes for changegroup generation and turn into revnums."""
540 """Sort nodes for changegroup generation."""
551 541 # Ellipses serving mode.
552 542 #
553 543 # In a perfect world, we'd generate better ellipsis-ified graphs
554 544 # for non-changelog revlogs. In practice, we haven't started doing
555 545 # that yet, so the resulting DAGs for the manifestlog and filelogs
556 546 # are actually full of bogus parentage on all the ellipsis
557 547 # nodes. This has the side effect that, while the contents are
558 548 # correct, the individual DAGs might be completely out of whack in
559 549 # a case like 882681bc3166 and its ancestors (back about 10
560 550 # revisions or so) in the main hg repo.
561 551 #
562 552 # The one invariant we *know* holds is that the new (potentially
563 553 # bogus) DAG shape will be valid if we order the nodes in the
564 554 # order that they're introduced in dramatis personae by the
565 555 # changelog, so what we do is we sort the non-changelog histories
566 556 # by the order in which they are used by the changelog.
567 557 key = lambda n: cl.rev(lookup(n))
568 return [store.rev(n) for n in sorted(nodes, key=key)]
558 return sorted(nodes, key=key)
569 559
570 def _makenarrowdeltarequest(cl, store, ischangelog, rev, node, linkrev,
571 linknode, clrevtolocalrev, fullclnodes,
572 precomputedellipsis):
560 def _resolvenarrowrevisioninfo(cl, store, ischangelog, rev, linkrev,
561 linknode, clrevtolocalrev, fullclnodes,
562 precomputedellipsis):
573 563 linkparents = precomputedellipsis[linkrev]
574 564 def local(clrev):
575 565 """Turn a changelog revnum into a local revnum.
576 566
577 567 The ellipsis dag is stored as revnums on the changelog,
578 568 but when we're producing ellipsis entries for
579 569 non-changelog revlogs, we need to turn those numbers into
580 570 something local. This does that for us, and during the
581 571 changelog sending phase will also expand the stored
582 572 mappings as needed.
583 573 """
584 574 if clrev == nullrev:
585 575 return nullrev
586 576
587 577 if ischangelog:
588 578 return clrev
589 579
590 580 # Walk the ellipsis-ized changelog breadth-first looking for a
591 581 # change that has been linked from the current revlog.
592 582 #
593 583 # For a flat manifest revlog only a single step should be necessary
594 584 # as all relevant changelog entries are relevant to the flat
595 585 # manifest.
596 586 #
597 587 # For a filelog or tree manifest dirlog however not every changelog
598 588 # entry will have been relevant, so we need to skip some changelog
599 589 # nodes even after ellipsis-izing.
600 590 walk = [clrev]
601 591 while walk:
602 592 p = walk[0]
603 593 walk = walk[1:]
604 594 if p in clrevtolocalrev:
605 595 return clrevtolocalrev[p]
606 596 elif p in fullclnodes:
607 597 walk.extend([pp for pp in cl.parentrevs(p)
608 598 if pp != nullrev])
609 599 elif p in precomputedellipsis:
610 600 walk.extend([pp for pp in precomputedellipsis[p]
611 601 if pp != nullrev])
612 602 else:
613 603 # In this case, we've got an ellipsis with parents
614 604 # outside the current bundle (likely an
615 605 # incremental pull). We "know" that we can use the
616 606 # value of this same revlog at whatever revision
617 607 # is pointed to by linknode. "Know" is in scare
618 608 # quotes because I haven't done enough examination
619 609 # of edge cases to convince myself this is really
620 610 # a fact - it works for all the (admittedly
621 611 # thorough) cases in our testsuite, but I would be
622 612 # somewhat unsurprised to find a case in the wild
623 613 # where this breaks down a bit. That said, I don't
624 614 # know if it would hurt anything.
625 615 for i in pycompat.xrange(rev, 0, -1):
626 616 if store.linkrev(i) == clrev:
627 617 return i
628 618 # We failed to resolve a parent for this node, so
629 619 # we crash the changegroup construction.
630 620 raise error.Abort(
631 621 'unable to resolve parent while packing %r %r'
632 622 ' for changeset %r' % (store.indexfile, rev, clrev))
633 623
634 624 return nullrev
635 625
636 626 if not linkparents or (
637 627 store.parentrevs(rev) == (nullrev, nullrev)):
638 628 p1, p2 = nullrev, nullrev
639 629 elif len(linkparents) == 1:
640 630 p1, = sorted(local(p) for p in linkparents)
641 631 p2 = nullrev
642 632 else:
643 633 p1, p2 = sorted(local(p) for p in linkparents)
644 634
645 635 p1node, p2node = store.node(p1), store.node(p2)
646 636
647 # TODO: try and actually send deltas for ellipsis data blocks
648 return revisiondeltarequest(
649 node=node,
650 p1node=p1node,
651 p2node=p2node,
652 linknode=linknode,
653 basenode=nullid,
654 ellipsis=True,
655 )
637 return p1node, p2node, linknode
656 638
657 639 def deltagroup(repo, store, nodes, ischangelog, lookup, forcedeltaparentprev,
658 640 topic=None,
659 641 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
660 642 precomputedellipsis=None):
661 643 """Calculate deltas for a set of revisions.
662 644
663 645 Is a generator of ``revisiondelta`` instances.
664 646
665 647 If topic is not None, progress detail will be generated using this
666 648 topic name (e.g. changesets, manifests, etc).
667 649 """
668 650 if not nodes:
669 651 return
670 652
671 # We perform two passes over the revisions whose data we will emit.
672 #
673 # In the first pass, we obtain information about the deltas that will
674 # be generated. This involves computing linknodes and adjusting the
675 # request to take shallow fetching into account. The end result of
676 # this pass is a list of "request" objects stating which deltas
677 # to obtain.
678 #
679 # The second pass is simply resolving the requested deltas.
680
681 653 cl = repo.changelog
682 654
683 655 if ischangelog:
684 # Changelog doesn't benefit from reordering revisions. So send
685 # out revisions in store order.
686 # TODO the API would be cleaner if this were controlled by the
687 # store producing the deltas.
688 revs = sorted(cl.rev(n) for n in nodes)
656 # `hg log` shows changesets in storage order. To preserve order
657 # across clones, send out changesets in storage order.
658 nodesorder = 'storage'
689 659 elif ellipses:
690 revs = _sortnodesellipsis(store, nodes, cl, lookup)
660 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
661 nodesorder = 'nodes'
691 662 else:
692 revs = _sortnodesnormal(store, nodes)
663 nodesorder = None
693 664
694 # In the first pass, collect info about the deltas we'll be
695 # generating.
696 requests = []
697
698 # Add the parent of the first rev.
699 revs.insert(0, store.parentrevs(revs[0])[0])
665 # Perform ellipses filtering and revision massaging. We do this before
666 # emitrevisions() because a) filtering out revisions creates less work
667 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
668 # assumptions about delta choices and we would possibly send a delta
669 # referencing a missing base revision.
670 #
671 # Also, calling lookup() has side-effects with regards to populating
672 # data structures. If we don't call lookup() for each node or if we call
673 # lookup() after the first pass through each node, things can break -
674 # possibly intermittently depending on the python hash seed! For that
675 # reason, we store a mapping of all linknodes during the initial node
676 # pass rather than use lookup() on the output side.
677 if ellipses:
678 filtered = []
679 adjustedparents = {}
680 linknodes = {}
700 681
701 for i in pycompat.xrange(len(revs) - 1):
702 prev = revs[i]
703 curr = revs[i + 1]
704
705 node = store.node(curr)
706 linknode = lookup(node)
707 p1node, p2node = store.parents(node)
708
709 if ellipses:
682 for node in nodes:
683 rev = store.rev(node)
684 linknode = lookup(node)
710 685 linkrev = cl.rev(linknode)
711 clrevtolocalrev[linkrev] = curr
686 clrevtolocalrev[linkrev] = rev
712 687
713 # This is a node to send in full, because the changeset it
714 # corresponds to was a full changeset.
688 # If linknode is in fullclnodes, it means the corresponding
689 # changeset was a full changeset and is being sent unaltered.
715 690 if linknode in fullclnodes:
716 requests.append(revisiondeltarequest(
717 node=node,
718 p1node=p1node,
719 p2node=p2node,
720 linknode=linknode,
721 basenode=None,
722 ))
691 linknodes[node] = linknode
723 692
693 # If the corresponding changeset wasn't in the set computed
694 # as relevant to us, it should be dropped outright.
724 695 elif linkrev not in precomputedellipsis:
725 pass
696 continue
697
726 698 else:
727 requests.append(_makenarrowdeltarequest(
728 cl, store, ischangelog, curr, node, linkrev, linknode,
729 clrevtolocalrev, fullclnodes,
730 precomputedellipsis))
731 else:
732 requests.append(revisiondeltarequest(
733 node=node,
734 p1node=p1node,
735 p2node=p2node,
736 linknode=linknode,
737 basenode=store.node(prev) if forcedeltaparentprev else None,
738 ))
699 # We could probably do this later and avoid the dict
700 # holding state. But it likely doesn't matter.
701 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
702 cl, store, ischangelog, rev, linkrev, linknode,
703 clrevtolocalrev, fullclnodes, precomputedellipsis)
704
705 adjustedparents[node] = (p1node, p2node)
706 linknodes[node] = linknode
707
708 filtered.append(node)
709
710 nodes = filtered
739 711
740 712 # We expect the first pass to be fast, so we only engage the progress
741 713 # meter for constructing the revision deltas.
742 714 progress = None
743 715 if topic is not None:
744 716 progress = repo.ui.makeprogress(topic, unit=_('chunks'),
745 total=len(requests))
717 total=len(nodes))
746 718
747 for i, delta in enumerate(store.emitrevisiondeltas(requests)):
719 revisions = store.emitrevisions(
720 nodes,
721 nodesorder=nodesorder,
722 revisiondata=True,
723 assumehaveparentrevisions=not ellipses,
724 deltaprevious=forcedeltaparentprev)
725
726 for i, revision in enumerate(revisions):
748 727 if progress:
749 728 progress.update(i + 1)
750 729
751 yield delta
730 if ellipses:
731 linknode = linknodes[revision.node]
732
733 if revision.node in adjustedparents:
734 p1node, p2node = adjustedparents[revision.node]
735 revision.p1node = p1node
736 revision.p2node = p2node
737 revision.flags |= revlog.REVIDX_ELLIPSIS
738
739 else:
740 linknode = lookup(revision.node)
741
742 revision.linknode = linknode
743 yield revision
752 744
753 745 if progress:
754 746 progress.complete()
755 747
756 748 class cgpacker(object):
757 749 def __init__(self, repo, filematcher, version,
758 750 builddeltaheader, manifestsend,
759 751 forcedeltaparentprev=False,
760 752 bundlecaps=None, ellipses=False,
761 753 shallow=False, ellipsisroots=None, fullnodes=None):
762 754 """Given a source repo, construct a bundler.
763 755
764 756 filematcher is a matcher that matches on files to include in the
765 757 changegroup. Used to facilitate sparse changegroups.
766 758
767 759 forcedeltaparentprev indicates whether delta parents must be against
768 760 the previous revision in a delta group. This should only be used for
769 761 compatibility with changegroup version 1.
770 762
771 763 builddeltaheader is a callable that constructs the header for a group
772 764 delta.
773 765
774 766 manifestsend is a chunk to send after manifests have been fully emitted.
775 767
776 768 ellipses indicates whether ellipsis serving mode is enabled.
777 769
778 770 bundlecaps is optional and can be used to specify the set of
779 771 capabilities which can be used to build the bundle. While bundlecaps is
780 772 unused in core Mercurial, extensions rely on this feature to communicate
781 773 capabilities to customize the changegroup packer.
782 774
783 775 shallow indicates whether shallow data might be sent. The packer may
784 776 need to pack file contents not introduced by the changes being packed.
785 777
786 778 fullnodes is the set of changelog nodes which should not be ellipsis
787 779 nodes. We store this rather than the set of nodes that should be
788 780 ellipsis because for very large histories we expect this to be
789 781 significantly smaller.
790 782 """
791 783 assert filematcher
792 784 self._filematcher = filematcher
793 785
794 786 self.version = version
795 787 self._forcedeltaparentprev = forcedeltaparentprev
796 788 self._builddeltaheader = builddeltaheader
797 789 self._manifestsend = manifestsend
798 790 self._ellipses = ellipses
799 791
800 792 # Set of capabilities we can use to build the bundle.
801 793 if bundlecaps is None:
802 794 bundlecaps = set()
803 795 self._bundlecaps = bundlecaps
804 796 self._isshallow = shallow
805 797 self._fullclnodes = fullnodes
806 798
807 799 # Maps ellipsis revs to their roots at the changelog level.
808 800 self._precomputedellipsis = ellipsisroots
809 801
810 802 self._repo = repo
811 803
812 804 if self._repo.ui.verbose and not self._repo.ui.debugflag:
813 805 self._verbosenote = self._repo.ui.note
814 806 else:
815 807 self._verbosenote = lambda s: None
816 808
817 809 def generate(self, commonrevs, clnodes, fastpathlinkrev, source,
818 810 changelog=True):
819 811 """Yield a sequence of changegroup byte chunks.
820 812 If changelog is False, changelog data won't be added to changegroup
821 813 """
822 814
823 815 repo = self._repo
824 816 cl = repo.changelog
825 817
826 818 self._verbosenote(_('uncompressed size of bundle content:\n'))
827 819 size = 0
828 820
829 821 clstate, deltas = self._generatechangelog(cl, clnodes)
830 822 for delta in deltas:
831 823 if changelog:
832 824 for chunk in _revisiondeltatochunks(delta,
833 825 self._builddeltaheader):
834 826 size += len(chunk)
835 827 yield chunk
836 828
837 829 close = closechunk()
838 830 size += len(close)
839 831 yield closechunk()
840 832
841 833 self._verbosenote(_('%8.i (changelog)\n') % size)
842 834
843 835 clrevorder = clstate['clrevorder']
844 836 manifests = clstate['manifests']
845 837 changedfiles = clstate['changedfiles']
846 838
847 839 # We need to make sure that the linkrev in the changegroup refers to
848 840 # the first changeset that introduced the manifest or file revision.
849 841 # The fastpath is usually safer than the slowpath, because the filelogs
850 842 # are walked in revlog order.
851 843 #
852 844 # When taking the slowpath when the manifest revlog uses generaldelta,
853 845 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
854 846 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
855 847 #
856 848 # When taking the fastpath, we are only vulnerable to reordering
857 849 # of the changelog itself. The changelog never uses generaldelta and is
858 850 # never reordered. To handle this case, we simply take the slowpath,
859 851 # which already has the 'clrevorder' logic. This was also fixed in
860 852 # cc0ff93d0c0c.
861 853
862 854 # Treemanifests don't work correctly with fastpathlinkrev
863 855 # either, because we don't discover which directory nodes to
864 856 # send along with files. This could probably be fixed.
865 857 fastpathlinkrev = fastpathlinkrev and (
866 858 'treemanifest' not in repo.requirements)
867 859
868 860 fnodes = {} # needed file nodes
869 861
870 862 size = 0
871 863 it = self.generatemanifests(
872 864 commonrevs, clrevorder, fastpathlinkrev, manifests, fnodes, source,
873 865 clstate['clrevtomanifestrev'])
874 866
875 867 for tree, deltas in it:
876 868 if tree:
877 869 assert self.version == b'03'
878 870 chunk = _fileheader(tree)
879 871 size += len(chunk)
880 872 yield chunk
881 873
882 874 for delta in deltas:
883 875 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
884 876 for chunk in chunks:
885 877 size += len(chunk)
886 878 yield chunk
887 879
888 880 close = closechunk()
889 881 size += len(close)
890 882 yield close
891 883
892 884 self._verbosenote(_('%8.i (manifests)\n') % size)
893 885 yield self._manifestsend
894 886
895 887 mfdicts = None
896 888 if self._ellipses and self._isshallow:
897 889 mfdicts = [(self._repo.manifestlog[n].read(), lr)
898 890 for (n, lr) in manifests.iteritems()]
899 891
900 892 manifests.clear()
901 893 clrevs = set(cl.rev(x) for x in clnodes)
902 894
903 895 it = self.generatefiles(changedfiles, commonrevs,
904 896 source, mfdicts, fastpathlinkrev,
905 897 fnodes, clrevs)
906 898
907 899 for path, deltas in it:
908 900 h = _fileheader(path)
909 901 size = len(h)
910 902 yield h
911 903
912 904 for delta in deltas:
913 905 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
914 906 for chunk in chunks:
915 907 size += len(chunk)
916 908 yield chunk
917 909
918 910 close = closechunk()
919 911 size += len(close)
920 912 yield close
921 913
922 914 self._verbosenote(_('%8.i %s\n') % (size, path))
923 915
924 916 yield closechunk()
925 917
926 918 if clnodes:
927 919 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
928 920
929 921 def _generatechangelog(self, cl, nodes):
930 922 """Generate data for changelog chunks.
931 923
932 924 Returns a 2-tuple of a dict containing state and an iterable of
933 925 byte chunks. The state will not be fully populated until the
934 926 chunk stream has been fully consumed.
935 927 """
936 928 clrevorder = {}
937 929 manifests = {}
938 930 mfl = self._repo.manifestlog
939 931 changedfiles = set()
940 932 clrevtomanifestrev = {}
941 933
942 934 # Callback for the changelog, used to collect changed files and
943 935 # manifest nodes.
944 936 # Returns the linkrev node (identity in the changelog case).
945 937 def lookupcl(x):
946 938 c = cl.changelogrevision(x)
947 939 clrevorder[x] = len(clrevorder)
948 940
949 941 if self._ellipses:
950 942 # Only update manifests if x is going to be sent. Otherwise we
951 943 # end up with bogus linkrevs specified for manifests and
952 944 # we skip some manifest nodes that we should otherwise
953 945 # have sent.
954 946 if (x in self._fullclnodes
955 947 or cl.rev(x) in self._precomputedellipsis):
956 948
957 949 manifestnode = c.manifest
958 950 # Record the first changeset introducing this manifest
959 951 # version.
960 952 manifests.setdefault(manifestnode, x)
961 953 # Set this narrow-specific dict so we have the lowest
962 954 # manifest revnum to look up for this cl revnum. (Part of
963 955 # mapping changelog ellipsis parents to manifest ellipsis
964 956 # parents)
965 957 clrevtomanifestrev.setdefault(
966 958 cl.rev(x), mfl.rev(manifestnode))
967 959 # We can't trust the changed files list in the changeset if the
968 960 # client requested a shallow clone.
969 961 if self._isshallow:
970 962 changedfiles.update(mfl[c.manifest].read().keys())
971 963 else:
972 964 changedfiles.update(c.files)
973 965 else:
974 966 # record the first changeset introducing this manifest version
975 967 manifests.setdefault(c.manifest, x)
976 968 # Record a complete list of potentially-changed files in
977 969 # this manifest.
978 970 changedfiles.update(c.files)
979 971
980 972 return x
981 973
982 974 state = {
983 975 'clrevorder': clrevorder,
984 976 'manifests': manifests,
985 977 'changedfiles': changedfiles,
986 978 'clrevtomanifestrev': clrevtomanifestrev,
987 979 }
988 980
989 981 gen = deltagroup(
990 982 self._repo, cl, nodes, True, lookupcl,
991 983 self._forcedeltaparentprev,
992 984 ellipses=self._ellipses,
993 985 topic=_('changesets'),
994 986 clrevtolocalrev={},
995 987 fullclnodes=self._fullclnodes,
996 988 precomputedellipsis=self._precomputedellipsis)
997 989
998 990 return state, gen
999 991
1000 992 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev,
1001 993 manifests, fnodes, source, clrevtolocalrev):
1002 994 """Returns an iterator of changegroup chunks containing manifests.
1003 995
1004 996 `source` is unused here, but is used by extensions like remotefilelog to
1005 997 change what is sent based in pulls vs pushes, etc.
1006 998 """
1007 999 repo = self._repo
1008 1000 mfl = repo.manifestlog
1009 1001 tmfnodes = {'': manifests}
1010 1002
1011 1003 # Callback for the manifest, used to collect linkrevs for filelog
1012 1004 # revisions.
1013 1005 # Returns the linkrev node (collected in lookupcl).
1014 1006 def makelookupmflinknode(tree, nodes):
1015 1007 if fastpathlinkrev:
1016 1008 assert not tree
1017 1009 return manifests.__getitem__
1018 1010
1019 1011 def lookupmflinknode(x):
1020 1012 """Callback for looking up the linknode for manifests.
1021 1013
1022 1014 Returns the linkrev node for the specified manifest.
1023 1015
1024 1016 SIDE EFFECT:
1025 1017
1026 1018 1) fclnodes gets populated with the list of relevant
1027 1019 file nodes if we're not using fastpathlinkrev
1028 1020 2) When treemanifests are in use, collects treemanifest nodes
1029 1021 to send
1030 1022
1031 1023 Note that this means manifests must be completely sent to
1032 1024 the client before you can trust the list of files and
1033 1025 treemanifests to send.
1034 1026 """
1035 1027 clnode = nodes[x]
1036 1028 mdata = mfl.get(tree, x).readfast(shallow=True)
1037 1029 for p, n, fl in mdata.iterentries():
1038 1030 if fl == 't': # subdirectory manifest
1039 1031 subtree = tree + p + '/'
1040 1032 tmfclnodes = tmfnodes.setdefault(subtree, {})
1041 1033 tmfclnode = tmfclnodes.setdefault(n, clnode)
1042 1034 if clrevorder[clnode] < clrevorder[tmfclnode]:
1043 1035 tmfclnodes[n] = clnode
1044 1036 else:
1045 1037 f = tree + p
1046 1038 fclnodes = fnodes.setdefault(f, {})
1047 1039 fclnode = fclnodes.setdefault(n, clnode)
1048 1040 if clrevorder[clnode] < clrevorder[fclnode]:
1049 1041 fclnodes[n] = clnode
1050 1042 return clnode
1051 1043 return lookupmflinknode
1052 1044
1053 1045 while tmfnodes:
1054 1046 tree, nodes = tmfnodes.popitem()
1055 1047 store = mfl.getstorage(tree)
1056 1048
1057 1049 if not self._filematcher.visitdir(store.tree[:-1] or '.'):
1058 1050 # No nodes to send because this directory is out of
1059 1051 # the client's view of the repository (probably
1060 1052 # because of narrow clones).
1061 1053 prunednodes = []
1062 1054 else:
1063 1055 # Avoid sending any manifest nodes we can prove the
1064 1056 # client already has by checking linkrevs. See the
1065 1057 # related comment in generatefiles().
1066 1058 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1067 1059 if tree and not prunednodes:
1068 1060 continue
1069 1061
1070 1062 lookupfn = makelookupmflinknode(tree, nodes)
1071 1063
1072 1064 deltas = deltagroup(
1073 1065 self._repo, store, prunednodes, False, lookupfn,
1074 1066 self._forcedeltaparentprev,
1075 1067 ellipses=self._ellipses,
1076 1068 topic=_('manifests'),
1077 1069 clrevtolocalrev=clrevtolocalrev,
1078 1070 fullclnodes=self._fullclnodes,
1079 1071 precomputedellipsis=self._precomputedellipsis)
1080 1072
1081 1073 yield tree, deltas
1082 1074
1083 1075 def _prunemanifests(self, store, nodes, commonrevs):
1084 1076 # This is split out as a separate method to allow filtering
1085 1077 # commonrevs in extension code.
1086 1078 #
1087 1079 # TODO(augie): this shouldn't be required, instead we should
1088 1080 # make filtering of revisions to send delegated to the store
1089 1081 # layer.
1090 1082 frev, flr = store.rev, store.linkrev
1091 1083 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1092 1084
1093 1085 # The 'source' parameter is useful for extensions
1094 1086 def generatefiles(self, changedfiles, commonrevs, source,
1095 1087 mfdicts, fastpathlinkrev, fnodes, clrevs):
1096 1088 changedfiles = list(filter(self._filematcher, changedfiles))
1097 1089
1098 1090 if not fastpathlinkrev:
1099 1091 def normallinknodes(unused, fname):
1100 1092 return fnodes.get(fname, {})
1101 1093 else:
1102 1094 cln = self._repo.changelog.node
1103 1095
1104 1096 def normallinknodes(store, fname):
1105 1097 flinkrev = store.linkrev
1106 1098 fnode = store.node
1107 1099 revs = ((r, flinkrev(r)) for r in store)
1108 1100 return dict((fnode(r), cln(lr))
1109 1101 for r, lr in revs if lr in clrevs)
1110 1102
1111 1103 clrevtolocalrev = {}
1112 1104
1113 1105 if self._isshallow:
1114 1106 # In a shallow clone, the linknodes callback needs to also include
1115 1107 # those file nodes that are in the manifests we sent but weren't
1116 1108 # introduced by those manifests.
1117 1109 commonctxs = [self._repo[c] for c in commonrevs]
1118 1110 clrev = self._repo.changelog.rev
1119 1111
1120 1112 def linknodes(flog, fname):
1121 1113 for c in commonctxs:
1122 1114 try:
1123 1115 fnode = c.filenode(fname)
1124 1116 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1125 1117 except error.ManifestLookupError:
1126 1118 pass
1127 1119 links = normallinknodes(flog, fname)
1128 1120 if len(links) != len(mfdicts):
1129 1121 for mf, lr in mfdicts:
1130 1122 fnode = mf.get(fname, None)
1131 1123 if fnode in links:
1132 1124 links[fnode] = min(links[fnode], lr, key=clrev)
1133 1125 elif fnode:
1134 1126 links[fnode] = lr
1135 1127 return links
1136 1128 else:
1137 1129 linknodes = normallinknodes
1138 1130
1139 1131 repo = self._repo
1140 1132 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1141 1133 total=len(changedfiles))
1142 1134 for i, fname in enumerate(sorted(changedfiles)):
1143 1135 filerevlog = repo.file(fname)
1144 1136 if not filerevlog:
1145 1137 raise error.Abort(_("empty or missing file data for %s") %
1146 1138 fname)
1147 1139
1148 1140 clrevtolocalrev.clear()
1149 1141
1150 1142 linkrevnodes = linknodes(filerevlog, fname)
1151 1143 # Lookup for filenodes, we collected the linkrev nodes above in the
1152 1144 # fastpath case and with lookupmf in the slowpath case.
1153 1145 def lookupfilelog(x):
1154 1146 return linkrevnodes[x]
1155 1147
1156 1148 frev, flr = filerevlog.rev, filerevlog.linkrev
1157 1149 # Skip sending any filenode we know the client already
1158 1150 # has. This avoids over-sending files relatively
1159 1151 # inexpensively, so it's not a problem if we under-filter
1160 1152 # here.
1161 1153 filenodes = [n for n in linkrevnodes
1162 1154 if flr(frev(n)) not in commonrevs]
1163 1155
1164 1156 if not filenodes:
1165 1157 continue
1166 1158
1167 1159 progress.update(i + 1, item=fname)
1168 1160
1169 1161 deltas = deltagroup(
1170 1162 self._repo, filerevlog, filenodes, False, lookupfilelog,
1171 1163 self._forcedeltaparentprev,
1172 1164 ellipses=self._ellipses,
1173 1165 clrevtolocalrev=clrevtolocalrev,
1174 1166 fullclnodes=self._fullclnodes,
1175 1167 precomputedellipsis=self._precomputedellipsis)
1176 1168
1177 1169 yield fname, deltas
1178 1170
1179 1171 progress.complete()
1180 1172
1181 1173 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1182 1174 shallow=False, ellipsisroots=None, fullnodes=None):
1183 1175 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1184 1176 d.node, d.p1node, d.p2node, d.linknode)
1185 1177
1186 1178 return cgpacker(repo, filematcher, b'01',
1187 1179 builddeltaheader=builddeltaheader,
1188 1180 manifestsend=b'',
1189 1181 forcedeltaparentprev=True,
1190 1182 bundlecaps=bundlecaps,
1191 1183 ellipses=ellipses,
1192 1184 shallow=shallow,
1193 1185 ellipsisroots=ellipsisroots,
1194 1186 fullnodes=fullnodes)
1195 1187
1196 1188 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1197 1189 shallow=False, ellipsisroots=None, fullnodes=None):
1198 1190 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1199 1191 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1200 1192
1201 1193 return cgpacker(repo, filematcher, b'02',
1202 1194 builddeltaheader=builddeltaheader,
1203 1195 manifestsend=b'',
1204 1196 bundlecaps=bundlecaps,
1205 1197 ellipses=ellipses,
1206 1198 shallow=shallow,
1207 1199 ellipsisroots=ellipsisroots,
1208 1200 fullnodes=fullnodes)
1209 1201
1210 1202 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1211 1203 shallow=False, ellipsisroots=None, fullnodes=None):
1212 1204 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1213 1205 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1214 1206
1215 1207 return cgpacker(repo, filematcher, b'03',
1216 1208 builddeltaheader=builddeltaheader,
1217 1209 manifestsend=closechunk(),
1218 1210 bundlecaps=bundlecaps,
1219 1211 ellipses=ellipses,
1220 1212 shallow=shallow,
1221 1213 ellipsisroots=ellipsisroots,
1222 1214 fullnodes=fullnodes)
1223 1215
1224 1216 _packermap = {'01': (_makecg1packer, cg1unpacker),
1225 1217 # cg2 adds support for exchanging generaldelta
1226 1218 '02': (_makecg2packer, cg2unpacker),
1227 1219 # cg3 adds support for exchanging revlog flags and treemanifests
1228 1220 '03': (_makecg3packer, cg3unpacker),
1229 1221 }
1230 1222
1231 1223 def allsupportedversions(repo):
1232 1224 versions = set(_packermap.keys())
1233 1225 if not (repo.ui.configbool('experimental', 'changegroup3') or
1234 1226 repo.ui.configbool('experimental', 'treemanifest') or
1235 1227 'treemanifest' in repo.requirements):
1236 1228 versions.discard('03')
1237 1229 return versions
1238 1230
1239 1231 # Changegroup versions that can be applied to the repo
1240 1232 def supportedincomingversions(repo):
1241 1233 return allsupportedversions(repo)
1242 1234
1243 1235 # Changegroup versions that can be created from the repo
1244 1236 def supportedoutgoingversions(repo):
1245 1237 versions = allsupportedversions(repo)
1246 1238 if 'treemanifest' in repo.requirements:
1247 1239 # Versions 01 and 02 support only flat manifests and it's just too
1248 1240 # expensive to convert between the flat manifest and tree manifest on
1249 1241 # the fly. Since tree manifests are hashed differently, all of history
1250 1242 # would have to be converted. Instead, we simply don't even pretend to
1251 1243 # support versions 01 and 02.
1252 1244 versions.discard('01')
1253 1245 versions.discard('02')
1254 1246 if repository.NARROW_REQUIREMENT in repo.requirements:
1255 1247 # Versions 01 and 02 don't support revlog flags, and we need to
1256 1248 # support that for stripping and unbundling to work.
1257 1249 versions.discard('01')
1258 1250 versions.discard('02')
1259 1251 if LFS_REQUIREMENT in repo.requirements:
1260 1252 # Versions 01 and 02 don't support revlog flags, and we need to
1261 1253 # mark LFS entries with REVIDX_EXTSTORED.
1262 1254 versions.discard('01')
1263 1255 versions.discard('02')
1264 1256
1265 1257 return versions
1266 1258
1267 1259 def localversion(repo):
1268 1260 # Finds the best version to use for bundles that are meant to be used
1269 1261 # locally, such as those from strip and shelve, and temporary bundles.
1270 1262 return max(supportedoutgoingversions(repo))
1271 1263
1272 1264 def safeversion(repo):
1273 1265 # Finds the smallest version that it's safe to assume clients of the repo
1274 1266 # will support. For example, all hg versions that support generaldelta also
1275 1267 # support changegroup 02.
1276 1268 versions = supportedoutgoingversions(repo)
1277 1269 if 'generaldelta' in repo.requirements:
1278 1270 versions.discard('01')
1279 1271 assert versions
1280 1272 return min(versions)
1281 1273
1282 1274 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1283 1275 ellipses=False, shallow=False, ellipsisroots=None,
1284 1276 fullnodes=None):
1285 1277 assert version in supportedoutgoingversions(repo)
1286 1278
1287 1279 if filematcher is None:
1288 1280 filematcher = matchmod.alwaysmatcher(repo.root, '')
1289 1281
1290 1282 if version == '01' and not filematcher.always():
1291 1283 raise error.ProgrammingError('version 01 changegroups do not support '
1292 1284 'sparse file matchers')
1293 1285
1294 1286 if ellipses and version in (b'01', b'02'):
1295 1287 raise error.Abort(
1296 1288 _('ellipsis nodes require at least cg3 on client and server, '
1297 1289 'but negotiated version %s') % version)
1298 1290
1299 1291 # Requested files could include files not in the local store. So
1300 1292 # filter those out.
1301 1293 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1302 1294 filematcher)
1303 1295
1304 1296 fn = _packermap[version][0]
1305 1297 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1306 1298 shallow=shallow, ellipsisroots=ellipsisroots,
1307 1299 fullnodes=fullnodes)
1308 1300
1309 1301 def getunbundler(version, fh, alg, extras=None):
1310 1302 return _packermap[version][1](fh, alg, extras=extras)
1311 1303
1312 1304 def _changegroupinfo(repo, nodes, source):
1313 1305 if repo.ui.verbose or source == 'bundle':
1314 1306 repo.ui.status(_("%d changesets found\n") % len(nodes))
1315 1307 if repo.ui.debugflag:
1316 1308 repo.ui.debug("list of changesets:\n")
1317 1309 for node in nodes:
1318 1310 repo.ui.debug("%s\n" % hex(node))
1319 1311
1320 1312 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1321 1313 bundlecaps=None):
1322 1314 cgstream = makestream(repo, outgoing, version, source,
1323 1315 fastpath=fastpath, bundlecaps=bundlecaps)
1324 1316 return getunbundler(version, util.chunkbuffer(cgstream), None,
1325 1317 {'clcount': len(outgoing.missing) })
1326 1318
1327 1319 def makestream(repo, outgoing, version, source, fastpath=False,
1328 1320 bundlecaps=None, filematcher=None):
1329 1321 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1330 1322 filematcher=filematcher)
1331 1323
1332 1324 repo = repo.unfiltered()
1333 1325 commonrevs = outgoing.common
1334 1326 csets = outgoing.missing
1335 1327 heads = outgoing.missingheads
1336 1328 # We go through the fast path if we get told to, or if all (unfiltered
1337 1329 # heads have been requested (since we then know there all linkrevs will
1338 1330 # be pulled by the client).
1339 1331 heads.sort()
1340 1332 fastpathlinkrev = fastpath or (
1341 1333 repo.filtername is None and heads == sorted(repo.heads()))
1342 1334
1343 1335 repo.hook('preoutgoing', throw=True, source=source)
1344 1336 _changegroupinfo(repo, csets, source)
1345 1337 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1346 1338
1347 1339 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1348 1340 revisions = 0
1349 1341 files = 0
1350 1342 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1351 1343 total=expectedfiles)
1352 1344 for chunkdata in iter(source.filelogheader, {}):
1353 1345 files += 1
1354 1346 f = chunkdata["filename"]
1355 1347 repo.ui.debug("adding %s revisions\n" % f)
1356 1348 progress.increment()
1357 1349 fl = repo.file(f)
1358 1350 o = len(fl)
1359 1351 try:
1360 1352 deltas = source.deltaiter()
1361 1353 if not fl.addgroup(deltas, revmap, trp):
1362 1354 raise error.Abort(_("received file revlog group is empty"))
1363 1355 except error.CensoredBaseError as e:
1364 1356 raise error.Abort(_("received delta base is censored: %s") % e)
1365 1357 revisions += len(fl) - o
1366 1358 if f in needfiles:
1367 1359 needs = needfiles[f]
1368 1360 for new in pycompat.xrange(o, len(fl)):
1369 1361 n = fl.node(new)
1370 1362 if n in needs:
1371 1363 needs.remove(n)
1372 1364 else:
1373 1365 raise error.Abort(
1374 1366 _("received spurious file revlog entry"))
1375 1367 if not needs:
1376 1368 del needfiles[f]
1377 1369 progress.complete()
1378 1370
1379 1371 for f, needs in needfiles.iteritems():
1380 1372 fl = repo.file(f)
1381 1373 for n in needs:
1382 1374 try:
1383 1375 fl.rev(n)
1384 1376 except error.LookupError:
1385 1377 raise error.Abort(
1386 1378 _('missing file data for %s:%s - run hg verify') %
1387 1379 (f, hex(n)))
1388 1380
1389 1381 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now