##// END OF EJS Templates
changegroup: remove _clnodetorev...
Gregory Szorc -
r39033:812eec3f default
parent child Browse files
Show More
@@ -1,1429 +1,1419 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def _fileheader(path):
70 70 """Obtain a changegroup chunk header for a named path."""
71 71 return chunkheader(len(path)) + path
72 72
73 73 def writechunks(ui, chunks, filename, vfs=None):
74 74 """Write chunks to a file and return its filename.
75 75
76 76 The stream is assumed to be a bundle file.
77 77 Existing files will not be overwritten.
78 78 If no filename is specified, a temporary file is created.
79 79 """
80 80 fh = None
81 81 cleanup = None
82 82 try:
83 83 if filename:
84 84 if vfs:
85 85 fh = vfs.open(filename, "wb")
86 86 else:
87 87 # Increase default buffer size because default is usually
88 88 # small (4k is common on Linux).
89 89 fh = open(filename, "wb", 131072)
90 90 else:
91 91 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
92 92 fh = os.fdopen(fd, r"wb")
93 93 cleanup = filename
94 94 for c in chunks:
95 95 fh.write(c)
96 96 cleanup = None
97 97 return filename
98 98 finally:
99 99 if fh is not None:
100 100 fh.close()
101 101 if cleanup is not None:
102 102 if filename and vfs:
103 103 vfs.unlink(cleanup)
104 104 else:
105 105 os.unlink(cleanup)
106 106
107 107 class cg1unpacker(object):
108 108 """Unpacker for cg1 changegroup streams.
109 109
110 110 A changegroup unpacker handles the framing of the revision data in
111 111 the wire format. Most consumers will want to use the apply()
112 112 method to add the changes from the changegroup to a repository.
113 113
114 114 If you're forwarding a changegroup unmodified to another consumer,
115 115 use getchunks(), which returns an iterator of changegroup
116 116 chunks. This is mostly useful for cases where you need to know the
117 117 data stream has ended by observing the end of the changegroup.
118 118
119 119 deltachunk() is useful only if you're applying delta data. Most
120 120 consumers should prefer apply() instead.
121 121
122 122 A few other public methods exist. Those are used only for
123 123 bundlerepo and some debug commands - their use is discouraged.
124 124 """
125 125 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 126 deltaheadersize = deltaheader.size
127 127 version = '01'
128 128 _grouplistcount = 1 # One list of files after the manifests
129 129
130 130 def __init__(self, fh, alg, extras=None):
131 131 if alg is None:
132 132 alg = 'UN'
133 133 if alg not in util.compengines.supportedbundletypes:
134 134 raise error.Abort(_('unknown stream compression type: %s')
135 135 % alg)
136 136 if alg == 'BZ':
137 137 alg = '_truncatedBZ'
138 138
139 139 compengine = util.compengines.forbundletype(alg)
140 140 self._stream = compengine.decompressorreader(fh)
141 141 self._type = alg
142 142 self.extras = extras or {}
143 143 self.callback = None
144 144
145 145 # These methods (compressed, read, seek, tell) all appear to only
146 146 # be used by bundlerepo, but it's a little hard to tell.
147 147 def compressed(self):
148 148 return self._type is not None and self._type != 'UN'
149 149 def read(self, l):
150 150 return self._stream.read(l)
151 151 def seek(self, pos):
152 152 return self._stream.seek(pos)
153 153 def tell(self):
154 154 return self._stream.tell()
155 155 def close(self):
156 156 return self._stream.close()
157 157
158 158 def _chunklength(self):
159 159 d = readexactly(self._stream, 4)
160 160 l = struct.unpack(">l", d)[0]
161 161 if l <= 4:
162 162 if l:
163 163 raise error.Abort(_("invalid chunk length %d") % l)
164 164 return 0
165 165 if self.callback:
166 166 self.callback()
167 167 return l - 4
168 168
169 169 def changelogheader(self):
170 170 """v10 does not have a changelog header chunk"""
171 171 return {}
172 172
173 173 def manifestheader(self):
174 174 """v10 does not have a manifest header chunk"""
175 175 return {}
176 176
177 177 def filelogheader(self):
178 178 """return the header of the filelogs chunk, v10 only has the filename"""
179 179 l = self._chunklength()
180 180 if not l:
181 181 return {}
182 182 fname = readexactly(self._stream, l)
183 183 return {'filename': fname}
184 184
185 185 def _deltaheader(self, headertuple, prevnode):
186 186 node, p1, p2, cs = headertuple
187 187 if prevnode is None:
188 188 deltabase = p1
189 189 else:
190 190 deltabase = prevnode
191 191 flags = 0
192 192 return node, p1, p2, deltabase, cs, flags
193 193
194 194 def deltachunk(self, prevnode):
195 195 l = self._chunklength()
196 196 if not l:
197 197 return {}
198 198 headerdata = readexactly(self._stream, self.deltaheadersize)
199 199 header = self.deltaheader.unpack(headerdata)
200 200 delta = readexactly(self._stream, l - self.deltaheadersize)
201 201 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
202 202 return (node, p1, p2, cs, deltabase, delta, flags)
203 203
204 204 def getchunks(self):
205 205 """returns all the chunks contains in the bundle
206 206
207 207 Used when you need to forward the binary stream to a file or another
208 208 network API. To do so, it parse the changegroup data, otherwise it will
209 209 block in case of sshrepo because it don't know the end of the stream.
210 210 """
211 211 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
212 212 # and a list of filelogs. For changegroup 3, we expect 4 parts:
213 213 # changelog, manifestlog, a list of tree manifestlogs, and a list of
214 214 # filelogs.
215 215 #
216 216 # Changelog and manifestlog parts are terminated with empty chunks. The
217 217 # tree and file parts are a list of entry sections. Each entry section
218 218 # is a series of chunks terminating in an empty chunk. The list of these
219 219 # entry sections is terminated in yet another empty chunk, so we know
220 220 # we've reached the end of the tree/file list when we reach an empty
221 221 # chunk that was proceeded by no non-empty chunks.
222 222
223 223 parts = 0
224 224 while parts < 2 + self._grouplistcount:
225 225 noentries = True
226 226 while True:
227 227 chunk = getchunk(self)
228 228 if not chunk:
229 229 # The first two empty chunks represent the end of the
230 230 # changelog and the manifestlog portions. The remaining
231 231 # empty chunks represent either A) the end of individual
232 232 # tree or file entries in the file list, or B) the end of
233 233 # the entire list. It's the end of the entire list if there
234 234 # were no entries (i.e. noentries is True).
235 235 if parts < 2:
236 236 parts += 1
237 237 elif noentries:
238 238 parts += 1
239 239 break
240 240 noentries = False
241 241 yield chunkheader(len(chunk))
242 242 pos = 0
243 243 while pos < len(chunk):
244 244 next = pos + 2**20
245 245 yield chunk[pos:next]
246 246 pos = next
247 247 yield closechunk()
248 248
249 249 def _unpackmanifests(self, repo, revmap, trp, prog):
250 250 self.callback = prog.increment
251 251 # no need to check for empty manifest group here:
252 252 # if the result of the merge of 1 and 2 is the same in 3 and 4,
253 253 # no new manifest will be created and the manifest group will
254 254 # be empty during the pull
255 255 self.manifestheader()
256 256 deltas = self.deltaiter()
257 257 repo.manifestlog.addgroup(deltas, revmap, trp)
258 258 prog.complete()
259 259 self.callback = None
260 260
261 261 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
262 262 expectedtotal=None):
263 263 """Add the changegroup returned by source.read() to this repo.
264 264 srctype is a string like 'push', 'pull', or 'unbundle'. url is
265 265 the URL of the repo where this changegroup is coming from.
266 266
267 267 Return an integer summarizing the change to this repo:
268 268 - nothing changed or no source: 0
269 269 - more heads than before: 1+added heads (2..n)
270 270 - fewer heads than before: -1-removed heads (-2..-n)
271 271 - number of heads stays the same: 1
272 272 """
273 273 repo = repo.unfiltered()
274 274 def csmap(x):
275 275 repo.ui.debug("add changeset %s\n" % short(x))
276 276 return len(cl)
277 277
278 278 def revmap(x):
279 279 return cl.rev(x)
280 280
281 281 changesets = files = revisions = 0
282 282
283 283 try:
284 284 # The transaction may already carry source information. In this
285 285 # case we use the top level data. We overwrite the argument
286 286 # because we need to use the top level value (if they exist)
287 287 # in this function.
288 288 srctype = tr.hookargs.setdefault('source', srctype)
289 289 url = tr.hookargs.setdefault('url', url)
290 290 repo.hook('prechangegroup',
291 291 throw=True, **pycompat.strkwargs(tr.hookargs))
292 292
293 293 # write changelog data to temp files so concurrent readers
294 294 # will not see an inconsistent view
295 295 cl = repo.changelog
296 296 cl.delayupdate(tr)
297 297 oldheads = set(cl.heads())
298 298
299 299 trp = weakref.proxy(tr)
300 300 # pull off the changeset group
301 301 repo.ui.status(_("adding changesets\n"))
302 302 clstart = len(cl)
303 303 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
304 304 total=expectedtotal)
305 305 self.callback = progress.increment
306 306
307 307 efiles = set()
308 308 def onchangelog(cl, node):
309 309 efiles.update(cl.readfiles(node))
310 310
311 311 self.changelogheader()
312 312 deltas = self.deltaiter()
313 313 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
314 314 efiles = len(efiles)
315 315
316 316 if not cgnodes:
317 317 repo.ui.develwarn('applied empty changegroup',
318 318 config='warn-empty-changegroup')
319 319 clend = len(cl)
320 320 changesets = clend - clstart
321 321 progress.complete()
322 322 self.callback = None
323 323
324 324 # pull off the manifest group
325 325 repo.ui.status(_("adding manifests\n"))
326 326 # We know that we'll never have more manifests than we had
327 327 # changesets.
328 328 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
329 329 total=changesets)
330 330 self._unpackmanifests(repo, revmap, trp, progress)
331 331
332 332 needfiles = {}
333 333 if repo.ui.configbool('server', 'validate'):
334 334 cl = repo.changelog
335 335 ml = repo.manifestlog
336 336 # validate incoming csets have their manifests
337 337 for cset in pycompat.xrange(clstart, clend):
338 338 mfnode = cl.changelogrevision(cset).manifest
339 339 mfest = ml[mfnode].readdelta()
340 340 # store file cgnodes we must see
341 341 for f, n in mfest.iteritems():
342 342 needfiles.setdefault(f, set()).add(n)
343 343
344 344 # process the files
345 345 repo.ui.status(_("adding file changes\n"))
346 346 newrevs, newfiles = _addchangegroupfiles(
347 347 repo, self, revmap, trp, efiles, needfiles)
348 348 revisions += newrevs
349 349 files += newfiles
350 350
351 351 deltaheads = 0
352 352 if oldheads:
353 353 heads = cl.heads()
354 354 deltaheads = len(heads) - len(oldheads)
355 355 for h in heads:
356 356 if h not in oldheads and repo[h].closesbranch():
357 357 deltaheads -= 1
358 358 htext = ""
359 359 if deltaheads:
360 360 htext = _(" (%+d heads)") % deltaheads
361 361
362 362 repo.ui.status(_("added %d changesets"
363 363 " with %d changes to %d files%s\n")
364 364 % (changesets, revisions, files, htext))
365 365 repo.invalidatevolatilesets()
366 366
367 367 if changesets > 0:
368 368 if 'node' not in tr.hookargs:
369 369 tr.hookargs['node'] = hex(cl.node(clstart))
370 370 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
371 371 hookargs = dict(tr.hookargs)
372 372 else:
373 373 hookargs = dict(tr.hookargs)
374 374 hookargs['node'] = hex(cl.node(clstart))
375 375 hookargs['node_last'] = hex(cl.node(clend - 1))
376 376 repo.hook('pretxnchangegroup',
377 377 throw=True, **pycompat.strkwargs(hookargs))
378 378
379 379 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
380 380 phaseall = None
381 381 if srctype in ('push', 'serve'):
382 382 # Old servers can not push the boundary themselves.
383 383 # New servers won't push the boundary if changeset already
384 384 # exists locally as secret
385 385 #
386 386 # We should not use added here but the list of all change in
387 387 # the bundle
388 388 if repo.publishing():
389 389 targetphase = phaseall = phases.public
390 390 else:
391 391 # closer target phase computation
392 392
393 393 # Those changesets have been pushed from the
394 394 # outside, their phases are going to be pushed
395 395 # alongside. Therefor `targetphase` is
396 396 # ignored.
397 397 targetphase = phaseall = phases.draft
398 398 if added:
399 399 phases.registernew(repo, tr, targetphase, added)
400 400 if phaseall is not None:
401 401 phases.advanceboundary(repo, tr, phaseall, cgnodes)
402 402
403 403 if changesets > 0:
404 404
405 405 def runhooks():
406 406 # These hooks run when the lock releases, not when the
407 407 # transaction closes. So it's possible for the changelog
408 408 # to have changed since we last saw it.
409 409 if clstart >= len(repo):
410 410 return
411 411
412 412 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
413 413
414 414 for n in added:
415 415 args = hookargs.copy()
416 416 args['node'] = hex(n)
417 417 del args['node_last']
418 418 repo.hook("incoming", **pycompat.strkwargs(args))
419 419
420 420 newheads = [h for h in repo.heads()
421 421 if h not in oldheads]
422 422 repo.ui.log("incoming",
423 423 "%d incoming changes - new heads: %s\n",
424 424 len(added),
425 425 ', '.join([hex(c[:6]) for c in newheads]))
426 426
427 427 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
428 428 lambda tr: repo._afterlock(runhooks))
429 429 finally:
430 430 repo.ui.flush()
431 431 # never return 0 here:
432 432 if deltaheads < 0:
433 433 ret = deltaheads - 1
434 434 else:
435 435 ret = deltaheads + 1
436 436 return ret
437 437
438 438 def deltaiter(self):
439 439 """
440 440 returns an iterator of the deltas in this changegroup
441 441
442 442 Useful for passing to the underlying storage system to be stored.
443 443 """
444 444 chain = None
445 445 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
446 446 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
447 447 yield chunkdata
448 448 chain = chunkdata[0]
449 449
450 450 class cg2unpacker(cg1unpacker):
451 451 """Unpacker for cg2 streams.
452 452
453 453 cg2 streams add support for generaldelta, so the delta header
454 454 format is slightly different. All other features about the data
455 455 remain the same.
456 456 """
457 457 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
458 458 deltaheadersize = deltaheader.size
459 459 version = '02'
460 460
461 461 def _deltaheader(self, headertuple, prevnode):
462 462 node, p1, p2, deltabase, cs = headertuple
463 463 flags = 0
464 464 return node, p1, p2, deltabase, cs, flags
465 465
466 466 class cg3unpacker(cg2unpacker):
467 467 """Unpacker for cg3 streams.
468 468
469 469 cg3 streams add support for exchanging treemanifests and revlog
470 470 flags. It adds the revlog flags to the delta header and an empty chunk
471 471 separating manifests and files.
472 472 """
473 473 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
474 474 deltaheadersize = deltaheader.size
475 475 version = '03'
476 476 _grouplistcount = 2 # One list of manifests and one list of files
477 477
478 478 def _deltaheader(self, headertuple, prevnode):
479 479 node, p1, p2, deltabase, cs, flags = headertuple
480 480 return node, p1, p2, deltabase, cs, flags
481 481
482 482 def _unpackmanifests(self, repo, revmap, trp, prog):
483 483 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
484 484 for chunkdata in iter(self.filelogheader, {}):
485 485 # If we get here, there are directory manifests in the changegroup
486 486 d = chunkdata["filename"]
487 487 repo.ui.debug("adding %s revisions\n" % d)
488 488 dirlog = repo.manifestlog._revlog.dirlog(d)
489 489 deltas = self.deltaiter()
490 490 if not dirlog.addgroup(deltas, revmap, trp):
491 491 raise error.Abort(_("received dir revlog group is empty"))
492 492
493 493 class headerlessfixup(object):
494 494 def __init__(self, fh, h):
495 495 self._h = h
496 496 self._fh = fh
497 497 def read(self, n):
498 498 if self._h:
499 499 d, self._h = self._h[:n], self._h[n:]
500 500 if len(d) < n:
501 501 d += readexactly(self._fh, n - len(d))
502 502 return d
503 503 return readexactly(self._fh, n)
504 504
505 505 @attr.s(slots=True, frozen=True)
506 506 class revisiondelta(object):
507 507 """Describes a delta entry in a changegroup.
508 508
509 509 Captured data is sufficient to serialize the delta into multiple
510 510 formats.
511 511 """
512 512 # 20 byte node of this revision.
513 513 node = attr.ib()
514 514 # 20 byte nodes of parent revisions.
515 515 p1node = attr.ib()
516 516 p2node = attr.ib()
517 517 # 20 byte node of node this delta is against.
518 518 basenode = attr.ib()
519 519 # 20 byte node of changeset revision this delta is associated with.
520 520 linknode = attr.ib()
521 521 # 2 bytes of flags to apply to revision data.
522 522 flags = attr.ib()
523 523 # Iterable of chunks holding raw delta data.
524 524 deltachunks = attr.ib()
525 525
526 526 def _sortnodesnormal(store, nodes, reorder):
527 527 """Sort nodes for changegroup generation and turn into revnums."""
528 528 # for generaldelta revlogs, we linearize the revs; this will both be
529 529 # much quicker and generate a much smaller bundle
530 530 if (store._generaldelta and reorder is None) or reorder:
531 531 dag = dagutil.revlogdag(store)
532 532 return dag.linearize(set(store.rev(n) for n in nodes))
533 533 else:
534 534 return sorted([store.rev(n) for n in nodes])
535 535
536 def _sortnodesellipsis(store, nodes, clnodetorev, lookup):
536 def _sortnodesellipsis(store, nodes, cl, lookup):
537 537 """Sort nodes for changegroup generation and turn into revnums."""
538 538 # Ellipses serving mode.
539 539 #
540 540 # In a perfect world, we'd generate better ellipsis-ified graphs
541 541 # for non-changelog revlogs. In practice, we haven't started doing
542 542 # that yet, so the resulting DAGs for the manifestlog and filelogs
543 543 # are actually full of bogus parentage on all the ellipsis
544 544 # nodes. This has the side effect that, while the contents are
545 545 # correct, the individual DAGs might be completely out of whack in
546 546 # a case like 882681bc3166 and its ancestors (back about 10
547 547 # revisions or so) in the main hg repo.
548 548 #
549 549 # The one invariant we *know* holds is that the new (potentially
550 550 # bogus) DAG shape will be valid if we order the nodes in the
551 551 # order that they're introduced in dramatis personae by the
552 552 # changelog, so what we do is we sort the non-changelog histories
553 553 # by the order in which they are used by the changelog.
554 key = lambda n: clnodetorev[lookup(n)]
554 key = lambda n: cl.rev(lookup(n))
555 555 return [store.rev(n) for n in sorted(nodes, key=key)]
556 556
557 557 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
558 558 """Construct a revision delta for non-ellipses changegroup generation."""
559 559 node = store.node(rev)
560 560 p1, p2 = store.parentrevs(rev)
561 561 base = deltaparentfn(store, rev, p1, p2, prev)
562 562
563 563 prefix = ''
564 564 if store.iscensored(base) or store.iscensored(rev):
565 565 try:
566 566 delta = store.revision(node, raw=True)
567 567 except error.CensoredNodeError as e:
568 568 delta = e.tombstone
569 569 if base == nullrev:
570 570 prefix = mdiff.trivialdiffheader(len(delta))
571 571 else:
572 572 baselen = store.rawsize(base)
573 573 prefix = mdiff.replacediffheader(baselen, len(delta))
574 574 elif base == nullrev:
575 575 delta = store.revision(node, raw=True)
576 576 prefix = mdiff.trivialdiffheader(len(delta))
577 577 else:
578 578 delta = store.revdiff(base, rev)
579 579 p1n, p2n = store.parents(node)
580 580
581 581 return revisiondelta(
582 582 node=node,
583 583 p1node=p1n,
584 584 p2node=p2n,
585 585 basenode=store.node(base),
586 586 linknode=linknode,
587 587 flags=store.flags(rev),
588 588 deltachunks=(prefix, delta),
589 589 )
590 590
591 591 class cgpacker(object):
592 592 def __init__(self, repo, filematcher, version, allowreorder,
593 593 deltaparentfn, builddeltaheader, manifestsend,
594 594 bundlecaps=None, ellipses=False,
595 595 shallow=False, ellipsisroots=None, fullnodes=None):
596 596 """Given a source repo, construct a bundler.
597 597
598 598 filematcher is a matcher that matches on files to include in the
599 599 changegroup. Used to facilitate sparse changegroups.
600 600
601 601 allowreorder controls whether reordering of revisions is allowed.
602 602 This value is used when ``bundle.reorder`` is ``auto`` or isn't
603 603 set.
604 604
605 605 deltaparentfn is a callable that resolves the delta parent for
606 606 a specific revision.
607 607
608 608 builddeltaheader is a callable that constructs the header for a group
609 609 delta.
610 610
611 611 manifestsend is a chunk to send after manifests have been fully emitted.
612 612
613 613 ellipses indicates whether ellipsis serving mode is enabled.
614 614
615 615 bundlecaps is optional and can be used to specify the set of
616 616 capabilities which can be used to build the bundle. While bundlecaps is
617 617 unused in core Mercurial, extensions rely on this feature to communicate
618 618 capabilities to customize the changegroup packer.
619 619
620 620 shallow indicates whether shallow data might be sent. The packer may
621 621 need to pack file contents not introduced by the changes being packed.
622 622
623 623 fullnodes is the set of changelog nodes which should not be ellipsis
624 624 nodes. We store this rather than the set of nodes that should be
625 625 ellipsis because for very large histories we expect this to be
626 626 significantly smaller.
627 627 """
628 628 assert filematcher
629 629 self._filematcher = filematcher
630 630
631 631 self.version = version
632 632 self._deltaparentfn = deltaparentfn
633 633 self._builddeltaheader = builddeltaheader
634 634 self._manifestsend = manifestsend
635 635 self._ellipses = ellipses
636 636
637 637 # Set of capabilities we can use to build the bundle.
638 638 if bundlecaps is None:
639 639 bundlecaps = set()
640 640 self._bundlecaps = bundlecaps
641 641 self._isshallow = shallow
642 642 self._fullclnodes = fullnodes
643 643
644 644 # Maps ellipsis revs to their roots at the changelog level.
645 645 self._precomputedellipsis = ellipsisroots
646 646
647 647 # experimental config: bundle.reorder
648 648 reorder = repo.ui.config('bundle', 'reorder')
649 649 if reorder == 'auto':
650 650 self._reorder = allowreorder
651 651 else:
652 652 self._reorder = stringutil.parsebool(reorder)
653 653
654 654 self._repo = repo
655 655
656 656 if self._repo.ui.verbose and not self._repo.ui.debugflag:
657 657 self._verbosenote = self._repo.ui.note
658 658 else:
659 659 self._verbosenote = lambda s: None
660 660
661 661 # Maps CL revs to per-revlog revisions. Cleared in close() at
662 662 # the end of each group.
663 663 self._clrevtolocalrev = {}
664 664 self._nextclrevtolocalrev = {}
665 665
666 # Maps changelog nodes to changelog revs. Filled in once
667 # during changelog stage and then left unmodified.
668 self._clnodetorev = {}
669
670 666 def _close(self):
671 667 # Ellipses serving mode.
672 668 self._clrevtolocalrev.clear()
673 669 if self._nextclrevtolocalrev is not None:
674 670 self._clrevtolocalrev = self._nextclrevtolocalrev
675 671 self._nextclrevtolocalrev = None
676 672
677 673 return closechunk()
678 674
679 675 def group(self, revs, store, ischangelog, lookup, units=None):
680 676 """Calculate a delta group, yielding a sequence of changegroup chunks
681 677 (strings).
682 678
683 679 Given a list of changeset revs, return a set of deltas and
684 680 metadata corresponding to nodes. The first delta is
685 681 first parent(nodelist[0]) -> nodelist[0], the receiver is
686 682 guaranteed to have this parent as it has all history before
687 683 these changesets. In the case firstparent is nullrev the
688 684 changegroup starts with a full revision.
689 685
690 686 If units is not None, progress detail will be generated, units specifies
691 687 the type of revlog that is touched (changelog, manifest, etc.).
692 688 """
693 689 # if we don't have any revisions touched by these changesets, bail
694 690 if len(revs) == 0:
695 691 yield self._close()
696 692 return
697 693
694 cl = self._repo.changelog
695
698 696 # add the parent of the first rev
699 697 p = store.parentrevs(revs[0])[0]
700 698 revs.insert(0, p)
701 699
702 700 # build deltas
703 701 progress = None
704 702 if units is not None:
705 703 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
706 704 total=(len(revs) - 1))
707 705 for r in pycompat.xrange(len(revs) - 1):
708 706 if progress:
709 707 progress.update(r + 1)
710 708 prev, curr = revs[r], revs[r + 1]
711 709 linknode = lookup(store.node(curr))
712 710
713 711 if self._ellipses:
714 linkrev = self._clnodetorev[linknode]
712 linkrev = cl.rev(linknode)
715 713 self._clrevtolocalrev[linkrev] = curr
716 714
717 715 # This is a node to send in full, because the changeset it
718 716 # corresponds to was a full changeset.
719 717 if linknode in self._fullclnodes:
720 718 delta = _revisiondeltanormal(store, curr, prev, linknode,
721 719 self._deltaparentfn)
722 720 elif linkrev not in self._precomputedellipsis:
723 721 delta = None
724 722 else:
725 723 delta = self._revisiondeltanarrow(store, ischangelog,
726 724 curr, linkrev, linknode)
727 725 else:
728 726 delta = _revisiondeltanormal(store, curr, prev, linknode,
729 727 self._deltaparentfn)
730 728
731 729 if not delta:
732 730 continue
733 731
734 732 meta = self._builddeltaheader(delta)
735 733 l = len(meta) + sum(len(x) for x in delta.deltachunks)
736 734 yield chunkheader(l)
737 735 yield meta
738 736 for x in delta.deltachunks:
739 737 yield x
740 738
741 739 if progress:
742 740 progress.complete()
743 741 yield self._close()
744 742
745 743 # filter any nodes that claim to be part of the known set
746 744 def _prune(self, store, missing, commonrevs):
747 745 # TODO this violates storage abstraction for manifests.
748 746 if isinstance(store, manifest.manifestrevlog):
749 747 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
750 748 return []
751 749
752 750 rr, rl = store.rev, store.linkrev
753 751 return [n for n in missing if rl(rr(n)) not in commonrevs]
754 752
755 753 def _packmanifests(self, dir, dirlog, revs, lookuplinknode):
756 754 """Pack manifests into a changegroup stream.
757 755
758 756 Encodes the directory name in the output so multiple manifests
759 757 can be sent. Multiple manifests is not supported by cg1 and cg2.
760 758 """
761 759 if dir:
762 760 assert self.version == b'03'
763 761 yield _fileheader(dir)
764 762
765 763 for chunk in self.group(revs, dirlog, False, lookuplinknode,
766 764 units=_('manifests')):
767 765 yield chunk
768 766
769 767 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
770 768 """Yield a sequence of changegroup byte chunks."""
771 769
772 770 repo = self._repo
773 771 cl = repo.changelog
774 772
775 773 self._verbosenote(_('uncompressed size of bundle content:\n'))
776 774 size = 0
777 775
778 776 clstate, chunks = self._generatechangelog(cl, clnodes)
779 777 for chunk in chunks:
780 778 size += len(chunk)
781 779 yield chunk
782 780
783 781 self._verbosenote(_('%8.i (changelog)\n') % size)
784 782
785 783 clrevorder = clstate['clrevorder']
786 784 mfs = clstate['mfs']
787 785 changedfiles = clstate['changedfiles']
788 786
789 787 # We need to make sure that the linkrev in the changegroup refers to
790 788 # the first changeset that introduced the manifest or file revision.
791 789 # The fastpath is usually safer than the slowpath, because the filelogs
792 790 # are walked in revlog order.
793 791 #
794 792 # When taking the slowpath with reorder=None and the manifest revlog
795 793 # uses generaldelta, the manifest may be walked in the "wrong" order.
796 794 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
797 795 # cc0ff93d0c0c).
798 796 #
799 797 # When taking the fastpath, we are only vulnerable to reordering
800 798 # of the changelog itself. The changelog never uses generaldelta, so
801 799 # it is only reordered when reorder=True. To handle this case, we
802 800 # simply take the slowpath, which already has the 'clrevorder' logic.
803 801 # This was also fixed in cc0ff93d0c0c.
804 802 fastpathlinkrev = fastpathlinkrev and not self._reorder
805 803 # Treemanifests don't work correctly with fastpathlinkrev
806 804 # either, because we don't discover which directory nodes to
807 805 # send along with files. This could probably be fixed.
808 806 fastpathlinkrev = fastpathlinkrev and (
809 807 'treemanifest' not in repo.requirements)
810 808
811 809 fnodes = {} # needed file nodes
812 810
813 811 for chunk in self.generatemanifests(commonrevs, clrevorder,
814 812 fastpathlinkrev, mfs, fnodes, source):
815 813 yield chunk
816 814
817 815 mfdicts = None
818 816 if self._ellipses and self._isshallow:
819 817 mfdicts = [(self._repo.manifestlog[n].read(), lr)
820 818 for (n, lr) in mfs.iteritems()]
821 819
822 820 mfs.clear()
823 821 clrevs = set(cl.rev(x) for x in clnodes)
824 822
825 823 if not fastpathlinkrev:
826 824 def linknodes(unused, fname):
827 825 return fnodes.get(fname, {})
828 826 else:
829 827 cln = cl.node
830 828 def linknodes(filerevlog, fname):
831 829 llr = filerevlog.linkrev
832 830 fln = filerevlog.node
833 831 revs = ((r, llr(r)) for r in filerevlog)
834 832 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
835 833
836 834 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
837 835 source, mfdicts):
838 836 yield chunk
839 837
840 838 yield self._close()
841 839
842 840 if clnodes:
843 841 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
844 842
845 843 def _generatechangelog(self, cl, nodes):
846 844 """Generate data for changelog chunks.
847 845
848 846 Returns a 2-tuple of a dict containing state and an iterable of
849 847 byte chunks. The state will not be fully populated until the
850 848 chunk stream has been fully consumed.
851 849 """
852 850 clrevorder = {}
853 851 mfs = {} # needed manifests
854 852 mfl = self._repo.manifestlog
855 853 # TODO violates storage abstraction.
856 854 mfrevlog = mfl._revlog
857 855 changedfiles = set()
858 856
859 857 # Callback for the changelog, used to collect changed files and
860 858 # manifest nodes.
861 859 # Returns the linkrev node (identity in the changelog case).
862 860 def lookupcl(x):
863 861 c = cl.read(x)
864 862 clrevorder[x] = len(clrevorder)
865 863
866 864 if self._ellipses:
867 self._clnodetorev[x] = cl.rev(x)
868
869 865 # Only update mfs if x is going to be sent. Otherwise we
870 866 # end up with bogus linkrevs specified for manifests and
871 867 # we skip some manifest nodes that we should otherwise
872 868 # have sent.
873 869 if (x in self._fullclnodes
874 870 or cl.rev(x) in self._precomputedellipsis):
875 871 n = c[0]
876 872 # Record the first changeset introducing this manifest
877 873 # version.
878 874 mfs.setdefault(n, x)
879 875 # Set this narrow-specific dict so we have the lowest
880 876 # manifest revnum to look up for this cl revnum. (Part of
881 877 # mapping changelog ellipsis parents to manifest ellipsis
882 878 # parents)
883 879 self._nextclrevtolocalrev.setdefault(cl.rev(x),
884 880 mfrevlog.rev(n))
885 881 # We can't trust the changed files list in the changeset if the
886 882 # client requested a shallow clone.
887 883 if self._isshallow:
888 884 changedfiles.update(mfl[c[0]].read().keys())
889 885 else:
890 886 changedfiles.update(c[3])
891 887 else:
892 888
893 889 n = c[0]
894 890 # record the first changeset introducing this manifest version
895 891 mfs.setdefault(n, x)
896 892 # Record a complete list of potentially-changed files in
897 893 # this manifest.
898 894 changedfiles.update(c[3])
899 895
900 896 return x
901 897
902 898 # Changelog doesn't benefit from reordering revisions. So send out
903 899 # revisions in store order.
904 900 revs = sorted(cl.rev(n) for n in nodes)
905 901
906 902 state = {
907 903 'clrevorder': clrevorder,
908 904 'mfs': mfs,
909 905 'changedfiles': changedfiles,
910 906 }
911 907
912 908 gen = self.group(revs, cl, True, lookupcl, units=_('changesets'))
913 909
914 910 return state, gen
915 911
916 912 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
917 913 fnodes, source):
918 914 """Returns an iterator of changegroup chunks containing manifests.
919 915
920 916 `source` is unused here, but is used by extensions like remotefilelog to
921 917 change what is sent based in pulls vs pushes, etc.
922 918 """
923 919 repo = self._repo
920 cl = repo.changelog
924 921 mfl = repo.manifestlog
925 922 dirlog = mfl._revlog.dirlog
926 923 tmfnodes = {'': mfs}
927 924
928 925 # Callback for the manifest, used to collect linkrevs for filelog
929 926 # revisions.
930 927 # Returns the linkrev node (collected in lookupcl).
931 928 def makelookupmflinknode(dir, nodes):
932 929 if fastpathlinkrev:
933 930 assert not dir
934 931 return mfs.__getitem__
935 932
936 933 def lookupmflinknode(x):
937 934 """Callback for looking up the linknode for manifests.
938 935
939 936 Returns the linkrev node for the specified manifest.
940 937
941 938 SIDE EFFECT:
942 939
943 940 1) fclnodes gets populated with the list of relevant
944 941 file nodes if we're not using fastpathlinkrev
945 942 2) When treemanifests are in use, collects treemanifest nodes
946 943 to send
947 944
948 945 Note that this means manifests must be completely sent to
949 946 the client before you can trust the list of files and
950 947 treemanifests to send.
951 948 """
952 949 clnode = nodes[x]
953 950 mdata = mfl.get(dir, x).readfast(shallow=True)
954 951 for p, n, fl in mdata.iterentries():
955 952 if fl == 't': # subdirectory manifest
956 953 subdir = dir + p + '/'
957 954 tmfclnodes = tmfnodes.setdefault(subdir, {})
958 955 tmfclnode = tmfclnodes.setdefault(n, clnode)
959 956 if clrevorder[clnode] < clrevorder[tmfclnode]:
960 957 tmfclnodes[n] = clnode
961 958 else:
962 959 f = dir + p
963 960 fclnodes = fnodes.setdefault(f, {})
964 961 fclnode = fclnodes.setdefault(n, clnode)
965 962 if clrevorder[clnode] < clrevorder[fclnode]:
966 963 fclnodes[n] = clnode
967 964 return clnode
968 965 return lookupmflinknode
969 966
970 967 size = 0
971 968 while tmfnodes:
972 969 dir, nodes = tmfnodes.popitem()
973 970 store = dirlog(dir)
974 971 prunednodes = self._prune(store, nodes, commonrevs)
975 972 if not dir or prunednodes:
976 973 lookupfn = makelookupmflinknode(dir, nodes)
977 974
978 975 if self._ellipses:
979 revs = _sortnodesellipsis(store, prunednodes,
980 self._clnodetorev, lookupfn)
976 revs = _sortnodesellipsis(store, prunednodes, cl,
977 lookupfn)
981 978 else:
982 979 revs = _sortnodesnormal(store, prunednodes,
983 980 self._reorder)
984 981
985 982 for x in self._packmanifests(dir, store, revs, lookupfn):
986 983 size += len(x)
987 984 yield x
988 985 self._verbosenote(_('%8.i (manifests)\n') % size)
989 986 yield self._manifestsend
990 987
991 988 # The 'source' parameter is useful for extensions
992 989 def generatefiles(self, changedfiles, linknodes, commonrevs, source,
993 990 mfdicts):
994 991 changedfiles = list(filter(self._filematcher, changedfiles))
995 992
996 993 if self._isshallow:
997 994 # In a shallow clone, the linknodes callback needs to also include
998 995 # those file nodes that are in the manifests we sent but weren't
999 996 # introduced by those manifests.
1000 997 commonctxs = [self._repo[c] for c in commonrevs]
1001 998 oldlinknodes = linknodes
1002 999 clrev = self._repo.changelog.rev
1003 1000
1004 1001 # Defining this function has a side-effect of overriding the
1005 1002 # function of the same name that was passed in as an argument.
1006 1003 # TODO have caller pass in appropriate function.
1007 1004 def linknodes(flog, fname):
1008 1005 for c in commonctxs:
1009 1006 try:
1010 1007 fnode = c.filenode(fname)
1011 1008 self._clrevtolocalrev[c.rev()] = flog.rev(fnode)
1012 1009 except error.ManifestLookupError:
1013 1010 pass
1014 1011 links = oldlinknodes(flog, fname)
1015 1012 if len(links) != len(mfdicts):
1016 1013 for mf, lr in mfdicts:
1017 1014 fnode = mf.get(fname, None)
1018 1015 if fnode in links:
1019 1016 links[fnode] = min(links[fnode], lr, key=clrev)
1020 1017 elif fnode:
1021 1018 links[fnode] = lr
1022 1019 return links
1023 1020
1024 1021 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
1025 1022
1026 1023 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
1027 1024 repo = self._repo
1025 cl = repo.changelog
1028 1026 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1029 1027 total=len(changedfiles))
1030 1028 for i, fname in enumerate(sorted(changedfiles)):
1031 1029 filerevlog = repo.file(fname)
1032 1030 if not filerevlog:
1033 1031 raise error.Abort(_("empty or missing file data for %s") %
1034 1032 fname)
1035 1033
1036 1034 linkrevnodes = linknodes(filerevlog, fname)
1037 1035 # Lookup for filenodes, we collected the linkrev nodes above in the
1038 1036 # fastpath case and with lookupmf in the slowpath case.
1039 1037 def lookupfilelog(x):
1040 1038 return linkrevnodes[x]
1041 1039
1042 1040 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
1043 1041 if filenodes:
1044 1042 if self._ellipses:
1045 1043 revs = _sortnodesellipsis(filerevlog, filenodes,
1046 self._clnodetorev, lookupfilelog)
1044 cl, lookupfilelog)
1047 1045 else:
1048 1046 revs = _sortnodesnormal(filerevlog, filenodes,
1049 1047 self._reorder)
1050 1048
1051 1049 progress.update(i + 1, item=fname)
1052 1050 h = _fileheader(fname)
1053 1051 size = len(h)
1054 1052 yield h
1055 1053 for chunk in self.group(revs, filerevlog, False, lookupfilelog):
1056 1054 size += len(chunk)
1057 1055 yield chunk
1058 1056 self._verbosenote(_('%8.i %s\n') % (size, fname))
1059 1057 progress.complete()
1060 1058
1061 1059 def _revisiondeltanarrow(self, store, ischangelog, rev, linkrev, linknode):
1062 1060 linkparents = self._precomputedellipsis[linkrev]
1063 1061 def local(clrev):
1064 1062 """Turn a changelog revnum into a local revnum.
1065 1063
1066 1064 The ellipsis dag is stored as revnums on the changelog,
1067 1065 but when we're producing ellipsis entries for
1068 1066 non-changelog revlogs, we need to turn those numbers into
1069 1067 something local. This does that for us, and during the
1070 1068 changelog sending phase will also expand the stored
1071 1069 mappings as needed.
1072 1070 """
1073 1071 if clrev == nullrev:
1074 1072 return nullrev
1075 1073
1076 1074 if ischangelog:
1077 # If we're doing the changelog, it's possible that we
1078 # have a parent that is already on the client, and we
1079 # need to store some extra mapping information so that
1080 # our contained ellipsis nodes will be able to resolve
1081 # their parents.
1082 if clrev not in self._clrevtolocalrev:
1083 clnode = store.node(clrev)
1084 self._clnodetorev[clnode] = clrev
1085 1075 return clrev
1086 1076
1087 1077 # Walk the ellipsis-ized changelog breadth-first looking for a
1088 1078 # change that has been linked from the current revlog.
1089 1079 #
1090 1080 # For a flat manifest revlog only a single step should be necessary
1091 1081 # as all relevant changelog entries are relevant to the flat
1092 1082 # manifest.
1093 1083 #
1094 1084 # For a filelog or tree manifest dirlog however not every changelog
1095 1085 # entry will have been relevant, so we need to skip some changelog
1096 1086 # nodes even after ellipsis-izing.
1097 1087 walk = [clrev]
1098 1088 while walk:
1099 1089 p = walk[0]
1100 1090 walk = walk[1:]
1101 1091 if p in self._clrevtolocalrev:
1102 1092 return self._clrevtolocalrev[p]
1103 1093 elif p in self._fullclnodes:
1104 1094 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1105 1095 if pp != nullrev])
1106 1096 elif p in self._precomputedellipsis:
1107 1097 walk.extend([pp for pp in self._precomputedellipsis[p]
1108 1098 if pp != nullrev])
1109 1099 else:
1110 1100 # In this case, we've got an ellipsis with parents
1111 1101 # outside the current bundle (likely an
1112 1102 # incremental pull). We "know" that we can use the
1113 1103 # value of this same revlog at whatever revision
1114 1104 # is pointed to by linknode. "Know" is in scare
1115 1105 # quotes because I haven't done enough examination
1116 1106 # of edge cases to convince myself this is really
1117 1107 # a fact - it works for all the (admittedly
1118 1108 # thorough) cases in our testsuite, but I would be
1119 1109 # somewhat unsurprised to find a case in the wild
1120 1110 # where this breaks down a bit. That said, I don't
1121 1111 # know if it would hurt anything.
1122 1112 for i in pycompat.xrange(rev, 0, -1):
1123 1113 if store.linkrev(i) == clrev:
1124 1114 return i
1125 1115 # We failed to resolve a parent for this node, so
1126 1116 # we crash the changegroup construction.
1127 1117 raise error.Abort(
1128 1118 'unable to resolve parent while packing %r %r'
1129 1119 ' for changeset %r' % (store.indexfile, rev, clrev))
1130 1120
1131 1121 return nullrev
1132 1122
1133 1123 if not linkparents or (
1134 1124 store.parentrevs(rev) == (nullrev, nullrev)):
1135 1125 p1, p2 = nullrev, nullrev
1136 1126 elif len(linkparents) == 1:
1137 1127 p1, = sorted(local(p) for p in linkparents)
1138 1128 p2 = nullrev
1139 1129 else:
1140 1130 p1, p2 = sorted(local(p) for p in linkparents)
1141 1131
1142 1132 n = store.node(rev)
1143 1133 p1n, p2n = store.node(p1), store.node(p2)
1144 1134 flags = store.flags(rev)
1145 1135 flags |= revlog.REVIDX_ELLIPSIS
1146 1136
1147 1137 # TODO: try and actually send deltas for ellipsis data blocks
1148 1138 data = store.revision(n)
1149 1139 diffheader = mdiff.trivialdiffheader(len(data))
1150 1140
1151 1141 return revisiondelta(
1152 1142 node=n,
1153 1143 p1node=p1n,
1154 1144 p2node=p2n,
1155 1145 basenode=nullid,
1156 1146 linknode=linknode,
1157 1147 flags=flags,
1158 1148 deltachunks=(diffheader, data),
1159 1149 )
1160 1150
1161 1151 def _deltaparentprev(store, rev, p1, p2, prev):
1162 1152 """Resolve a delta parent to the previous revision.
1163 1153
1164 1154 Used for version 1 changegroups, which don't support generaldelta.
1165 1155 """
1166 1156 return prev
1167 1157
1168 1158 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1169 1159 """Resolve a delta parent when general deltas are supported."""
1170 1160 dp = store.deltaparent(rev)
1171 1161 if dp == nullrev and store.storedeltachains:
1172 1162 # Avoid sending full revisions when delta parent is null. Pick prev
1173 1163 # in that case. It's tempting to pick p1 in this case, as p1 will
1174 1164 # be smaller in the common case. However, computing a delta against
1175 1165 # p1 may require resolving the raw text of p1, which could be
1176 1166 # expensive. The revlog caches should have prev cached, meaning
1177 1167 # less CPU for changegroup generation. There is likely room to add
1178 1168 # a flag and/or config option to control this behavior.
1179 1169 base = prev
1180 1170 elif dp == nullrev:
1181 1171 # revlog is configured to use full snapshot for a reason,
1182 1172 # stick to full snapshot.
1183 1173 base = nullrev
1184 1174 elif dp not in (p1, p2, prev):
1185 1175 # Pick prev when we can't be sure remote has the base revision.
1186 1176 return prev
1187 1177 else:
1188 1178 base = dp
1189 1179
1190 1180 if base != nullrev and not store.candelta(base, rev):
1191 1181 base = nullrev
1192 1182
1193 1183 return base
1194 1184
1195 1185 def _deltaparentellipses(store, rev, p1, p2, prev):
1196 1186 """Resolve a delta parent when in ellipses mode."""
1197 1187 # TODO: send better deltas when in narrow mode.
1198 1188 #
1199 1189 # changegroup.group() loops over revisions to send,
1200 1190 # including revisions we'll skip. What this means is that
1201 1191 # `prev` will be a potentially useless delta base for all
1202 1192 # ellipsis nodes, as the client likely won't have it. In
1203 1193 # the future we should do bookkeeping about which nodes
1204 1194 # have been sent to the client, and try to be
1205 1195 # significantly smarter about delta bases. This is
1206 1196 # slightly tricky because this same code has to work for
1207 1197 # all revlogs, and we don't have the linkrev/linknode here.
1208 1198 return p1
1209 1199
1210 1200 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1211 1201 shallow=False, ellipsisroots=None, fullnodes=None):
1212 1202 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1213 1203 d.node, d.p1node, d.p2node, d.linknode)
1214 1204
1215 1205 return cgpacker(repo, filematcher, b'01',
1216 1206 deltaparentfn=_deltaparentprev,
1217 1207 allowreorder=None,
1218 1208 builddeltaheader=builddeltaheader,
1219 1209 manifestsend=b'',
1220 1210 bundlecaps=bundlecaps,
1221 1211 ellipses=ellipses,
1222 1212 shallow=shallow,
1223 1213 ellipsisroots=ellipsisroots,
1224 1214 fullnodes=fullnodes)
1225 1215
1226 1216 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1227 1217 shallow=False, ellipsisroots=None, fullnodes=None):
1228 1218 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1229 1219 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1230 1220
1231 1221 # Since generaldelta is directly supported by cg2, reordering
1232 1222 # generally doesn't help, so we disable it by default (treating
1233 1223 # bundle.reorder=auto just like bundle.reorder=False).
1234 1224 return cgpacker(repo, filematcher, b'02',
1235 1225 deltaparentfn=_deltaparentgeneraldelta,
1236 1226 allowreorder=False,
1237 1227 builddeltaheader=builddeltaheader,
1238 1228 manifestsend=b'',
1239 1229 bundlecaps=bundlecaps,
1240 1230 ellipses=ellipses,
1241 1231 shallow=shallow,
1242 1232 ellipsisroots=ellipsisroots,
1243 1233 fullnodes=fullnodes)
1244 1234
1245 1235 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1246 1236 shallow=False, ellipsisroots=None, fullnodes=None):
1247 1237 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1248 1238 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1249 1239
1250 1240 deltaparentfn = (_deltaparentellipses if ellipses
1251 1241 else _deltaparentgeneraldelta)
1252 1242
1253 1243 return cgpacker(repo, filematcher, b'03',
1254 1244 deltaparentfn=deltaparentfn,
1255 1245 allowreorder=False,
1256 1246 builddeltaheader=builddeltaheader,
1257 1247 manifestsend=closechunk(),
1258 1248 bundlecaps=bundlecaps,
1259 1249 ellipses=ellipses,
1260 1250 shallow=shallow,
1261 1251 ellipsisroots=ellipsisroots,
1262 1252 fullnodes=fullnodes)
1263 1253
1264 1254 _packermap = {'01': (_makecg1packer, cg1unpacker),
1265 1255 # cg2 adds support for exchanging generaldelta
1266 1256 '02': (_makecg2packer, cg2unpacker),
1267 1257 # cg3 adds support for exchanging revlog flags and treemanifests
1268 1258 '03': (_makecg3packer, cg3unpacker),
1269 1259 }
1270 1260
1271 1261 def allsupportedversions(repo):
1272 1262 versions = set(_packermap.keys())
1273 1263 if not (repo.ui.configbool('experimental', 'changegroup3') or
1274 1264 repo.ui.configbool('experimental', 'treemanifest') or
1275 1265 'treemanifest' in repo.requirements):
1276 1266 versions.discard('03')
1277 1267 return versions
1278 1268
1279 1269 # Changegroup versions that can be applied to the repo
1280 1270 def supportedincomingversions(repo):
1281 1271 return allsupportedversions(repo)
1282 1272
1283 1273 # Changegroup versions that can be created from the repo
1284 1274 def supportedoutgoingversions(repo):
1285 1275 versions = allsupportedversions(repo)
1286 1276 if 'treemanifest' in repo.requirements:
1287 1277 # Versions 01 and 02 support only flat manifests and it's just too
1288 1278 # expensive to convert between the flat manifest and tree manifest on
1289 1279 # the fly. Since tree manifests are hashed differently, all of history
1290 1280 # would have to be converted. Instead, we simply don't even pretend to
1291 1281 # support versions 01 and 02.
1292 1282 versions.discard('01')
1293 1283 versions.discard('02')
1294 1284 if repository.NARROW_REQUIREMENT in repo.requirements:
1295 1285 # Versions 01 and 02 don't support revlog flags, and we need to
1296 1286 # support that for stripping and unbundling to work.
1297 1287 versions.discard('01')
1298 1288 versions.discard('02')
1299 1289 if LFS_REQUIREMENT in repo.requirements:
1300 1290 # Versions 01 and 02 don't support revlog flags, and we need to
1301 1291 # mark LFS entries with REVIDX_EXTSTORED.
1302 1292 versions.discard('01')
1303 1293 versions.discard('02')
1304 1294
1305 1295 return versions
1306 1296
1307 1297 def localversion(repo):
1308 1298 # Finds the best version to use for bundles that are meant to be used
1309 1299 # locally, such as those from strip and shelve, and temporary bundles.
1310 1300 return max(supportedoutgoingversions(repo))
1311 1301
1312 1302 def safeversion(repo):
1313 1303 # Finds the smallest version that it's safe to assume clients of the repo
1314 1304 # will support. For example, all hg versions that support generaldelta also
1315 1305 # support changegroup 02.
1316 1306 versions = supportedoutgoingversions(repo)
1317 1307 if 'generaldelta' in repo.requirements:
1318 1308 versions.discard('01')
1319 1309 assert versions
1320 1310 return min(versions)
1321 1311
1322 1312 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1323 1313 ellipses=False, shallow=False, ellipsisroots=None,
1324 1314 fullnodes=None):
1325 1315 assert version in supportedoutgoingversions(repo)
1326 1316
1327 1317 if filematcher is None:
1328 1318 filematcher = matchmod.alwaysmatcher(repo.root, '')
1329 1319
1330 1320 if version == '01' and not filematcher.always():
1331 1321 raise error.ProgrammingError('version 01 changegroups do not support '
1332 1322 'sparse file matchers')
1333 1323
1334 1324 if ellipses and version in (b'01', b'02'):
1335 1325 raise error.Abort(
1336 1326 _('ellipsis nodes require at least cg3 on client and server, '
1337 1327 'but negotiated version %s') % version)
1338 1328
1339 1329 # Requested files could include files not in the local store. So
1340 1330 # filter those out.
1341 1331 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1342 1332 filematcher)
1343 1333
1344 1334 fn = _packermap[version][0]
1345 1335 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1346 1336 shallow=shallow, ellipsisroots=ellipsisroots,
1347 1337 fullnodes=fullnodes)
1348 1338
1349 1339 def getunbundler(version, fh, alg, extras=None):
1350 1340 return _packermap[version][1](fh, alg, extras=extras)
1351 1341
1352 1342 def _changegroupinfo(repo, nodes, source):
1353 1343 if repo.ui.verbose or source == 'bundle':
1354 1344 repo.ui.status(_("%d changesets found\n") % len(nodes))
1355 1345 if repo.ui.debugflag:
1356 1346 repo.ui.debug("list of changesets:\n")
1357 1347 for node in nodes:
1358 1348 repo.ui.debug("%s\n" % hex(node))
1359 1349
1360 1350 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1361 1351 bundlecaps=None):
1362 1352 cgstream = makestream(repo, outgoing, version, source,
1363 1353 fastpath=fastpath, bundlecaps=bundlecaps)
1364 1354 return getunbundler(version, util.chunkbuffer(cgstream), None,
1365 1355 {'clcount': len(outgoing.missing) })
1366 1356
1367 1357 def makestream(repo, outgoing, version, source, fastpath=False,
1368 1358 bundlecaps=None, filematcher=None):
1369 1359 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1370 1360 filematcher=filematcher)
1371 1361
1372 1362 repo = repo.unfiltered()
1373 1363 commonrevs = outgoing.common
1374 1364 csets = outgoing.missing
1375 1365 heads = outgoing.missingheads
1376 1366 # We go through the fast path if we get told to, or if all (unfiltered
1377 1367 # heads have been requested (since we then know there all linkrevs will
1378 1368 # be pulled by the client).
1379 1369 heads.sort()
1380 1370 fastpathlinkrev = fastpath or (
1381 1371 repo.filtername is None and heads == sorted(repo.heads()))
1382 1372
1383 1373 repo.hook('preoutgoing', throw=True, source=source)
1384 1374 _changegroupinfo(repo, csets, source)
1385 1375 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1386 1376
1387 1377 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1388 1378 revisions = 0
1389 1379 files = 0
1390 1380 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1391 1381 total=expectedfiles)
1392 1382 for chunkdata in iter(source.filelogheader, {}):
1393 1383 files += 1
1394 1384 f = chunkdata["filename"]
1395 1385 repo.ui.debug("adding %s revisions\n" % f)
1396 1386 progress.increment()
1397 1387 fl = repo.file(f)
1398 1388 o = len(fl)
1399 1389 try:
1400 1390 deltas = source.deltaiter()
1401 1391 if not fl.addgroup(deltas, revmap, trp):
1402 1392 raise error.Abort(_("received file revlog group is empty"))
1403 1393 except error.CensoredBaseError as e:
1404 1394 raise error.Abort(_("received delta base is censored: %s") % e)
1405 1395 revisions += len(fl) - o
1406 1396 if f in needfiles:
1407 1397 needs = needfiles[f]
1408 1398 for new in pycompat.xrange(o, len(fl)):
1409 1399 n = fl.node(new)
1410 1400 if n in needs:
1411 1401 needs.remove(n)
1412 1402 else:
1413 1403 raise error.Abort(
1414 1404 _("received spurious file revlog entry"))
1415 1405 if not needs:
1416 1406 del needfiles[f]
1417 1407 progress.complete()
1418 1408
1419 1409 for f, needs in needfiles.iteritems():
1420 1410 fl = repo.file(f)
1421 1411 for n in needs:
1422 1412 try:
1423 1413 fl.rev(n)
1424 1414 except error.LookupError:
1425 1415 raise error.Abort(
1426 1416 _('missing file data for %s:%s - run hg verify') %
1427 1417 (f, hex(n)))
1428 1418
1429 1419 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now