##// END OF EJS Templates
changegroup: populate _clnodetorev as part of changelog linknode lookup...
Gregory Szorc -
r39030:60760535 default
parent child Browse files
Show More
@@ -1,1436 +1,1431 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 manifest,
30 30 match as matchmod,
31 31 mdiff,
32 32 phases,
33 33 pycompat,
34 34 repository,
35 35 revlog,
36 36 util,
37 37 )
38 38
39 39 from .utils import (
40 40 stringutil,
41 41 )
42 42
43 43 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
44 44 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
45 45 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
46 46
47 47 LFS_REQUIREMENT = 'lfs'
48 48
49 49 readexactly = util.readexactly
50 50
51 51 def getchunk(stream):
52 52 """return the next chunk from stream as a string"""
53 53 d = readexactly(stream, 4)
54 54 l = struct.unpack(">l", d)[0]
55 55 if l <= 4:
56 56 if l:
57 57 raise error.Abort(_("invalid chunk length %d") % l)
58 58 return ""
59 59 return readexactly(stream, l - 4)
60 60
61 61 def chunkheader(length):
62 62 """return a changegroup chunk header (string)"""
63 63 return struct.pack(">l", length + 4)
64 64
65 65 def closechunk():
66 66 """return a changegroup chunk header (string) for a zero-length chunk"""
67 67 return struct.pack(">l", 0)
68 68
69 69 def _fileheader(path):
70 70 """Obtain a changegroup chunk header for a named path."""
71 71 return chunkheader(len(path)) + path
72 72
73 73 def writechunks(ui, chunks, filename, vfs=None):
74 74 """Write chunks to a file and return its filename.
75 75
76 76 The stream is assumed to be a bundle file.
77 77 Existing files will not be overwritten.
78 78 If no filename is specified, a temporary file is created.
79 79 """
80 80 fh = None
81 81 cleanup = None
82 82 try:
83 83 if filename:
84 84 if vfs:
85 85 fh = vfs.open(filename, "wb")
86 86 else:
87 87 # Increase default buffer size because default is usually
88 88 # small (4k is common on Linux).
89 89 fh = open(filename, "wb", 131072)
90 90 else:
91 91 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
92 92 fh = os.fdopen(fd, r"wb")
93 93 cleanup = filename
94 94 for c in chunks:
95 95 fh.write(c)
96 96 cleanup = None
97 97 return filename
98 98 finally:
99 99 if fh is not None:
100 100 fh.close()
101 101 if cleanup is not None:
102 102 if filename and vfs:
103 103 vfs.unlink(cleanup)
104 104 else:
105 105 os.unlink(cleanup)
106 106
107 107 class cg1unpacker(object):
108 108 """Unpacker for cg1 changegroup streams.
109 109
110 110 A changegroup unpacker handles the framing of the revision data in
111 111 the wire format. Most consumers will want to use the apply()
112 112 method to add the changes from the changegroup to a repository.
113 113
114 114 If you're forwarding a changegroup unmodified to another consumer,
115 115 use getchunks(), which returns an iterator of changegroup
116 116 chunks. This is mostly useful for cases where you need to know the
117 117 data stream has ended by observing the end of the changegroup.
118 118
119 119 deltachunk() is useful only if you're applying delta data. Most
120 120 consumers should prefer apply() instead.
121 121
122 122 A few other public methods exist. Those are used only for
123 123 bundlerepo and some debug commands - their use is discouraged.
124 124 """
125 125 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 126 deltaheadersize = deltaheader.size
127 127 version = '01'
128 128 _grouplistcount = 1 # One list of files after the manifests
129 129
130 130 def __init__(self, fh, alg, extras=None):
131 131 if alg is None:
132 132 alg = 'UN'
133 133 if alg not in util.compengines.supportedbundletypes:
134 134 raise error.Abort(_('unknown stream compression type: %s')
135 135 % alg)
136 136 if alg == 'BZ':
137 137 alg = '_truncatedBZ'
138 138
139 139 compengine = util.compengines.forbundletype(alg)
140 140 self._stream = compengine.decompressorreader(fh)
141 141 self._type = alg
142 142 self.extras = extras or {}
143 143 self.callback = None
144 144
145 145 # These methods (compressed, read, seek, tell) all appear to only
146 146 # be used by bundlerepo, but it's a little hard to tell.
147 147 def compressed(self):
148 148 return self._type is not None and self._type != 'UN'
149 149 def read(self, l):
150 150 return self._stream.read(l)
151 151 def seek(self, pos):
152 152 return self._stream.seek(pos)
153 153 def tell(self):
154 154 return self._stream.tell()
155 155 def close(self):
156 156 return self._stream.close()
157 157
158 158 def _chunklength(self):
159 159 d = readexactly(self._stream, 4)
160 160 l = struct.unpack(">l", d)[0]
161 161 if l <= 4:
162 162 if l:
163 163 raise error.Abort(_("invalid chunk length %d") % l)
164 164 return 0
165 165 if self.callback:
166 166 self.callback()
167 167 return l - 4
168 168
169 169 def changelogheader(self):
170 170 """v10 does not have a changelog header chunk"""
171 171 return {}
172 172
173 173 def manifestheader(self):
174 174 """v10 does not have a manifest header chunk"""
175 175 return {}
176 176
177 177 def filelogheader(self):
178 178 """return the header of the filelogs chunk, v10 only has the filename"""
179 179 l = self._chunklength()
180 180 if not l:
181 181 return {}
182 182 fname = readexactly(self._stream, l)
183 183 return {'filename': fname}
184 184
185 185 def _deltaheader(self, headertuple, prevnode):
186 186 node, p1, p2, cs = headertuple
187 187 if prevnode is None:
188 188 deltabase = p1
189 189 else:
190 190 deltabase = prevnode
191 191 flags = 0
192 192 return node, p1, p2, deltabase, cs, flags
193 193
194 194 def deltachunk(self, prevnode):
195 195 l = self._chunklength()
196 196 if not l:
197 197 return {}
198 198 headerdata = readexactly(self._stream, self.deltaheadersize)
199 199 header = self.deltaheader.unpack(headerdata)
200 200 delta = readexactly(self._stream, l - self.deltaheadersize)
201 201 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
202 202 return (node, p1, p2, cs, deltabase, delta, flags)
203 203
204 204 def getchunks(self):
205 205 """returns all the chunks contains in the bundle
206 206
207 207 Used when you need to forward the binary stream to a file or another
208 208 network API. To do so, it parse the changegroup data, otherwise it will
209 209 block in case of sshrepo because it don't know the end of the stream.
210 210 """
211 211 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
212 212 # and a list of filelogs. For changegroup 3, we expect 4 parts:
213 213 # changelog, manifestlog, a list of tree manifestlogs, and a list of
214 214 # filelogs.
215 215 #
216 216 # Changelog and manifestlog parts are terminated with empty chunks. The
217 217 # tree and file parts are a list of entry sections. Each entry section
218 218 # is a series of chunks terminating in an empty chunk. The list of these
219 219 # entry sections is terminated in yet another empty chunk, so we know
220 220 # we've reached the end of the tree/file list when we reach an empty
221 221 # chunk that was proceeded by no non-empty chunks.
222 222
223 223 parts = 0
224 224 while parts < 2 + self._grouplistcount:
225 225 noentries = True
226 226 while True:
227 227 chunk = getchunk(self)
228 228 if not chunk:
229 229 # The first two empty chunks represent the end of the
230 230 # changelog and the manifestlog portions. The remaining
231 231 # empty chunks represent either A) the end of individual
232 232 # tree or file entries in the file list, or B) the end of
233 233 # the entire list. It's the end of the entire list if there
234 234 # were no entries (i.e. noentries is True).
235 235 if parts < 2:
236 236 parts += 1
237 237 elif noentries:
238 238 parts += 1
239 239 break
240 240 noentries = False
241 241 yield chunkheader(len(chunk))
242 242 pos = 0
243 243 while pos < len(chunk):
244 244 next = pos + 2**20
245 245 yield chunk[pos:next]
246 246 pos = next
247 247 yield closechunk()
248 248
249 249 def _unpackmanifests(self, repo, revmap, trp, prog):
250 250 self.callback = prog.increment
251 251 # no need to check for empty manifest group here:
252 252 # if the result of the merge of 1 and 2 is the same in 3 and 4,
253 253 # no new manifest will be created and the manifest group will
254 254 # be empty during the pull
255 255 self.manifestheader()
256 256 deltas = self.deltaiter()
257 257 repo.manifestlog.addgroup(deltas, revmap, trp)
258 258 prog.complete()
259 259 self.callback = None
260 260
261 261 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
262 262 expectedtotal=None):
263 263 """Add the changegroup returned by source.read() to this repo.
264 264 srctype is a string like 'push', 'pull', or 'unbundle'. url is
265 265 the URL of the repo where this changegroup is coming from.
266 266
267 267 Return an integer summarizing the change to this repo:
268 268 - nothing changed or no source: 0
269 269 - more heads than before: 1+added heads (2..n)
270 270 - fewer heads than before: -1-removed heads (-2..-n)
271 271 - number of heads stays the same: 1
272 272 """
273 273 repo = repo.unfiltered()
274 274 def csmap(x):
275 275 repo.ui.debug("add changeset %s\n" % short(x))
276 276 return len(cl)
277 277
278 278 def revmap(x):
279 279 return cl.rev(x)
280 280
281 281 changesets = files = revisions = 0
282 282
283 283 try:
284 284 # The transaction may already carry source information. In this
285 285 # case we use the top level data. We overwrite the argument
286 286 # because we need to use the top level value (if they exist)
287 287 # in this function.
288 288 srctype = tr.hookargs.setdefault('source', srctype)
289 289 url = tr.hookargs.setdefault('url', url)
290 290 repo.hook('prechangegroup',
291 291 throw=True, **pycompat.strkwargs(tr.hookargs))
292 292
293 293 # write changelog data to temp files so concurrent readers
294 294 # will not see an inconsistent view
295 295 cl = repo.changelog
296 296 cl.delayupdate(tr)
297 297 oldheads = set(cl.heads())
298 298
299 299 trp = weakref.proxy(tr)
300 300 # pull off the changeset group
301 301 repo.ui.status(_("adding changesets\n"))
302 302 clstart = len(cl)
303 303 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
304 304 total=expectedtotal)
305 305 self.callback = progress.increment
306 306
307 307 efiles = set()
308 308 def onchangelog(cl, node):
309 309 efiles.update(cl.readfiles(node))
310 310
311 311 self.changelogheader()
312 312 deltas = self.deltaiter()
313 313 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
314 314 efiles = len(efiles)
315 315
316 316 if not cgnodes:
317 317 repo.ui.develwarn('applied empty changegroup',
318 318 config='warn-empty-changegroup')
319 319 clend = len(cl)
320 320 changesets = clend - clstart
321 321 progress.complete()
322 322 self.callback = None
323 323
324 324 # pull off the manifest group
325 325 repo.ui.status(_("adding manifests\n"))
326 326 # We know that we'll never have more manifests than we had
327 327 # changesets.
328 328 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
329 329 total=changesets)
330 330 self._unpackmanifests(repo, revmap, trp, progress)
331 331
332 332 needfiles = {}
333 333 if repo.ui.configbool('server', 'validate'):
334 334 cl = repo.changelog
335 335 ml = repo.manifestlog
336 336 # validate incoming csets have their manifests
337 337 for cset in pycompat.xrange(clstart, clend):
338 338 mfnode = cl.changelogrevision(cset).manifest
339 339 mfest = ml[mfnode].readdelta()
340 340 # store file cgnodes we must see
341 341 for f, n in mfest.iteritems():
342 342 needfiles.setdefault(f, set()).add(n)
343 343
344 344 # process the files
345 345 repo.ui.status(_("adding file changes\n"))
346 346 newrevs, newfiles = _addchangegroupfiles(
347 347 repo, self, revmap, trp, efiles, needfiles)
348 348 revisions += newrevs
349 349 files += newfiles
350 350
351 351 deltaheads = 0
352 352 if oldheads:
353 353 heads = cl.heads()
354 354 deltaheads = len(heads) - len(oldheads)
355 355 for h in heads:
356 356 if h not in oldheads and repo[h].closesbranch():
357 357 deltaheads -= 1
358 358 htext = ""
359 359 if deltaheads:
360 360 htext = _(" (%+d heads)") % deltaheads
361 361
362 362 repo.ui.status(_("added %d changesets"
363 363 " with %d changes to %d files%s\n")
364 364 % (changesets, revisions, files, htext))
365 365 repo.invalidatevolatilesets()
366 366
367 367 if changesets > 0:
368 368 if 'node' not in tr.hookargs:
369 369 tr.hookargs['node'] = hex(cl.node(clstart))
370 370 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
371 371 hookargs = dict(tr.hookargs)
372 372 else:
373 373 hookargs = dict(tr.hookargs)
374 374 hookargs['node'] = hex(cl.node(clstart))
375 375 hookargs['node_last'] = hex(cl.node(clend - 1))
376 376 repo.hook('pretxnchangegroup',
377 377 throw=True, **pycompat.strkwargs(hookargs))
378 378
379 379 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
380 380 phaseall = None
381 381 if srctype in ('push', 'serve'):
382 382 # Old servers can not push the boundary themselves.
383 383 # New servers won't push the boundary if changeset already
384 384 # exists locally as secret
385 385 #
386 386 # We should not use added here but the list of all change in
387 387 # the bundle
388 388 if repo.publishing():
389 389 targetphase = phaseall = phases.public
390 390 else:
391 391 # closer target phase computation
392 392
393 393 # Those changesets have been pushed from the
394 394 # outside, their phases are going to be pushed
395 395 # alongside. Therefor `targetphase` is
396 396 # ignored.
397 397 targetphase = phaseall = phases.draft
398 398 if added:
399 399 phases.registernew(repo, tr, targetphase, added)
400 400 if phaseall is not None:
401 401 phases.advanceboundary(repo, tr, phaseall, cgnodes)
402 402
403 403 if changesets > 0:
404 404
405 405 def runhooks():
406 406 # These hooks run when the lock releases, not when the
407 407 # transaction closes. So it's possible for the changelog
408 408 # to have changed since we last saw it.
409 409 if clstart >= len(repo):
410 410 return
411 411
412 412 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
413 413
414 414 for n in added:
415 415 args = hookargs.copy()
416 416 args['node'] = hex(n)
417 417 del args['node_last']
418 418 repo.hook("incoming", **pycompat.strkwargs(args))
419 419
420 420 newheads = [h for h in repo.heads()
421 421 if h not in oldheads]
422 422 repo.ui.log("incoming",
423 423 "%d incoming changes - new heads: %s\n",
424 424 len(added),
425 425 ', '.join([hex(c[:6]) for c in newheads]))
426 426
427 427 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
428 428 lambda tr: repo._afterlock(runhooks))
429 429 finally:
430 430 repo.ui.flush()
431 431 # never return 0 here:
432 432 if deltaheads < 0:
433 433 ret = deltaheads - 1
434 434 else:
435 435 ret = deltaheads + 1
436 436 return ret
437 437
438 438 def deltaiter(self):
439 439 """
440 440 returns an iterator of the deltas in this changegroup
441 441
442 442 Useful for passing to the underlying storage system to be stored.
443 443 """
444 444 chain = None
445 445 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
446 446 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
447 447 yield chunkdata
448 448 chain = chunkdata[0]
449 449
450 450 class cg2unpacker(cg1unpacker):
451 451 """Unpacker for cg2 streams.
452 452
453 453 cg2 streams add support for generaldelta, so the delta header
454 454 format is slightly different. All other features about the data
455 455 remain the same.
456 456 """
457 457 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
458 458 deltaheadersize = deltaheader.size
459 459 version = '02'
460 460
461 461 def _deltaheader(self, headertuple, prevnode):
462 462 node, p1, p2, deltabase, cs = headertuple
463 463 flags = 0
464 464 return node, p1, p2, deltabase, cs, flags
465 465
466 466 class cg3unpacker(cg2unpacker):
467 467 """Unpacker for cg3 streams.
468 468
469 469 cg3 streams add support for exchanging treemanifests and revlog
470 470 flags. It adds the revlog flags to the delta header and an empty chunk
471 471 separating manifests and files.
472 472 """
473 473 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
474 474 deltaheadersize = deltaheader.size
475 475 version = '03'
476 476 _grouplistcount = 2 # One list of manifests and one list of files
477 477
478 478 def _deltaheader(self, headertuple, prevnode):
479 479 node, p1, p2, deltabase, cs, flags = headertuple
480 480 return node, p1, p2, deltabase, cs, flags
481 481
482 482 def _unpackmanifests(self, repo, revmap, trp, prog):
483 483 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
484 484 for chunkdata in iter(self.filelogheader, {}):
485 485 # If we get here, there are directory manifests in the changegroup
486 486 d = chunkdata["filename"]
487 487 repo.ui.debug("adding %s revisions\n" % d)
488 488 dirlog = repo.manifestlog._revlog.dirlog(d)
489 489 deltas = self.deltaiter()
490 490 if not dirlog.addgroup(deltas, revmap, trp):
491 491 raise error.Abort(_("received dir revlog group is empty"))
492 492
493 493 class headerlessfixup(object):
494 494 def __init__(self, fh, h):
495 495 self._h = h
496 496 self._fh = fh
497 497 def read(self, n):
498 498 if self._h:
499 499 d, self._h = self._h[:n], self._h[n:]
500 500 if len(d) < n:
501 501 d += readexactly(self._fh, n - len(d))
502 502 return d
503 503 return readexactly(self._fh, n)
504 504
505 505 @attr.s(slots=True, frozen=True)
506 506 class revisiondelta(object):
507 507 """Describes a delta entry in a changegroup.
508 508
509 509 Captured data is sufficient to serialize the delta into multiple
510 510 formats.
511 511 """
512 512 # 20 byte node of this revision.
513 513 node = attr.ib()
514 514 # 20 byte nodes of parent revisions.
515 515 p1node = attr.ib()
516 516 p2node = attr.ib()
517 517 # 20 byte node of node this delta is against.
518 518 basenode = attr.ib()
519 519 # 20 byte node of changeset revision this delta is associated with.
520 520 linknode = attr.ib()
521 521 # 2 bytes of flags to apply to revision data.
522 522 flags = attr.ib()
523 523 # Iterable of chunks holding raw delta data.
524 524 deltachunks = attr.ib()
525 525
526 526 def _sortnodesnormal(store, nodes, reorder):
527 527 """Sort nodes for changegroup generation and turn into revnums."""
528 528 # for generaldelta revlogs, we linearize the revs; this will both be
529 529 # much quicker and generate a much smaller bundle
530 530 if (store._generaldelta and reorder is None) or reorder:
531 531 dag = dagutil.revlogdag(store)
532 532 return dag.linearize(set(store.rev(n) for n in nodes))
533 533 else:
534 534 return sorted([store.rev(n) for n in nodes])
535 535
536 536 def _sortnodesellipsis(store, nodes, clnodetorev, lookup):
537 537 """Sort nodes for changegroup generation and turn into revnums."""
538 538 # Ellipses serving mode.
539 539 #
540 540 # In a perfect world, we'd generate better ellipsis-ified graphs
541 541 # for non-changelog revlogs. In practice, we haven't started doing
542 542 # that yet, so the resulting DAGs for the manifestlog and filelogs
543 543 # are actually full of bogus parentage on all the ellipsis
544 544 # nodes. This has the side effect that, while the contents are
545 545 # correct, the individual DAGs might be completely out of whack in
546 546 # a case like 882681bc3166 and its ancestors (back about 10
547 547 # revisions or so) in the main hg repo.
548 548 #
549 549 # The one invariant we *know* holds is that the new (potentially
550 550 # bogus) DAG shape will be valid if we order the nodes in the
551 551 # order that they're introduced in dramatis personae by the
552 552 # changelog, so what we do is we sort the non-changelog histories
553 553 # by the order in which they are used by the changelog.
554 554 key = lambda n: clnodetorev[lookup(n)]
555 555 return [store.rev(n) for n in sorted(nodes, key=key)]
556 556
557 557 def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
558 558 """Construct a revision delta for non-ellipses changegroup generation."""
559 559 node = store.node(rev)
560 560 p1, p2 = store.parentrevs(rev)
561 561 base = deltaparentfn(store, rev, p1, p2, prev)
562 562
563 563 prefix = ''
564 564 if store.iscensored(base) or store.iscensored(rev):
565 565 try:
566 566 delta = store.revision(node, raw=True)
567 567 except error.CensoredNodeError as e:
568 568 delta = e.tombstone
569 569 if base == nullrev:
570 570 prefix = mdiff.trivialdiffheader(len(delta))
571 571 else:
572 572 baselen = store.rawsize(base)
573 573 prefix = mdiff.replacediffheader(baselen, len(delta))
574 574 elif base == nullrev:
575 575 delta = store.revision(node, raw=True)
576 576 prefix = mdiff.trivialdiffheader(len(delta))
577 577 else:
578 578 delta = store.revdiff(base, rev)
579 579 p1n, p2n = store.parents(node)
580 580
581 581 return revisiondelta(
582 582 node=node,
583 583 p1node=p1n,
584 584 p2node=p2n,
585 585 basenode=store.node(base),
586 586 linknode=linknode,
587 587 flags=store.flags(rev),
588 588 deltachunks=(prefix, delta),
589 589 )
590 590
591 591 class cgpacker(object):
592 592 def __init__(self, repo, filematcher, version, allowreorder,
593 593 deltaparentfn, builddeltaheader, manifestsend,
594 594 bundlecaps=None, ellipses=False,
595 595 shallow=False, ellipsisroots=None, fullnodes=None):
596 596 """Given a source repo, construct a bundler.
597 597
598 598 filematcher is a matcher that matches on files to include in the
599 599 changegroup. Used to facilitate sparse changegroups.
600 600
601 601 allowreorder controls whether reordering of revisions is allowed.
602 602 This value is used when ``bundle.reorder`` is ``auto`` or isn't
603 603 set.
604 604
605 605 deltaparentfn is a callable that resolves the delta parent for
606 606 a specific revision.
607 607
608 608 builddeltaheader is a callable that constructs the header for a group
609 609 delta.
610 610
611 611 manifestsend is a chunk to send after manifests have been fully emitted.
612 612
613 613 ellipses indicates whether ellipsis serving mode is enabled.
614 614
615 615 bundlecaps is optional and can be used to specify the set of
616 616 capabilities which can be used to build the bundle. While bundlecaps is
617 617 unused in core Mercurial, extensions rely on this feature to communicate
618 618 capabilities to customize the changegroup packer.
619 619
620 620 shallow indicates whether shallow data might be sent. The packer may
621 621 need to pack file contents not introduced by the changes being packed.
622 622
623 623 fullnodes is the list of nodes which should not be ellipsis nodes. We
624 624 store this rather than the set of nodes that should be ellipsis because
625 625 for very large histories we expect this to be significantly smaller.
626 626 """
627 627 assert filematcher
628 628 self._filematcher = filematcher
629 629
630 630 self.version = version
631 631 self._deltaparentfn = deltaparentfn
632 632 self._builddeltaheader = builddeltaheader
633 633 self._manifestsend = manifestsend
634 634 self._ellipses = ellipses
635 635
636 636 # Set of capabilities we can use to build the bundle.
637 637 if bundlecaps is None:
638 638 bundlecaps = set()
639 639 self._bundlecaps = bundlecaps
640 640 self._isshallow = shallow
641 641 self._fullnodes = fullnodes
642 642
643 643 # Maps ellipsis revs to their roots at the changelog level.
644 644 self._precomputedellipsis = ellipsisroots
645 645
646 646 # experimental config: bundle.reorder
647 647 reorder = repo.ui.config('bundle', 'reorder')
648 648 if reorder == 'auto':
649 649 self._reorder = allowreorder
650 650 else:
651 651 self._reorder = stringutil.parsebool(reorder)
652 652
653 653 self._repo = repo
654 654
655 655 if self._repo.ui.verbose and not self._repo.ui.debugflag:
656 656 self._verbosenote = self._repo.ui.note
657 657 else:
658 658 self._verbosenote = lambda s: None
659 659
660 660 # Maps CL revs to per-revlog revisions. Cleared in close() at
661 661 # the end of each group.
662 662 self._clrevtolocalrev = {}
663 663 self._nextclrevtolocalrev = {}
664 664
665 665 # Maps changelog nodes to changelog revs. Filled in once
666 666 # during changelog stage and then left unmodified.
667 667 self._clnodetorev = {}
668 668
669 669 def _close(self):
670 670 # Ellipses serving mode.
671 671 self._clrevtolocalrev.clear()
672 672 if self._nextclrevtolocalrev is not None:
673 673 self._clrevtolocalrev = self._nextclrevtolocalrev
674 674 self._nextclrevtolocalrev = None
675 675
676 676 return closechunk()
677 677
678 678 def group(self, revs, store, ischangelog, lookup, units=None):
679 679 """Calculate a delta group, yielding a sequence of changegroup chunks
680 680 (strings).
681 681
682 682 Given a list of changeset revs, return a set of deltas and
683 683 metadata corresponding to nodes. The first delta is
684 684 first parent(nodelist[0]) -> nodelist[0], the receiver is
685 685 guaranteed to have this parent as it has all history before
686 686 these changesets. In the case firstparent is nullrev the
687 687 changegroup starts with a full revision.
688 688
689 689 If units is not None, progress detail will be generated, units specifies
690 690 the type of revlog that is touched (changelog, manifest, etc.).
691 691 """
692 692 # if we don't have any revisions touched by these changesets, bail
693 693 if len(revs) == 0:
694 694 yield self._close()
695 695 return
696 696
697 697 # add the parent of the first rev
698 698 p = store.parentrevs(revs[0])[0]
699 699 revs.insert(0, p)
700 700
701 701 # build deltas
702 702 progress = None
703 703 if units is not None:
704 704 progress = self._repo.ui.makeprogress(_('bundling'), unit=units,
705 705 total=(len(revs) - 1))
706 706 for r in pycompat.xrange(len(revs) - 1):
707 707 if progress:
708 708 progress.update(r + 1)
709 709 prev, curr = revs[r], revs[r + 1]
710 710 linknode = lookup(store.node(curr))
711 711
712 712 if self._ellipses:
713 713 delta = self._revisiondeltanarrow(store, ischangelog,
714 714 curr, prev, linknode)
715 715 else:
716 716 delta = _revisiondeltanormal(store, curr, prev, linknode,
717 717 self._deltaparentfn)
718 718
719 719 if not delta:
720 720 continue
721 721
722 722 meta = self._builddeltaheader(delta)
723 723 l = len(meta) + sum(len(x) for x in delta.deltachunks)
724 724 yield chunkheader(l)
725 725 yield meta
726 726 for x in delta.deltachunks:
727 727 yield x
728 728
729 729 if progress:
730 730 progress.complete()
731 731 yield self._close()
732 732
733 733 # filter any nodes that claim to be part of the known set
734 734 def _prune(self, store, missing, commonrevs):
735 735 # TODO this violates storage abstraction for manifests.
736 736 if isinstance(store, manifest.manifestrevlog):
737 737 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
738 738 return []
739 739
740 740 rr, rl = store.rev, store.linkrev
741 741 return [n for n in missing if rl(rr(n)) not in commonrevs]
742 742
743 743 def _packmanifests(self, dir, dirlog, revs, lookuplinknode):
744 744 """Pack manifests into a changegroup stream.
745 745
746 746 Encodes the directory name in the output so multiple manifests
747 747 can be sent. Multiple manifests is not supported by cg1 and cg2.
748 748 """
749 749 if dir:
750 750 assert self.version == b'03'
751 751 yield _fileheader(dir)
752 752
753 753 for chunk in self.group(revs, dirlog, False, lookuplinknode,
754 754 units=_('manifests')):
755 755 yield chunk
756 756
757 757 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
758 758 """Yield a sequence of changegroup byte chunks."""
759 759
760 760 repo = self._repo
761 761 cl = repo.changelog
762 762
763 763 self._verbosenote(_('uncompressed size of bundle content:\n'))
764 764 size = 0
765 765
766 766 clstate, chunks = self._generatechangelog(cl, clnodes)
767 767 for chunk in chunks:
768 768 size += len(chunk)
769 769 yield chunk
770 770
771 771 self._verbosenote(_('%8.i (changelog)\n') % size)
772 772
773 773 clrevorder = clstate['clrevorder']
774 774 mfs = clstate['mfs']
775 775 changedfiles = clstate['changedfiles']
776 776
777 777 # We need to make sure that the linkrev in the changegroup refers to
778 778 # the first changeset that introduced the manifest or file revision.
779 779 # The fastpath is usually safer than the slowpath, because the filelogs
780 780 # are walked in revlog order.
781 781 #
782 782 # When taking the slowpath with reorder=None and the manifest revlog
783 783 # uses generaldelta, the manifest may be walked in the "wrong" order.
784 784 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
785 785 # cc0ff93d0c0c).
786 786 #
787 787 # When taking the fastpath, we are only vulnerable to reordering
788 788 # of the changelog itself. The changelog never uses generaldelta, so
789 789 # it is only reordered when reorder=True. To handle this case, we
790 790 # simply take the slowpath, which already has the 'clrevorder' logic.
791 791 # This was also fixed in cc0ff93d0c0c.
792 792 fastpathlinkrev = fastpathlinkrev and not self._reorder
793 793 # Treemanifests don't work correctly with fastpathlinkrev
794 794 # either, because we don't discover which directory nodes to
795 795 # send along with files. This could probably be fixed.
796 796 fastpathlinkrev = fastpathlinkrev and (
797 797 'treemanifest' not in repo.requirements)
798 798
799 799 fnodes = {} # needed file nodes
800 800
801 801 for chunk in self.generatemanifests(commonrevs, clrevorder,
802 802 fastpathlinkrev, mfs, fnodes, source):
803 803 yield chunk
804 804
805 805 mfdicts = None
806 806 if self._ellipses and self._isshallow:
807 807 mfdicts = [(self._repo.manifestlog[n].read(), lr)
808 808 for (n, lr) in mfs.iteritems()]
809 809
810 810 mfs.clear()
811 811 clrevs = set(cl.rev(x) for x in clnodes)
812 812
813 813 if not fastpathlinkrev:
814 814 def linknodes(unused, fname):
815 815 return fnodes.get(fname, {})
816 816 else:
817 817 cln = cl.node
818 818 def linknodes(filerevlog, fname):
819 819 llr = filerevlog.linkrev
820 820 fln = filerevlog.node
821 821 revs = ((r, llr(r)) for r in filerevlog)
822 822 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
823 823
824 824 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
825 825 source, mfdicts):
826 826 yield chunk
827 827
828 828 yield self._close()
829 829
830 830 if clnodes:
831 831 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
832 832
833 833 def _generatechangelog(self, cl, nodes):
834 834 """Generate data for changelog chunks.
835 835
836 836 Returns a 2-tuple of a dict containing state and an iterable of
837 837 byte chunks. The state will not be fully populated until the
838 838 chunk stream has been fully consumed.
839 839 """
840 840 clrevorder = {}
841 841 mfs = {} # needed manifests
842 842 mfl = self._repo.manifestlog
843 843 # TODO violates storage abstraction.
844 844 mfrevlog = mfl._revlog
845 845 changedfiles = set()
846 846
847 847 # Callback for the changelog, used to collect changed files and
848 848 # manifest nodes.
849 849 # Returns the linkrev node (identity in the changelog case).
850 850 def lookupcl(x):
851 851 c = cl.read(x)
852 852 clrevorder[x] = len(clrevorder)
853 853
854 854 if self._ellipses:
855 self._clnodetorev[x] = cl.rev(x)
856
855 857 # Only update mfs if x is going to be sent. Otherwise we
856 858 # end up with bogus linkrevs specified for manifests and
857 859 # we skip some manifest nodes that we should otherwise
858 860 # have sent.
859 861 if (x in self._fullnodes
860 862 or cl.rev(x) in self._precomputedellipsis):
861 863 n = c[0]
862 864 # Record the first changeset introducing this manifest
863 865 # version.
864 866 mfs.setdefault(n, x)
865 867 # Set this narrow-specific dict so we have the lowest
866 868 # manifest revnum to look up for this cl revnum. (Part of
867 869 # mapping changelog ellipsis parents to manifest ellipsis
868 870 # parents)
869 871 self._nextclrevtolocalrev.setdefault(cl.rev(x),
870 872 mfrevlog.rev(n))
871 873 # We can't trust the changed files list in the changeset if the
872 874 # client requested a shallow clone.
873 875 if self._isshallow:
874 876 changedfiles.update(mfl[c[0]].read().keys())
875 877 else:
876 878 changedfiles.update(c[3])
877 879 else:
878 880
879 881 n = c[0]
880 882 # record the first changeset introducing this manifest version
881 883 mfs.setdefault(n, x)
882 884 # Record a complete list of potentially-changed files in
883 885 # this manifest.
884 886 changedfiles.update(c[3])
885 887
886 888 return x
887 889
888 890 # Changelog doesn't benefit from reordering revisions. So send out
889 891 # revisions in store order.
890 892 revs = sorted(cl.rev(n) for n in nodes)
891 893
892 894 state = {
893 895 'clrevorder': clrevorder,
894 896 'mfs': mfs,
895 897 'changedfiles': changedfiles,
896 898 }
897 899
898 900 gen = self.group(revs, cl, True, lookupcl, units=_('changesets'))
899 901
900 902 return state, gen
901 903
902 904 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
903 905 fnodes, source):
904 906 """Returns an iterator of changegroup chunks containing manifests.
905 907
906 908 `source` is unused here, but is used by extensions like remotefilelog to
907 909 change what is sent based in pulls vs pushes, etc.
908 910 """
909 911 repo = self._repo
910 912 mfl = repo.manifestlog
911 913 dirlog = mfl._revlog.dirlog
912 914 tmfnodes = {'': mfs}
913 915
914 916 # Callback for the manifest, used to collect linkrevs for filelog
915 917 # revisions.
916 918 # Returns the linkrev node (collected in lookupcl).
917 919 def makelookupmflinknode(dir, nodes):
918 920 if fastpathlinkrev:
919 921 assert not dir
920 922 return mfs.__getitem__
921 923
922 924 def lookupmflinknode(x):
923 925 """Callback for looking up the linknode for manifests.
924 926
925 927 Returns the linkrev node for the specified manifest.
926 928
927 929 SIDE EFFECT:
928 930
929 931 1) fclnodes gets populated with the list of relevant
930 932 file nodes if we're not using fastpathlinkrev
931 933 2) When treemanifests are in use, collects treemanifest nodes
932 934 to send
933 935
934 936 Note that this means manifests must be completely sent to
935 937 the client before you can trust the list of files and
936 938 treemanifests to send.
937 939 """
938 940 clnode = nodes[x]
939 941 mdata = mfl.get(dir, x).readfast(shallow=True)
940 942 for p, n, fl in mdata.iterentries():
941 943 if fl == 't': # subdirectory manifest
942 944 subdir = dir + p + '/'
943 945 tmfclnodes = tmfnodes.setdefault(subdir, {})
944 946 tmfclnode = tmfclnodes.setdefault(n, clnode)
945 947 if clrevorder[clnode] < clrevorder[tmfclnode]:
946 948 tmfclnodes[n] = clnode
947 949 else:
948 950 f = dir + p
949 951 fclnodes = fnodes.setdefault(f, {})
950 952 fclnode = fclnodes.setdefault(n, clnode)
951 953 if clrevorder[clnode] < clrevorder[fclnode]:
952 954 fclnodes[n] = clnode
953 955 return clnode
954 956 return lookupmflinknode
955 957
956 958 size = 0
957 959 while tmfnodes:
958 960 dir, nodes = tmfnodes.popitem()
959 961 store = dirlog(dir)
960 962 prunednodes = self._prune(store, nodes, commonrevs)
961 963 if not dir or prunednodes:
962 964 lookupfn = makelookupmflinknode(dir, nodes)
963 965
964 966 if self._ellipses:
965 967 revs = _sortnodesellipsis(store, prunednodes,
966 968 self._clnodetorev, lookupfn)
967 969 else:
968 970 revs = _sortnodesnormal(store, prunednodes,
969 971 self._reorder)
970 972
971 973 for x in self._packmanifests(dir, store, revs, lookupfn):
972 974 size += len(x)
973 975 yield x
974 976 self._verbosenote(_('%8.i (manifests)\n') % size)
975 977 yield self._manifestsend
976 978
977 979 # The 'source' parameter is useful for extensions
978 980 def generatefiles(self, changedfiles, linknodes, commonrevs, source,
979 981 mfdicts):
980 982 changedfiles = list(filter(self._filematcher, changedfiles))
981 983
982 984 if self._isshallow:
983 985 # In a shallow clone, the linknodes callback needs to also include
984 986 # those file nodes that are in the manifests we sent but weren't
985 987 # introduced by those manifests.
986 988 commonctxs = [self._repo[c] for c in commonrevs]
987 989 oldlinknodes = linknodes
988 990 clrev = self._repo.changelog.rev
989 991
990 992 # Defining this function has a side-effect of overriding the
991 993 # function of the same name that was passed in as an argument.
992 994 # TODO have caller pass in appropriate function.
993 995 def linknodes(flog, fname):
994 996 for c in commonctxs:
995 997 try:
996 998 fnode = c.filenode(fname)
997 999 self._clrevtolocalrev[c.rev()] = flog.rev(fnode)
998 1000 except error.ManifestLookupError:
999 1001 pass
1000 1002 links = oldlinknodes(flog, fname)
1001 1003 if len(links) != len(mfdicts):
1002 1004 for mf, lr in mfdicts:
1003 1005 fnode = mf.get(fname, None)
1004 1006 if fnode in links:
1005 1007 links[fnode] = min(links[fnode], lr, key=clrev)
1006 1008 elif fnode:
1007 1009 links[fnode] = lr
1008 1010 return links
1009 1011
1010 1012 return self._generatefiles(changedfiles, linknodes, commonrevs, source)
1011 1013
1012 1014 def _generatefiles(self, changedfiles, linknodes, commonrevs, source):
1013 1015 repo = self._repo
1014 1016 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1015 1017 total=len(changedfiles))
1016 1018 for i, fname in enumerate(sorted(changedfiles)):
1017 1019 filerevlog = repo.file(fname)
1018 1020 if not filerevlog:
1019 1021 raise error.Abort(_("empty or missing file data for %s") %
1020 1022 fname)
1021 1023
1022 1024 linkrevnodes = linknodes(filerevlog, fname)
1023 1025 # Lookup for filenodes, we collected the linkrev nodes above in the
1024 1026 # fastpath case and with lookupmf in the slowpath case.
1025 1027 def lookupfilelog(x):
1026 1028 return linkrevnodes[x]
1027 1029
1028 1030 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
1029 1031 if filenodes:
1030 1032 if self._ellipses:
1031 1033 revs = _sortnodesellipsis(filerevlog, filenodes,
1032 1034 self._clnodetorev, lookupfilelog)
1033 1035 else:
1034 1036 revs = _sortnodesnormal(filerevlog, filenodes,
1035 1037 self._reorder)
1036 1038
1037 1039 progress.update(i + 1, item=fname)
1038 1040 h = _fileheader(fname)
1039 1041 size = len(h)
1040 1042 yield h
1041 1043 for chunk in self.group(revs, filerevlog, False, lookupfilelog):
1042 1044 size += len(chunk)
1043 1045 yield chunk
1044 1046 self._verbosenote(_('%8.i %s\n') % (size, fname))
1045 1047 progress.complete()
1046 1048
1047 1049 def _revisiondeltanarrow(self, store, ischangelog, rev, prev, linknode):
1048 # build up some mapping information that's useful later. See
1049 # the local() nested function below.
1050 if ischangelog:
1051 self._clnodetorev[linknode] = rev
1052 linkrev = rev
1053 self._clrevtolocalrev[linkrev] = rev
1054 else:
1055 1050 linkrev = self._clnodetorev[linknode]
1056 1051 self._clrevtolocalrev[linkrev] = rev
1057 1052
1058 1053 # This is a node to send in full, because the changeset it
1059 1054 # corresponds to was a full changeset.
1060 1055 if linknode in self._fullnodes:
1061 1056 return _revisiondeltanormal(store, rev, prev, linknode,
1062 1057 self._deltaparentfn)
1063 1058
1064 1059 # At this point, a node can either be one we should skip or an
1065 1060 # ellipsis. If it's not an ellipsis, bail immediately.
1066 1061 if linkrev not in self._precomputedellipsis:
1067 1062 return
1068 1063
1069 1064 linkparents = self._precomputedellipsis[linkrev]
1070 1065 def local(clrev):
1071 1066 """Turn a changelog revnum into a local revnum.
1072 1067
1073 1068 The ellipsis dag is stored as revnums on the changelog,
1074 1069 but when we're producing ellipsis entries for
1075 1070 non-changelog revlogs, we need to turn those numbers into
1076 1071 something local. This does that for us, and during the
1077 1072 changelog sending phase will also expand the stored
1078 1073 mappings as needed.
1079 1074 """
1080 1075 if clrev == nullrev:
1081 1076 return nullrev
1082 1077
1083 1078 if ischangelog:
1084 1079 # If we're doing the changelog, it's possible that we
1085 1080 # have a parent that is already on the client, and we
1086 1081 # need to store some extra mapping information so that
1087 1082 # our contained ellipsis nodes will be able to resolve
1088 1083 # their parents.
1089 1084 if clrev not in self._clrevtolocalrev:
1090 1085 clnode = store.node(clrev)
1091 1086 self._clnodetorev[clnode] = clrev
1092 1087 return clrev
1093 1088
1094 1089 # Walk the ellipsis-ized changelog breadth-first looking for a
1095 1090 # change that has been linked from the current revlog.
1096 1091 #
1097 1092 # For a flat manifest revlog only a single step should be necessary
1098 1093 # as all relevant changelog entries are relevant to the flat
1099 1094 # manifest.
1100 1095 #
1101 1096 # For a filelog or tree manifest dirlog however not every changelog
1102 1097 # entry will have been relevant, so we need to skip some changelog
1103 1098 # nodes even after ellipsis-izing.
1104 1099 walk = [clrev]
1105 1100 while walk:
1106 1101 p = walk[0]
1107 1102 walk = walk[1:]
1108 1103 if p in self._clrevtolocalrev:
1109 1104 return self._clrevtolocalrev[p]
1110 1105 elif p in self._fullnodes:
1111 1106 walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
1112 1107 if pp != nullrev])
1113 1108 elif p in self._precomputedellipsis:
1114 1109 walk.extend([pp for pp in self._precomputedellipsis[p]
1115 1110 if pp != nullrev])
1116 1111 else:
1117 1112 # In this case, we've got an ellipsis with parents
1118 1113 # outside the current bundle (likely an
1119 1114 # incremental pull). We "know" that we can use the
1120 1115 # value of this same revlog at whatever revision
1121 1116 # is pointed to by linknode. "Know" is in scare
1122 1117 # quotes because I haven't done enough examination
1123 1118 # of edge cases to convince myself this is really
1124 1119 # a fact - it works for all the (admittedly
1125 1120 # thorough) cases in our testsuite, but I would be
1126 1121 # somewhat unsurprised to find a case in the wild
1127 1122 # where this breaks down a bit. That said, I don't
1128 1123 # know if it would hurt anything.
1129 1124 for i in pycompat.xrange(rev, 0, -1):
1130 1125 if store.linkrev(i) == clrev:
1131 1126 return i
1132 1127 # We failed to resolve a parent for this node, so
1133 1128 # we crash the changegroup construction.
1134 1129 raise error.Abort(
1135 1130 'unable to resolve parent while packing %r %r'
1136 1131 ' for changeset %r' % (store.indexfile, rev, clrev))
1137 1132
1138 1133 return nullrev
1139 1134
1140 1135 if not linkparents or (
1141 1136 store.parentrevs(rev) == (nullrev, nullrev)):
1142 1137 p1, p2 = nullrev, nullrev
1143 1138 elif len(linkparents) == 1:
1144 1139 p1, = sorted(local(p) for p in linkparents)
1145 1140 p2 = nullrev
1146 1141 else:
1147 1142 p1, p2 = sorted(local(p) for p in linkparents)
1148 1143
1149 1144 n = store.node(rev)
1150 1145 p1n, p2n = store.node(p1), store.node(p2)
1151 1146 flags = store.flags(rev)
1152 1147 flags |= revlog.REVIDX_ELLIPSIS
1153 1148
1154 1149 # TODO: try and actually send deltas for ellipsis data blocks
1155 1150 data = store.revision(n)
1156 1151 diffheader = mdiff.trivialdiffheader(len(data))
1157 1152
1158 1153 return revisiondelta(
1159 1154 node=n,
1160 1155 p1node=p1n,
1161 1156 p2node=p2n,
1162 1157 basenode=nullid,
1163 1158 linknode=linknode,
1164 1159 flags=flags,
1165 1160 deltachunks=(diffheader, data),
1166 1161 )
1167 1162
1168 1163 def _deltaparentprev(store, rev, p1, p2, prev):
1169 1164 """Resolve a delta parent to the previous revision.
1170 1165
1171 1166 Used for version 1 changegroups, which don't support generaldelta.
1172 1167 """
1173 1168 return prev
1174 1169
1175 1170 def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
1176 1171 """Resolve a delta parent when general deltas are supported."""
1177 1172 dp = store.deltaparent(rev)
1178 1173 if dp == nullrev and store.storedeltachains:
1179 1174 # Avoid sending full revisions when delta parent is null. Pick prev
1180 1175 # in that case. It's tempting to pick p1 in this case, as p1 will
1181 1176 # be smaller in the common case. However, computing a delta against
1182 1177 # p1 may require resolving the raw text of p1, which could be
1183 1178 # expensive. The revlog caches should have prev cached, meaning
1184 1179 # less CPU for changegroup generation. There is likely room to add
1185 1180 # a flag and/or config option to control this behavior.
1186 1181 base = prev
1187 1182 elif dp == nullrev:
1188 1183 # revlog is configured to use full snapshot for a reason,
1189 1184 # stick to full snapshot.
1190 1185 base = nullrev
1191 1186 elif dp not in (p1, p2, prev):
1192 1187 # Pick prev when we can't be sure remote has the base revision.
1193 1188 return prev
1194 1189 else:
1195 1190 base = dp
1196 1191
1197 1192 if base != nullrev and not store.candelta(base, rev):
1198 1193 base = nullrev
1199 1194
1200 1195 return base
1201 1196
1202 1197 def _deltaparentellipses(store, rev, p1, p2, prev):
1203 1198 """Resolve a delta parent when in ellipses mode."""
1204 1199 # TODO: send better deltas when in narrow mode.
1205 1200 #
1206 1201 # changegroup.group() loops over revisions to send,
1207 1202 # including revisions we'll skip. What this means is that
1208 1203 # `prev` will be a potentially useless delta base for all
1209 1204 # ellipsis nodes, as the client likely won't have it. In
1210 1205 # the future we should do bookkeeping about which nodes
1211 1206 # have been sent to the client, and try to be
1212 1207 # significantly smarter about delta bases. This is
1213 1208 # slightly tricky because this same code has to work for
1214 1209 # all revlogs, and we don't have the linkrev/linknode here.
1215 1210 return p1
1216 1211
1217 1212 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1218 1213 shallow=False, ellipsisroots=None, fullnodes=None):
1219 1214 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1220 1215 d.node, d.p1node, d.p2node, d.linknode)
1221 1216
1222 1217 return cgpacker(repo, filematcher, b'01',
1223 1218 deltaparentfn=_deltaparentprev,
1224 1219 allowreorder=None,
1225 1220 builddeltaheader=builddeltaheader,
1226 1221 manifestsend=b'',
1227 1222 bundlecaps=bundlecaps,
1228 1223 ellipses=ellipses,
1229 1224 shallow=shallow,
1230 1225 ellipsisroots=ellipsisroots,
1231 1226 fullnodes=fullnodes)
1232 1227
1233 1228 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1234 1229 shallow=False, ellipsisroots=None, fullnodes=None):
1235 1230 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1236 1231 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1237 1232
1238 1233 # Since generaldelta is directly supported by cg2, reordering
1239 1234 # generally doesn't help, so we disable it by default (treating
1240 1235 # bundle.reorder=auto just like bundle.reorder=False).
1241 1236 return cgpacker(repo, filematcher, b'02',
1242 1237 deltaparentfn=_deltaparentgeneraldelta,
1243 1238 allowreorder=False,
1244 1239 builddeltaheader=builddeltaheader,
1245 1240 manifestsend=b'',
1246 1241 bundlecaps=bundlecaps,
1247 1242 ellipses=ellipses,
1248 1243 shallow=shallow,
1249 1244 ellipsisroots=ellipsisroots,
1250 1245 fullnodes=fullnodes)
1251 1246
1252 1247 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1253 1248 shallow=False, ellipsisroots=None, fullnodes=None):
1254 1249 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1255 1250 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1256 1251
1257 1252 deltaparentfn = (_deltaparentellipses if ellipses
1258 1253 else _deltaparentgeneraldelta)
1259 1254
1260 1255 return cgpacker(repo, filematcher, b'03',
1261 1256 deltaparentfn=deltaparentfn,
1262 1257 allowreorder=False,
1263 1258 builddeltaheader=builddeltaheader,
1264 1259 manifestsend=closechunk(),
1265 1260 bundlecaps=bundlecaps,
1266 1261 ellipses=ellipses,
1267 1262 shallow=shallow,
1268 1263 ellipsisroots=ellipsisroots,
1269 1264 fullnodes=fullnodes)
1270 1265
1271 1266 _packermap = {'01': (_makecg1packer, cg1unpacker),
1272 1267 # cg2 adds support for exchanging generaldelta
1273 1268 '02': (_makecg2packer, cg2unpacker),
1274 1269 # cg3 adds support for exchanging revlog flags and treemanifests
1275 1270 '03': (_makecg3packer, cg3unpacker),
1276 1271 }
1277 1272
1278 1273 def allsupportedversions(repo):
1279 1274 versions = set(_packermap.keys())
1280 1275 if not (repo.ui.configbool('experimental', 'changegroup3') or
1281 1276 repo.ui.configbool('experimental', 'treemanifest') or
1282 1277 'treemanifest' in repo.requirements):
1283 1278 versions.discard('03')
1284 1279 return versions
1285 1280
1286 1281 # Changegroup versions that can be applied to the repo
1287 1282 def supportedincomingversions(repo):
1288 1283 return allsupportedversions(repo)
1289 1284
1290 1285 # Changegroup versions that can be created from the repo
1291 1286 def supportedoutgoingversions(repo):
1292 1287 versions = allsupportedversions(repo)
1293 1288 if 'treemanifest' in repo.requirements:
1294 1289 # Versions 01 and 02 support only flat manifests and it's just too
1295 1290 # expensive to convert between the flat manifest and tree manifest on
1296 1291 # the fly. Since tree manifests are hashed differently, all of history
1297 1292 # would have to be converted. Instead, we simply don't even pretend to
1298 1293 # support versions 01 and 02.
1299 1294 versions.discard('01')
1300 1295 versions.discard('02')
1301 1296 if repository.NARROW_REQUIREMENT in repo.requirements:
1302 1297 # Versions 01 and 02 don't support revlog flags, and we need to
1303 1298 # support that for stripping and unbundling to work.
1304 1299 versions.discard('01')
1305 1300 versions.discard('02')
1306 1301 if LFS_REQUIREMENT in repo.requirements:
1307 1302 # Versions 01 and 02 don't support revlog flags, and we need to
1308 1303 # mark LFS entries with REVIDX_EXTSTORED.
1309 1304 versions.discard('01')
1310 1305 versions.discard('02')
1311 1306
1312 1307 return versions
1313 1308
1314 1309 def localversion(repo):
1315 1310 # Finds the best version to use for bundles that are meant to be used
1316 1311 # locally, such as those from strip and shelve, and temporary bundles.
1317 1312 return max(supportedoutgoingversions(repo))
1318 1313
1319 1314 def safeversion(repo):
1320 1315 # Finds the smallest version that it's safe to assume clients of the repo
1321 1316 # will support. For example, all hg versions that support generaldelta also
1322 1317 # support changegroup 02.
1323 1318 versions = supportedoutgoingversions(repo)
1324 1319 if 'generaldelta' in repo.requirements:
1325 1320 versions.discard('01')
1326 1321 assert versions
1327 1322 return min(versions)
1328 1323
1329 1324 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1330 1325 ellipses=False, shallow=False, ellipsisroots=None,
1331 1326 fullnodes=None):
1332 1327 assert version in supportedoutgoingversions(repo)
1333 1328
1334 1329 if filematcher is None:
1335 1330 filematcher = matchmod.alwaysmatcher(repo.root, '')
1336 1331
1337 1332 if version == '01' and not filematcher.always():
1338 1333 raise error.ProgrammingError('version 01 changegroups do not support '
1339 1334 'sparse file matchers')
1340 1335
1341 1336 if ellipses and version in (b'01', b'02'):
1342 1337 raise error.Abort(
1343 1338 _('ellipsis nodes require at least cg3 on client and server, '
1344 1339 'but negotiated version %s') % version)
1345 1340
1346 1341 # Requested files could include files not in the local store. So
1347 1342 # filter those out.
1348 1343 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1349 1344 filematcher)
1350 1345
1351 1346 fn = _packermap[version][0]
1352 1347 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1353 1348 shallow=shallow, ellipsisroots=ellipsisroots,
1354 1349 fullnodes=fullnodes)
1355 1350
1356 1351 def getunbundler(version, fh, alg, extras=None):
1357 1352 return _packermap[version][1](fh, alg, extras=extras)
1358 1353
1359 1354 def _changegroupinfo(repo, nodes, source):
1360 1355 if repo.ui.verbose or source == 'bundle':
1361 1356 repo.ui.status(_("%d changesets found\n") % len(nodes))
1362 1357 if repo.ui.debugflag:
1363 1358 repo.ui.debug("list of changesets:\n")
1364 1359 for node in nodes:
1365 1360 repo.ui.debug("%s\n" % hex(node))
1366 1361
1367 1362 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1368 1363 bundlecaps=None):
1369 1364 cgstream = makestream(repo, outgoing, version, source,
1370 1365 fastpath=fastpath, bundlecaps=bundlecaps)
1371 1366 return getunbundler(version, util.chunkbuffer(cgstream), None,
1372 1367 {'clcount': len(outgoing.missing) })
1373 1368
1374 1369 def makestream(repo, outgoing, version, source, fastpath=False,
1375 1370 bundlecaps=None, filematcher=None):
1376 1371 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1377 1372 filematcher=filematcher)
1378 1373
1379 1374 repo = repo.unfiltered()
1380 1375 commonrevs = outgoing.common
1381 1376 csets = outgoing.missing
1382 1377 heads = outgoing.missingheads
1383 1378 # We go through the fast path if we get told to, or if all (unfiltered
1384 1379 # heads have been requested (since we then know there all linkrevs will
1385 1380 # be pulled by the client).
1386 1381 heads.sort()
1387 1382 fastpathlinkrev = fastpath or (
1388 1383 repo.filtername is None and heads == sorted(repo.heads()))
1389 1384
1390 1385 repo.hook('preoutgoing', throw=True, source=source)
1391 1386 _changegroupinfo(repo, csets, source)
1392 1387 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1393 1388
1394 1389 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1395 1390 revisions = 0
1396 1391 files = 0
1397 1392 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1398 1393 total=expectedfiles)
1399 1394 for chunkdata in iter(source.filelogheader, {}):
1400 1395 files += 1
1401 1396 f = chunkdata["filename"]
1402 1397 repo.ui.debug("adding %s revisions\n" % f)
1403 1398 progress.increment()
1404 1399 fl = repo.file(f)
1405 1400 o = len(fl)
1406 1401 try:
1407 1402 deltas = source.deltaiter()
1408 1403 if not fl.addgroup(deltas, revmap, trp):
1409 1404 raise error.Abort(_("received file revlog group is empty"))
1410 1405 except error.CensoredBaseError as e:
1411 1406 raise error.Abort(_("received delta base is censored: %s") % e)
1412 1407 revisions += len(fl) - o
1413 1408 if f in needfiles:
1414 1409 needs = needfiles[f]
1415 1410 for new in pycompat.xrange(o, len(fl)):
1416 1411 n = fl.node(new)
1417 1412 if n in needs:
1418 1413 needs.remove(n)
1419 1414 else:
1420 1415 raise error.Abort(
1421 1416 _("received spurious file revlog entry"))
1422 1417 if not needs:
1423 1418 del needfiles[f]
1424 1419 progress.complete()
1425 1420
1426 1421 for f, needs in needfiles.iteritems():
1427 1422 fl = repo.file(f)
1428 1423 for n in needs:
1429 1424 try:
1430 1425 fl.rev(n)
1431 1426 except error.LookupError:
1432 1427 raise error.Abort(
1433 1428 _('missing file data for %s:%s - run hg verify') %
1434 1429 (f, hex(n)))
1435 1430
1436 1431 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now