##// END OF EJS Templates
changegroup: introduce requests to define delta generation...
Gregory Szorc -
r39054:e793e11e default
parent child Browse files
Show More
@@ -1,1461 +1,1520 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 match as matchmod,
30 30 mdiff,
31 31 phases,
32 32 pycompat,
33 33 repository,
34 34 revlog,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 stringutil,
40 40 )
41 41
42 42 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
43 43 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
44 44 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
45 45
46 46 LFS_REQUIREMENT = 'lfs'
47 47
48 48 readexactly = util.readexactly
49 49
50 50 def getchunk(stream):
51 51 """return the next chunk from stream as a string"""
52 52 d = readexactly(stream, 4)
53 53 l = struct.unpack(">l", d)[0]
54 54 if l <= 4:
55 55 if l:
56 56 raise error.Abort(_("invalid chunk length %d") % l)
57 57 return ""
58 58 return readexactly(stream, l - 4)
59 59
60 60 def chunkheader(length):
61 61 """return a changegroup chunk header (string)"""
62 62 return struct.pack(">l", length + 4)
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(">l", 0)
67 67
68 68 def _fileheader(path):
69 69 """Obtain a changegroup chunk header for a named path."""
70 70 return chunkheader(len(path)) + path
71 71
72 72 def writechunks(ui, chunks, filename, vfs=None):
73 73 """Write chunks to a file and return its filename.
74 74
75 75 The stream is assumed to be a bundle file.
76 76 Existing files will not be overwritten.
77 77 If no filename is specified, a temporary file is created.
78 78 """
79 79 fh = None
80 80 cleanup = None
81 81 try:
82 82 if filename:
83 83 if vfs:
84 84 fh = vfs.open(filename, "wb")
85 85 else:
86 86 # Increase default buffer size because default is usually
87 87 # small (4k is common on Linux).
88 88 fh = open(filename, "wb", 131072)
89 89 else:
90 90 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
91 91 fh = os.fdopen(fd, r"wb")
92 92 cleanup = filename
93 93 for c in chunks:
94 94 fh.write(c)
95 95 cleanup = None
96 96 return filename
97 97 finally:
98 98 if fh is not None:
99 99 fh.close()
100 100 if cleanup is not None:
101 101 if filename and vfs:
102 102 vfs.unlink(cleanup)
103 103 else:
104 104 os.unlink(cleanup)
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
125 125 deltaheadersize = deltaheader.size
126 126 version = '01'
127 127 _grouplistcount = 1 # One list of files after the manifests
128 128
129 129 def __init__(self, fh, alg, extras=None):
130 130 if alg is None:
131 131 alg = 'UN'
132 132 if alg not in util.compengines.supportedbundletypes:
133 133 raise error.Abort(_('unknown stream compression type: %s')
134 134 % alg)
135 135 if alg == 'BZ':
136 136 alg = '_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != 'UN'
148 148 def read(self, l):
149 149 return self._stream.read(l)
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152 def tell(self):
153 153 return self._stream.tell()
154 154 def close(self):
155 155 return self._stream.close()
156 156
157 157 def _chunklength(self):
158 158 d = readexactly(self._stream, 4)
159 159 l = struct.unpack(">l", d)[0]
160 160 if l <= 4:
161 161 if l:
162 162 raise error.Abort(_("invalid chunk length %d") % l)
163 163 return 0
164 164 if self.callback:
165 165 self.callback()
166 166 return l - 4
167 167
168 168 def changelogheader(self):
169 169 """v10 does not have a changelog header chunk"""
170 170 return {}
171 171
172 172 def manifestheader(self):
173 173 """v10 does not have a manifest header chunk"""
174 174 return {}
175 175
176 176 def filelogheader(self):
177 177 """return the header of the filelogs chunk, v10 only has the filename"""
178 178 l = self._chunklength()
179 179 if not l:
180 180 return {}
181 181 fname = readexactly(self._stream, l)
182 182 return {'filename': fname}
183 183
184 184 def _deltaheader(self, headertuple, prevnode):
185 185 node, p1, p2, cs = headertuple
186 186 if prevnode is None:
187 187 deltabase = p1
188 188 else:
189 189 deltabase = prevnode
190 190 flags = 0
191 191 return node, p1, p2, deltabase, cs, flags
192 192
193 193 def deltachunk(self, prevnode):
194 194 l = self._chunklength()
195 195 if not l:
196 196 return {}
197 197 headerdata = readexactly(self._stream, self.deltaheadersize)
198 198 header = self.deltaheader.unpack(headerdata)
199 199 delta = readexactly(self._stream, l - self.deltaheadersize)
200 200 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
201 201 return (node, p1, p2, cs, deltabase, delta, flags)
202 202
203 203 def getchunks(self):
204 204 """returns all the chunks contains in the bundle
205 205
206 206 Used when you need to forward the binary stream to a file or another
207 207 network API. To do so, it parse the changegroup data, otherwise it will
208 208 block in case of sshrepo because it don't know the end of the stream.
209 209 """
210 210 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
211 211 # and a list of filelogs. For changegroup 3, we expect 4 parts:
212 212 # changelog, manifestlog, a list of tree manifestlogs, and a list of
213 213 # filelogs.
214 214 #
215 215 # Changelog and manifestlog parts are terminated with empty chunks. The
216 216 # tree and file parts are a list of entry sections. Each entry section
217 217 # is a series of chunks terminating in an empty chunk. The list of these
218 218 # entry sections is terminated in yet another empty chunk, so we know
219 219 # we've reached the end of the tree/file list when we reach an empty
220 220 # chunk that was proceeded by no non-empty chunks.
221 221
222 222 parts = 0
223 223 while parts < 2 + self._grouplistcount:
224 224 noentries = True
225 225 while True:
226 226 chunk = getchunk(self)
227 227 if not chunk:
228 228 # The first two empty chunks represent the end of the
229 229 # changelog and the manifestlog portions. The remaining
230 230 # empty chunks represent either A) the end of individual
231 231 # tree or file entries in the file list, or B) the end of
232 232 # the entire list. It's the end of the entire list if there
233 233 # were no entries (i.e. noentries is True).
234 234 if parts < 2:
235 235 parts += 1
236 236 elif noentries:
237 237 parts += 1
238 238 break
239 239 noentries = False
240 240 yield chunkheader(len(chunk))
241 241 pos = 0
242 242 while pos < len(chunk):
243 243 next = pos + 2**20
244 244 yield chunk[pos:next]
245 245 pos = next
246 246 yield closechunk()
247 247
248 248 def _unpackmanifests(self, repo, revmap, trp, prog):
249 249 self.callback = prog.increment
250 250 # no need to check for empty manifest group here:
251 251 # if the result of the merge of 1 and 2 is the same in 3 and 4,
252 252 # no new manifest will be created and the manifest group will
253 253 # be empty during the pull
254 254 self.manifestheader()
255 255 deltas = self.deltaiter()
256 256 repo.manifestlog.addgroup(deltas, revmap, trp)
257 257 prog.complete()
258 258 self.callback = None
259 259
260 260 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
261 261 expectedtotal=None):
262 262 """Add the changegroup returned by source.read() to this repo.
263 263 srctype is a string like 'push', 'pull', or 'unbundle'. url is
264 264 the URL of the repo where this changegroup is coming from.
265 265
266 266 Return an integer summarizing the change to this repo:
267 267 - nothing changed or no source: 0
268 268 - more heads than before: 1+added heads (2..n)
269 269 - fewer heads than before: -1-removed heads (-2..-n)
270 270 - number of heads stays the same: 1
271 271 """
272 272 repo = repo.unfiltered()
273 273 def csmap(x):
274 274 repo.ui.debug("add changeset %s\n" % short(x))
275 275 return len(cl)
276 276
277 277 def revmap(x):
278 278 return cl.rev(x)
279 279
280 280 changesets = files = revisions = 0
281 281
282 282 try:
283 283 # The transaction may already carry source information. In this
284 284 # case we use the top level data. We overwrite the argument
285 285 # because we need to use the top level value (if they exist)
286 286 # in this function.
287 287 srctype = tr.hookargs.setdefault('source', srctype)
288 288 url = tr.hookargs.setdefault('url', url)
289 289 repo.hook('prechangegroup',
290 290 throw=True, **pycompat.strkwargs(tr.hookargs))
291 291
292 292 # write changelog data to temp files so concurrent readers
293 293 # will not see an inconsistent view
294 294 cl = repo.changelog
295 295 cl.delayupdate(tr)
296 296 oldheads = set(cl.heads())
297 297
298 298 trp = weakref.proxy(tr)
299 299 # pull off the changeset group
300 300 repo.ui.status(_("adding changesets\n"))
301 301 clstart = len(cl)
302 302 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
303 303 total=expectedtotal)
304 304 self.callback = progress.increment
305 305
306 306 efiles = set()
307 307 def onchangelog(cl, node):
308 308 efiles.update(cl.readfiles(node))
309 309
310 310 self.changelogheader()
311 311 deltas = self.deltaiter()
312 312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
313 313 efiles = len(efiles)
314 314
315 315 if not cgnodes:
316 316 repo.ui.develwarn('applied empty changegroup',
317 317 config='warn-empty-changegroup')
318 318 clend = len(cl)
319 319 changesets = clend - clstart
320 320 progress.complete()
321 321 self.callback = None
322 322
323 323 # pull off the manifest group
324 324 repo.ui.status(_("adding manifests\n"))
325 325 # We know that we'll never have more manifests than we had
326 326 # changesets.
327 327 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
328 328 total=changesets)
329 329 self._unpackmanifests(repo, revmap, trp, progress)
330 330
331 331 needfiles = {}
332 332 if repo.ui.configbool('server', 'validate'):
333 333 cl = repo.changelog
334 334 ml = repo.manifestlog
335 335 # validate incoming csets have their manifests
336 336 for cset in pycompat.xrange(clstart, clend):
337 337 mfnode = cl.changelogrevision(cset).manifest
338 338 mfest = ml[mfnode].readdelta()
339 339 # store file cgnodes we must see
340 340 for f, n in mfest.iteritems():
341 341 needfiles.setdefault(f, set()).add(n)
342 342
343 343 # process the files
344 344 repo.ui.status(_("adding file changes\n"))
345 345 newrevs, newfiles = _addchangegroupfiles(
346 346 repo, self, revmap, trp, efiles, needfiles)
347 347 revisions += newrevs
348 348 files += newfiles
349 349
350 350 deltaheads = 0
351 351 if oldheads:
352 352 heads = cl.heads()
353 353 deltaheads = len(heads) - len(oldheads)
354 354 for h in heads:
355 355 if h not in oldheads and repo[h].closesbranch():
356 356 deltaheads -= 1
357 357 htext = ""
358 358 if deltaheads:
359 359 htext = _(" (%+d heads)") % deltaheads
360 360
361 361 repo.ui.status(_("added %d changesets"
362 362 " with %d changes to %d files%s\n")
363 363 % (changesets, revisions, files, htext))
364 364 repo.invalidatevolatilesets()
365 365
366 366 if changesets > 0:
367 367 if 'node' not in tr.hookargs:
368 368 tr.hookargs['node'] = hex(cl.node(clstart))
369 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
370 370 hookargs = dict(tr.hookargs)
371 371 else:
372 372 hookargs = dict(tr.hookargs)
373 373 hookargs['node'] = hex(cl.node(clstart))
374 374 hookargs['node_last'] = hex(cl.node(clend - 1))
375 375 repo.hook('pretxnchangegroup',
376 376 throw=True, **pycompat.strkwargs(hookargs))
377 377
378 378 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
379 379 phaseall = None
380 380 if srctype in ('push', 'serve'):
381 381 # Old servers can not push the boundary themselves.
382 382 # New servers won't push the boundary if changeset already
383 383 # exists locally as secret
384 384 #
385 385 # We should not use added here but the list of all change in
386 386 # the bundle
387 387 if repo.publishing():
388 388 targetphase = phaseall = phases.public
389 389 else:
390 390 # closer target phase computation
391 391
392 392 # Those changesets have been pushed from the
393 393 # outside, their phases are going to be pushed
394 394 # alongside. Therefor `targetphase` is
395 395 # ignored.
396 396 targetphase = phaseall = phases.draft
397 397 if added:
398 398 phases.registernew(repo, tr, targetphase, added)
399 399 if phaseall is not None:
400 400 phases.advanceboundary(repo, tr, phaseall, cgnodes)
401 401
402 402 if changesets > 0:
403 403
404 404 def runhooks():
405 405 # These hooks run when the lock releases, not when the
406 406 # transaction closes. So it's possible for the changelog
407 407 # to have changed since we last saw it.
408 408 if clstart >= len(repo):
409 409 return
410 410
411 411 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
412 412
413 413 for n in added:
414 414 args = hookargs.copy()
415 415 args['node'] = hex(n)
416 416 del args['node_last']
417 417 repo.hook("incoming", **pycompat.strkwargs(args))
418 418
419 419 newheads = [h for h in repo.heads()
420 420 if h not in oldheads]
421 421 repo.ui.log("incoming",
422 422 "%d incoming changes - new heads: %s\n",
423 423 len(added),
424 424 ', '.join([hex(c[:6]) for c in newheads]))
425 425
426 426 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
427 427 lambda tr: repo._afterlock(runhooks))
428 428 finally:
429 429 repo.ui.flush()
430 430 # never return 0 here:
431 431 if deltaheads < 0:
432 432 ret = deltaheads - 1
433 433 else:
434 434 ret = deltaheads + 1
435 435 return ret
436 436
437 437 def deltaiter(self):
438 438 """
439 439 returns an iterator of the deltas in this changegroup
440 440
441 441 Useful for passing to the underlying storage system to be stored.
442 442 """
443 443 chain = None
444 444 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
445 445 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
446 446 yield chunkdata
447 447 chain = chunkdata[0]
448 448
449 449 class cg2unpacker(cg1unpacker):
450 450 """Unpacker for cg2 streams.
451 451
452 452 cg2 streams add support for generaldelta, so the delta header
453 453 format is slightly different. All other features about the data
454 454 remain the same.
455 455 """
456 456 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
457 457 deltaheadersize = deltaheader.size
458 458 version = '02'
459 459
460 460 def _deltaheader(self, headertuple, prevnode):
461 461 node, p1, p2, deltabase, cs = headertuple
462 462 flags = 0
463 463 return node, p1, p2, deltabase, cs, flags
464 464
465 465 class cg3unpacker(cg2unpacker):
466 466 """Unpacker for cg3 streams.
467 467
468 468 cg3 streams add support for exchanging treemanifests and revlog
469 469 flags. It adds the revlog flags to the delta header and an empty chunk
470 470 separating manifests and files.
471 471 """
472 472 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
473 473 deltaheadersize = deltaheader.size
474 474 version = '03'
475 475 _grouplistcount = 2 # One list of manifests and one list of files
476 476
477 477 def _deltaheader(self, headertuple, prevnode):
478 478 node, p1, p2, deltabase, cs, flags = headertuple
479 479 return node, p1, p2, deltabase, cs, flags
480 480
481 481 def _unpackmanifests(self, repo, revmap, trp, prog):
482 482 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
483 483 for chunkdata in iter(self.filelogheader, {}):
484 484 # If we get here, there are directory manifests in the changegroup
485 485 d = chunkdata["filename"]
486 486 repo.ui.debug("adding %s revisions\n" % d)
487 487 dirlog = repo.manifestlog._revlog.dirlog(d)
488 488 deltas = self.deltaiter()
489 489 if not dirlog.addgroup(deltas, revmap, trp):
490 490 raise error.Abort(_("received dir revlog group is empty"))
491 491
492 492 class headerlessfixup(object):
493 493 def __init__(self, fh, h):
494 494 self._h = h
495 495 self._fh = fh
496 496 def read(self, n):
497 497 if self._h:
498 498 d, self._h = self._h[:n], self._h[n:]
499 499 if len(d) < n:
500 500 d += readexactly(self._fh, n - len(d))
501 501 return d
502 502 return readexactly(self._fh, n)
503 503
504 504 @attr.s(slots=True, frozen=True)
505 class revisiondeltarequest(object):
506 """Describes a request to construct a revision delta.
507
508 Instances are converted into ``revisiondelta`` later.
509 """
510 # Revision whose delta will be generated.
511 node = attr.ib()
512
513 # Linknode value.
514 linknode = attr.ib()
515
516 # Parent revisions to record in ``revisiondelta`` instance.
517 p1node = attr.ib()
518 p2node = attr.ib()
519
520 # Base revision that delta should be generated against. If nullrev,
521 # the full revision data should be populated. If None, the delta
522 # may be generated against any base revision that is an ancestor of
523 # this revision. If any other numeric value, the delta should be
524 # produced against that revision.
525 baserev = attr.ib()
526
527 # Whether this should be marked as an ellipsis revision.
528 ellipsis = attr.ib(default=False)
529
530 @attr.s(slots=True, frozen=True)
505 531 class revisiondelta(object):
506 532 """Describes a delta entry in a changegroup.
507 533
508 534 Captured data is sufficient to serialize the delta into multiple
509 535 formats.
510 536
511 537 ``revision`` and ``delta`` are mutually exclusive.
512 538 """
513 539 # 20 byte node of this revision.
514 540 node = attr.ib()
515 541 # 20 byte nodes of parent revisions.
516 542 p1node = attr.ib()
517 543 p2node = attr.ib()
518 544 # 20 byte node of node this delta is against.
519 545 basenode = attr.ib()
520 546 # 20 byte node of changeset revision this delta is associated with.
521 547 linknode = attr.ib()
522 548 # 2 bytes of flags to apply to revision data.
523 549 flags = attr.ib()
524 550 # Size of base revision this delta is against. May be None if
525 551 # basenode is nullid.
526 552 baserevisionsize = attr.ib()
527 553 # Raw fulltext revision data.
528 554 revision = attr.ib()
529 555 # Delta between the basenode and node.
530 556 delta = attr.ib()
531 557
532 558 def _revisiondeltatochunks(delta, headerfn):
533 559 """Serialize a revisiondelta to changegroup chunks."""
534 560
535 561 # The captured revision delta may be encoded as a delta against
536 562 # a base revision or as a full revision. The changegroup format
537 563 # requires that everything on the wire be deltas. So for full
538 564 # revisions, we need to invent a header that says to rewrite
539 565 # data.
540 566
541 567 if delta.delta is not None:
542 568 prefix, data = b'', delta.delta
543 569 elif delta.basenode == nullid:
544 570 data = delta.revision
545 571 prefix = mdiff.trivialdiffheader(len(data))
546 572 else:
547 573 data = delta.revision
548 574 prefix = mdiff.replacediffheader(delta.baserevisionsize,
549 575 len(data))
550 576
551 577 meta = headerfn(delta)
552 578
553 579 yield chunkheader(len(meta) + len(prefix) + len(data))
554 580 yield meta
555 581 if prefix:
556 582 yield prefix
557 583 yield data
558 584
559 585 def _sortnodesnormal(store, nodes, reorder):
560 586 """Sort nodes for changegroup generation and turn into revnums."""
561 587 # for generaldelta revlogs, we linearize the revs; this will both be
562 588 # much quicker and generate a much smaller bundle
563 589 if (store._generaldelta and reorder is None) or reorder:
564 590 dag = dagutil.revlogdag(store)
565 591 return dag.linearize(set(store.rev(n) for n in nodes))
566 592 else:
567 593 return sorted([store.rev(n) for n in nodes])
568 594
569 595 def _sortnodesellipsis(store, nodes, cl, lookup):
570 596 """Sort nodes for changegroup generation and turn into revnums."""
571 597 # Ellipses serving mode.
572 598 #
573 599 # In a perfect world, we'd generate better ellipsis-ified graphs
574 600 # for non-changelog revlogs. In practice, we haven't started doing
575 601 # that yet, so the resulting DAGs for the manifestlog and filelogs
576 602 # are actually full of bogus parentage on all the ellipsis
577 603 # nodes. This has the side effect that, while the contents are
578 604 # correct, the individual DAGs might be completely out of whack in
579 605 # a case like 882681bc3166 and its ancestors (back about 10
580 606 # revisions or so) in the main hg repo.
581 607 #
582 608 # The one invariant we *know* holds is that the new (potentially
583 609 # bogus) DAG shape will be valid if we order the nodes in the
584 610 # order that they're introduced in dramatis personae by the
585 611 # changelog, so what we do is we sort the non-changelog histories
586 612 # by the order in which they are used by the changelog.
587 613 key = lambda n: cl.rev(lookup(n))
588 614 return [store.rev(n) for n in sorted(nodes, key=key)]
589 615
590 def _revisiondeltanormal(store, rev, prev, linknode, forcedeltaparentprev):
591 """Construct a revision delta for non-ellipses changegroup generation."""
592 node = store.node(rev)
593 p1, p2 = store.parentrevs(rev)
616 def _handlerevisiondeltarequest(store, request, prev):
617 """Obtain a revisiondelta from a revisiondeltarequest"""
618
619 node = request.node
620 rev = store.rev(node)
594 621
595 if forcedeltaparentprev:
596 base = prev
622 # Requesting a full revision.
623 if request.baserev == nullrev:
624 base = nullrev
625 # Requesting an explicit revision.
626 elif request.baserev is not None:
627 base = request.baserev
628 # Allowing us to choose.
597 629 else:
630 p1, p2 = store.parentrevs(rev)
598 631 dp = store.deltaparent(rev)
599 632
600 633 if dp == nullrev and store.storedeltachains:
601 634 # Avoid sending full revisions when delta parent is null. Pick prev
602 635 # in that case. It's tempting to pick p1 in this case, as p1 will
603 636 # be smaller in the common case. However, computing a delta against
604 637 # p1 may require resolving the raw text of p1, which could be
605 638 # expensive. The revlog caches should have prev cached, meaning
606 639 # less CPU for changegroup generation. There is likely room to add
607 640 # a flag and/or config option to control this behavior.
608 641 base = prev
609 642 elif dp == nullrev:
610 643 # revlog is configured to use full snapshot for a reason,
611 644 # stick to full snapshot.
612 645 base = nullrev
613 646 elif dp not in (p1, p2, prev):
614 647 # Pick prev when we can't be sure remote has the base revision.
615 648 base = prev
616 649 else:
617 650 base = dp
618 651
619 652 if base != nullrev and not store.candelta(base, rev):
620 653 base = nullrev
621 654
622 655 revision = None
623 656 delta = None
624 657 baserevisionsize = None
625 658
626 659 if store.iscensored(base) or store.iscensored(rev):
627 660 try:
628 661 revision = store.revision(node, raw=True)
629 662 except error.CensoredNodeError as e:
630 663 revision = e.tombstone
631 664
632 665 if base != nullrev:
633 666 baserevisionsize = store.rawsize(base)
634 667
635 668 elif base == nullrev:
636 669 revision = store.revision(node, raw=True)
637 670 else:
638 671 delta = store.revdiff(base, rev)
639 672
640 p1n, p2n = store.parents(node)
673 extraflags = revlog.REVIDX_ELLIPSIS if request.ellipsis else 0
641 674
642 675 return revisiondelta(
643 676 node=node,
644 p1node=p1n,
645 p2node=p2n,
677 p1node=request.p1node,
678 p2node=request.p2node,
679 linknode=request.linknode,
646 680 basenode=store.node(base),
647 linknode=linknode,
648 flags=store.flags(rev),
681 flags=store.flags(rev) | extraflags,
649 682 baserevisionsize=baserevisionsize,
650 683 revision=revision,
651 684 delta=delta,
652 685 )
653 686
654 def _revisiondeltanarrow(cl, store, ischangelog, rev, linkrev,
655 linknode, clrevtolocalrev, fullclnodes,
656 precomputedellipsis):
687 def _makenarrowdeltarequest(cl, store, ischangelog, rev, node, linkrev,
688 linknode, clrevtolocalrev, fullclnodes,
689 precomputedellipsis):
657 690 linkparents = precomputedellipsis[linkrev]
658 691 def local(clrev):
659 692 """Turn a changelog revnum into a local revnum.
660 693
661 694 The ellipsis dag is stored as revnums on the changelog,
662 695 but when we're producing ellipsis entries for
663 696 non-changelog revlogs, we need to turn those numbers into
664 697 something local. This does that for us, and during the
665 698 changelog sending phase will also expand the stored
666 699 mappings as needed.
667 700 """
668 701 if clrev == nullrev:
669 702 return nullrev
670 703
671 704 if ischangelog:
672 705 return clrev
673 706
674 707 # Walk the ellipsis-ized changelog breadth-first looking for a
675 708 # change that has been linked from the current revlog.
676 709 #
677 710 # For a flat manifest revlog only a single step should be necessary
678 711 # as all relevant changelog entries are relevant to the flat
679 712 # manifest.
680 713 #
681 714 # For a filelog or tree manifest dirlog however not every changelog
682 715 # entry will have been relevant, so we need to skip some changelog
683 716 # nodes even after ellipsis-izing.
684 717 walk = [clrev]
685 718 while walk:
686 719 p = walk[0]
687 720 walk = walk[1:]
688 721 if p in clrevtolocalrev:
689 722 return clrevtolocalrev[p]
690 723 elif p in fullclnodes:
691 724 walk.extend([pp for pp in cl.parentrevs(p)
692 725 if pp != nullrev])
693 726 elif p in precomputedellipsis:
694 727 walk.extend([pp for pp in precomputedellipsis[p]
695 728 if pp != nullrev])
696 729 else:
697 730 # In this case, we've got an ellipsis with parents
698 731 # outside the current bundle (likely an
699 732 # incremental pull). We "know" that we can use the
700 733 # value of this same revlog at whatever revision
701 734 # is pointed to by linknode. "Know" is in scare
702 735 # quotes because I haven't done enough examination
703 736 # of edge cases to convince myself this is really
704 737 # a fact - it works for all the (admittedly
705 738 # thorough) cases in our testsuite, but I would be
706 739 # somewhat unsurprised to find a case in the wild
707 740 # where this breaks down a bit. That said, I don't
708 741 # know if it would hurt anything.
709 742 for i in pycompat.xrange(rev, 0, -1):
710 743 if store.linkrev(i) == clrev:
711 744 return i
712 745 # We failed to resolve a parent for this node, so
713 746 # we crash the changegroup construction.
714 747 raise error.Abort(
715 748 'unable to resolve parent while packing %r %r'
716 749 ' for changeset %r' % (store.indexfile, rev, clrev))
717 750
718 751 return nullrev
719 752
720 753 if not linkparents or (
721 754 store.parentrevs(rev) == (nullrev, nullrev)):
722 755 p1, p2 = nullrev, nullrev
723 756 elif len(linkparents) == 1:
724 757 p1, = sorted(local(p) for p in linkparents)
725 758 p2 = nullrev
726 759 else:
727 760 p1, p2 = sorted(local(p) for p in linkparents)
728 761
729 n = store.node(rev)
730 p1n, p2n = store.node(p1), store.node(p2)
731 flags = store.flags(rev)
732 flags |= revlog.REVIDX_ELLIPSIS
762 p1node, p2node = store.node(p1), store.node(p2)
733 763
734 764 # TODO: try and actually send deltas for ellipsis data blocks
735
736 return revisiondelta(
737 node=n,
738 p1node=p1n,
739 p2node=p2n,
740 basenode=nullid,
765 return revisiondeltarequest(
766 node=node,
767 p1node=p1node,
768 p2node=p2node,
741 769 linknode=linknode,
742 flags=flags,
743 baserevisionsize=None,
744 revision=store.revision(n),
745 delta=None,
770 baserev=nullrev,
771 ellipsis=True,
746 772 )
747 773
748 774 def deltagroup(repo, revs, store, ischangelog, lookup, forcedeltaparentprev,
749 775 units=None,
750 776 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
751 777 precomputedellipsis=None):
752 778 """Calculate deltas for a set of revisions.
753 779
754 780 Is a generator of ``revisiondelta`` instances.
755 781
756 782 If units is not None, progress detail will be generated, units specifies
757 783 the type of revlog that is touched (changelog, manifest, etc.).
758 784 """
759 785 if not revs:
760 786 return
761 787
788 # We perform two passes over the revisions whose data we will emit.
789 #
790 # In the first pass, we obtain information about the deltas that will
791 # be generated. This involves computing linknodes and adjusting the
792 # request to take shallow fetching into account. The end result of
793 # this pass is a list of "request" objects stating which deltas
794 # to obtain.
795 #
796 # The second pass is simply resolving the requested deltas.
797
762 798 cl = repo.changelog
763 799
800 # In the first pass, collect info about the deltas we'll be
801 # generating.
802 requests = []
803
764 804 # Add the parent of the first rev.
765 805 revs.insert(0, store.parentrevs(revs[0])[0])
766 806
767 # build deltas
768 progress = None
769 if units is not None:
770 progress = repo.ui.makeprogress(_('bundling'), unit=units,
771 total=(len(revs) - 1))
772
773 807 for i in pycompat.xrange(len(revs) - 1):
774 if progress:
775 progress.update(i + 1)
776
777 808 prev = revs[i]
778 809 curr = revs[i + 1]
779 810
780 linknode = lookup(store.node(curr))
811 node = store.node(curr)
812 linknode = lookup(node)
813 p1node, p2node = store.parents(node)
781 814
782 815 if ellipses:
783 816 linkrev = cl.rev(linknode)
784 817 clrevtolocalrev[linkrev] = curr
785 818
786 819 # This is a node to send in full, because the changeset it
787 820 # corresponds to was a full changeset.
788 821 if linknode in fullclnodes:
789 delta = _revisiondeltanormal(store, curr, prev, linknode,
790 forcedeltaparentprev)
822 requests.append(revisiondeltarequest(
823 node=node,
824 p1node=p1node,
825 p2node=p2node,
826 linknode=linknode,
827 baserev=None,
828 ))
829
791 830 elif linkrev not in precomputedellipsis:
792 delta = None
831 pass
793 832 else:
794 delta = _revisiondeltanarrow(
795 cl, store, ischangelog, curr, linkrev, linknode,
833 requests.append(_makenarrowdeltarequest(
834 cl, store, ischangelog, curr, node, linkrev, linknode,
796 835 clrevtolocalrev, fullclnodes,
797 precomputedellipsis)
836 precomputedellipsis))
798 837 else:
799 delta = _revisiondeltanormal(store, curr, prev, linknode,
800 forcedeltaparentprev)
838 requests.append(revisiondeltarequest(
839 node=node,
840 p1node=p1node,
841 p2node=p2node,
842 linknode=linknode,
843 baserev=prev if forcedeltaparentprev else None,
844 ))
801 845
802 if delta:
803 yield delta
846 # We expect the first pass to be fast, so we only engage the progress
847 # meter for constructing the revision deltas.
848 progress = None
849 if units is not None:
850 progress = repo.ui.makeprogress(_('bundling'), unit=units,
851 total=len(requests))
852
853 prevrev = revs[0]
854 for i, request in enumerate(requests):
855 if progress:
856 progress.update(i + 1)
857
858 delta = _handlerevisiondeltarequest(store, request, prevrev)
859
860 yield delta
861
862 prevrev = store.rev(request.node)
804 863
805 864 if progress:
806 865 progress.complete()
807 866
808 867 class cgpacker(object):
809 868 def __init__(self, repo, filematcher, version, allowreorder,
810 869 builddeltaheader, manifestsend,
811 870 forcedeltaparentprev=False,
812 871 bundlecaps=None, ellipses=False,
813 872 shallow=False, ellipsisroots=None, fullnodes=None):
814 873 """Given a source repo, construct a bundler.
815 874
816 875 filematcher is a matcher that matches on files to include in the
817 876 changegroup. Used to facilitate sparse changegroups.
818 877
819 878 allowreorder controls whether reordering of revisions is allowed.
820 879 This value is used when ``bundle.reorder`` is ``auto`` or isn't
821 880 set.
822 881
823 882 forcedeltaparentprev indicates whether delta parents must be against
824 883 the previous revision in a delta group. This should only be used for
825 884 compatibility with changegroup version 1.
826 885
827 886 builddeltaheader is a callable that constructs the header for a group
828 887 delta.
829 888
830 889 manifestsend is a chunk to send after manifests have been fully emitted.
831 890
832 891 ellipses indicates whether ellipsis serving mode is enabled.
833 892
834 893 bundlecaps is optional and can be used to specify the set of
835 894 capabilities which can be used to build the bundle. While bundlecaps is
836 895 unused in core Mercurial, extensions rely on this feature to communicate
837 896 capabilities to customize the changegroup packer.
838 897
839 898 shallow indicates whether shallow data might be sent. The packer may
840 899 need to pack file contents not introduced by the changes being packed.
841 900
842 901 fullnodes is the set of changelog nodes which should not be ellipsis
843 902 nodes. We store this rather than the set of nodes that should be
844 903 ellipsis because for very large histories we expect this to be
845 904 significantly smaller.
846 905 """
847 906 assert filematcher
848 907 self._filematcher = filematcher
849 908
850 909 self.version = version
851 910 self._forcedeltaparentprev = forcedeltaparentprev
852 911 self._builddeltaheader = builddeltaheader
853 912 self._manifestsend = manifestsend
854 913 self._ellipses = ellipses
855 914
856 915 # Set of capabilities we can use to build the bundle.
857 916 if bundlecaps is None:
858 917 bundlecaps = set()
859 918 self._bundlecaps = bundlecaps
860 919 self._isshallow = shallow
861 920 self._fullclnodes = fullnodes
862 921
863 922 # Maps ellipsis revs to their roots at the changelog level.
864 923 self._precomputedellipsis = ellipsisroots
865 924
866 925 # experimental config: bundle.reorder
867 926 reorder = repo.ui.config('bundle', 'reorder')
868 927 if reorder == 'auto':
869 928 self._reorder = allowreorder
870 929 else:
871 930 self._reorder = stringutil.parsebool(reorder)
872 931
873 932 self._repo = repo
874 933
875 934 if self._repo.ui.verbose and not self._repo.ui.debugflag:
876 935 self._verbosenote = self._repo.ui.note
877 936 else:
878 937 self._verbosenote = lambda s: None
879 938
880 939 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
881 940 """Yield a sequence of changegroup byte chunks."""
882 941
883 942 repo = self._repo
884 943 cl = repo.changelog
885 944
886 945 self._verbosenote(_('uncompressed size of bundle content:\n'))
887 946 size = 0
888 947
889 948 clstate, deltas = self._generatechangelog(cl, clnodes)
890 949 for delta in deltas:
891 950 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
892 951 size += len(chunk)
893 952 yield chunk
894 953
895 954 close = closechunk()
896 955 size += len(close)
897 956 yield closechunk()
898 957
899 958 self._verbosenote(_('%8.i (changelog)\n') % size)
900 959
901 960 clrevorder = clstate['clrevorder']
902 961 mfs = clstate['mfs']
903 962 changedfiles = clstate['changedfiles']
904 963
905 964 # We need to make sure that the linkrev in the changegroup refers to
906 965 # the first changeset that introduced the manifest or file revision.
907 966 # The fastpath is usually safer than the slowpath, because the filelogs
908 967 # are walked in revlog order.
909 968 #
910 969 # When taking the slowpath with reorder=None and the manifest revlog
911 970 # uses generaldelta, the manifest may be walked in the "wrong" order.
912 971 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
913 972 # cc0ff93d0c0c).
914 973 #
915 974 # When taking the fastpath, we are only vulnerable to reordering
916 975 # of the changelog itself. The changelog never uses generaldelta, so
917 976 # it is only reordered when reorder=True. To handle this case, we
918 977 # simply take the slowpath, which already has the 'clrevorder' logic.
919 978 # This was also fixed in cc0ff93d0c0c.
920 979 fastpathlinkrev = fastpathlinkrev and not self._reorder
921 980 # Treemanifests don't work correctly with fastpathlinkrev
922 981 # either, because we don't discover which directory nodes to
923 982 # send along with files. This could probably be fixed.
924 983 fastpathlinkrev = fastpathlinkrev and (
925 984 'treemanifest' not in repo.requirements)
926 985
927 986 fnodes = {} # needed file nodes
928 987
929 988 size = 0
930 989 it = self.generatemanifests(
931 990 commonrevs, clrevorder, fastpathlinkrev, mfs, fnodes, source,
932 991 clstate['clrevtomanifestrev'])
933 992
934 993 for dir, deltas in it:
935 994 if dir:
936 995 assert self.version == b'03'
937 996 chunk = _fileheader(dir)
938 997 size += len(chunk)
939 998 yield chunk
940 999
941 1000 for delta in deltas:
942 1001 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
943 1002 for chunk in chunks:
944 1003 size += len(chunk)
945 1004 yield chunk
946 1005
947 1006 close = closechunk()
948 1007 size += len(close)
949 1008 yield close
950 1009
951 1010 self._verbosenote(_('%8.i (manifests)\n') % size)
952 1011 yield self._manifestsend
953 1012
954 1013 mfdicts = None
955 1014 if self._ellipses and self._isshallow:
956 1015 mfdicts = [(self._repo.manifestlog[n].read(), lr)
957 1016 for (n, lr) in mfs.iteritems()]
958 1017
959 1018 mfs.clear()
960 1019 clrevs = set(cl.rev(x) for x in clnodes)
961 1020
962 1021 it = self.generatefiles(changedfiles, commonrevs,
963 1022 source, mfdicts, fastpathlinkrev,
964 1023 fnodes, clrevs)
965 1024
966 1025 for path, deltas in it:
967 1026 h = _fileheader(path)
968 1027 size = len(h)
969 1028 yield h
970 1029
971 1030 for delta in deltas:
972 1031 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
973 1032 for chunk in chunks:
974 1033 size += len(chunk)
975 1034 yield chunk
976 1035
977 1036 close = closechunk()
978 1037 size += len(close)
979 1038 yield close
980 1039
981 1040 self._verbosenote(_('%8.i %s\n') % (size, path))
982 1041
983 1042 yield closechunk()
984 1043
985 1044 if clnodes:
986 1045 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
987 1046
988 1047 def _generatechangelog(self, cl, nodes):
989 1048 """Generate data for changelog chunks.
990 1049
991 1050 Returns a 2-tuple of a dict containing state and an iterable of
992 1051 byte chunks. The state will not be fully populated until the
993 1052 chunk stream has been fully consumed.
994 1053 """
995 1054 clrevorder = {}
996 1055 mfs = {} # needed manifests
997 1056 mfl = self._repo.manifestlog
998 1057 # TODO violates storage abstraction.
999 1058 mfrevlog = mfl._revlog
1000 1059 changedfiles = set()
1001 1060 clrevtomanifestrev = {}
1002 1061
1003 1062 # Callback for the changelog, used to collect changed files and
1004 1063 # manifest nodes.
1005 1064 # Returns the linkrev node (identity in the changelog case).
1006 1065 def lookupcl(x):
1007 1066 c = cl.read(x)
1008 1067 clrevorder[x] = len(clrevorder)
1009 1068
1010 1069 if self._ellipses:
1011 1070 # Only update mfs if x is going to be sent. Otherwise we
1012 1071 # end up with bogus linkrevs specified for manifests and
1013 1072 # we skip some manifest nodes that we should otherwise
1014 1073 # have sent.
1015 1074 if (x in self._fullclnodes
1016 1075 or cl.rev(x) in self._precomputedellipsis):
1017 1076 n = c[0]
1018 1077 # Record the first changeset introducing this manifest
1019 1078 # version.
1020 1079 mfs.setdefault(n, x)
1021 1080 # Set this narrow-specific dict so we have the lowest
1022 1081 # manifest revnum to look up for this cl revnum. (Part of
1023 1082 # mapping changelog ellipsis parents to manifest ellipsis
1024 1083 # parents)
1025 1084 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
1026 1085 # We can't trust the changed files list in the changeset if the
1027 1086 # client requested a shallow clone.
1028 1087 if self._isshallow:
1029 1088 changedfiles.update(mfl[c[0]].read().keys())
1030 1089 else:
1031 1090 changedfiles.update(c[3])
1032 1091 else:
1033 1092
1034 1093 n = c[0]
1035 1094 # record the first changeset introducing this manifest version
1036 1095 mfs.setdefault(n, x)
1037 1096 # Record a complete list of potentially-changed files in
1038 1097 # this manifest.
1039 1098 changedfiles.update(c[3])
1040 1099
1041 1100 return x
1042 1101
1043 1102 # Changelog doesn't benefit from reordering revisions. So send out
1044 1103 # revisions in store order.
1045 1104 revs = sorted(cl.rev(n) for n in nodes)
1046 1105
1047 1106 state = {
1048 1107 'clrevorder': clrevorder,
1049 1108 'mfs': mfs,
1050 1109 'changedfiles': changedfiles,
1051 1110 'clrevtomanifestrev': clrevtomanifestrev,
1052 1111 }
1053 1112
1054 1113 gen = deltagroup(
1055 1114 self._repo, revs, cl, True, lookupcl,
1056 1115 self._forcedeltaparentprev,
1057 1116 ellipses=self._ellipses,
1058 1117 units=_('changesets'),
1059 1118 clrevtolocalrev={},
1060 1119 fullclnodes=self._fullclnodes,
1061 1120 precomputedellipsis=self._precomputedellipsis)
1062 1121
1063 1122 return state, gen
1064 1123
1065 1124 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
1066 1125 fnodes, source, clrevtolocalrev):
1067 1126 """Returns an iterator of changegroup chunks containing manifests.
1068 1127
1069 1128 `source` is unused here, but is used by extensions like remotefilelog to
1070 1129 change what is sent based in pulls vs pushes, etc.
1071 1130 """
1072 1131 repo = self._repo
1073 1132 cl = repo.changelog
1074 1133 mfl = repo.manifestlog
1075 1134 dirlog = mfl._revlog.dirlog
1076 1135 tmfnodes = {'': mfs}
1077 1136
1078 1137 # Callback for the manifest, used to collect linkrevs for filelog
1079 1138 # revisions.
1080 1139 # Returns the linkrev node (collected in lookupcl).
1081 1140 def makelookupmflinknode(dir, nodes):
1082 1141 if fastpathlinkrev:
1083 1142 assert not dir
1084 1143 return mfs.__getitem__
1085 1144
1086 1145 def lookupmflinknode(x):
1087 1146 """Callback for looking up the linknode for manifests.
1088 1147
1089 1148 Returns the linkrev node for the specified manifest.
1090 1149
1091 1150 SIDE EFFECT:
1092 1151
1093 1152 1) fclnodes gets populated with the list of relevant
1094 1153 file nodes if we're not using fastpathlinkrev
1095 1154 2) When treemanifests are in use, collects treemanifest nodes
1096 1155 to send
1097 1156
1098 1157 Note that this means manifests must be completely sent to
1099 1158 the client before you can trust the list of files and
1100 1159 treemanifests to send.
1101 1160 """
1102 1161 clnode = nodes[x]
1103 1162 mdata = mfl.get(dir, x).readfast(shallow=True)
1104 1163 for p, n, fl in mdata.iterentries():
1105 1164 if fl == 't': # subdirectory manifest
1106 1165 subdir = dir + p + '/'
1107 1166 tmfclnodes = tmfnodes.setdefault(subdir, {})
1108 1167 tmfclnode = tmfclnodes.setdefault(n, clnode)
1109 1168 if clrevorder[clnode] < clrevorder[tmfclnode]:
1110 1169 tmfclnodes[n] = clnode
1111 1170 else:
1112 1171 f = dir + p
1113 1172 fclnodes = fnodes.setdefault(f, {})
1114 1173 fclnode = fclnodes.setdefault(n, clnode)
1115 1174 if clrevorder[clnode] < clrevorder[fclnode]:
1116 1175 fclnodes[n] = clnode
1117 1176 return clnode
1118 1177 return lookupmflinknode
1119 1178
1120 1179 while tmfnodes:
1121 1180 dir, nodes = tmfnodes.popitem()
1122 1181 store = dirlog(dir)
1123 1182
1124 1183 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
1125 1184 prunednodes = []
1126 1185 else:
1127 1186 frev, flr = store.rev, store.linkrev
1128 1187 prunednodes = [n for n in nodes
1129 1188 if flr(frev(n)) not in commonrevs]
1130 1189
1131 1190 if dir and not prunednodes:
1132 1191 continue
1133 1192
1134 1193 lookupfn = makelookupmflinknode(dir, nodes)
1135 1194
1136 1195 if self._ellipses:
1137 1196 revs = _sortnodesellipsis(store, prunednodes, cl,
1138 1197 lookupfn)
1139 1198 else:
1140 1199 revs = _sortnodesnormal(store, prunednodes,
1141 1200 self._reorder)
1142 1201
1143 1202 deltas = deltagroup(
1144 1203 self._repo, revs, store, False, lookupfn,
1145 1204 self._forcedeltaparentprev,
1146 1205 ellipses=self._ellipses,
1147 1206 units=_('manifests'),
1148 1207 clrevtolocalrev=clrevtolocalrev,
1149 1208 fullclnodes=self._fullclnodes,
1150 1209 precomputedellipsis=self._precomputedellipsis)
1151 1210
1152 1211 yield dir, deltas
1153 1212
1154 1213 # The 'source' parameter is useful for extensions
1155 1214 def generatefiles(self, changedfiles, commonrevs, source,
1156 1215 mfdicts, fastpathlinkrev, fnodes, clrevs):
1157 1216 changedfiles = list(filter(self._filematcher, changedfiles))
1158 1217
1159 1218 if not fastpathlinkrev:
1160 1219 def normallinknodes(unused, fname):
1161 1220 return fnodes.get(fname, {})
1162 1221 else:
1163 1222 cln = self._repo.changelog.node
1164 1223
1165 1224 def normallinknodes(store, fname):
1166 1225 flinkrev = store.linkrev
1167 1226 fnode = store.node
1168 1227 revs = ((r, flinkrev(r)) for r in store)
1169 1228 return dict((fnode(r), cln(lr))
1170 1229 for r, lr in revs if lr in clrevs)
1171 1230
1172 1231 clrevtolocalrev = {}
1173 1232
1174 1233 if self._isshallow:
1175 1234 # In a shallow clone, the linknodes callback needs to also include
1176 1235 # those file nodes that are in the manifests we sent but weren't
1177 1236 # introduced by those manifests.
1178 1237 commonctxs = [self._repo[c] for c in commonrevs]
1179 1238 clrev = self._repo.changelog.rev
1180 1239
1181 1240 # Defining this function has a side-effect of overriding the
1182 1241 # function of the same name that was passed in as an argument.
1183 1242 # TODO have caller pass in appropriate function.
1184 1243 def linknodes(flog, fname):
1185 1244 for c in commonctxs:
1186 1245 try:
1187 1246 fnode = c.filenode(fname)
1188 1247 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1189 1248 except error.ManifestLookupError:
1190 1249 pass
1191 1250 links = normallinknodes(flog, fname)
1192 1251 if len(links) != len(mfdicts):
1193 1252 for mf, lr in mfdicts:
1194 1253 fnode = mf.get(fname, None)
1195 1254 if fnode in links:
1196 1255 links[fnode] = min(links[fnode], lr, key=clrev)
1197 1256 elif fnode:
1198 1257 links[fnode] = lr
1199 1258 return links
1200 1259 else:
1201 1260 linknodes = normallinknodes
1202 1261
1203 1262 repo = self._repo
1204 1263 cl = repo.changelog
1205 1264 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1206 1265 total=len(changedfiles))
1207 1266 for i, fname in enumerate(sorted(changedfiles)):
1208 1267 filerevlog = repo.file(fname)
1209 1268 if not filerevlog:
1210 1269 raise error.Abort(_("empty or missing file data for %s") %
1211 1270 fname)
1212 1271
1213 1272 clrevtolocalrev.clear()
1214 1273
1215 1274 linkrevnodes = linknodes(filerevlog, fname)
1216 1275 # Lookup for filenodes, we collected the linkrev nodes above in the
1217 1276 # fastpath case and with lookupmf in the slowpath case.
1218 1277 def lookupfilelog(x):
1219 1278 return linkrevnodes[x]
1220 1279
1221 1280 frev, flr = filerevlog.rev, filerevlog.linkrev
1222 1281 filenodes = [n for n in linkrevnodes
1223 1282 if flr(frev(n)) not in commonrevs]
1224 1283
1225 1284 if filenodes:
1226 1285 if self._ellipses:
1227 1286 revs = _sortnodesellipsis(filerevlog, filenodes,
1228 1287 cl, lookupfilelog)
1229 1288 else:
1230 1289 revs = _sortnodesnormal(filerevlog, filenodes,
1231 1290 self._reorder)
1232 1291
1233 1292 progress.update(i + 1, item=fname)
1234 1293
1235 1294 deltas = deltagroup(
1236 1295 self._repo, revs, filerevlog, False, lookupfilelog,
1237 1296 self._forcedeltaparentprev,
1238 1297 ellipses=self._ellipses,
1239 1298 clrevtolocalrev=clrevtolocalrev,
1240 1299 fullclnodes=self._fullclnodes,
1241 1300 precomputedellipsis=self._precomputedellipsis)
1242 1301
1243 1302 yield fname, deltas
1244 1303
1245 1304 progress.complete()
1246 1305
1247 1306 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1248 1307 shallow=False, ellipsisroots=None, fullnodes=None):
1249 1308 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1250 1309 d.node, d.p1node, d.p2node, d.linknode)
1251 1310
1252 1311 return cgpacker(repo, filematcher, b'01',
1253 1312 allowreorder=None,
1254 1313 builddeltaheader=builddeltaheader,
1255 1314 manifestsend=b'',
1256 1315 forcedeltaparentprev=True,
1257 1316 bundlecaps=bundlecaps,
1258 1317 ellipses=ellipses,
1259 1318 shallow=shallow,
1260 1319 ellipsisroots=ellipsisroots,
1261 1320 fullnodes=fullnodes)
1262 1321
1263 1322 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1264 1323 shallow=False, ellipsisroots=None, fullnodes=None):
1265 1324 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1266 1325 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1267 1326
1268 1327 # Since generaldelta is directly supported by cg2, reordering
1269 1328 # generally doesn't help, so we disable it by default (treating
1270 1329 # bundle.reorder=auto just like bundle.reorder=False).
1271 1330 return cgpacker(repo, filematcher, b'02',
1272 1331 allowreorder=False,
1273 1332 builddeltaheader=builddeltaheader,
1274 1333 manifestsend=b'',
1275 1334 bundlecaps=bundlecaps,
1276 1335 ellipses=ellipses,
1277 1336 shallow=shallow,
1278 1337 ellipsisroots=ellipsisroots,
1279 1338 fullnodes=fullnodes)
1280 1339
1281 1340 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1282 1341 shallow=False, ellipsisroots=None, fullnodes=None):
1283 1342 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1284 1343 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1285 1344
1286 1345 return cgpacker(repo, filematcher, b'03',
1287 1346 allowreorder=False,
1288 1347 builddeltaheader=builddeltaheader,
1289 1348 manifestsend=closechunk(),
1290 1349 bundlecaps=bundlecaps,
1291 1350 ellipses=ellipses,
1292 1351 shallow=shallow,
1293 1352 ellipsisroots=ellipsisroots,
1294 1353 fullnodes=fullnodes)
1295 1354
1296 1355 _packermap = {'01': (_makecg1packer, cg1unpacker),
1297 1356 # cg2 adds support for exchanging generaldelta
1298 1357 '02': (_makecg2packer, cg2unpacker),
1299 1358 # cg3 adds support for exchanging revlog flags and treemanifests
1300 1359 '03': (_makecg3packer, cg3unpacker),
1301 1360 }
1302 1361
1303 1362 def allsupportedversions(repo):
1304 1363 versions = set(_packermap.keys())
1305 1364 if not (repo.ui.configbool('experimental', 'changegroup3') or
1306 1365 repo.ui.configbool('experimental', 'treemanifest') or
1307 1366 'treemanifest' in repo.requirements):
1308 1367 versions.discard('03')
1309 1368 return versions
1310 1369
1311 1370 # Changegroup versions that can be applied to the repo
1312 1371 def supportedincomingversions(repo):
1313 1372 return allsupportedversions(repo)
1314 1373
1315 1374 # Changegroup versions that can be created from the repo
1316 1375 def supportedoutgoingversions(repo):
1317 1376 versions = allsupportedversions(repo)
1318 1377 if 'treemanifest' in repo.requirements:
1319 1378 # Versions 01 and 02 support only flat manifests and it's just too
1320 1379 # expensive to convert between the flat manifest and tree manifest on
1321 1380 # the fly. Since tree manifests are hashed differently, all of history
1322 1381 # would have to be converted. Instead, we simply don't even pretend to
1323 1382 # support versions 01 and 02.
1324 1383 versions.discard('01')
1325 1384 versions.discard('02')
1326 1385 if repository.NARROW_REQUIREMENT in repo.requirements:
1327 1386 # Versions 01 and 02 don't support revlog flags, and we need to
1328 1387 # support that for stripping and unbundling to work.
1329 1388 versions.discard('01')
1330 1389 versions.discard('02')
1331 1390 if LFS_REQUIREMENT in repo.requirements:
1332 1391 # Versions 01 and 02 don't support revlog flags, and we need to
1333 1392 # mark LFS entries with REVIDX_EXTSTORED.
1334 1393 versions.discard('01')
1335 1394 versions.discard('02')
1336 1395
1337 1396 return versions
1338 1397
1339 1398 def localversion(repo):
1340 1399 # Finds the best version to use for bundles that are meant to be used
1341 1400 # locally, such as those from strip and shelve, and temporary bundles.
1342 1401 return max(supportedoutgoingversions(repo))
1343 1402
1344 1403 def safeversion(repo):
1345 1404 # Finds the smallest version that it's safe to assume clients of the repo
1346 1405 # will support. For example, all hg versions that support generaldelta also
1347 1406 # support changegroup 02.
1348 1407 versions = supportedoutgoingversions(repo)
1349 1408 if 'generaldelta' in repo.requirements:
1350 1409 versions.discard('01')
1351 1410 assert versions
1352 1411 return min(versions)
1353 1412
1354 1413 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1355 1414 ellipses=False, shallow=False, ellipsisroots=None,
1356 1415 fullnodes=None):
1357 1416 assert version in supportedoutgoingversions(repo)
1358 1417
1359 1418 if filematcher is None:
1360 1419 filematcher = matchmod.alwaysmatcher(repo.root, '')
1361 1420
1362 1421 if version == '01' and not filematcher.always():
1363 1422 raise error.ProgrammingError('version 01 changegroups do not support '
1364 1423 'sparse file matchers')
1365 1424
1366 1425 if ellipses and version in (b'01', b'02'):
1367 1426 raise error.Abort(
1368 1427 _('ellipsis nodes require at least cg3 on client and server, '
1369 1428 'but negotiated version %s') % version)
1370 1429
1371 1430 # Requested files could include files not in the local store. So
1372 1431 # filter those out.
1373 1432 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1374 1433 filematcher)
1375 1434
1376 1435 fn = _packermap[version][0]
1377 1436 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1378 1437 shallow=shallow, ellipsisroots=ellipsisroots,
1379 1438 fullnodes=fullnodes)
1380 1439
1381 1440 def getunbundler(version, fh, alg, extras=None):
1382 1441 return _packermap[version][1](fh, alg, extras=extras)
1383 1442
1384 1443 def _changegroupinfo(repo, nodes, source):
1385 1444 if repo.ui.verbose or source == 'bundle':
1386 1445 repo.ui.status(_("%d changesets found\n") % len(nodes))
1387 1446 if repo.ui.debugflag:
1388 1447 repo.ui.debug("list of changesets:\n")
1389 1448 for node in nodes:
1390 1449 repo.ui.debug("%s\n" % hex(node))
1391 1450
1392 1451 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1393 1452 bundlecaps=None):
1394 1453 cgstream = makestream(repo, outgoing, version, source,
1395 1454 fastpath=fastpath, bundlecaps=bundlecaps)
1396 1455 return getunbundler(version, util.chunkbuffer(cgstream), None,
1397 1456 {'clcount': len(outgoing.missing) })
1398 1457
1399 1458 def makestream(repo, outgoing, version, source, fastpath=False,
1400 1459 bundlecaps=None, filematcher=None):
1401 1460 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1402 1461 filematcher=filematcher)
1403 1462
1404 1463 repo = repo.unfiltered()
1405 1464 commonrevs = outgoing.common
1406 1465 csets = outgoing.missing
1407 1466 heads = outgoing.missingheads
1408 1467 # We go through the fast path if we get told to, or if all (unfiltered
1409 1468 # heads have been requested (since we then know there all linkrevs will
1410 1469 # be pulled by the client).
1411 1470 heads.sort()
1412 1471 fastpathlinkrev = fastpath or (
1413 1472 repo.filtername is None and heads == sorted(repo.heads()))
1414 1473
1415 1474 repo.hook('preoutgoing', throw=True, source=source)
1416 1475 _changegroupinfo(repo, csets, source)
1417 1476 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1418 1477
1419 1478 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1420 1479 revisions = 0
1421 1480 files = 0
1422 1481 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1423 1482 total=expectedfiles)
1424 1483 for chunkdata in iter(source.filelogheader, {}):
1425 1484 files += 1
1426 1485 f = chunkdata["filename"]
1427 1486 repo.ui.debug("adding %s revisions\n" % f)
1428 1487 progress.increment()
1429 1488 fl = repo.file(f)
1430 1489 o = len(fl)
1431 1490 try:
1432 1491 deltas = source.deltaiter()
1433 1492 if not fl.addgroup(deltas, revmap, trp):
1434 1493 raise error.Abort(_("received file revlog group is empty"))
1435 1494 except error.CensoredBaseError as e:
1436 1495 raise error.Abort(_("received delta base is censored: %s") % e)
1437 1496 revisions += len(fl) - o
1438 1497 if f in needfiles:
1439 1498 needs = needfiles[f]
1440 1499 for new in pycompat.xrange(o, len(fl)):
1441 1500 n = fl.node(new)
1442 1501 if n in needs:
1443 1502 needs.remove(n)
1444 1503 else:
1445 1504 raise error.Abort(
1446 1505 _("received spurious file revlog entry"))
1447 1506 if not needs:
1448 1507 del needfiles[f]
1449 1508 progress.complete()
1450 1509
1451 1510 for f, needs in needfiles.iteritems():
1452 1511 fl = repo.file(f)
1453 1512 for n in needs:
1454 1513 try:
1455 1514 fl.rev(n)
1456 1515 except error.LookupError:
1457 1516 raise error.Abort(
1458 1517 _('missing file data for %s:%s - run hg verify') %
1459 1518 (f, hex(n)))
1460 1519
1461 1520 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now