##// END OF EJS Templates
changegroup: capture base node instead of rev in delta request...
Gregory Szorc -
r39055:d0d197ab default
parent child Browse files
Show More
@@ -1,1520 +1,1520
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21
22 22 from .thirdparty import (
23 23 attr,
24 24 )
25 25
26 26 from . import (
27 27 dagutil,
28 28 error,
29 29 match as matchmod,
30 30 mdiff,
31 31 phases,
32 32 pycompat,
33 33 repository,
34 34 revlog,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 stringutil,
40 40 )
41 41
42 42 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct("20s20s20s20s")
43 43 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct("20s20s20s20s20s")
44 44 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(">20s20s20s20s20sH")
45 45
46 46 LFS_REQUIREMENT = 'lfs'
47 47
48 48 readexactly = util.readexactly
49 49
50 50 def getchunk(stream):
51 51 """return the next chunk from stream as a string"""
52 52 d = readexactly(stream, 4)
53 53 l = struct.unpack(">l", d)[0]
54 54 if l <= 4:
55 55 if l:
56 56 raise error.Abort(_("invalid chunk length %d") % l)
57 57 return ""
58 58 return readexactly(stream, l - 4)
59 59
60 60 def chunkheader(length):
61 61 """return a changegroup chunk header (string)"""
62 62 return struct.pack(">l", length + 4)
63 63
64 64 def closechunk():
65 65 """return a changegroup chunk header (string) for a zero-length chunk"""
66 66 return struct.pack(">l", 0)
67 67
68 68 def _fileheader(path):
69 69 """Obtain a changegroup chunk header for a named path."""
70 70 return chunkheader(len(path)) + path
71 71
72 72 def writechunks(ui, chunks, filename, vfs=None):
73 73 """Write chunks to a file and return its filename.
74 74
75 75 The stream is assumed to be a bundle file.
76 76 Existing files will not be overwritten.
77 77 If no filename is specified, a temporary file is created.
78 78 """
79 79 fh = None
80 80 cleanup = None
81 81 try:
82 82 if filename:
83 83 if vfs:
84 84 fh = vfs.open(filename, "wb")
85 85 else:
86 86 # Increase default buffer size because default is usually
87 87 # small (4k is common on Linux).
88 88 fh = open(filename, "wb", 131072)
89 89 else:
90 90 fd, filename = pycompat.mkstemp(prefix="hg-bundle-", suffix=".hg")
91 91 fh = os.fdopen(fd, r"wb")
92 92 cleanup = filename
93 93 for c in chunks:
94 94 fh.write(c)
95 95 cleanup = None
96 96 return filename
97 97 finally:
98 98 if fh is not None:
99 99 fh.close()
100 100 if cleanup is not None:
101 101 if filename and vfs:
102 102 vfs.unlink(cleanup)
103 103 else:
104 104 os.unlink(cleanup)
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
125 125 deltaheadersize = deltaheader.size
126 126 version = '01'
127 127 _grouplistcount = 1 # One list of files after the manifests
128 128
129 129 def __init__(self, fh, alg, extras=None):
130 130 if alg is None:
131 131 alg = 'UN'
132 132 if alg not in util.compengines.supportedbundletypes:
133 133 raise error.Abort(_('unknown stream compression type: %s')
134 134 % alg)
135 135 if alg == 'BZ':
136 136 alg = '_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != 'UN'
148 148 def read(self, l):
149 149 return self._stream.read(l)
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152 def tell(self):
153 153 return self._stream.tell()
154 154 def close(self):
155 155 return self._stream.close()
156 156
157 157 def _chunklength(self):
158 158 d = readexactly(self._stream, 4)
159 159 l = struct.unpack(">l", d)[0]
160 160 if l <= 4:
161 161 if l:
162 162 raise error.Abort(_("invalid chunk length %d") % l)
163 163 return 0
164 164 if self.callback:
165 165 self.callback()
166 166 return l - 4
167 167
168 168 def changelogheader(self):
169 169 """v10 does not have a changelog header chunk"""
170 170 return {}
171 171
172 172 def manifestheader(self):
173 173 """v10 does not have a manifest header chunk"""
174 174 return {}
175 175
176 176 def filelogheader(self):
177 177 """return the header of the filelogs chunk, v10 only has the filename"""
178 178 l = self._chunklength()
179 179 if not l:
180 180 return {}
181 181 fname = readexactly(self._stream, l)
182 182 return {'filename': fname}
183 183
184 184 def _deltaheader(self, headertuple, prevnode):
185 185 node, p1, p2, cs = headertuple
186 186 if prevnode is None:
187 187 deltabase = p1
188 188 else:
189 189 deltabase = prevnode
190 190 flags = 0
191 191 return node, p1, p2, deltabase, cs, flags
192 192
193 193 def deltachunk(self, prevnode):
194 194 l = self._chunklength()
195 195 if not l:
196 196 return {}
197 197 headerdata = readexactly(self._stream, self.deltaheadersize)
198 198 header = self.deltaheader.unpack(headerdata)
199 199 delta = readexactly(self._stream, l - self.deltaheadersize)
200 200 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
201 201 return (node, p1, p2, cs, deltabase, delta, flags)
202 202
203 203 def getchunks(self):
204 204 """returns all the chunks contains in the bundle
205 205
206 206 Used when you need to forward the binary stream to a file or another
207 207 network API. To do so, it parse the changegroup data, otherwise it will
208 208 block in case of sshrepo because it don't know the end of the stream.
209 209 """
210 210 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
211 211 # and a list of filelogs. For changegroup 3, we expect 4 parts:
212 212 # changelog, manifestlog, a list of tree manifestlogs, and a list of
213 213 # filelogs.
214 214 #
215 215 # Changelog and manifestlog parts are terminated with empty chunks. The
216 216 # tree and file parts are a list of entry sections. Each entry section
217 217 # is a series of chunks terminating in an empty chunk. The list of these
218 218 # entry sections is terminated in yet another empty chunk, so we know
219 219 # we've reached the end of the tree/file list when we reach an empty
220 220 # chunk that was proceeded by no non-empty chunks.
221 221
222 222 parts = 0
223 223 while parts < 2 + self._grouplistcount:
224 224 noentries = True
225 225 while True:
226 226 chunk = getchunk(self)
227 227 if not chunk:
228 228 # The first two empty chunks represent the end of the
229 229 # changelog and the manifestlog portions. The remaining
230 230 # empty chunks represent either A) the end of individual
231 231 # tree or file entries in the file list, or B) the end of
232 232 # the entire list. It's the end of the entire list if there
233 233 # were no entries (i.e. noentries is True).
234 234 if parts < 2:
235 235 parts += 1
236 236 elif noentries:
237 237 parts += 1
238 238 break
239 239 noentries = False
240 240 yield chunkheader(len(chunk))
241 241 pos = 0
242 242 while pos < len(chunk):
243 243 next = pos + 2**20
244 244 yield chunk[pos:next]
245 245 pos = next
246 246 yield closechunk()
247 247
248 248 def _unpackmanifests(self, repo, revmap, trp, prog):
249 249 self.callback = prog.increment
250 250 # no need to check for empty manifest group here:
251 251 # if the result of the merge of 1 and 2 is the same in 3 and 4,
252 252 # no new manifest will be created and the manifest group will
253 253 # be empty during the pull
254 254 self.manifestheader()
255 255 deltas = self.deltaiter()
256 256 repo.manifestlog.addgroup(deltas, revmap, trp)
257 257 prog.complete()
258 258 self.callback = None
259 259
260 260 def apply(self, repo, tr, srctype, url, targetphase=phases.draft,
261 261 expectedtotal=None):
262 262 """Add the changegroup returned by source.read() to this repo.
263 263 srctype is a string like 'push', 'pull', or 'unbundle'. url is
264 264 the URL of the repo where this changegroup is coming from.
265 265
266 266 Return an integer summarizing the change to this repo:
267 267 - nothing changed or no source: 0
268 268 - more heads than before: 1+added heads (2..n)
269 269 - fewer heads than before: -1-removed heads (-2..-n)
270 270 - number of heads stays the same: 1
271 271 """
272 272 repo = repo.unfiltered()
273 273 def csmap(x):
274 274 repo.ui.debug("add changeset %s\n" % short(x))
275 275 return len(cl)
276 276
277 277 def revmap(x):
278 278 return cl.rev(x)
279 279
280 280 changesets = files = revisions = 0
281 281
282 282 try:
283 283 # The transaction may already carry source information. In this
284 284 # case we use the top level data. We overwrite the argument
285 285 # because we need to use the top level value (if they exist)
286 286 # in this function.
287 287 srctype = tr.hookargs.setdefault('source', srctype)
288 288 url = tr.hookargs.setdefault('url', url)
289 289 repo.hook('prechangegroup',
290 290 throw=True, **pycompat.strkwargs(tr.hookargs))
291 291
292 292 # write changelog data to temp files so concurrent readers
293 293 # will not see an inconsistent view
294 294 cl = repo.changelog
295 295 cl.delayupdate(tr)
296 296 oldheads = set(cl.heads())
297 297
298 298 trp = weakref.proxy(tr)
299 299 # pull off the changeset group
300 300 repo.ui.status(_("adding changesets\n"))
301 301 clstart = len(cl)
302 302 progress = repo.ui.makeprogress(_('changesets'), unit=_('chunks'),
303 303 total=expectedtotal)
304 304 self.callback = progress.increment
305 305
306 306 efiles = set()
307 307 def onchangelog(cl, node):
308 308 efiles.update(cl.readfiles(node))
309 309
310 310 self.changelogheader()
311 311 deltas = self.deltaiter()
312 312 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
313 313 efiles = len(efiles)
314 314
315 315 if not cgnodes:
316 316 repo.ui.develwarn('applied empty changegroup',
317 317 config='warn-empty-changegroup')
318 318 clend = len(cl)
319 319 changesets = clend - clstart
320 320 progress.complete()
321 321 self.callback = None
322 322
323 323 # pull off the manifest group
324 324 repo.ui.status(_("adding manifests\n"))
325 325 # We know that we'll never have more manifests than we had
326 326 # changesets.
327 327 progress = repo.ui.makeprogress(_('manifests'), unit=_('chunks'),
328 328 total=changesets)
329 329 self._unpackmanifests(repo, revmap, trp, progress)
330 330
331 331 needfiles = {}
332 332 if repo.ui.configbool('server', 'validate'):
333 333 cl = repo.changelog
334 334 ml = repo.manifestlog
335 335 # validate incoming csets have their manifests
336 336 for cset in pycompat.xrange(clstart, clend):
337 337 mfnode = cl.changelogrevision(cset).manifest
338 338 mfest = ml[mfnode].readdelta()
339 339 # store file cgnodes we must see
340 340 for f, n in mfest.iteritems():
341 341 needfiles.setdefault(f, set()).add(n)
342 342
343 343 # process the files
344 344 repo.ui.status(_("adding file changes\n"))
345 345 newrevs, newfiles = _addchangegroupfiles(
346 346 repo, self, revmap, trp, efiles, needfiles)
347 347 revisions += newrevs
348 348 files += newfiles
349 349
350 350 deltaheads = 0
351 351 if oldheads:
352 352 heads = cl.heads()
353 353 deltaheads = len(heads) - len(oldheads)
354 354 for h in heads:
355 355 if h not in oldheads and repo[h].closesbranch():
356 356 deltaheads -= 1
357 357 htext = ""
358 358 if deltaheads:
359 359 htext = _(" (%+d heads)") % deltaheads
360 360
361 361 repo.ui.status(_("added %d changesets"
362 362 " with %d changes to %d files%s\n")
363 363 % (changesets, revisions, files, htext))
364 364 repo.invalidatevolatilesets()
365 365
366 366 if changesets > 0:
367 367 if 'node' not in tr.hookargs:
368 368 tr.hookargs['node'] = hex(cl.node(clstart))
369 369 tr.hookargs['node_last'] = hex(cl.node(clend - 1))
370 370 hookargs = dict(tr.hookargs)
371 371 else:
372 372 hookargs = dict(tr.hookargs)
373 373 hookargs['node'] = hex(cl.node(clstart))
374 374 hookargs['node_last'] = hex(cl.node(clend - 1))
375 375 repo.hook('pretxnchangegroup',
376 376 throw=True, **pycompat.strkwargs(hookargs))
377 377
378 378 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
379 379 phaseall = None
380 380 if srctype in ('push', 'serve'):
381 381 # Old servers can not push the boundary themselves.
382 382 # New servers won't push the boundary if changeset already
383 383 # exists locally as secret
384 384 #
385 385 # We should not use added here but the list of all change in
386 386 # the bundle
387 387 if repo.publishing():
388 388 targetphase = phaseall = phases.public
389 389 else:
390 390 # closer target phase computation
391 391
392 392 # Those changesets have been pushed from the
393 393 # outside, their phases are going to be pushed
394 394 # alongside. Therefor `targetphase` is
395 395 # ignored.
396 396 targetphase = phaseall = phases.draft
397 397 if added:
398 398 phases.registernew(repo, tr, targetphase, added)
399 399 if phaseall is not None:
400 400 phases.advanceboundary(repo, tr, phaseall, cgnodes)
401 401
402 402 if changesets > 0:
403 403
404 404 def runhooks():
405 405 # These hooks run when the lock releases, not when the
406 406 # transaction closes. So it's possible for the changelog
407 407 # to have changed since we last saw it.
408 408 if clstart >= len(repo):
409 409 return
410 410
411 411 repo.hook("changegroup", **pycompat.strkwargs(hookargs))
412 412
413 413 for n in added:
414 414 args = hookargs.copy()
415 415 args['node'] = hex(n)
416 416 del args['node_last']
417 417 repo.hook("incoming", **pycompat.strkwargs(args))
418 418
419 419 newheads = [h for h in repo.heads()
420 420 if h not in oldheads]
421 421 repo.ui.log("incoming",
422 422 "%d incoming changes - new heads: %s\n",
423 423 len(added),
424 424 ', '.join([hex(c[:6]) for c in newheads]))
425 425
426 426 tr.addpostclose('changegroup-runhooks-%020i' % clstart,
427 427 lambda tr: repo._afterlock(runhooks))
428 428 finally:
429 429 repo.ui.flush()
430 430 # never return 0 here:
431 431 if deltaheads < 0:
432 432 ret = deltaheads - 1
433 433 else:
434 434 ret = deltaheads + 1
435 435 return ret
436 436
437 437 def deltaiter(self):
438 438 """
439 439 returns an iterator of the deltas in this changegroup
440 440
441 441 Useful for passing to the underlying storage system to be stored.
442 442 """
443 443 chain = None
444 444 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
445 445 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
446 446 yield chunkdata
447 447 chain = chunkdata[0]
448 448
449 449 class cg2unpacker(cg1unpacker):
450 450 """Unpacker for cg2 streams.
451 451
452 452 cg2 streams add support for generaldelta, so the delta header
453 453 format is slightly different. All other features about the data
454 454 remain the same.
455 455 """
456 456 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
457 457 deltaheadersize = deltaheader.size
458 458 version = '02'
459 459
460 460 def _deltaheader(self, headertuple, prevnode):
461 461 node, p1, p2, deltabase, cs = headertuple
462 462 flags = 0
463 463 return node, p1, p2, deltabase, cs, flags
464 464
465 465 class cg3unpacker(cg2unpacker):
466 466 """Unpacker for cg3 streams.
467 467
468 468 cg3 streams add support for exchanging treemanifests and revlog
469 469 flags. It adds the revlog flags to the delta header and an empty chunk
470 470 separating manifests and files.
471 471 """
472 472 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
473 473 deltaheadersize = deltaheader.size
474 474 version = '03'
475 475 _grouplistcount = 2 # One list of manifests and one list of files
476 476
477 477 def _deltaheader(self, headertuple, prevnode):
478 478 node, p1, p2, deltabase, cs, flags = headertuple
479 479 return node, p1, p2, deltabase, cs, flags
480 480
481 481 def _unpackmanifests(self, repo, revmap, trp, prog):
482 482 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
483 483 for chunkdata in iter(self.filelogheader, {}):
484 484 # If we get here, there are directory manifests in the changegroup
485 485 d = chunkdata["filename"]
486 486 repo.ui.debug("adding %s revisions\n" % d)
487 487 dirlog = repo.manifestlog._revlog.dirlog(d)
488 488 deltas = self.deltaiter()
489 489 if not dirlog.addgroup(deltas, revmap, trp):
490 490 raise error.Abort(_("received dir revlog group is empty"))
491 491
492 492 class headerlessfixup(object):
493 493 def __init__(self, fh, h):
494 494 self._h = h
495 495 self._fh = fh
496 496 def read(self, n):
497 497 if self._h:
498 498 d, self._h = self._h[:n], self._h[n:]
499 499 if len(d) < n:
500 500 d += readexactly(self._fh, n - len(d))
501 501 return d
502 502 return readexactly(self._fh, n)
503 503
504 504 @attr.s(slots=True, frozen=True)
505 505 class revisiondeltarequest(object):
506 506 """Describes a request to construct a revision delta.
507 507
508 508 Instances are converted into ``revisiondelta`` later.
509 509 """
510 510 # Revision whose delta will be generated.
511 511 node = attr.ib()
512 512
513 513 # Linknode value.
514 514 linknode = attr.ib()
515 515
516 516 # Parent revisions to record in ``revisiondelta`` instance.
517 517 p1node = attr.ib()
518 518 p2node = attr.ib()
519 519
520 # Base revision that delta should be generated against. If nullrev,
520 # Base revision that delta should be generated against. If nullid,
521 521 # the full revision data should be populated. If None, the delta
522 522 # may be generated against any base revision that is an ancestor of
523 # this revision. If any other numeric value, the delta should be
524 # produced against that revision.
525 baserev = attr.ib()
523 # this revision. If any other value, the delta should be produced
524 # against that revision.
525 basenode = attr.ib()
526 526
527 527 # Whether this should be marked as an ellipsis revision.
528 528 ellipsis = attr.ib(default=False)
529 529
530 530 @attr.s(slots=True, frozen=True)
531 531 class revisiondelta(object):
532 532 """Describes a delta entry in a changegroup.
533 533
534 534 Captured data is sufficient to serialize the delta into multiple
535 535 formats.
536 536
537 537 ``revision`` and ``delta`` are mutually exclusive.
538 538 """
539 539 # 20 byte node of this revision.
540 540 node = attr.ib()
541 541 # 20 byte nodes of parent revisions.
542 542 p1node = attr.ib()
543 543 p2node = attr.ib()
544 544 # 20 byte node of node this delta is against.
545 545 basenode = attr.ib()
546 546 # 20 byte node of changeset revision this delta is associated with.
547 547 linknode = attr.ib()
548 548 # 2 bytes of flags to apply to revision data.
549 549 flags = attr.ib()
550 550 # Size of base revision this delta is against. May be None if
551 551 # basenode is nullid.
552 552 baserevisionsize = attr.ib()
553 553 # Raw fulltext revision data.
554 554 revision = attr.ib()
555 555 # Delta between the basenode and node.
556 556 delta = attr.ib()
557 557
558 558 def _revisiondeltatochunks(delta, headerfn):
559 559 """Serialize a revisiondelta to changegroup chunks."""
560 560
561 561 # The captured revision delta may be encoded as a delta against
562 562 # a base revision or as a full revision. The changegroup format
563 563 # requires that everything on the wire be deltas. So for full
564 564 # revisions, we need to invent a header that says to rewrite
565 565 # data.
566 566
567 567 if delta.delta is not None:
568 568 prefix, data = b'', delta.delta
569 569 elif delta.basenode == nullid:
570 570 data = delta.revision
571 571 prefix = mdiff.trivialdiffheader(len(data))
572 572 else:
573 573 data = delta.revision
574 574 prefix = mdiff.replacediffheader(delta.baserevisionsize,
575 575 len(data))
576 576
577 577 meta = headerfn(delta)
578 578
579 579 yield chunkheader(len(meta) + len(prefix) + len(data))
580 580 yield meta
581 581 if prefix:
582 582 yield prefix
583 583 yield data
584 584
585 585 def _sortnodesnormal(store, nodes, reorder):
586 586 """Sort nodes for changegroup generation and turn into revnums."""
587 587 # for generaldelta revlogs, we linearize the revs; this will both be
588 588 # much quicker and generate a much smaller bundle
589 589 if (store._generaldelta and reorder is None) or reorder:
590 590 dag = dagutil.revlogdag(store)
591 591 return dag.linearize(set(store.rev(n) for n in nodes))
592 592 else:
593 593 return sorted([store.rev(n) for n in nodes])
594 594
595 595 def _sortnodesellipsis(store, nodes, cl, lookup):
596 596 """Sort nodes for changegroup generation and turn into revnums."""
597 597 # Ellipses serving mode.
598 598 #
599 599 # In a perfect world, we'd generate better ellipsis-ified graphs
600 600 # for non-changelog revlogs. In practice, we haven't started doing
601 601 # that yet, so the resulting DAGs for the manifestlog and filelogs
602 602 # are actually full of bogus parentage on all the ellipsis
603 603 # nodes. This has the side effect that, while the contents are
604 604 # correct, the individual DAGs might be completely out of whack in
605 605 # a case like 882681bc3166 and its ancestors (back about 10
606 606 # revisions or so) in the main hg repo.
607 607 #
608 608 # The one invariant we *know* holds is that the new (potentially
609 609 # bogus) DAG shape will be valid if we order the nodes in the
610 610 # order that they're introduced in dramatis personae by the
611 611 # changelog, so what we do is we sort the non-changelog histories
612 612 # by the order in which they are used by the changelog.
613 613 key = lambda n: cl.rev(lookup(n))
614 614 return [store.rev(n) for n in sorted(nodes, key=key)]
615 615
616 def _handlerevisiondeltarequest(store, request, prev):
616 def _handlerevisiondeltarequest(store, request, prevnode):
617 617 """Obtain a revisiondelta from a revisiondeltarequest"""
618 618
619 619 node = request.node
620 620 rev = store.rev(node)
621 621
622 622 # Requesting a full revision.
623 if request.baserev == nullrev:
624 base = nullrev
623 if request.basenode == nullid:
624 baserev = nullrev
625 625 # Requesting an explicit revision.
626 elif request.baserev is not None:
627 base = request.baserev
626 elif request.basenode is not None:
627 baserev = store.rev(request.basenode)
628 628 # Allowing us to choose.
629 629 else:
630 630 p1, p2 = store.parentrevs(rev)
631 631 dp = store.deltaparent(rev)
632 632
633 633 if dp == nullrev and store.storedeltachains:
634 634 # Avoid sending full revisions when delta parent is null. Pick prev
635 635 # in that case. It's tempting to pick p1 in this case, as p1 will
636 636 # be smaller in the common case. However, computing a delta against
637 637 # p1 may require resolving the raw text of p1, which could be
638 638 # expensive. The revlog caches should have prev cached, meaning
639 639 # less CPU for changegroup generation. There is likely room to add
640 640 # a flag and/or config option to control this behavior.
641 base = prev
641 baserev = store.rev(prevnode)
642 642 elif dp == nullrev:
643 643 # revlog is configured to use full snapshot for a reason,
644 644 # stick to full snapshot.
645 base = nullrev
646 elif dp not in (p1, p2, prev):
645 baserev = nullrev
646 elif dp not in (p1, p2, store.rev(prevnode)):
647 647 # Pick prev when we can't be sure remote has the base revision.
648 base = prev
648 baserev = store.rev(prevnode)
649 649 else:
650 base = dp
650 baserev = dp
651 651
652 if base != nullrev and not store.candelta(base, rev):
653 base = nullrev
652 if baserev != nullrev and not store.candelta(baserev, rev):
653 baserev = nullrev
654 654
655 655 revision = None
656 656 delta = None
657 657 baserevisionsize = None
658 658
659 if store.iscensored(base) or store.iscensored(rev):
659 if store.iscensored(baserev) or store.iscensored(rev):
660 660 try:
661 661 revision = store.revision(node, raw=True)
662 662 except error.CensoredNodeError as e:
663 663 revision = e.tombstone
664 664
665 if base != nullrev:
666 baserevisionsize = store.rawsize(base)
665 if baserev != nullrev:
666 baserevisionsize = store.rawsize(baserev)
667 667
668 elif base == nullrev:
668 elif baserev == nullrev:
669 669 revision = store.revision(node, raw=True)
670 670 else:
671 delta = store.revdiff(base, rev)
671 delta = store.revdiff(baserev, rev)
672 672
673 673 extraflags = revlog.REVIDX_ELLIPSIS if request.ellipsis else 0
674 674
675 675 return revisiondelta(
676 676 node=node,
677 677 p1node=request.p1node,
678 678 p2node=request.p2node,
679 679 linknode=request.linknode,
680 basenode=store.node(base),
680 basenode=store.node(baserev),
681 681 flags=store.flags(rev) | extraflags,
682 682 baserevisionsize=baserevisionsize,
683 683 revision=revision,
684 684 delta=delta,
685 685 )
686 686
687 687 def _makenarrowdeltarequest(cl, store, ischangelog, rev, node, linkrev,
688 688 linknode, clrevtolocalrev, fullclnodes,
689 689 precomputedellipsis):
690 690 linkparents = precomputedellipsis[linkrev]
691 691 def local(clrev):
692 692 """Turn a changelog revnum into a local revnum.
693 693
694 694 The ellipsis dag is stored as revnums on the changelog,
695 695 but when we're producing ellipsis entries for
696 696 non-changelog revlogs, we need to turn those numbers into
697 697 something local. This does that for us, and during the
698 698 changelog sending phase will also expand the stored
699 699 mappings as needed.
700 700 """
701 701 if clrev == nullrev:
702 702 return nullrev
703 703
704 704 if ischangelog:
705 705 return clrev
706 706
707 707 # Walk the ellipsis-ized changelog breadth-first looking for a
708 708 # change that has been linked from the current revlog.
709 709 #
710 710 # For a flat manifest revlog only a single step should be necessary
711 711 # as all relevant changelog entries are relevant to the flat
712 712 # manifest.
713 713 #
714 714 # For a filelog or tree manifest dirlog however not every changelog
715 715 # entry will have been relevant, so we need to skip some changelog
716 716 # nodes even after ellipsis-izing.
717 717 walk = [clrev]
718 718 while walk:
719 719 p = walk[0]
720 720 walk = walk[1:]
721 721 if p in clrevtolocalrev:
722 722 return clrevtolocalrev[p]
723 723 elif p in fullclnodes:
724 724 walk.extend([pp for pp in cl.parentrevs(p)
725 725 if pp != nullrev])
726 726 elif p in precomputedellipsis:
727 727 walk.extend([pp for pp in precomputedellipsis[p]
728 728 if pp != nullrev])
729 729 else:
730 730 # In this case, we've got an ellipsis with parents
731 731 # outside the current bundle (likely an
732 732 # incremental pull). We "know" that we can use the
733 733 # value of this same revlog at whatever revision
734 734 # is pointed to by linknode. "Know" is in scare
735 735 # quotes because I haven't done enough examination
736 736 # of edge cases to convince myself this is really
737 737 # a fact - it works for all the (admittedly
738 738 # thorough) cases in our testsuite, but I would be
739 739 # somewhat unsurprised to find a case in the wild
740 740 # where this breaks down a bit. That said, I don't
741 741 # know if it would hurt anything.
742 742 for i in pycompat.xrange(rev, 0, -1):
743 743 if store.linkrev(i) == clrev:
744 744 return i
745 745 # We failed to resolve a parent for this node, so
746 746 # we crash the changegroup construction.
747 747 raise error.Abort(
748 748 'unable to resolve parent while packing %r %r'
749 749 ' for changeset %r' % (store.indexfile, rev, clrev))
750 750
751 751 return nullrev
752 752
753 753 if not linkparents or (
754 754 store.parentrevs(rev) == (nullrev, nullrev)):
755 755 p1, p2 = nullrev, nullrev
756 756 elif len(linkparents) == 1:
757 757 p1, = sorted(local(p) for p in linkparents)
758 758 p2 = nullrev
759 759 else:
760 760 p1, p2 = sorted(local(p) for p in linkparents)
761 761
762 762 p1node, p2node = store.node(p1), store.node(p2)
763 763
764 764 # TODO: try and actually send deltas for ellipsis data blocks
765 765 return revisiondeltarequest(
766 766 node=node,
767 767 p1node=p1node,
768 768 p2node=p2node,
769 769 linknode=linknode,
770 baserev=nullrev,
770 basenode=nullid,
771 771 ellipsis=True,
772 772 )
773 773
774 774 def deltagroup(repo, revs, store, ischangelog, lookup, forcedeltaparentprev,
775 775 units=None,
776 776 ellipses=False, clrevtolocalrev=None, fullclnodes=None,
777 777 precomputedellipsis=None):
778 778 """Calculate deltas for a set of revisions.
779 779
780 780 Is a generator of ``revisiondelta`` instances.
781 781
782 782 If units is not None, progress detail will be generated, units specifies
783 783 the type of revlog that is touched (changelog, manifest, etc.).
784 784 """
785 785 if not revs:
786 786 return
787 787
788 788 # We perform two passes over the revisions whose data we will emit.
789 789 #
790 790 # In the first pass, we obtain information about the deltas that will
791 791 # be generated. This involves computing linknodes and adjusting the
792 792 # request to take shallow fetching into account. The end result of
793 793 # this pass is a list of "request" objects stating which deltas
794 794 # to obtain.
795 795 #
796 796 # The second pass is simply resolving the requested deltas.
797 797
798 798 cl = repo.changelog
799 799
800 800 # In the first pass, collect info about the deltas we'll be
801 801 # generating.
802 802 requests = []
803 803
804 804 # Add the parent of the first rev.
805 805 revs.insert(0, store.parentrevs(revs[0])[0])
806 806
807 807 for i in pycompat.xrange(len(revs) - 1):
808 808 prev = revs[i]
809 809 curr = revs[i + 1]
810 810
811 811 node = store.node(curr)
812 812 linknode = lookup(node)
813 813 p1node, p2node = store.parents(node)
814 814
815 815 if ellipses:
816 816 linkrev = cl.rev(linknode)
817 817 clrevtolocalrev[linkrev] = curr
818 818
819 819 # This is a node to send in full, because the changeset it
820 820 # corresponds to was a full changeset.
821 821 if linknode in fullclnodes:
822 822 requests.append(revisiondeltarequest(
823 823 node=node,
824 824 p1node=p1node,
825 825 p2node=p2node,
826 826 linknode=linknode,
827 baserev=None,
827 basenode=None,
828 828 ))
829 829
830 830 elif linkrev not in precomputedellipsis:
831 831 pass
832 832 else:
833 833 requests.append(_makenarrowdeltarequest(
834 834 cl, store, ischangelog, curr, node, linkrev, linknode,
835 835 clrevtolocalrev, fullclnodes,
836 836 precomputedellipsis))
837 837 else:
838 838 requests.append(revisiondeltarequest(
839 839 node=node,
840 840 p1node=p1node,
841 841 p2node=p2node,
842 842 linknode=linknode,
843 baserev=prev if forcedeltaparentprev else None,
843 basenode=store.node(prev) if forcedeltaparentprev else None,
844 844 ))
845 845
846 846 # We expect the first pass to be fast, so we only engage the progress
847 847 # meter for constructing the revision deltas.
848 848 progress = None
849 849 if units is not None:
850 850 progress = repo.ui.makeprogress(_('bundling'), unit=units,
851 851 total=len(requests))
852 852
853 prevrev = revs[0]
853 prevnode = store.node(revs[0])
854 854 for i, request in enumerate(requests):
855 855 if progress:
856 856 progress.update(i + 1)
857 857
858 delta = _handlerevisiondeltarequest(store, request, prevrev)
858 delta = _handlerevisiondeltarequest(store, request, prevnode)
859 859
860 860 yield delta
861 861
862 prevrev = store.rev(request.node)
862 prevnode = request.node
863 863
864 864 if progress:
865 865 progress.complete()
866 866
867 867 class cgpacker(object):
868 868 def __init__(self, repo, filematcher, version, allowreorder,
869 869 builddeltaheader, manifestsend,
870 870 forcedeltaparentprev=False,
871 871 bundlecaps=None, ellipses=False,
872 872 shallow=False, ellipsisroots=None, fullnodes=None):
873 873 """Given a source repo, construct a bundler.
874 874
875 875 filematcher is a matcher that matches on files to include in the
876 876 changegroup. Used to facilitate sparse changegroups.
877 877
878 878 allowreorder controls whether reordering of revisions is allowed.
879 879 This value is used when ``bundle.reorder`` is ``auto`` or isn't
880 880 set.
881 881
882 882 forcedeltaparentprev indicates whether delta parents must be against
883 883 the previous revision in a delta group. This should only be used for
884 884 compatibility with changegroup version 1.
885 885
886 886 builddeltaheader is a callable that constructs the header for a group
887 887 delta.
888 888
889 889 manifestsend is a chunk to send after manifests have been fully emitted.
890 890
891 891 ellipses indicates whether ellipsis serving mode is enabled.
892 892
893 893 bundlecaps is optional and can be used to specify the set of
894 894 capabilities which can be used to build the bundle. While bundlecaps is
895 895 unused in core Mercurial, extensions rely on this feature to communicate
896 896 capabilities to customize the changegroup packer.
897 897
898 898 shallow indicates whether shallow data might be sent. The packer may
899 899 need to pack file contents not introduced by the changes being packed.
900 900
901 901 fullnodes is the set of changelog nodes which should not be ellipsis
902 902 nodes. We store this rather than the set of nodes that should be
903 903 ellipsis because for very large histories we expect this to be
904 904 significantly smaller.
905 905 """
906 906 assert filematcher
907 907 self._filematcher = filematcher
908 908
909 909 self.version = version
910 910 self._forcedeltaparentprev = forcedeltaparentprev
911 911 self._builddeltaheader = builddeltaheader
912 912 self._manifestsend = manifestsend
913 913 self._ellipses = ellipses
914 914
915 915 # Set of capabilities we can use to build the bundle.
916 916 if bundlecaps is None:
917 917 bundlecaps = set()
918 918 self._bundlecaps = bundlecaps
919 919 self._isshallow = shallow
920 920 self._fullclnodes = fullnodes
921 921
922 922 # Maps ellipsis revs to their roots at the changelog level.
923 923 self._precomputedellipsis = ellipsisroots
924 924
925 925 # experimental config: bundle.reorder
926 926 reorder = repo.ui.config('bundle', 'reorder')
927 927 if reorder == 'auto':
928 928 self._reorder = allowreorder
929 929 else:
930 930 self._reorder = stringutil.parsebool(reorder)
931 931
932 932 self._repo = repo
933 933
934 934 if self._repo.ui.verbose and not self._repo.ui.debugflag:
935 935 self._verbosenote = self._repo.ui.note
936 936 else:
937 937 self._verbosenote = lambda s: None
938 938
939 939 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
940 940 """Yield a sequence of changegroup byte chunks."""
941 941
942 942 repo = self._repo
943 943 cl = repo.changelog
944 944
945 945 self._verbosenote(_('uncompressed size of bundle content:\n'))
946 946 size = 0
947 947
948 948 clstate, deltas = self._generatechangelog(cl, clnodes)
949 949 for delta in deltas:
950 950 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
951 951 size += len(chunk)
952 952 yield chunk
953 953
954 954 close = closechunk()
955 955 size += len(close)
956 956 yield closechunk()
957 957
958 958 self._verbosenote(_('%8.i (changelog)\n') % size)
959 959
960 960 clrevorder = clstate['clrevorder']
961 961 mfs = clstate['mfs']
962 962 changedfiles = clstate['changedfiles']
963 963
964 964 # We need to make sure that the linkrev in the changegroup refers to
965 965 # the first changeset that introduced the manifest or file revision.
966 966 # The fastpath is usually safer than the slowpath, because the filelogs
967 967 # are walked in revlog order.
968 968 #
969 969 # When taking the slowpath with reorder=None and the manifest revlog
970 970 # uses generaldelta, the manifest may be walked in the "wrong" order.
971 971 # Without 'clrevorder', we would get an incorrect linkrev (see fix in
972 972 # cc0ff93d0c0c).
973 973 #
974 974 # When taking the fastpath, we are only vulnerable to reordering
975 975 # of the changelog itself. The changelog never uses generaldelta, so
976 976 # it is only reordered when reorder=True. To handle this case, we
977 977 # simply take the slowpath, which already has the 'clrevorder' logic.
978 978 # This was also fixed in cc0ff93d0c0c.
979 979 fastpathlinkrev = fastpathlinkrev and not self._reorder
980 980 # Treemanifests don't work correctly with fastpathlinkrev
981 981 # either, because we don't discover which directory nodes to
982 982 # send along with files. This could probably be fixed.
983 983 fastpathlinkrev = fastpathlinkrev and (
984 984 'treemanifest' not in repo.requirements)
985 985
986 986 fnodes = {} # needed file nodes
987 987
988 988 size = 0
989 989 it = self.generatemanifests(
990 990 commonrevs, clrevorder, fastpathlinkrev, mfs, fnodes, source,
991 991 clstate['clrevtomanifestrev'])
992 992
993 993 for dir, deltas in it:
994 994 if dir:
995 995 assert self.version == b'03'
996 996 chunk = _fileheader(dir)
997 997 size += len(chunk)
998 998 yield chunk
999 999
1000 1000 for delta in deltas:
1001 1001 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1002 1002 for chunk in chunks:
1003 1003 size += len(chunk)
1004 1004 yield chunk
1005 1005
1006 1006 close = closechunk()
1007 1007 size += len(close)
1008 1008 yield close
1009 1009
1010 1010 self._verbosenote(_('%8.i (manifests)\n') % size)
1011 1011 yield self._manifestsend
1012 1012
1013 1013 mfdicts = None
1014 1014 if self._ellipses and self._isshallow:
1015 1015 mfdicts = [(self._repo.manifestlog[n].read(), lr)
1016 1016 for (n, lr) in mfs.iteritems()]
1017 1017
1018 1018 mfs.clear()
1019 1019 clrevs = set(cl.rev(x) for x in clnodes)
1020 1020
1021 1021 it = self.generatefiles(changedfiles, commonrevs,
1022 1022 source, mfdicts, fastpathlinkrev,
1023 1023 fnodes, clrevs)
1024 1024
1025 1025 for path, deltas in it:
1026 1026 h = _fileheader(path)
1027 1027 size = len(h)
1028 1028 yield h
1029 1029
1030 1030 for delta in deltas:
1031 1031 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1032 1032 for chunk in chunks:
1033 1033 size += len(chunk)
1034 1034 yield chunk
1035 1035
1036 1036 close = closechunk()
1037 1037 size += len(close)
1038 1038 yield close
1039 1039
1040 1040 self._verbosenote(_('%8.i %s\n') % (size, path))
1041 1041
1042 1042 yield closechunk()
1043 1043
1044 1044 if clnodes:
1045 1045 repo.hook('outgoing', node=hex(clnodes[0]), source=source)
1046 1046
1047 1047 def _generatechangelog(self, cl, nodes):
1048 1048 """Generate data for changelog chunks.
1049 1049
1050 1050 Returns a 2-tuple of a dict containing state and an iterable of
1051 1051 byte chunks. The state will not be fully populated until the
1052 1052 chunk stream has been fully consumed.
1053 1053 """
1054 1054 clrevorder = {}
1055 1055 mfs = {} # needed manifests
1056 1056 mfl = self._repo.manifestlog
1057 1057 # TODO violates storage abstraction.
1058 1058 mfrevlog = mfl._revlog
1059 1059 changedfiles = set()
1060 1060 clrevtomanifestrev = {}
1061 1061
1062 1062 # Callback for the changelog, used to collect changed files and
1063 1063 # manifest nodes.
1064 1064 # Returns the linkrev node (identity in the changelog case).
1065 1065 def lookupcl(x):
1066 1066 c = cl.read(x)
1067 1067 clrevorder[x] = len(clrevorder)
1068 1068
1069 1069 if self._ellipses:
1070 1070 # Only update mfs if x is going to be sent. Otherwise we
1071 1071 # end up with bogus linkrevs specified for manifests and
1072 1072 # we skip some manifest nodes that we should otherwise
1073 1073 # have sent.
1074 1074 if (x in self._fullclnodes
1075 1075 or cl.rev(x) in self._precomputedellipsis):
1076 1076 n = c[0]
1077 1077 # Record the first changeset introducing this manifest
1078 1078 # version.
1079 1079 mfs.setdefault(n, x)
1080 1080 # Set this narrow-specific dict so we have the lowest
1081 1081 # manifest revnum to look up for this cl revnum. (Part of
1082 1082 # mapping changelog ellipsis parents to manifest ellipsis
1083 1083 # parents)
1084 1084 clrevtomanifestrev.setdefault(cl.rev(x), mfrevlog.rev(n))
1085 1085 # We can't trust the changed files list in the changeset if the
1086 1086 # client requested a shallow clone.
1087 1087 if self._isshallow:
1088 1088 changedfiles.update(mfl[c[0]].read().keys())
1089 1089 else:
1090 1090 changedfiles.update(c[3])
1091 1091 else:
1092 1092
1093 1093 n = c[0]
1094 1094 # record the first changeset introducing this manifest version
1095 1095 mfs.setdefault(n, x)
1096 1096 # Record a complete list of potentially-changed files in
1097 1097 # this manifest.
1098 1098 changedfiles.update(c[3])
1099 1099
1100 1100 return x
1101 1101
1102 1102 # Changelog doesn't benefit from reordering revisions. So send out
1103 1103 # revisions in store order.
1104 1104 revs = sorted(cl.rev(n) for n in nodes)
1105 1105
1106 1106 state = {
1107 1107 'clrevorder': clrevorder,
1108 1108 'mfs': mfs,
1109 1109 'changedfiles': changedfiles,
1110 1110 'clrevtomanifestrev': clrevtomanifestrev,
1111 1111 }
1112 1112
1113 1113 gen = deltagroup(
1114 1114 self._repo, revs, cl, True, lookupcl,
1115 1115 self._forcedeltaparentprev,
1116 1116 ellipses=self._ellipses,
1117 1117 units=_('changesets'),
1118 1118 clrevtolocalrev={},
1119 1119 fullclnodes=self._fullclnodes,
1120 1120 precomputedellipsis=self._precomputedellipsis)
1121 1121
1122 1122 return state, gen
1123 1123
1124 1124 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
1125 1125 fnodes, source, clrevtolocalrev):
1126 1126 """Returns an iterator of changegroup chunks containing manifests.
1127 1127
1128 1128 `source` is unused here, but is used by extensions like remotefilelog to
1129 1129 change what is sent based in pulls vs pushes, etc.
1130 1130 """
1131 1131 repo = self._repo
1132 1132 cl = repo.changelog
1133 1133 mfl = repo.manifestlog
1134 1134 dirlog = mfl._revlog.dirlog
1135 1135 tmfnodes = {'': mfs}
1136 1136
1137 1137 # Callback for the manifest, used to collect linkrevs for filelog
1138 1138 # revisions.
1139 1139 # Returns the linkrev node (collected in lookupcl).
1140 1140 def makelookupmflinknode(dir, nodes):
1141 1141 if fastpathlinkrev:
1142 1142 assert not dir
1143 1143 return mfs.__getitem__
1144 1144
1145 1145 def lookupmflinknode(x):
1146 1146 """Callback for looking up the linknode for manifests.
1147 1147
1148 1148 Returns the linkrev node for the specified manifest.
1149 1149
1150 1150 SIDE EFFECT:
1151 1151
1152 1152 1) fclnodes gets populated with the list of relevant
1153 1153 file nodes if we're not using fastpathlinkrev
1154 1154 2) When treemanifests are in use, collects treemanifest nodes
1155 1155 to send
1156 1156
1157 1157 Note that this means manifests must be completely sent to
1158 1158 the client before you can trust the list of files and
1159 1159 treemanifests to send.
1160 1160 """
1161 1161 clnode = nodes[x]
1162 1162 mdata = mfl.get(dir, x).readfast(shallow=True)
1163 1163 for p, n, fl in mdata.iterentries():
1164 1164 if fl == 't': # subdirectory manifest
1165 1165 subdir = dir + p + '/'
1166 1166 tmfclnodes = tmfnodes.setdefault(subdir, {})
1167 1167 tmfclnode = tmfclnodes.setdefault(n, clnode)
1168 1168 if clrevorder[clnode] < clrevorder[tmfclnode]:
1169 1169 tmfclnodes[n] = clnode
1170 1170 else:
1171 1171 f = dir + p
1172 1172 fclnodes = fnodes.setdefault(f, {})
1173 1173 fclnode = fclnodes.setdefault(n, clnode)
1174 1174 if clrevorder[clnode] < clrevorder[fclnode]:
1175 1175 fclnodes[n] = clnode
1176 1176 return clnode
1177 1177 return lookupmflinknode
1178 1178
1179 1179 while tmfnodes:
1180 1180 dir, nodes = tmfnodes.popitem()
1181 1181 store = dirlog(dir)
1182 1182
1183 1183 if not self._filematcher.visitdir(store._dir[:-1] or '.'):
1184 1184 prunednodes = []
1185 1185 else:
1186 1186 frev, flr = store.rev, store.linkrev
1187 1187 prunednodes = [n for n in nodes
1188 1188 if flr(frev(n)) not in commonrevs]
1189 1189
1190 1190 if dir and not prunednodes:
1191 1191 continue
1192 1192
1193 1193 lookupfn = makelookupmflinknode(dir, nodes)
1194 1194
1195 1195 if self._ellipses:
1196 1196 revs = _sortnodesellipsis(store, prunednodes, cl,
1197 1197 lookupfn)
1198 1198 else:
1199 1199 revs = _sortnodesnormal(store, prunednodes,
1200 1200 self._reorder)
1201 1201
1202 1202 deltas = deltagroup(
1203 1203 self._repo, revs, store, False, lookupfn,
1204 1204 self._forcedeltaparentprev,
1205 1205 ellipses=self._ellipses,
1206 1206 units=_('manifests'),
1207 1207 clrevtolocalrev=clrevtolocalrev,
1208 1208 fullclnodes=self._fullclnodes,
1209 1209 precomputedellipsis=self._precomputedellipsis)
1210 1210
1211 1211 yield dir, deltas
1212 1212
1213 1213 # The 'source' parameter is useful for extensions
1214 1214 def generatefiles(self, changedfiles, commonrevs, source,
1215 1215 mfdicts, fastpathlinkrev, fnodes, clrevs):
1216 1216 changedfiles = list(filter(self._filematcher, changedfiles))
1217 1217
1218 1218 if not fastpathlinkrev:
1219 1219 def normallinknodes(unused, fname):
1220 1220 return fnodes.get(fname, {})
1221 1221 else:
1222 1222 cln = self._repo.changelog.node
1223 1223
1224 1224 def normallinknodes(store, fname):
1225 1225 flinkrev = store.linkrev
1226 1226 fnode = store.node
1227 1227 revs = ((r, flinkrev(r)) for r in store)
1228 1228 return dict((fnode(r), cln(lr))
1229 1229 for r, lr in revs if lr in clrevs)
1230 1230
1231 1231 clrevtolocalrev = {}
1232 1232
1233 1233 if self._isshallow:
1234 1234 # In a shallow clone, the linknodes callback needs to also include
1235 1235 # those file nodes that are in the manifests we sent but weren't
1236 1236 # introduced by those manifests.
1237 1237 commonctxs = [self._repo[c] for c in commonrevs]
1238 1238 clrev = self._repo.changelog.rev
1239 1239
1240 1240 # Defining this function has a side-effect of overriding the
1241 1241 # function of the same name that was passed in as an argument.
1242 1242 # TODO have caller pass in appropriate function.
1243 1243 def linknodes(flog, fname):
1244 1244 for c in commonctxs:
1245 1245 try:
1246 1246 fnode = c.filenode(fname)
1247 1247 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1248 1248 except error.ManifestLookupError:
1249 1249 pass
1250 1250 links = normallinknodes(flog, fname)
1251 1251 if len(links) != len(mfdicts):
1252 1252 for mf, lr in mfdicts:
1253 1253 fnode = mf.get(fname, None)
1254 1254 if fnode in links:
1255 1255 links[fnode] = min(links[fnode], lr, key=clrev)
1256 1256 elif fnode:
1257 1257 links[fnode] = lr
1258 1258 return links
1259 1259 else:
1260 1260 linknodes = normallinknodes
1261 1261
1262 1262 repo = self._repo
1263 1263 cl = repo.changelog
1264 1264 progress = repo.ui.makeprogress(_('bundling'), unit=_('files'),
1265 1265 total=len(changedfiles))
1266 1266 for i, fname in enumerate(sorted(changedfiles)):
1267 1267 filerevlog = repo.file(fname)
1268 1268 if not filerevlog:
1269 1269 raise error.Abort(_("empty or missing file data for %s") %
1270 1270 fname)
1271 1271
1272 1272 clrevtolocalrev.clear()
1273 1273
1274 1274 linkrevnodes = linknodes(filerevlog, fname)
1275 1275 # Lookup for filenodes, we collected the linkrev nodes above in the
1276 1276 # fastpath case and with lookupmf in the slowpath case.
1277 1277 def lookupfilelog(x):
1278 1278 return linkrevnodes[x]
1279 1279
1280 1280 frev, flr = filerevlog.rev, filerevlog.linkrev
1281 1281 filenodes = [n for n in linkrevnodes
1282 1282 if flr(frev(n)) not in commonrevs]
1283 1283
1284 1284 if filenodes:
1285 1285 if self._ellipses:
1286 1286 revs = _sortnodesellipsis(filerevlog, filenodes,
1287 1287 cl, lookupfilelog)
1288 1288 else:
1289 1289 revs = _sortnodesnormal(filerevlog, filenodes,
1290 1290 self._reorder)
1291 1291
1292 1292 progress.update(i + 1, item=fname)
1293 1293
1294 1294 deltas = deltagroup(
1295 1295 self._repo, revs, filerevlog, False, lookupfilelog,
1296 1296 self._forcedeltaparentprev,
1297 1297 ellipses=self._ellipses,
1298 1298 clrevtolocalrev=clrevtolocalrev,
1299 1299 fullclnodes=self._fullclnodes,
1300 1300 precomputedellipsis=self._precomputedellipsis)
1301 1301
1302 1302 yield fname, deltas
1303 1303
1304 1304 progress.complete()
1305 1305
1306 1306 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
1307 1307 shallow=False, ellipsisroots=None, fullnodes=None):
1308 1308 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1309 1309 d.node, d.p1node, d.p2node, d.linknode)
1310 1310
1311 1311 return cgpacker(repo, filematcher, b'01',
1312 1312 allowreorder=None,
1313 1313 builddeltaheader=builddeltaheader,
1314 1314 manifestsend=b'',
1315 1315 forcedeltaparentprev=True,
1316 1316 bundlecaps=bundlecaps,
1317 1317 ellipses=ellipses,
1318 1318 shallow=shallow,
1319 1319 ellipsisroots=ellipsisroots,
1320 1320 fullnodes=fullnodes)
1321 1321
1322 1322 def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
1323 1323 shallow=False, ellipsisroots=None, fullnodes=None):
1324 1324 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1325 1325 d.node, d.p1node, d.p2node, d.basenode, d.linknode)
1326 1326
1327 1327 # Since generaldelta is directly supported by cg2, reordering
1328 1328 # generally doesn't help, so we disable it by default (treating
1329 1329 # bundle.reorder=auto just like bundle.reorder=False).
1330 1330 return cgpacker(repo, filematcher, b'02',
1331 1331 allowreorder=False,
1332 1332 builddeltaheader=builddeltaheader,
1333 1333 manifestsend=b'',
1334 1334 bundlecaps=bundlecaps,
1335 1335 ellipses=ellipses,
1336 1336 shallow=shallow,
1337 1337 ellipsisroots=ellipsisroots,
1338 1338 fullnodes=fullnodes)
1339 1339
1340 1340 def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
1341 1341 shallow=False, ellipsisroots=None, fullnodes=None):
1342 1342 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1343 1343 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
1344 1344
1345 1345 return cgpacker(repo, filematcher, b'03',
1346 1346 allowreorder=False,
1347 1347 builddeltaheader=builddeltaheader,
1348 1348 manifestsend=closechunk(),
1349 1349 bundlecaps=bundlecaps,
1350 1350 ellipses=ellipses,
1351 1351 shallow=shallow,
1352 1352 ellipsisroots=ellipsisroots,
1353 1353 fullnodes=fullnodes)
1354 1354
1355 1355 _packermap = {'01': (_makecg1packer, cg1unpacker),
1356 1356 # cg2 adds support for exchanging generaldelta
1357 1357 '02': (_makecg2packer, cg2unpacker),
1358 1358 # cg3 adds support for exchanging revlog flags and treemanifests
1359 1359 '03': (_makecg3packer, cg3unpacker),
1360 1360 }
1361 1361
1362 1362 def allsupportedversions(repo):
1363 1363 versions = set(_packermap.keys())
1364 1364 if not (repo.ui.configbool('experimental', 'changegroup3') or
1365 1365 repo.ui.configbool('experimental', 'treemanifest') or
1366 1366 'treemanifest' in repo.requirements):
1367 1367 versions.discard('03')
1368 1368 return versions
1369 1369
1370 1370 # Changegroup versions that can be applied to the repo
1371 1371 def supportedincomingversions(repo):
1372 1372 return allsupportedversions(repo)
1373 1373
1374 1374 # Changegroup versions that can be created from the repo
1375 1375 def supportedoutgoingversions(repo):
1376 1376 versions = allsupportedversions(repo)
1377 1377 if 'treemanifest' in repo.requirements:
1378 1378 # Versions 01 and 02 support only flat manifests and it's just too
1379 1379 # expensive to convert between the flat manifest and tree manifest on
1380 1380 # the fly. Since tree manifests are hashed differently, all of history
1381 1381 # would have to be converted. Instead, we simply don't even pretend to
1382 1382 # support versions 01 and 02.
1383 1383 versions.discard('01')
1384 1384 versions.discard('02')
1385 1385 if repository.NARROW_REQUIREMENT in repo.requirements:
1386 1386 # Versions 01 and 02 don't support revlog flags, and we need to
1387 1387 # support that for stripping and unbundling to work.
1388 1388 versions.discard('01')
1389 1389 versions.discard('02')
1390 1390 if LFS_REQUIREMENT in repo.requirements:
1391 1391 # Versions 01 and 02 don't support revlog flags, and we need to
1392 1392 # mark LFS entries with REVIDX_EXTSTORED.
1393 1393 versions.discard('01')
1394 1394 versions.discard('02')
1395 1395
1396 1396 return versions
1397 1397
1398 1398 def localversion(repo):
1399 1399 # Finds the best version to use for bundles that are meant to be used
1400 1400 # locally, such as those from strip and shelve, and temporary bundles.
1401 1401 return max(supportedoutgoingversions(repo))
1402 1402
1403 1403 def safeversion(repo):
1404 1404 # Finds the smallest version that it's safe to assume clients of the repo
1405 1405 # will support. For example, all hg versions that support generaldelta also
1406 1406 # support changegroup 02.
1407 1407 versions = supportedoutgoingversions(repo)
1408 1408 if 'generaldelta' in repo.requirements:
1409 1409 versions.discard('01')
1410 1410 assert versions
1411 1411 return min(versions)
1412 1412
1413 1413 def getbundler(version, repo, bundlecaps=None, filematcher=None,
1414 1414 ellipses=False, shallow=False, ellipsisroots=None,
1415 1415 fullnodes=None):
1416 1416 assert version in supportedoutgoingversions(repo)
1417 1417
1418 1418 if filematcher is None:
1419 1419 filematcher = matchmod.alwaysmatcher(repo.root, '')
1420 1420
1421 1421 if version == '01' and not filematcher.always():
1422 1422 raise error.ProgrammingError('version 01 changegroups do not support '
1423 1423 'sparse file matchers')
1424 1424
1425 1425 if ellipses and version in (b'01', b'02'):
1426 1426 raise error.Abort(
1427 1427 _('ellipsis nodes require at least cg3 on client and server, '
1428 1428 'but negotiated version %s') % version)
1429 1429
1430 1430 # Requested files could include files not in the local store. So
1431 1431 # filter those out.
1432 1432 filematcher = matchmod.intersectmatchers(repo.narrowmatch(),
1433 1433 filematcher)
1434 1434
1435 1435 fn = _packermap[version][0]
1436 1436 return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
1437 1437 shallow=shallow, ellipsisroots=ellipsisroots,
1438 1438 fullnodes=fullnodes)
1439 1439
1440 1440 def getunbundler(version, fh, alg, extras=None):
1441 1441 return _packermap[version][1](fh, alg, extras=extras)
1442 1442
1443 1443 def _changegroupinfo(repo, nodes, source):
1444 1444 if repo.ui.verbose or source == 'bundle':
1445 1445 repo.ui.status(_("%d changesets found\n") % len(nodes))
1446 1446 if repo.ui.debugflag:
1447 1447 repo.ui.debug("list of changesets:\n")
1448 1448 for node in nodes:
1449 1449 repo.ui.debug("%s\n" % hex(node))
1450 1450
1451 1451 def makechangegroup(repo, outgoing, version, source, fastpath=False,
1452 1452 bundlecaps=None):
1453 1453 cgstream = makestream(repo, outgoing, version, source,
1454 1454 fastpath=fastpath, bundlecaps=bundlecaps)
1455 1455 return getunbundler(version, util.chunkbuffer(cgstream), None,
1456 1456 {'clcount': len(outgoing.missing) })
1457 1457
1458 1458 def makestream(repo, outgoing, version, source, fastpath=False,
1459 1459 bundlecaps=None, filematcher=None):
1460 1460 bundler = getbundler(version, repo, bundlecaps=bundlecaps,
1461 1461 filematcher=filematcher)
1462 1462
1463 1463 repo = repo.unfiltered()
1464 1464 commonrevs = outgoing.common
1465 1465 csets = outgoing.missing
1466 1466 heads = outgoing.missingheads
1467 1467 # We go through the fast path if we get told to, or if all (unfiltered
1468 1468 # heads have been requested (since we then know there all linkrevs will
1469 1469 # be pulled by the client).
1470 1470 heads.sort()
1471 1471 fastpathlinkrev = fastpath or (
1472 1472 repo.filtername is None and heads == sorted(repo.heads()))
1473 1473
1474 1474 repo.hook('preoutgoing', throw=True, source=source)
1475 1475 _changegroupinfo(repo, csets, source)
1476 1476 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1477 1477
1478 1478 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1479 1479 revisions = 0
1480 1480 files = 0
1481 1481 progress = repo.ui.makeprogress(_('files'), unit=_('files'),
1482 1482 total=expectedfiles)
1483 1483 for chunkdata in iter(source.filelogheader, {}):
1484 1484 files += 1
1485 1485 f = chunkdata["filename"]
1486 1486 repo.ui.debug("adding %s revisions\n" % f)
1487 1487 progress.increment()
1488 1488 fl = repo.file(f)
1489 1489 o = len(fl)
1490 1490 try:
1491 1491 deltas = source.deltaiter()
1492 1492 if not fl.addgroup(deltas, revmap, trp):
1493 1493 raise error.Abort(_("received file revlog group is empty"))
1494 1494 except error.CensoredBaseError as e:
1495 1495 raise error.Abort(_("received delta base is censored: %s") % e)
1496 1496 revisions += len(fl) - o
1497 1497 if f in needfiles:
1498 1498 needs = needfiles[f]
1499 1499 for new in pycompat.xrange(o, len(fl)):
1500 1500 n = fl.node(new)
1501 1501 if n in needs:
1502 1502 needs.remove(n)
1503 1503 else:
1504 1504 raise error.Abort(
1505 1505 _("received spurious file revlog entry"))
1506 1506 if not needs:
1507 1507 del needfiles[f]
1508 1508 progress.complete()
1509 1509
1510 1510 for f, needs in needfiles.iteritems():
1511 1511 fl = repo.file(f)
1512 1512 for n in needs:
1513 1513 try:
1514 1514 fl.rev(n)
1515 1515 except error.LookupError:
1516 1516 raise error.Abort(
1517 1517 _('missing file data for %s:%s - run hg verify') %
1518 1518 (f, hex(n)))
1519 1519
1520 1520 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now