##// END OF EJS Templates
sidedata: use only changegroup3 if sidedata is in use...
marmoute -
r43402:8f807a83 default
parent child Browse files
Show More
@@ -1,1686 +1,1691 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21 from .pycompat import open
22 22
23 23 from . import (
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 phases,
28 28 pycompat,
29 29 util,
30 30 )
31 31
32 32 from .interfaces import repository
33 33
34 34 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
35 35 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
36 36 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
37 37
38 38 LFS_REQUIREMENT = b'lfs'
39 39
40 40 readexactly = util.readexactly
41 41
42 42
43 43 def getchunk(stream):
44 44 """return the next chunk from stream as a string"""
45 45 d = readexactly(stream, 4)
46 46 l = struct.unpack(b">l", d)[0]
47 47 if l <= 4:
48 48 if l:
49 49 raise error.Abort(_(b"invalid chunk length %d") % l)
50 50 return b""
51 51 return readexactly(stream, l - 4)
52 52
53 53
54 54 def chunkheader(length):
55 55 """return a changegroup chunk header (string)"""
56 56 return struct.pack(b">l", length + 4)
57 57
58 58
59 59 def closechunk():
60 60 """return a changegroup chunk header (string) for a zero-length chunk"""
61 61 return struct.pack(b">l", 0)
62 62
63 63
64 64 def _fileheader(path):
65 65 """Obtain a changegroup chunk header for a named path."""
66 66 return chunkheader(len(path)) + path
67 67
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, b"wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, b"wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103
104 104 class cg1unpacker(object):
105 105 """Unpacker for cg1 changegroup streams.
106 106
107 107 A changegroup unpacker handles the framing of the revision data in
108 108 the wire format. Most consumers will want to use the apply()
109 109 method to add the changes from the changegroup to a repository.
110 110
111 111 If you're forwarding a changegroup unmodified to another consumer,
112 112 use getchunks(), which returns an iterator of changegroup
113 113 chunks. This is mostly useful for cases where you need to know the
114 114 data stream has ended by observing the end of the changegroup.
115 115
116 116 deltachunk() is useful only if you're applying delta data. Most
117 117 consumers should prefer apply() instead.
118 118
119 119 A few other public methods exist. Those are used only for
120 120 bundlerepo and some debug commands - their use is discouraged.
121 121 """
122 122
123 123 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
124 124 deltaheadersize = deltaheader.size
125 125 version = b'01'
126 126 _grouplistcount = 1 # One list of files after the manifests
127 127
128 128 def __init__(self, fh, alg, extras=None):
129 129 if alg is None:
130 130 alg = b'UN'
131 131 if alg not in util.compengines.supportedbundletypes:
132 132 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
133 133 if alg == b'BZ':
134 134 alg = b'_truncatedBZ'
135 135
136 136 compengine = util.compengines.forbundletype(alg)
137 137 self._stream = compengine.decompressorreader(fh)
138 138 self._type = alg
139 139 self.extras = extras or {}
140 140 self.callback = None
141 141
142 142 # These methods (compressed, read, seek, tell) all appear to only
143 143 # be used by bundlerepo, but it's a little hard to tell.
144 144 def compressed(self):
145 145 return self._type is not None and self._type != b'UN'
146 146
147 147 def read(self, l):
148 148 return self._stream.read(l)
149 149
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152
153 153 def tell(self):
154 154 return self._stream.tell()
155 155
156 156 def close(self):
157 157 return self._stream.close()
158 158
159 159 def _chunklength(self):
160 160 d = readexactly(self._stream, 4)
161 161 l = struct.unpack(b">l", d)[0]
162 162 if l <= 4:
163 163 if l:
164 164 raise error.Abort(_(b"invalid chunk length %d") % l)
165 165 return 0
166 166 if self.callback:
167 167 self.callback()
168 168 return l - 4
169 169
170 170 def changelogheader(self):
171 171 """v10 does not have a changelog header chunk"""
172 172 return {}
173 173
174 174 def manifestheader(self):
175 175 """v10 does not have a manifest header chunk"""
176 176 return {}
177 177
178 178 def filelogheader(self):
179 179 """return the header of the filelogs chunk, v10 only has the filename"""
180 180 l = self._chunklength()
181 181 if not l:
182 182 return {}
183 183 fname = readexactly(self._stream, l)
184 184 return {b'filename': fname}
185 185
186 186 def _deltaheader(self, headertuple, prevnode):
187 187 node, p1, p2, cs = headertuple
188 188 if prevnode is None:
189 189 deltabase = p1
190 190 else:
191 191 deltabase = prevnode
192 192 flags = 0
193 193 return node, p1, p2, deltabase, cs, flags
194 194
195 195 def deltachunk(self, prevnode):
196 196 l = self._chunklength()
197 197 if not l:
198 198 return {}
199 199 headerdata = readexactly(self._stream, self.deltaheadersize)
200 200 header = self.deltaheader.unpack(headerdata)
201 201 delta = readexactly(self._stream, l - self.deltaheadersize)
202 202 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
203 203 return (node, p1, p2, cs, deltabase, delta, flags)
204 204
205 205 def getchunks(self):
206 206 """returns all the chunks contains in the bundle
207 207
208 208 Used when you need to forward the binary stream to a file or another
209 209 network API. To do so, it parse the changegroup data, otherwise it will
210 210 block in case of sshrepo because it don't know the end of the stream.
211 211 """
212 212 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
213 213 # and a list of filelogs. For changegroup 3, we expect 4 parts:
214 214 # changelog, manifestlog, a list of tree manifestlogs, and a list of
215 215 # filelogs.
216 216 #
217 217 # Changelog and manifestlog parts are terminated with empty chunks. The
218 218 # tree and file parts are a list of entry sections. Each entry section
219 219 # is a series of chunks terminating in an empty chunk. The list of these
220 220 # entry sections is terminated in yet another empty chunk, so we know
221 221 # we've reached the end of the tree/file list when we reach an empty
222 222 # chunk that was proceeded by no non-empty chunks.
223 223
224 224 parts = 0
225 225 while parts < 2 + self._grouplistcount:
226 226 noentries = True
227 227 while True:
228 228 chunk = getchunk(self)
229 229 if not chunk:
230 230 # The first two empty chunks represent the end of the
231 231 # changelog and the manifestlog portions. The remaining
232 232 # empty chunks represent either A) the end of individual
233 233 # tree or file entries in the file list, or B) the end of
234 234 # the entire list. It's the end of the entire list if there
235 235 # were no entries (i.e. noentries is True).
236 236 if parts < 2:
237 237 parts += 1
238 238 elif noentries:
239 239 parts += 1
240 240 break
241 241 noentries = False
242 242 yield chunkheader(len(chunk))
243 243 pos = 0
244 244 while pos < len(chunk):
245 245 next = pos + 2 ** 20
246 246 yield chunk[pos:next]
247 247 pos = next
248 248 yield closechunk()
249 249
250 250 def _unpackmanifests(self, repo, revmap, trp, prog):
251 251 self.callback = prog.increment
252 252 # no need to check for empty manifest group here:
253 253 # if the result of the merge of 1 and 2 is the same in 3 and 4,
254 254 # no new manifest will be created and the manifest group will
255 255 # be empty during the pull
256 256 self.manifestheader()
257 257 deltas = self.deltaiter()
258 258 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
259 259 prog.complete()
260 260 self.callback = None
261 261
262 262 def apply(
263 263 self,
264 264 repo,
265 265 tr,
266 266 srctype,
267 267 url,
268 268 targetphase=phases.draft,
269 269 expectedtotal=None,
270 270 ):
271 271 """Add the changegroup returned by source.read() to this repo.
272 272 srctype is a string like 'push', 'pull', or 'unbundle'. url is
273 273 the URL of the repo where this changegroup is coming from.
274 274
275 275 Return an integer summarizing the change to this repo:
276 276 - nothing changed or no source: 0
277 277 - more heads than before: 1+added heads (2..n)
278 278 - fewer heads than before: -1-removed heads (-2..-n)
279 279 - number of heads stays the same: 1
280 280 """
281 281 repo = repo.unfiltered()
282 282
283 283 def csmap(x):
284 284 repo.ui.debug(b"add changeset %s\n" % short(x))
285 285 return len(cl)
286 286
287 287 def revmap(x):
288 288 return cl.rev(x)
289 289
290 290 changesets = 0
291 291
292 292 try:
293 293 # The transaction may already carry source information. In this
294 294 # case we use the top level data. We overwrite the argument
295 295 # because we need to use the top level value (if they exist)
296 296 # in this function.
297 297 srctype = tr.hookargs.setdefault(b'source', srctype)
298 298 tr.hookargs.setdefault(b'url', url)
299 299 repo.hook(
300 300 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
301 301 )
302 302
303 303 # write changelog data to temp files so concurrent readers
304 304 # will not see an inconsistent view
305 305 cl = repo.changelog
306 306 cl.delayupdate(tr)
307 307 oldheads = set(cl.heads())
308 308
309 309 trp = weakref.proxy(tr)
310 310 # pull off the changeset group
311 311 repo.ui.status(_(b"adding changesets\n"))
312 312 clstart = len(cl)
313 313 progress = repo.ui.makeprogress(
314 314 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
315 315 )
316 316 self.callback = progress.increment
317 317
318 318 efiles = set()
319 319
320 320 def onchangelog(cl, node):
321 321 efiles.update(cl.readfiles(node))
322 322
323 323 self.changelogheader()
324 324 deltas = self.deltaiter()
325 325 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
326 326 efiles = len(efiles)
327 327
328 328 if not cgnodes:
329 329 repo.ui.develwarn(
330 330 b'applied empty changelog from changegroup',
331 331 config=b'warn-empty-changegroup',
332 332 )
333 333 clend = len(cl)
334 334 changesets = clend - clstart
335 335 progress.complete()
336 336 self.callback = None
337 337
338 338 # pull off the manifest group
339 339 repo.ui.status(_(b"adding manifests\n"))
340 340 # We know that we'll never have more manifests than we had
341 341 # changesets.
342 342 progress = repo.ui.makeprogress(
343 343 _(b'manifests'), unit=_(b'chunks'), total=changesets
344 344 )
345 345 self._unpackmanifests(repo, revmap, trp, progress)
346 346
347 347 needfiles = {}
348 348 if repo.ui.configbool(b'server', b'validate'):
349 349 cl = repo.changelog
350 350 ml = repo.manifestlog
351 351 # validate incoming csets have their manifests
352 352 for cset in pycompat.xrange(clstart, clend):
353 353 mfnode = cl.changelogrevision(cset).manifest
354 354 mfest = ml[mfnode].readdelta()
355 355 # store file cgnodes we must see
356 356 for f, n in pycompat.iteritems(mfest):
357 357 needfiles.setdefault(f, set()).add(n)
358 358
359 359 # process the files
360 360 repo.ui.status(_(b"adding file changes\n"))
361 361 newrevs, newfiles = _addchangegroupfiles(
362 362 repo, self, revmap, trp, efiles, needfiles
363 363 )
364 364
365 365 # making sure the value exists
366 366 tr.changes.setdefault(b'changegroup-count-changesets', 0)
367 367 tr.changes.setdefault(b'changegroup-count-revisions', 0)
368 368 tr.changes.setdefault(b'changegroup-count-files', 0)
369 369 tr.changes.setdefault(b'changegroup-count-heads', 0)
370 370
371 371 # some code use bundle operation for internal purpose. They usually
372 372 # set `ui.quiet` to do this outside of user sight. Size the report
373 373 # of such operation now happens at the end of the transaction, that
374 374 # ui.quiet has not direct effect on the output.
375 375 #
376 376 # To preserve this intend use an inelegant hack, we fail to report
377 377 # the change if `quiet` is set. We should probably move to
378 378 # something better, but this is a good first step to allow the "end
379 379 # of transaction report" to pass tests.
380 380 if not repo.ui.quiet:
381 381 tr.changes[b'changegroup-count-changesets'] += changesets
382 382 tr.changes[b'changegroup-count-revisions'] += newrevs
383 383 tr.changes[b'changegroup-count-files'] += newfiles
384 384
385 385 deltaheads = 0
386 386 if oldheads:
387 387 heads = cl.heads()
388 388 deltaheads += len(heads) - len(oldheads)
389 389 for h in heads:
390 390 if h not in oldheads and repo[h].closesbranch():
391 391 deltaheads -= 1
392 392
393 393 # see previous comment about checking ui.quiet
394 394 if not repo.ui.quiet:
395 395 tr.changes[b'changegroup-count-heads'] += deltaheads
396 396 repo.invalidatevolatilesets()
397 397
398 398 if changesets > 0:
399 399 if b'node' not in tr.hookargs:
400 400 tr.hookargs[b'node'] = hex(cl.node(clstart))
401 401 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
402 402 hookargs = dict(tr.hookargs)
403 403 else:
404 404 hookargs = dict(tr.hookargs)
405 405 hookargs[b'node'] = hex(cl.node(clstart))
406 406 hookargs[b'node_last'] = hex(cl.node(clend - 1))
407 407 repo.hook(
408 408 b'pretxnchangegroup',
409 409 throw=True,
410 410 **pycompat.strkwargs(hookargs)
411 411 )
412 412
413 413 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
414 414 phaseall = None
415 415 if srctype in (b'push', b'serve'):
416 416 # Old servers can not push the boundary themselves.
417 417 # New servers won't push the boundary if changeset already
418 418 # exists locally as secret
419 419 #
420 420 # We should not use added here but the list of all change in
421 421 # the bundle
422 422 if repo.publishing():
423 423 targetphase = phaseall = phases.public
424 424 else:
425 425 # closer target phase computation
426 426
427 427 # Those changesets have been pushed from the
428 428 # outside, their phases are going to be pushed
429 429 # alongside. Therefor `targetphase` is
430 430 # ignored.
431 431 targetphase = phaseall = phases.draft
432 432 if added:
433 433 phases.registernew(repo, tr, targetphase, added)
434 434 if phaseall is not None:
435 435 phases.advanceboundary(repo, tr, phaseall, cgnodes)
436 436
437 437 if changesets > 0:
438 438
439 439 def runhooks():
440 440 # These hooks run when the lock releases, not when the
441 441 # transaction closes. So it's possible for the changelog
442 442 # to have changed since we last saw it.
443 443 if clstart >= len(repo):
444 444 return
445 445
446 446 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
447 447
448 448 for n in added:
449 449 args = hookargs.copy()
450 450 args[b'node'] = hex(n)
451 451 del args[b'node_last']
452 452 repo.hook(b"incoming", **pycompat.strkwargs(args))
453 453
454 454 newheads = [h for h in repo.heads() if h not in oldheads]
455 455 repo.ui.log(
456 456 b"incoming",
457 457 b"%d incoming changes - new heads: %s\n",
458 458 len(added),
459 459 b', '.join([hex(c[:6]) for c in newheads]),
460 460 )
461 461
462 462 tr.addpostclose(
463 463 b'changegroup-runhooks-%020i' % clstart,
464 464 lambda tr: repo._afterlock(runhooks),
465 465 )
466 466 finally:
467 467 repo.ui.flush()
468 468 # never return 0 here:
469 469 if deltaheads < 0:
470 470 ret = deltaheads - 1
471 471 else:
472 472 ret = deltaheads + 1
473 473 return ret
474 474
475 475 def deltaiter(self):
476 476 """
477 477 returns an iterator of the deltas in this changegroup
478 478
479 479 Useful for passing to the underlying storage system to be stored.
480 480 """
481 481 chain = None
482 482 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
483 483 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
484 484 yield chunkdata
485 485 chain = chunkdata[0]
486 486
487 487
488 488 class cg2unpacker(cg1unpacker):
489 489 """Unpacker for cg2 streams.
490 490
491 491 cg2 streams add support for generaldelta, so the delta header
492 492 format is slightly different. All other features about the data
493 493 remain the same.
494 494 """
495 495
496 496 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
497 497 deltaheadersize = deltaheader.size
498 498 version = b'02'
499 499
500 500 def _deltaheader(self, headertuple, prevnode):
501 501 node, p1, p2, deltabase, cs = headertuple
502 502 flags = 0
503 503 return node, p1, p2, deltabase, cs, flags
504 504
505 505
506 506 class cg3unpacker(cg2unpacker):
507 507 """Unpacker for cg3 streams.
508 508
509 509 cg3 streams add support for exchanging treemanifests and revlog
510 510 flags. It adds the revlog flags to the delta header and an empty chunk
511 511 separating manifests and files.
512 512 """
513 513
514 514 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
515 515 deltaheadersize = deltaheader.size
516 516 version = b'03'
517 517 _grouplistcount = 2 # One list of manifests and one list of files
518 518
519 519 def _deltaheader(self, headertuple, prevnode):
520 520 node, p1, p2, deltabase, cs, flags = headertuple
521 521 return node, p1, p2, deltabase, cs, flags
522 522
523 523 def _unpackmanifests(self, repo, revmap, trp, prog):
524 524 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
525 525 for chunkdata in iter(self.filelogheader, {}):
526 526 # If we get here, there are directory manifests in the changegroup
527 527 d = chunkdata[b"filename"]
528 528 repo.ui.debug(b"adding %s revisions\n" % d)
529 529 deltas = self.deltaiter()
530 530 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
531 531 raise error.Abort(_(b"received dir revlog group is empty"))
532 532
533 533
534 534 class headerlessfixup(object):
535 535 def __init__(self, fh, h):
536 536 self._h = h
537 537 self._fh = fh
538 538
539 539 def read(self, n):
540 540 if self._h:
541 541 d, self._h = self._h[:n], self._h[n:]
542 542 if len(d) < n:
543 543 d += readexactly(self._fh, n - len(d))
544 544 return d
545 545 return readexactly(self._fh, n)
546 546
547 547
548 548 def _revisiondeltatochunks(delta, headerfn):
549 549 """Serialize a revisiondelta to changegroup chunks."""
550 550
551 551 # The captured revision delta may be encoded as a delta against
552 552 # a base revision or as a full revision. The changegroup format
553 553 # requires that everything on the wire be deltas. So for full
554 554 # revisions, we need to invent a header that says to rewrite
555 555 # data.
556 556
557 557 if delta.delta is not None:
558 558 prefix, data = b'', delta.delta
559 559 elif delta.basenode == nullid:
560 560 data = delta.revision
561 561 prefix = mdiff.trivialdiffheader(len(data))
562 562 else:
563 563 data = delta.revision
564 564 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
565 565
566 566 meta = headerfn(delta)
567 567
568 568 yield chunkheader(len(meta) + len(prefix) + len(data))
569 569 yield meta
570 570 if prefix:
571 571 yield prefix
572 572 yield data
573 573
574 574
575 575 def _sortnodesellipsis(store, nodes, cl, lookup):
576 576 """Sort nodes for changegroup generation."""
577 577 # Ellipses serving mode.
578 578 #
579 579 # In a perfect world, we'd generate better ellipsis-ified graphs
580 580 # for non-changelog revlogs. In practice, we haven't started doing
581 581 # that yet, so the resulting DAGs for the manifestlog and filelogs
582 582 # are actually full of bogus parentage on all the ellipsis
583 583 # nodes. This has the side effect that, while the contents are
584 584 # correct, the individual DAGs might be completely out of whack in
585 585 # a case like 882681bc3166 and its ancestors (back about 10
586 586 # revisions or so) in the main hg repo.
587 587 #
588 588 # The one invariant we *know* holds is that the new (potentially
589 589 # bogus) DAG shape will be valid if we order the nodes in the
590 590 # order that they're introduced in dramatis personae by the
591 591 # changelog, so what we do is we sort the non-changelog histories
592 592 # by the order in which they are used by the changelog.
593 593 key = lambda n: cl.rev(lookup(n))
594 594 return sorted(nodes, key=key)
595 595
596 596
597 597 def _resolvenarrowrevisioninfo(
598 598 cl,
599 599 store,
600 600 ischangelog,
601 601 rev,
602 602 linkrev,
603 603 linknode,
604 604 clrevtolocalrev,
605 605 fullclnodes,
606 606 precomputedellipsis,
607 607 ):
608 608 linkparents = precomputedellipsis[linkrev]
609 609
610 610 def local(clrev):
611 611 """Turn a changelog revnum into a local revnum.
612 612
613 613 The ellipsis dag is stored as revnums on the changelog,
614 614 but when we're producing ellipsis entries for
615 615 non-changelog revlogs, we need to turn those numbers into
616 616 something local. This does that for us, and during the
617 617 changelog sending phase will also expand the stored
618 618 mappings as needed.
619 619 """
620 620 if clrev == nullrev:
621 621 return nullrev
622 622
623 623 if ischangelog:
624 624 return clrev
625 625
626 626 # Walk the ellipsis-ized changelog breadth-first looking for a
627 627 # change that has been linked from the current revlog.
628 628 #
629 629 # For a flat manifest revlog only a single step should be necessary
630 630 # as all relevant changelog entries are relevant to the flat
631 631 # manifest.
632 632 #
633 633 # For a filelog or tree manifest dirlog however not every changelog
634 634 # entry will have been relevant, so we need to skip some changelog
635 635 # nodes even after ellipsis-izing.
636 636 walk = [clrev]
637 637 while walk:
638 638 p = walk[0]
639 639 walk = walk[1:]
640 640 if p in clrevtolocalrev:
641 641 return clrevtolocalrev[p]
642 642 elif p in fullclnodes:
643 643 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
644 644 elif p in precomputedellipsis:
645 645 walk.extend(
646 646 [pp for pp in precomputedellipsis[p] if pp != nullrev]
647 647 )
648 648 else:
649 649 # In this case, we've got an ellipsis with parents
650 650 # outside the current bundle (likely an
651 651 # incremental pull). We "know" that we can use the
652 652 # value of this same revlog at whatever revision
653 653 # is pointed to by linknode. "Know" is in scare
654 654 # quotes because I haven't done enough examination
655 655 # of edge cases to convince myself this is really
656 656 # a fact - it works for all the (admittedly
657 657 # thorough) cases in our testsuite, but I would be
658 658 # somewhat unsurprised to find a case in the wild
659 659 # where this breaks down a bit. That said, I don't
660 660 # know if it would hurt anything.
661 661 for i in pycompat.xrange(rev, 0, -1):
662 662 if store.linkrev(i) == clrev:
663 663 return i
664 664 # We failed to resolve a parent for this node, so
665 665 # we crash the changegroup construction.
666 666 raise error.Abort(
667 667 b'unable to resolve parent while packing %r %r'
668 668 b' for changeset %r' % (store.indexfile, rev, clrev)
669 669 )
670 670
671 671 return nullrev
672 672
673 673 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
674 674 p1, p2 = nullrev, nullrev
675 675 elif len(linkparents) == 1:
676 676 (p1,) = sorted(local(p) for p in linkparents)
677 677 p2 = nullrev
678 678 else:
679 679 p1, p2 = sorted(local(p) for p in linkparents)
680 680
681 681 p1node, p2node = store.node(p1), store.node(p2)
682 682
683 683 return p1node, p2node, linknode
684 684
685 685
686 686 def deltagroup(
687 687 repo,
688 688 store,
689 689 nodes,
690 690 ischangelog,
691 691 lookup,
692 692 forcedeltaparentprev,
693 693 topic=None,
694 694 ellipses=False,
695 695 clrevtolocalrev=None,
696 696 fullclnodes=None,
697 697 precomputedellipsis=None,
698 698 ):
699 699 """Calculate deltas for a set of revisions.
700 700
701 701 Is a generator of ``revisiondelta`` instances.
702 702
703 703 If topic is not None, progress detail will be generated using this
704 704 topic name (e.g. changesets, manifests, etc).
705 705 """
706 706 if not nodes:
707 707 return
708 708
709 709 cl = repo.changelog
710 710
711 711 if ischangelog:
712 712 # `hg log` shows changesets in storage order. To preserve order
713 713 # across clones, send out changesets in storage order.
714 714 nodesorder = b'storage'
715 715 elif ellipses:
716 716 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
717 717 nodesorder = b'nodes'
718 718 else:
719 719 nodesorder = None
720 720
721 721 # Perform ellipses filtering and revision massaging. We do this before
722 722 # emitrevisions() because a) filtering out revisions creates less work
723 723 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
724 724 # assumptions about delta choices and we would possibly send a delta
725 725 # referencing a missing base revision.
726 726 #
727 727 # Also, calling lookup() has side-effects with regards to populating
728 728 # data structures. If we don't call lookup() for each node or if we call
729 729 # lookup() after the first pass through each node, things can break -
730 730 # possibly intermittently depending on the python hash seed! For that
731 731 # reason, we store a mapping of all linknodes during the initial node
732 732 # pass rather than use lookup() on the output side.
733 733 if ellipses:
734 734 filtered = []
735 735 adjustedparents = {}
736 736 linknodes = {}
737 737
738 738 for node in nodes:
739 739 rev = store.rev(node)
740 740 linknode = lookup(node)
741 741 linkrev = cl.rev(linknode)
742 742 clrevtolocalrev[linkrev] = rev
743 743
744 744 # If linknode is in fullclnodes, it means the corresponding
745 745 # changeset was a full changeset and is being sent unaltered.
746 746 if linknode in fullclnodes:
747 747 linknodes[node] = linknode
748 748
749 749 # If the corresponding changeset wasn't in the set computed
750 750 # as relevant to us, it should be dropped outright.
751 751 elif linkrev not in precomputedellipsis:
752 752 continue
753 753
754 754 else:
755 755 # We could probably do this later and avoid the dict
756 756 # holding state. But it likely doesn't matter.
757 757 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
758 758 cl,
759 759 store,
760 760 ischangelog,
761 761 rev,
762 762 linkrev,
763 763 linknode,
764 764 clrevtolocalrev,
765 765 fullclnodes,
766 766 precomputedellipsis,
767 767 )
768 768
769 769 adjustedparents[node] = (p1node, p2node)
770 770 linknodes[node] = linknode
771 771
772 772 filtered.append(node)
773 773
774 774 nodes = filtered
775 775
776 776 # We expect the first pass to be fast, so we only engage the progress
777 777 # meter for constructing the revision deltas.
778 778 progress = None
779 779 if topic is not None:
780 780 progress = repo.ui.makeprogress(
781 781 topic, unit=_(b'chunks'), total=len(nodes)
782 782 )
783 783
784 784 configtarget = repo.ui.config(b'devel', b'bundle.delta')
785 785 if configtarget not in (b'', b'p1', b'full'):
786 786 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
787 787 repo.ui.warn(msg % configtarget)
788 788
789 789 deltamode = repository.CG_DELTAMODE_STD
790 790 if forcedeltaparentprev:
791 791 deltamode = repository.CG_DELTAMODE_PREV
792 792 elif configtarget == b'p1':
793 793 deltamode = repository.CG_DELTAMODE_P1
794 794 elif configtarget == b'full':
795 795 deltamode = repository.CG_DELTAMODE_FULL
796 796
797 797 revisions = store.emitrevisions(
798 798 nodes,
799 799 nodesorder=nodesorder,
800 800 revisiondata=True,
801 801 assumehaveparentrevisions=not ellipses,
802 802 deltamode=deltamode,
803 803 )
804 804
805 805 for i, revision in enumerate(revisions):
806 806 if progress:
807 807 progress.update(i + 1)
808 808
809 809 if ellipses:
810 810 linknode = linknodes[revision.node]
811 811
812 812 if revision.node in adjustedparents:
813 813 p1node, p2node = adjustedparents[revision.node]
814 814 revision.p1node = p1node
815 815 revision.p2node = p2node
816 816 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
817 817
818 818 else:
819 819 linknode = lookup(revision.node)
820 820
821 821 revision.linknode = linknode
822 822 yield revision
823 823
824 824 if progress:
825 825 progress.complete()
826 826
827 827
828 828 class cgpacker(object):
829 829 def __init__(
830 830 self,
831 831 repo,
832 832 oldmatcher,
833 833 matcher,
834 834 version,
835 835 builddeltaheader,
836 836 manifestsend,
837 837 forcedeltaparentprev=False,
838 838 bundlecaps=None,
839 839 ellipses=False,
840 840 shallow=False,
841 841 ellipsisroots=None,
842 842 fullnodes=None,
843 843 ):
844 844 """Given a source repo, construct a bundler.
845 845
846 846 oldmatcher is a matcher that matches on files the client already has.
847 847 These will not be included in the changegroup.
848 848
849 849 matcher is a matcher that matches on files to include in the
850 850 changegroup. Used to facilitate sparse changegroups.
851 851
852 852 forcedeltaparentprev indicates whether delta parents must be against
853 853 the previous revision in a delta group. This should only be used for
854 854 compatibility with changegroup version 1.
855 855
856 856 builddeltaheader is a callable that constructs the header for a group
857 857 delta.
858 858
859 859 manifestsend is a chunk to send after manifests have been fully emitted.
860 860
861 861 ellipses indicates whether ellipsis serving mode is enabled.
862 862
863 863 bundlecaps is optional and can be used to specify the set of
864 864 capabilities which can be used to build the bundle. While bundlecaps is
865 865 unused in core Mercurial, extensions rely on this feature to communicate
866 866 capabilities to customize the changegroup packer.
867 867
868 868 shallow indicates whether shallow data might be sent. The packer may
869 869 need to pack file contents not introduced by the changes being packed.
870 870
871 871 fullnodes is the set of changelog nodes which should not be ellipsis
872 872 nodes. We store this rather than the set of nodes that should be
873 873 ellipsis because for very large histories we expect this to be
874 874 significantly smaller.
875 875 """
876 876 assert oldmatcher
877 877 assert matcher
878 878 self._oldmatcher = oldmatcher
879 879 self._matcher = matcher
880 880
881 881 self.version = version
882 882 self._forcedeltaparentprev = forcedeltaparentprev
883 883 self._builddeltaheader = builddeltaheader
884 884 self._manifestsend = manifestsend
885 885 self._ellipses = ellipses
886 886
887 887 # Set of capabilities we can use to build the bundle.
888 888 if bundlecaps is None:
889 889 bundlecaps = set()
890 890 self._bundlecaps = bundlecaps
891 891 self._isshallow = shallow
892 892 self._fullclnodes = fullnodes
893 893
894 894 # Maps ellipsis revs to their roots at the changelog level.
895 895 self._precomputedellipsis = ellipsisroots
896 896
897 897 self._repo = repo
898 898
899 899 if self._repo.ui.verbose and not self._repo.ui.debugflag:
900 900 self._verbosenote = self._repo.ui.note
901 901 else:
902 902 self._verbosenote = lambda s: None
903 903
904 904 def generate(
905 905 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
906 906 ):
907 907 """Yield a sequence of changegroup byte chunks.
908 908 If changelog is False, changelog data won't be added to changegroup
909 909 """
910 910
911 911 repo = self._repo
912 912 cl = repo.changelog
913 913
914 914 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
915 915 size = 0
916 916
917 917 clstate, deltas = self._generatechangelog(
918 918 cl, clnodes, generate=changelog
919 919 )
920 920 for delta in deltas:
921 921 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
922 922 size += len(chunk)
923 923 yield chunk
924 924
925 925 close = closechunk()
926 926 size += len(close)
927 927 yield closechunk()
928 928
929 929 self._verbosenote(_(b'%8.i (changelog)\n') % size)
930 930
931 931 clrevorder = clstate[b'clrevorder']
932 932 manifests = clstate[b'manifests']
933 933 changedfiles = clstate[b'changedfiles']
934 934
935 935 # We need to make sure that the linkrev in the changegroup refers to
936 936 # the first changeset that introduced the manifest or file revision.
937 937 # The fastpath is usually safer than the slowpath, because the filelogs
938 938 # are walked in revlog order.
939 939 #
940 940 # When taking the slowpath when the manifest revlog uses generaldelta,
941 941 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
942 942 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
943 943 #
944 944 # When taking the fastpath, we are only vulnerable to reordering
945 945 # of the changelog itself. The changelog never uses generaldelta and is
946 946 # never reordered. To handle this case, we simply take the slowpath,
947 947 # which already has the 'clrevorder' logic. This was also fixed in
948 948 # cc0ff93d0c0c.
949 949
950 950 # Treemanifests don't work correctly with fastpathlinkrev
951 951 # either, because we don't discover which directory nodes to
952 952 # send along with files. This could probably be fixed.
953 953 fastpathlinkrev = fastpathlinkrev and (
954 954 b'treemanifest' not in repo.requirements
955 955 )
956 956
957 957 fnodes = {} # needed file nodes
958 958
959 959 size = 0
960 960 it = self.generatemanifests(
961 961 commonrevs,
962 962 clrevorder,
963 963 fastpathlinkrev,
964 964 manifests,
965 965 fnodes,
966 966 source,
967 967 clstate[b'clrevtomanifestrev'],
968 968 )
969 969
970 970 for tree, deltas in it:
971 971 if tree:
972 972 assert self.version == b'03'
973 973 chunk = _fileheader(tree)
974 974 size += len(chunk)
975 975 yield chunk
976 976
977 977 for delta in deltas:
978 978 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
979 979 for chunk in chunks:
980 980 size += len(chunk)
981 981 yield chunk
982 982
983 983 close = closechunk()
984 984 size += len(close)
985 985 yield close
986 986
987 987 self._verbosenote(_(b'%8.i (manifests)\n') % size)
988 988 yield self._manifestsend
989 989
990 990 mfdicts = None
991 991 if self._ellipses and self._isshallow:
992 992 mfdicts = [
993 993 (self._repo.manifestlog[n].read(), lr)
994 994 for (n, lr) in pycompat.iteritems(manifests)
995 995 ]
996 996
997 997 manifests.clear()
998 998 clrevs = set(cl.rev(x) for x in clnodes)
999 999
1000 1000 it = self.generatefiles(
1001 1001 changedfiles,
1002 1002 commonrevs,
1003 1003 source,
1004 1004 mfdicts,
1005 1005 fastpathlinkrev,
1006 1006 fnodes,
1007 1007 clrevs,
1008 1008 )
1009 1009
1010 1010 for path, deltas in it:
1011 1011 h = _fileheader(path)
1012 1012 size = len(h)
1013 1013 yield h
1014 1014
1015 1015 for delta in deltas:
1016 1016 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1017 1017 for chunk in chunks:
1018 1018 size += len(chunk)
1019 1019 yield chunk
1020 1020
1021 1021 close = closechunk()
1022 1022 size += len(close)
1023 1023 yield close
1024 1024
1025 1025 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1026 1026
1027 1027 yield closechunk()
1028 1028
1029 1029 if clnodes:
1030 1030 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1031 1031
1032 1032 def _generatechangelog(self, cl, nodes, generate=True):
1033 1033 """Generate data for changelog chunks.
1034 1034
1035 1035 Returns a 2-tuple of a dict containing state and an iterable of
1036 1036 byte chunks. The state will not be fully populated until the
1037 1037 chunk stream has been fully consumed.
1038 1038
1039 1039 if generate is False, the state will be fully populated and no chunk
1040 1040 stream will be yielded
1041 1041 """
1042 1042 clrevorder = {}
1043 1043 manifests = {}
1044 1044 mfl = self._repo.manifestlog
1045 1045 changedfiles = set()
1046 1046 clrevtomanifestrev = {}
1047 1047
1048 1048 state = {
1049 1049 b'clrevorder': clrevorder,
1050 1050 b'manifests': manifests,
1051 1051 b'changedfiles': changedfiles,
1052 1052 b'clrevtomanifestrev': clrevtomanifestrev,
1053 1053 }
1054 1054
1055 1055 if not (generate or self._ellipses):
1056 1056 # sort the nodes in storage order
1057 1057 nodes = sorted(nodes, key=cl.rev)
1058 1058 for node in nodes:
1059 1059 c = cl.changelogrevision(node)
1060 1060 clrevorder[node] = len(clrevorder)
1061 1061 # record the first changeset introducing this manifest version
1062 1062 manifests.setdefault(c.manifest, node)
1063 1063 # Record a complete list of potentially-changed files in
1064 1064 # this manifest.
1065 1065 changedfiles.update(c.files)
1066 1066
1067 1067 return state, ()
1068 1068
1069 1069 # Callback for the changelog, used to collect changed files and
1070 1070 # manifest nodes.
1071 1071 # Returns the linkrev node (identity in the changelog case).
1072 1072 def lookupcl(x):
1073 1073 c = cl.changelogrevision(x)
1074 1074 clrevorder[x] = len(clrevorder)
1075 1075
1076 1076 if self._ellipses:
1077 1077 # Only update manifests if x is going to be sent. Otherwise we
1078 1078 # end up with bogus linkrevs specified for manifests and
1079 1079 # we skip some manifest nodes that we should otherwise
1080 1080 # have sent.
1081 1081 if (
1082 1082 x in self._fullclnodes
1083 1083 or cl.rev(x) in self._precomputedellipsis
1084 1084 ):
1085 1085
1086 1086 manifestnode = c.manifest
1087 1087 # Record the first changeset introducing this manifest
1088 1088 # version.
1089 1089 manifests.setdefault(manifestnode, x)
1090 1090 # Set this narrow-specific dict so we have the lowest
1091 1091 # manifest revnum to look up for this cl revnum. (Part of
1092 1092 # mapping changelog ellipsis parents to manifest ellipsis
1093 1093 # parents)
1094 1094 clrevtomanifestrev.setdefault(
1095 1095 cl.rev(x), mfl.rev(manifestnode)
1096 1096 )
1097 1097 # We can't trust the changed files list in the changeset if the
1098 1098 # client requested a shallow clone.
1099 1099 if self._isshallow:
1100 1100 changedfiles.update(mfl[c.manifest].read().keys())
1101 1101 else:
1102 1102 changedfiles.update(c.files)
1103 1103 else:
1104 1104 # record the first changeset introducing this manifest version
1105 1105 manifests.setdefault(c.manifest, x)
1106 1106 # Record a complete list of potentially-changed files in
1107 1107 # this manifest.
1108 1108 changedfiles.update(c.files)
1109 1109
1110 1110 return x
1111 1111
1112 1112 gen = deltagroup(
1113 1113 self._repo,
1114 1114 cl,
1115 1115 nodes,
1116 1116 True,
1117 1117 lookupcl,
1118 1118 self._forcedeltaparentprev,
1119 1119 ellipses=self._ellipses,
1120 1120 topic=_(b'changesets'),
1121 1121 clrevtolocalrev={},
1122 1122 fullclnodes=self._fullclnodes,
1123 1123 precomputedellipsis=self._precomputedellipsis,
1124 1124 )
1125 1125
1126 1126 return state, gen
1127 1127
1128 1128 def generatemanifests(
1129 1129 self,
1130 1130 commonrevs,
1131 1131 clrevorder,
1132 1132 fastpathlinkrev,
1133 1133 manifests,
1134 1134 fnodes,
1135 1135 source,
1136 1136 clrevtolocalrev,
1137 1137 ):
1138 1138 """Returns an iterator of changegroup chunks containing manifests.
1139 1139
1140 1140 `source` is unused here, but is used by extensions like remotefilelog to
1141 1141 change what is sent based in pulls vs pushes, etc.
1142 1142 """
1143 1143 repo = self._repo
1144 1144 mfl = repo.manifestlog
1145 1145 tmfnodes = {b'': manifests}
1146 1146
1147 1147 # Callback for the manifest, used to collect linkrevs for filelog
1148 1148 # revisions.
1149 1149 # Returns the linkrev node (collected in lookupcl).
1150 1150 def makelookupmflinknode(tree, nodes):
1151 1151 if fastpathlinkrev:
1152 1152 assert not tree
1153 1153 return manifests.__getitem__
1154 1154
1155 1155 def lookupmflinknode(x):
1156 1156 """Callback for looking up the linknode for manifests.
1157 1157
1158 1158 Returns the linkrev node for the specified manifest.
1159 1159
1160 1160 SIDE EFFECT:
1161 1161
1162 1162 1) fclnodes gets populated with the list of relevant
1163 1163 file nodes if we're not using fastpathlinkrev
1164 1164 2) When treemanifests are in use, collects treemanifest nodes
1165 1165 to send
1166 1166
1167 1167 Note that this means manifests must be completely sent to
1168 1168 the client before you can trust the list of files and
1169 1169 treemanifests to send.
1170 1170 """
1171 1171 clnode = nodes[x]
1172 1172 mdata = mfl.get(tree, x).readfast(shallow=True)
1173 1173 for p, n, fl in mdata.iterentries():
1174 1174 if fl == b't': # subdirectory manifest
1175 1175 subtree = tree + p + b'/'
1176 1176 tmfclnodes = tmfnodes.setdefault(subtree, {})
1177 1177 tmfclnode = tmfclnodes.setdefault(n, clnode)
1178 1178 if clrevorder[clnode] < clrevorder[tmfclnode]:
1179 1179 tmfclnodes[n] = clnode
1180 1180 else:
1181 1181 f = tree + p
1182 1182 fclnodes = fnodes.setdefault(f, {})
1183 1183 fclnode = fclnodes.setdefault(n, clnode)
1184 1184 if clrevorder[clnode] < clrevorder[fclnode]:
1185 1185 fclnodes[n] = clnode
1186 1186 return clnode
1187 1187
1188 1188 return lookupmflinknode
1189 1189
1190 1190 while tmfnodes:
1191 1191 tree, nodes = tmfnodes.popitem()
1192 1192
1193 1193 should_visit = self._matcher.visitdir(tree[:-1])
1194 1194 if tree and not should_visit:
1195 1195 continue
1196 1196
1197 1197 store = mfl.getstorage(tree)
1198 1198
1199 1199 if not should_visit:
1200 1200 # No nodes to send because this directory is out of
1201 1201 # the client's view of the repository (probably
1202 1202 # because of narrow clones). Do this even for the root
1203 1203 # directory (tree=='')
1204 1204 prunednodes = []
1205 1205 else:
1206 1206 # Avoid sending any manifest nodes we can prove the
1207 1207 # client already has by checking linkrevs. See the
1208 1208 # related comment in generatefiles().
1209 1209 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1210 1210
1211 1211 if tree and not prunednodes:
1212 1212 continue
1213 1213
1214 1214 lookupfn = makelookupmflinknode(tree, nodes)
1215 1215
1216 1216 deltas = deltagroup(
1217 1217 self._repo,
1218 1218 store,
1219 1219 prunednodes,
1220 1220 False,
1221 1221 lookupfn,
1222 1222 self._forcedeltaparentprev,
1223 1223 ellipses=self._ellipses,
1224 1224 topic=_(b'manifests'),
1225 1225 clrevtolocalrev=clrevtolocalrev,
1226 1226 fullclnodes=self._fullclnodes,
1227 1227 precomputedellipsis=self._precomputedellipsis,
1228 1228 )
1229 1229
1230 1230 if not self._oldmatcher.visitdir(store.tree[:-1]):
1231 1231 yield tree, deltas
1232 1232 else:
1233 1233 # 'deltas' is a generator and we need to consume it even if
1234 1234 # we are not going to send it because a side-effect is that
1235 1235 # it updates tmdnodes (via lookupfn)
1236 1236 for d in deltas:
1237 1237 pass
1238 1238 if not tree:
1239 1239 yield tree, []
1240 1240
1241 1241 def _prunemanifests(self, store, nodes, commonrevs):
1242 1242 if not self._ellipses:
1243 1243 # In non-ellipses case and large repositories, it is better to
1244 1244 # prevent calling of store.rev and store.linkrev on a lot of
1245 1245 # nodes as compared to sending some extra data
1246 1246 return nodes.copy()
1247 1247 # This is split out as a separate method to allow filtering
1248 1248 # commonrevs in extension code.
1249 1249 #
1250 1250 # TODO(augie): this shouldn't be required, instead we should
1251 1251 # make filtering of revisions to send delegated to the store
1252 1252 # layer.
1253 1253 frev, flr = store.rev, store.linkrev
1254 1254 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1255 1255
1256 1256 # The 'source' parameter is useful for extensions
1257 1257 def generatefiles(
1258 1258 self,
1259 1259 changedfiles,
1260 1260 commonrevs,
1261 1261 source,
1262 1262 mfdicts,
1263 1263 fastpathlinkrev,
1264 1264 fnodes,
1265 1265 clrevs,
1266 1266 ):
1267 1267 changedfiles = [
1268 1268 f
1269 1269 for f in changedfiles
1270 1270 if self._matcher(f) and not self._oldmatcher(f)
1271 1271 ]
1272 1272
1273 1273 if not fastpathlinkrev:
1274 1274
1275 1275 def normallinknodes(unused, fname):
1276 1276 return fnodes.get(fname, {})
1277 1277
1278 1278 else:
1279 1279 cln = self._repo.changelog.node
1280 1280
1281 1281 def normallinknodes(store, fname):
1282 1282 flinkrev = store.linkrev
1283 1283 fnode = store.node
1284 1284 revs = ((r, flinkrev(r)) for r in store)
1285 1285 return dict(
1286 1286 (fnode(r), cln(lr)) for r, lr in revs if lr in clrevs
1287 1287 )
1288 1288
1289 1289 clrevtolocalrev = {}
1290 1290
1291 1291 if self._isshallow:
1292 1292 # In a shallow clone, the linknodes callback needs to also include
1293 1293 # those file nodes that are in the manifests we sent but weren't
1294 1294 # introduced by those manifests.
1295 1295 commonctxs = [self._repo[c] for c in commonrevs]
1296 1296 clrev = self._repo.changelog.rev
1297 1297
1298 1298 def linknodes(flog, fname):
1299 1299 for c in commonctxs:
1300 1300 try:
1301 1301 fnode = c.filenode(fname)
1302 1302 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1303 1303 except error.ManifestLookupError:
1304 1304 pass
1305 1305 links = normallinknodes(flog, fname)
1306 1306 if len(links) != len(mfdicts):
1307 1307 for mf, lr in mfdicts:
1308 1308 fnode = mf.get(fname, None)
1309 1309 if fnode in links:
1310 1310 links[fnode] = min(links[fnode], lr, key=clrev)
1311 1311 elif fnode:
1312 1312 links[fnode] = lr
1313 1313 return links
1314 1314
1315 1315 else:
1316 1316 linknodes = normallinknodes
1317 1317
1318 1318 repo = self._repo
1319 1319 progress = repo.ui.makeprogress(
1320 1320 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1321 1321 )
1322 1322 for i, fname in enumerate(sorted(changedfiles)):
1323 1323 filerevlog = repo.file(fname)
1324 1324 if not filerevlog:
1325 1325 raise error.Abort(
1326 1326 _(b"empty or missing file data for %s") % fname
1327 1327 )
1328 1328
1329 1329 clrevtolocalrev.clear()
1330 1330
1331 1331 linkrevnodes = linknodes(filerevlog, fname)
1332 1332 # Lookup for filenodes, we collected the linkrev nodes above in the
1333 1333 # fastpath case and with lookupmf in the slowpath case.
1334 1334 def lookupfilelog(x):
1335 1335 return linkrevnodes[x]
1336 1336
1337 1337 frev, flr = filerevlog.rev, filerevlog.linkrev
1338 1338 # Skip sending any filenode we know the client already
1339 1339 # has. This avoids over-sending files relatively
1340 1340 # inexpensively, so it's not a problem if we under-filter
1341 1341 # here.
1342 1342 filenodes = [
1343 1343 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1344 1344 ]
1345 1345
1346 1346 if not filenodes:
1347 1347 continue
1348 1348
1349 1349 progress.update(i + 1, item=fname)
1350 1350
1351 1351 deltas = deltagroup(
1352 1352 self._repo,
1353 1353 filerevlog,
1354 1354 filenodes,
1355 1355 False,
1356 1356 lookupfilelog,
1357 1357 self._forcedeltaparentprev,
1358 1358 ellipses=self._ellipses,
1359 1359 clrevtolocalrev=clrevtolocalrev,
1360 1360 fullclnodes=self._fullclnodes,
1361 1361 precomputedellipsis=self._precomputedellipsis,
1362 1362 )
1363 1363
1364 1364 yield fname, deltas
1365 1365
1366 1366 progress.complete()
1367 1367
1368 1368
1369 1369 def _makecg1packer(
1370 1370 repo,
1371 1371 oldmatcher,
1372 1372 matcher,
1373 1373 bundlecaps,
1374 1374 ellipses=False,
1375 1375 shallow=False,
1376 1376 ellipsisroots=None,
1377 1377 fullnodes=None,
1378 1378 ):
1379 1379 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1380 1380 d.node, d.p1node, d.p2node, d.linknode
1381 1381 )
1382 1382
1383 1383 return cgpacker(
1384 1384 repo,
1385 1385 oldmatcher,
1386 1386 matcher,
1387 1387 b'01',
1388 1388 builddeltaheader=builddeltaheader,
1389 1389 manifestsend=b'',
1390 1390 forcedeltaparentprev=True,
1391 1391 bundlecaps=bundlecaps,
1392 1392 ellipses=ellipses,
1393 1393 shallow=shallow,
1394 1394 ellipsisroots=ellipsisroots,
1395 1395 fullnodes=fullnodes,
1396 1396 )
1397 1397
1398 1398
1399 1399 def _makecg2packer(
1400 1400 repo,
1401 1401 oldmatcher,
1402 1402 matcher,
1403 1403 bundlecaps,
1404 1404 ellipses=False,
1405 1405 shallow=False,
1406 1406 ellipsisroots=None,
1407 1407 fullnodes=None,
1408 1408 ):
1409 1409 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1410 1410 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1411 1411 )
1412 1412
1413 1413 return cgpacker(
1414 1414 repo,
1415 1415 oldmatcher,
1416 1416 matcher,
1417 1417 b'02',
1418 1418 builddeltaheader=builddeltaheader,
1419 1419 manifestsend=b'',
1420 1420 bundlecaps=bundlecaps,
1421 1421 ellipses=ellipses,
1422 1422 shallow=shallow,
1423 1423 ellipsisroots=ellipsisroots,
1424 1424 fullnodes=fullnodes,
1425 1425 )
1426 1426
1427 1427
1428 1428 def _makecg3packer(
1429 1429 repo,
1430 1430 oldmatcher,
1431 1431 matcher,
1432 1432 bundlecaps,
1433 1433 ellipses=False,
1434 1434 shallow=False,
1435 1435 ellipsisroots=None,
1436 1436 fullnodes=None,
1437 1437 ):
1438 1438 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1439 1439 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1440 1440 )
1441 1441
1442 1442 return cgpacker(
1443 1443 repo,
1444 1444 oldmatcher,
1445 1445 matcher,
1446 1446 b'03',
1447 1447 builddeltaheader=builddeltaheader,
1448 1448 manifestsend=closechunk(),
1449 1449 bundlecaps=bundlecaps,
1450 1450 ellipses=ellipses,
1451 1451 shallow=shallow,
1452 1452 ellipsisroots=ellipsisroots,
1453 1453 fullnodes=fullnodes,
1454 1454 )
1455 1455
1456 1456
1457 1457 _packermap = {
1458 1458 b'01': (_makecg1packer, cg1unpacker),
1459 1459 # cg2 adds support for exchanging generaldelta
1460 1460 b'02': (_makecg2packer, cg2unpacker),
1461 1461 # cg3 adds support for exchanging revlog flags and treemanifests
1462 1462 b'03': (_makecg3packer, cg3unpacker),
1463 1463 }
1464 1464
1465 1465
1466 1466 def allsupportedversions(repo):
1467 1467 versions = set(_packermap.keys())
1468 1468 needv03 = False
1469 1469 if (
1470 1470 repo.ui.configbool(b'experimental', b'changegroup3')
1471 1471 or repo.ui.configbool(b'experimental', b'treemanifest')
1472 1472 or b'treemanifest' in repo.requirements
1473 1473 ):
1474 1474 # we keep version 03 because we need to to exchange treemanifest data
1475 1475 #
1476 1476 # we also keep vresion 01 and 02, because it is possible for repo to
1477 1477 # contains both normal and tree manifest at the same time. so using
1478 1478 # older version to pull data is viable
1479 1479 #
1480 1480 # (or even to push subset of history)
1481 1481 needv03 = True
1482 if b'exp-sidedata-flag' in repo.requirements:
1483 needv03 = True
1484 # don't attempt to use 01/02 until we do sidedata cleaning
1485 versions.discard(b'01')
1486 versions.discard(b'02')
1482 1487 if not needv03:
1483 1488 versions.discard(b'03')
1484 1489 return versions
1485 1490
1486 1491
1487 1492 # Changegroup versions that can be applied to the repo
1488 1493 def supportedincomingversions(repo):
1489 1494 return allsupportedversions(repo)
1490 1495
1491 1496
1492 1497 # Changegroup versions that can be created from the repo
1493 1498 def supportedoutgoingversions(repo):
1494 1499 versions = allsupportedversions(repo)
1495 1500 if b'treemanifest' in repo.requirements:
1496 1501 # Versions 01 and 02 support only flat manifests and it's just too
1497 1502 # expensive to convert between the flat manifest and tree manifest on
1498 1503 # the fly. Since tree manifests are hashed differently, all of history
1499 1504 # would have to be converted. Instead, we simply don't even pretend to
1500 1505 # support versions 01 and 02.
1501 1506 versions.discard(b'01')
1502 1507 versions.discard(b'02')
1503 1508 if repository.NARROW_REQUIREMENT in repo.requirements:
1504 1509 # Versions 01 and 02 don't support revlog flags, and we need to
1505 1510 # support that for stripping and unbundling to work.
1506 1511 versions.discard(b'01')
1507 1512 versions.discard(b'02')
1508 1513 if LFS_REQUIREMENT in repo.requirements:
1509 1514 # Versions 01 and 02 don't support revlog flags, and we need to
1510 1515 # mark LFS entries with REVIDX_EXTSTORED.
1511 1516 versions.discard(b'01')
1512 1517 versions.discard(b'02')
1513 1518
1514 1519 return versions
1515 1520
1516 1521
1517 1522 def localversion(repo):
1518 1523 # Finds the best version to use for bundles that are meant to be used
1519 1524 # locally, such as those from strip and shelve, and temporary bundles.
1520 1525 return max(supportedoutgoingversions(repo))
1521 1526
1522 1527
1523 1528 def safeversion(repo):
1524 1529 # Finds the smallest version that it's safe to assume clients of the repo
1525 1530 # will support. For example, all hg versions that support generaldelta also
1526 1531 # support changegroup 02.
1527 1532 versions = supportedoutgoingversions(repo)
1528 1533 if b'generaldelta' in repo.requirements:
1529 1534 versions.discard(b'01')
1530 1535 assert versions
1531 1536 return min(versions)
1532 1537
1533 1538
1534 1539 def getbundler(
1535 1540 version,
1536 1541 repo,
1537 1542 bundlecaps=None,
1538 1543 oldmatcher=None,
1539 1544 matcher=None,
1540 1545 ellipses=False,
1541 1546 shallow=False,
1542 1547 ellipsisroots=None,
1543 1548 fullnodes=None,
1544 1549 ):
1545 1550 assert version in supportedoutgoingversions(repo)
1546 1551
1547 1552 if matcher is None:
1548 1553 matcher = matchmod.always()
1549 1554 if oldmatcher is None:
1550 1555 oldmatcher = matchmod.never()
1551 1556
1552 1557 if version == b'01' and not matcher.always():
1553 1558 raise error.ProgrammingError(
1554 1559 b'version 01 changegroups do not support sparse file matchers'
1555 1560 )
1556 1561
1557 1562 if ellipses and version in (b'01', b'02'):
1558 1563 raise error.Abort(
1559 1564 _(
1560 1565 b'ellipsis nodes require at least cg3 on client and server, '
1561 1566 b'but negotiated version %s'
1562 1567 )
1563 1568 % version
1564 1569 )
1565 1570
1566 1571 # Requested files could include files not in the local store. So
1567 1572 # filter those out.
1568 1573 matcher = repo.narrowmatch(matcher)
1569 1574
1570 1575 fn = _packermap[version][0]
1571 1576 return fn(
1572 1577 repo,
1573 1578 oldmatcher,
1574 1579 matcher,
1575 1580 bundlecaps,
1576 1581 ellipses=ellipses,
1577 1582 shallow=shallow,
1578 1583 ellipsisroots=ellipsisroots,
1579 1584 fullnodes=fullnodes,
1580 1585 )
1581 1586
1582 1587
1583 1588 def getunbundler(version, fh, alg, extras=None):
1584 1589 return _packermap[version][1](fh, alg, extras=extras)
1585 1590
1586 1591
1587 1592 def _changegroupinfo(repo, nodes, source):
1588 1593 if repo.ui.verbose or source == b'bundle':
1589 1594 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1590 1595 if repo.ui.debugflag:
1591 1596 repo.ui.debug(b"list of changesets:\n")
1592 1597 for node in nodes:
1593 1598 repo.ui.debug(b"%s\n" % hex(node))
1594 1599
1595 1600
1596 1601 def makechangegroup(
1597 1602 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1598 1603 ):
1599 1604 cgstream = makestream(
1600 1605 repo,
1601 1606 outgoing,
1602 1607 version,
1603 1608 source,
1604 1609 fastpath=fastpath,
1605 1610 bundlecaps=bundlecaps,
1606 1611 )
1607 1612 return getunbundler(
1608 1613 version,
1609 1614 util.chunkbuffer(cgstream),
1610 1615 None,
1611 1616 {b'clcount': len(outgoing.missing)},
1612 1617 )
1613 1618
1614 1619
1615 1620 def makestream(
1616 1621 repo,
1617 1622 outgoing,
1618 1623 version,
1619 1624 source,
1620 1625 fastpath=False,
1621 1626 bundlecaps=None,
1622 1627 matcher=None,
1623 1628 ):
1624 1629 bundler = getbundler(version, repo, bundlecaps=bundlecaps, matcher=matcher)
1625 1630
1626 1631 repo = repo.unfiltered()
1627 1632 commonrevs = outgoing.common
1628 1633 csets = outgoing.missing
1629 1634 heads = outgoing.missingheads
1630 1635 # We go through the fast path if we get told to, or if all (unfiltered
1631 1636 # heads have been requested (since we then know there all linkrevs will
1632 1637 # be pulled by the client).
1633 1638 heads.sort()
1634 1639 fastpathlinkrev = fastpath or (
1635 1640 repo.filtername is None and heads == sorted(repo.heads())
1636 1641 )
1637 1642
1638 1643 repo.hook(b'preoutgoing', throw=True, source=source)
1639 1644 _changegroupinfo(repo, csets, source)
1640 1645 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1641 1646
1642 1647
1643 1648 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1644 1649 revisions = 0
1645 1650 files = 0
1646 1651 progress = repo.ui.makeprogress(
1647 1652 _(b'files'), unit=_(b'files'), total=expectedfiles
1648 1653 )
1649 1654 for chunkdata in iter(source.filelogheader, {}):
1650 1655 files += 1
1651 1656 f = chunkdata[b"filename"]
1652 1657 repo.ui.debug(b"adding %s revisions\n" % f)
1653 1658 progress.increment()
1654 1659 fl = repo.file(f)
1655 1660 o = len(fl)
1656 1661 try:
1657 1662 deltas = source.deltaiter()
1658 1663 if not fl.addgroup(deltas, revmap, trp):
1659 1664 raise error.Abort(_(b"received file revlog group is empty"))
1660 1665 except error.CensoredBaseError as e:
1661 1666 raise error.Abort(_(b"received delta base is censored: %s") % e)
1662 1667 revisions += len(fl) - o
1663 1668 if f in needfiles:
1664 1669 needs = needfiles[f]
1665 1670 for new in pycompat.xrange(o, len(fl)):
1666 1671 n = fl.node(new)
1667 1672 if n in needs:
1668 1673 needs.remove(n)
1669 1674 else:
1670 1675 raise error.Abort(_(b"received spurious file revlog entry"))
1671 1676 if not needs:
1672 1677 del needfiles[f]
1673 1678 progress.complete()
1674 1679
1675 1680 for f, needs in pycompat.iteritems(needfiles):
1676 1681 fl = repo.file(f)
1677 1682 for n in needs:
1678 1683 try:
1679 1684 fl.rev(n)
1680 1685 except error.LookupError:
1681 1686 raise error.Abort(
1682 1687 _(b'missing file data for %s:%s - run hg verify')
1683 1688 % (f, hex(n))
1684 1689 )
1685 1690
1686 1691 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now