##// END OF EJS Templates
changegroup: suppress pytype error that's wrong...
Augie Fackler -
r43791:5b5e62c2 default
parent child Browse files
Show More
@@ -1,1691 +1,1693 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21 from .pycompat import open
22 22
23 23 from . import (
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 phases,
28 28 pycompat,
29 29 util,
30 30 )
31 31
32 32 from .interfaces import repository
33 33
34 34 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
35 35 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
36 36 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
37 37
38 38 LFS_REQUIREMENT = b'lfs'
39 39
40 40 readexactly = util.readexactly
41 41
42 42
43 43 def getchunk(stream):
44 44 """return the next chunk from stream as a string"""
45 45 d = readexactly(stream, 4)
46 46 l = struct.unpack(b">l", d)[0]
47 47 if l <= 4:
48 48 if l:
49 49 raise error.Abort(_(b"invalid chunk length %d") % l)
50 50 return b""
51 51 return readexactly(stream, l - 4)
52 52
53 53
54 54 def chunkheader(length):
55 55 """return a changegroup chunk header (string)"""
56 56 return struct.pack(b">l", length + 4)
57 57
58 58
59 59 def closechunk():
60 60 """return a changegroup chunk header (string) for a zero-length chunk"""
61 61 return struct.pack(b">l", 0)
62 62
63 63
64 64 def _fileheader(path):
65 65 """Obtain a changegroup chunk header for a named path."""
66 66 return chunkheader(len(path)) + path
67 67
68 68
69 69 def writechunks(ui, chunks, filename, vfs=None):
70 70 """Write chunks to a file and return its filename.
71 71
72 72 The stream is assumed to be a bundle file.
73 73 Existing files will not be overwritten.
74 74 If no filename is specified, a temporary file is created.
75 75 """
76 76 fh = None
77 77 cleanup = None
78 78 try:
79 79 if filename:
80 80 if vfs:
81 81 fh = vfs.open(filename, b"wb")
82 82 else:
83 83 # Increase default buffer size because default is usually
84 84 # small (4k is common on Linux).
85 85 fh = open(filename, b"wb", 131072)
86 86 else:
87 87 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
88 88 fh = os.fdopen(fd, r"wb")
89 89 cleanup = filename
90 90 for c in chunks:
91 91 fh.write(c)
92 92 cleanup = None
93 93 return filename
94 94 finally:
95 95 if fh is not None:
96 96 fh.close()
97 97 if cleanup is not None:
98 98 if filename and vfs:
99 99 vfs.unlink(cleanup)
100 100 else:
101 101 os.unlink(cleanup)
102 102
103 103
104 104 class cg1unpacker(object):
105 105 """Unpacker for cg1 changegroup streams.
106 106
107 107 A changegroup unpacker handles the framing of the revision data in
108 108 the wire format. Most consumers will want to use the apply()
109 109 method to add the changes from the changegroup to a repository.
110 110
111 111 If you're forwarding a changegroup unmodified to another consumer,
112 112 use getchunks(), which returns an iterator of changegroup
113 113 chunks. This is mostly useful for cases where you need to know the
114 114 data stream has ended by observing the end of the changegroup.
115 115
116 116 deltachunk() is useful only if you're applying delta data. Most
117 117 consumers should prefer apply() instead.
118 118
119 119 A few other public methods exist. Those are used only for
120 120 bundlerepo and some debug commands - their use is discouraged.
121 121 """
122 122
123 123 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
124 124 deltaheadersize = deltaheader.size
125 125 version = b'01'
126 126 _grouplistcount = 1 # One list of files after the manifests
127 127
128 128 def __init__(self, fh, alg, extras=None):
129 129 if alg is None:
130 130 alg = b'UN'
131 131 if alg not in util.compengines.supportedbundletypes:
132 132 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
133 133 if alg == b'BZ':
134 134 alg = b'_truncatedBZ'
135 135
136 136 compengine = util.compengines.forbundletype(alg)
137 137 self._stream = compengine.decompressorreader(fh)
138 138 self._type = alg
139 139 self.extras = extras or {}
140 140 self.callback = None
141 141
142 142 # These methods (compressed, read, seek, tell) all appear to only
143 143 # be used by bundlerepo, but it's a little hard to tell.
144 144 def compressed(self):
145 145 return self._type is not None and self._type != b'UN'
146 146
147 147 def read(self, l):
148 148 return self._stream.read(l)
149 149
150 150 def seek(self, pos):
151 151 return self._stream.seek(pos)
152 152
153 153 def tell(self):
154 154 return self._stream.tell()
155 155
156 156 def close(self):
157 157 return self._stream.close()
158 158
159 159 def _chunklength(self):
160 160 d = readexactly(self._stream, 4)
161 161 l = struct.unpack(b">l", d)[0]
162 162 if l <= 4:
163 163 if l:
164 164 raise error.Abort(_(b"invalid chunk length %d") % l)
165 165 return 0
166 166 if self.callback:
167 167 self.callback()
168 168 return l - 4
169 169
170 170 def changelogheader(self):
171 171 """v10 does not have a changelog header chunk"""
172 172 return {}
173 173
174 174 def manifestheader(self):
175 175 """v10 does not have a manifest header chunk"""
176 176 return {}
177 177
178 178 def filelogheader(self):
179 179 """return the header of the filelogs chunk, v10 only has the filename"""
180 180 l = self._chunklength()
181 181 if not l:
182 182 return {}
183 183 fname = readexactly(self._stream, l)
184 184 return {b'filename': fname}
185 185
186 186 def _deltaheader(self, headertuple, prevnode):
187 187 node, p1, p2, cs = headertuple
188 188 if prevnode is None:
189 189 deltabase = p1
190 190 else:
191 191 deltabase = prevnode
192 192 flags = 0
193 193 return node, p1, p2, deltabase, cs, flags
194 194
195 195 def deltachunk(self, prevnode):
196 196 l = self._chunklength()
197 197 if not l:
198 198 return {}
199 199 headerdata = readexactly(self._stream, self.deltaheadersize)
200 200 header = self.deltaheader.unpack(headerdata)
201 201 delta = readexactly(self._stream, l - self.deltaheadersize)
202 202 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
203 203 return (node, p1, p2, cs, deltabase, delta, flags)
204 204
205 205 def getchunks(self):
206 206 """returns all the chunks contains in the bundle
207 207
208 208 Used when you need to forward the binary stream to a file or another
209 209 network API. To do so, it parse the changegroup data, otherwise it will
210 210 block in case of sshrepo because it don't know the end of the stream.
211 211 """
212 212 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
213 213 # and a list of filelogs. For changegroup 3, we expect 4 parts:
214 214 # changelog, manifestlog, a list of tree manifestlogs, and a list of
215 215 # filelogs.
216 216 #
217 217 # Changelog and manifestlog parts are terminated with empty chunks. The
218 218 # tree and file parts are a list of entry sections. Each entry section
219 219 # is a series of chunks terminating in an empty chunk. The list of these
220 220 # entry sections is terminated in yet another empty chunk, so we know
221 221 # we've reached the end of the tree/file list when we reach an empty
222 222 # chunk that was proceeded by no non-empty chunks.
223 223
224 224 parts = 0
225 225 while parts < 2 + self._grouplistcount:
226 226 noentries = True
227 227 while True:
228 228 chunk = getchunk(self)
229 229 if not chunk:
230 230 # The first two empty chunks represent the end of the
231 231 # changelog and the manifestlog portions. The remaining
232 232 # empty chunks represent either A) the end of individual
233 233 # tree or file entries in the file list, or B) the end of
234 234 # the entire list. It's the end of the entire list if there
235 235 # were no entries (i.e. noentries is True).
236 236 if parts < 2:
237 237 parts += 1
238 238 elif noentries:
239 239 parts += 1
240 240 break
241 241 noentries = False
242 242 yield chunkheader(len(chunk))
243 243 pos = 0
244 244 while pos < len(chunk):
245 245 next = pos + 2 ** 20
246 246 yield chunk[pos:next]
247 247 pos = next
248 248 yield closechunk()
249 249
250 250 def _unpackmanifests(self, repo, revmap, trp, prog):
251 251 self.callback = prog.increment
252 252 # no need to check for empty manifest group here:
253 253 # if the result of the merge of 1 and 2 is the same in 3 and 4,
254 254 # no new manifest will be created and the manifest group will
255 255 # be empty during the pull
256 256 self.manifestheader()
257 257 deltas = self.deltaiter()
258 258 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
259 259 prog.complete()
260 260 self.callback = None
261 261
262 262 def apply(
263 263 self,
264 264 repo,
265 265 tr,
266 266 srctype,
267 267 url,
268 268 targetphase=phases.draft,
269 269 expectedtotal=None,
270 270 ):
271 271 """Add the changegroup returned by source.read() to this repo.
272 272 srctype is a string like 'push', 'pull', or 'unbundle'. url is
273 273 the URL of the repo where this changegroup is coming from.
274 274
275 275 Return an integer summarizing the change to this repo:
276 276 - nothing changed or no source: 0
277 277 - more heads than before: 1+added heads (2..n)
278 278 - fewer heads than before: -1-removed heads (-2..-n)
279 279 - number of heads stays the same: 1
280 280 """
281 281 repo = repo.unfiltered()
282 282
283 283 def csmap(x):
284 284 repo.ui.debug(b"add changeset %s\n" % short(x))
285 285 return len(cl)
286 286
287 287 def revmap(x):
288 288 return cl.rev(x)
289 289
290 290 changesets = 0
291 291
292 292 try:
293 293 # The transaction may already carry source information. In this
294 294 # case we use the top level data. We overwrite the argument
295 295 # because we need to use the top level value (if they exist)
296 296 # in this function.
297 297 srctype = tr.hookargs.setdefault(b'source', srctype)
298 298 tr.hookargs.setdefault(b'url', url)
299 299 repo.hook(
300 300 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
301 301 )
302 302
303 303 # write changelog data to temp files so concurrent readers
304 304 # will not see an inconsistent view
305 305 cl = repo.changelog
306 306 cl.delayupdate(tr)
307 307 oldheads = set(cl.heads())
308 308
309 309 trp = weakref.proxy(tr)
310 310 # pull off the changeset group
311 311 repo.ui.status(_(b"adding changesets\n"))
312 312 clstart = len(cl)
313 313 progress = repo.ui.makeprogress(
314 314 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
315 315 )
316 316 self.callback = progress.increment
317 317
318 318 efilesset = set()
319 319
320 320 def onchangelog(cl, node):
321 321 efilesset.update(cl.readfiles(node))
322 322
323 323 self.changelogheader()
324 324 deltas = self.deltaiter()
325 325 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
326 326 efiles = len(efilesset)
327 327
328 328 if not cgnodes:
329 329 repo.ui.develwarn(
330 330 b'applied empty changelog from changegroup',
331 331 config=b'warn-empty-changegroup',
332 332 )
333 333 clend = len(cl)
334 334 changesets = clend - clstart
335 335 progress.complete()
336 336 self.callback = None
337 337
338 338 # pull off the manifest group
339 339 repo.ui.status(_(b"adding manifests\n"))
340 340 # We know that we'll never have more manifests than we had
341 341 # changesets.
342 342 progress = repo.ui.makeprogress(
343 343 _(b'manifests'), unit=_(b'chunks'), total=changesets
344 344 )
345 345 self._unpackmanifests(repo, revmap, trp, progress)
346 346
347 347 needfiles = {}
348 348 if repo.ui.configbool(b'server', b'validate'):
349 349 cl = repo.changelog
350 350 ml = repo.manifestlog
351 351 # validate incoming csets have their manifests
352 352 for cset in pycompat.xrange(clstart, clend):
353 353 mfnode = cl.changelogrevision(cset).manifest
354 354 mfest = ml[mfnode].readdelta()
355 355 # store file cgnodes we must see
356 356 for f, n in pycompat.iteritems(mfest):
357 357 needfiles.setdefault(f, set()).add(n)
358 358
359 359 # process the files
360 360 repo.ui.status(_(b"adding file changes\n"))
361 361 newrevs, newfiles = _addchangegroupfiles(
362 362 repo, self, revmap, trp, efiles, needfiles
363 363 )
364 364
365 365 # making sure the value exists
366 366 tr.changes.setdefault(b'changegroup-count-changesets', 0)
367 367 tr.changes.setdefault(b'changegroup-count-revisions', 0)
368 368 tr.changes.setdefault(b'changegroup-count-files', 0)
369 369 tr.changes.setdefault(b'changegroup-count-heads', 0)
370 370
371 371 # some code use bundle operation for internal purpose. They usually
372 372 # set `ui.quiet` to do this outside of user sight. Size the report
373 373 # of such operation now happens at the end of the transaction, that
374 374 # ui.quiet has not direct effect on the output.
375 375 #
376 376 # To preserve this intend use an inelegant hack, we fail to report
377 377 # the change if `quiet` is set. We should probably move to
378 378 # something better, but this is a good first step to allow the "end
379 379 # of transaction report" to pass tests.
380 380 if not repo.ui.quiet:
381 381 tr.changes[b'changegroup-count-changesets'] += changesets
382 382 tr.changes[b'changegroup-count-revisions'] += newrevs
383 383 tr.changes[b'changegroup-count-files'] += newfiles
384 384
385 385 deltaheads = 0
386 386 if oldheads:
387 387 heads = cl.heads()
388 388 deltaheads += len(heads) - len(oldheads)
389 389 for h in heads:
390 390 if h not in oldheads and repo[h].closesbranch():
391 391 deltaheads -= 1
392 392
393 393 # see previous comment about checking ui.quiet
394 394 if not repo.ui.quiet:
395 395 tr.changes[b'changegroup-count-heads'] += deltaheads
396 396 repo.invalidatevolatilesets()
397 397
398 398 if changesets > 0:
399 399 if b'node' not in tr.hookargs:
400 400 tr.hookargs[b'node'] = hex(cl.node(clstart))
401 401 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
402 402 hookargs = dict(tr.hookargs)
403 403 else:
404 404 hookargs = dict(tr.hookargs)
405 405 hookargs[b'node'] = hex(cl.node(clstart))
406 406 hookargs[b'node_last'] = hex(cl.node(clend - 1))
407 407 repo.hook(
408 408 b'pretxnchangegroup',
409 409 throw=True,
410 410 **pycompat.strkwargs(hookargs)
411 411 )
412 412
413 413 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
414 414 phaseall = None
415 415 if srctype in (b'push', b'serve'):
416 416 # Old servers can not push the boundary themselves.
417 417 # New servers won't push the boundary if changeset already
418 418 # exists locally as secret
419 419 #
420 420 # We should not use added here but the list of all change in
421 421 # the bundle
422 422 if repo.publishing():
423 423 targetphase = phaseall = phases.public
424 424 else:
425 425 # closer target phase computation
426 426
427 427 # Those changesets have been pushed from the
428 428 # outside, their phases are going to be pushed
429 429 # alongside. Therefor `targetphase` is
430 430 # ignored.
431 431 targetphase = phaseall = phases.draft
432 432 if added:
433 433 phases.registernew(repo, tr, targetphase, added)
434 434 if phaseall is not None:
435 435 phases.advanceboundary(repo, tr, phaseall, cgnodes)
436 436
437 437 if changesets > 0:
438 438
439 439 def runhooks():
440 440 # These hooks run when the lock releases, not when the
441 441 # transaction closes. So it's possible for the changelog
442 442 # to have changed since we last saw it.
443 443 if clstart >= len(repo):
444 444 return
445 445
446 446 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
447 447
448 448 for n in added:
449 449 args = hookargs.copy()
450 450 args[b'node'] = hex(n)
451 451 del args[b'node_last']
452 452 repo.hook(b"incoming", **pycompat.strkwargs(args))
453 453
454 454 newheads = [h for h in repo.heads() if h not in oldheads]
455 455 repo.ui.log(
456 456 b"incoming",
457 457 b"%d incoming changes - new heads: %s\n",
458 458 len(added),
459 459 b', '.join([hex(c[:6]) for c in newheads]),
460 460 )
461 461
462 462 tr.addpostclose(
463 463 b'changegroup-runhooks-%020i' % clstart,
464 464 lambda tr: repo._afterlock(runhooks),
465 465 )
466 466 finally:
467 467 repo.ui.flush()
468 468 # never return 0 here:
469 469 if deltaheads < 0:
470 470 ret = deltaheads - 1
471 471 else:
472 472 ret = deltaheads + 1
473 473 return ret
474 474
475 475 def deltaiter(self):
476 476 """
477 477 returns an iterator of the deltas in this changegroup
478 478
479 479 Useful for passing to the underlying storage system to be stored.
480 480 """
481 481 chain = None
482 482 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
483 483 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
484 484 yield chunkdata
485 485 chain = chunkdata[0]
486 486
487 487
488 488 class cg2unpacker(cg1unpacker):
489 489 """Unpacker for cg2 streams.
490 490
491 491 cg2 streams add support for generaldelta, so the delta header
492 492 format is slightly different. All other features about the data
493 493 remain the same.
494 494 """
495 495
496 496 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
497 497 deltaheadersize = deltaheader.size
498 498 version = b'02'
499 499
500 500 def _deltaheader(self, headertuple, prevnode):
501 501 node, p1, p2, deltabase, cs = headertuple
502 502 flags = 0
503 503 return node, p1, p2, deltabase, cs, flags
504 504
505 505
506 506 class cg3unpacker(cg2unpacker):
507 507 """Unpacker for cg3 streams.
508 508
509 509 cg3 streams add support for exchanging treemanifests and revlog
510 510 flags. It adds the revlog flags to the delta header and an empty chunk
511 511 separating manifests and files.
512 512 """
513 513
514 514 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
515 515 deltaheadersize = deltaheader.size
516 516 version = b'03'
517 517 _grouplistcount = 2 # One list of manifests and one list of files
518 518
519 519 def _deltaheader(self, headertuple, prevnode):
520 520 node, p1, p2, deltabase, cs, flags = headertuple
521 521 return node, p1, p2, deltabase, cs, flags
522 522
523 523 def _unpackmanifests(self, repo, revmap, trp, prog):
524 524 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
525 525 for chunkdata in iter(self.filelogheader, {}):
526 526 # If we get here, there are directory manifests in the changegroup
527 527 d = chunkdata[b"filename"]
528 528 repo.ui.debug(b"adding %s revisions\n" % d)
529 529 deltas = self.deltaiter()
530 530 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
531 531 raise error.Abort(_(b"received dir revlog group is empty"))
532 532
533 533
534 534 class headerlessfixup(object):
535 535 def __init__(self, fh, h):
536 536 self._h = h
537 537 self._fh = fh
538 538
539 539 def read(self, n):
540 540 if self._h:
541 541 d, self._h = self._h[:n], self._h[n:]
542 542 if len(d) < n:
543 543 d += readexactly(self._fh, n - len(d))
544 544 return d
545 545 return readexactly(self._fh, n)
546 546
547 547
548 548 def _revisiondeltatochunks(delta, headerfn):
549 549 """Serialize a revisiondelta to changegroup chunks."""
550 550
551 551 # The captured revision delta may be encoded as a delta against
552 552 # a base revision or as a full revision. The changegroup format
553 553 # requires that everything on the wire be deltas. So for full
554 554 # revisions, we need to invent a header that says to rewrite
555 555 # data.
556 556
557 557 if delta.delta is not None:
558 558 prefix, data = b'', delta.delta
559 559 elif delta.basenode == nullid:
560 560 data = delta.revision
561 561 prefix = mdiff.trivialdiffheader(len(data))
562 562 else:
563 563 data = delta.revision
564 564 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
565 565
566 566 meta = headerfn(delta)
567 567
568 568 yield chunkheader(len(meta) + len(prefix) + len(data))
569 569 yield meta
570 570 if prefix:
571 571 yield prefix
572 572 yield data
573 573
574 574
575 575 def _sortnodesellipsis(store, nodes, cl, lookup):
576 576 """Sort nodes for changegroup generation."""
577 577 # Ellipses serving mode.
578 578 #
579 579 # In a perfect world, we'd generate better ellipsis-ified graphs
580 580 # for non-changelog revlogs. In practice, we haven't started doing
581 581 # that yet, so the resulting DAGs for the manifestlog and filelogs
582 582 # are actually full of bogus parentage on all the ellipsis
583 583 # nodes. This has the side effect that, while the contents are
584 584 # correct, the individual DAGs might be completely out of whack in
585 585 # a case like 882681bc3166 and its ancestors (back about 10
586 586 # revisions or so) in the main hg repo.
587 587 #
588 588 # The one invariant we *know* holds is that the new (potentially
589 589 # bogus) DAG shape will be valid if we order the nodes in the
590 590 # order that they're introduced in dramatis personae by the
591 591 # changelog, so what we do is we sort the non-changelog histories
592 592 # by the order in which they are used by the changelog.
593 593 key = lambda n: cl.rev(lookup(n))
594 594 return sorted(nodes, key=key)
595 595
596 596
597 597 def _resolvenarrowrevisioninfo(
598 598 cl,
599 599 store,
600 600 ischangelog,
601 601 rev,
602 602 linkrev,
603 603 linknode,
604 604 clrevtolocalrev,
605 605 fullclnodes,
606 606 precomputedellipsis,
607 607 ):
608 608 linkparents = precomputedellipsis[linkrev]
609 609
610 610 def local(clrev):
611 611 """Turn a changelog revnum into a local revnum.
612 612
613 613 The ellipsis dag is stored as revnums on the changelog,
614 614 but when we're producing ellipsis entries for
615 615 non-changelog revlogs, we need to turn those numbers into
616 616 something local. This does that for us, and during the
617 617 changelog sending phase will also expand the stored
618 618 mappings as needed.
619 619 """
620 620 if clrev == nullrev:
621 621 return nullrev
622 622
623 623 if ischangelog:
624 624 return clrev
625 625
626 626 # Walk the ellipsis-ized changelog breadth-first looking for a
627 627 # change that has been linked from the current revlog.
628 628 #
629 629 # For a flat manifest revlog only a single step should be necessary
630 630 # as all relevant changelog entries are relevant to the flat
631 631 # manifest.
632 632 #
633 633 # For a filelog or tree manifest dirlog however not every changelog
634 634 # entry will have been relevant, so we need to skip some changelog
635 635 # nodes even after ellipsis-izing.
636 636 walk = [clrev]
637 637 while walk:
638 638 p = walk[0]
639 639 walk = walk[1:]
640 640 if p in clrevtolocalrev:
641 641 return clrevtolocalrev[p]
642 642 elif p in fullclnodes:
643 643 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
644 644 elif p in precomputedellipsis:
645 645 walk.extend(
646 646 [pp for pp in precomputedellipsis[p] if pp != nullrev]
647 647 )
648 648 else:
649 649 # In this case, we've got an ellipsis with parents
650 650 # outside the current bundle (likely an
651 651 # incremental pull). We "know" that we can use the
652 652 # value of this same revlog at whatever revision
653 653 # is pointed to by linknode. "Know" is in scare
654 654 # quotes because I haven't done enough examination
655 655 # of edge cases to convince myself this is really
656 656 # a fact - it works for all the (admittedly
657 657 # thorough) cases in our testsuite, but I would be
658 658 # somewhat unsurprised to find a case in the wild
659 659 # where this breaks down a bit. That said, I don't
660 660 # know if it would hurt anything.
661 661 for i in pycompat.xrange(rev, 0, -1):
662 662 if store.linkrev(i) == clrev:
663 663 return i
664 664 # We failed to resolve a parent for this node, so
665 665 # we crash the changegroup construction.
666 666 raise error.Abort(
667 667 b'unable to resolve parent while packing %r %r'
668 668 b' for changeset %r' % (store.indexfile, rev, clrev)
669 669 )
670 670
671 671 return nullrev
672 672
673 673 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
674 674 p1, p2 = nullrev, nullrev
675 675 elif len(linkparents) == 1:
676 676 (p1,) = sorted(local(p) for p in linkparents)
677 677 p2 = nullrev
678 678 else:
679 679 p1, p2 = sorted(local(p) for p in linkparents)
680 680
681 681 p1node, p2node = store.node(p1), store.node(p2)
682 682
683 683 return p1node, p2node, linknode
684 684
685 685
686 686 def deltagroup(
687 687 repo,
688 688 store,
689 689 nodes,
690 690 ischangelog,
691 691 lookup,
692 692 forcedeltaparentprev,
693 693 topic=None,
694 694 ellipses=False,
695 695 clrevtolocalrev=None,
696 696 fullclnodes=None,
697 697 precomputedellipsis=None,
698 698 ):
699 699 """Calculate deltas for a set of revisions.
700 700
701 701 Is a generator of ``revisiondelta`` instances.
702 702
703 703 If topic is not None, progress detail will be generated using this
704 704 topic name (e.g. changesets, manifests, etc).
705 705 """
706 706 if not nodes:
707 707 return
708 708
709 709 cl = repo.changelog
710 710
711 711 if ischangelog:
712 712 # `hg log` shows changesets in storage order. To preserve order
713 713 # across clones, send out changesets in storage order.
714 714 nodesorder = b'storage'
715 715 elif ellipses:
716 716 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
717 717 nodesorder = b'nodes'
718 718 else:
719 719 nodesorder = None
720 720
721 721 # Perform ellipses filtering and revision massaging. We do this before
722 722 # emitrevisions() because a) filtering out revisions creates less work
723 723 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
724 724 # assumptions about delta choices and we would possibly send a delta
725 725 # referencing a missing base revision.
726 726 #
727 727 # Also, calling lookup() has side-effects with regards to populating
728 728 # data structures. If we don't call lookup() for each node or if we call
729 729 # lookup() after the first pass through each node, things can break -
730 730 # possibly intermittently depending on the python hash seed! For that
731 731 # reason, we store a mapping of all linknodes during the initial node
732 732 # pass rather than use lookup() on the output side.
733 733 if ellipses:
734 734 filtered = []
735 735 adjustedparents = {}
736 736 linknodes = {}
737 737
738 738 for node in nodes:
739 739 rev = store.rev(node)
740 740 linknode = lookup(node)
741 741 linkrev = cl.rev(linknode)
742 742 clrevtolocalrev[linkrev] = rev
743 743
744 744 # If linknode is in fullclnodes, it means the corresponding
745 745 # changeset was a full changeset and is being sent unaltered.
746 746 if linknode in fullclnodes:
747 747 linknodes[node] = linknode
748 748
749 749 # If the corresponding changeset wasn't in the set computed
750 750 # as relevant to us, it should be dropped outright.
751 751 elif linkrev not in precomputedellipsis:
752 752 continue
753 753
754 754 else:
755 755 # We could probably do this later and avoid the dict
756 756 # holding state. But it likely doesn't matter.
757 757 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
758 758 cl,
759 759 store,
760 760 ischangelog,
761 761 rev,
762 762 linkrev,
763 763 linknode,
764 764 clrevtolocalrev,
765 765 fullclnodes,
766 766 precomputedellipsis,
767 767 )
768 768
769 769 adjustedparents[node] = (p1node, p2node)
770 770 linknodes[node] = linknode
771 771
772 772 filtered.append(node)
773 773
774 774 nodes = filtered
775 775
776 776 # We expect the first pass to be fast, so we only engage the progress
777 777 # meter for constructing the revision deltas.
778 778 progress = None
779 779 if topic is not None:
780 780 progress = repo.ui.makeprogress(
781 781 topic, unit=_(b'chunks'), total=len(nodes)
782 782 )
783 783
784 784 configtarget = repo.ui.config(b'devel', b'bundle.delta')
785 785 if configtarget not in (b'', b'p1', b'full'):
786 786 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
787 787 repo.ui.warn(msg % configtarget)
788 788
789 789 deltamode = repository.CG_DELTAMODE_STD
790 790 if forcedeltaparentprev:
791 791 deltamode = repository.CG_DELTAMODE_PREV
792 792 elif configtarget == b'p1':
793 793 deltamode = repository.CG_DELTAMODE_P1
794 794 elif configtarget == b'full':
795 795 deltamode = repository.CG_DELTAMODE_FULL
796 796
797 797 revisions = store.emitrevisions(
798 798 nodes,
799 799 nodesorder=nodesorder,
800 800 revisiondata=True,
801 801 assumehaveparentrevisions=not ellipses,
802 802 deltamode=deltamode,
803 803 )
804 804
805 805 for i, revision in enumerate(revisions):
806 806 if progress:
807 807 progress.update(i + 1)
808 808
809 809 if ellipses:
810 810 linknode = linknodes[revision.node]
811 811
812 812 if revision.node in adjustedparents:
813 813 p1node, p2node = adjustedparents[revision.node]
814 814 revision.p1node = p1node
815 815 revision.p2node = p2node
816 816 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
817 817
818 818 else:
819 819 linknode = lookup(revision.node)
820 820
821 821 revision.linknode = linknode
822 822 yield revision
823 823
824 824 if progress:
825 825 progress.complete()
826 826
827 827
828 828 class cgpacker(object):
829 829 def __init__(
830 830 self,
831 831 repo,
832 832 oldmatcher,
833 833 matcher,
834 834 version,
835 835 builddeltaheader,
836 836 manifestsend,
837 837 forcedeltaparentprev=False,
838 838 bundlecaps=None,
839 839 ellipses=False,
840 840 shallow=False,
841 841 ellipsisroots=None,
842 842 fullnodes=None,
843 843 ):
844 844 """Given a source repo, construct a bundler.
845 845
846 846 oldmatcher is a matcher that matches on files the client already has.
847 847 These will not be included in the changegroup.
848 848
849 849 matcher is a matcher that matches on files to include in the
850 850 changegroup. Used to facilitate sparse changegroups.
851 851
852 852 forcedeltaparentprev indicates whether delta parents must be against
853 853 the previous revision in a delta group. This should only be used for
854 854 compatibility with changegroup version 1.
855 855
856 856 builddeltaheader is a callable that constructs the header for a group
857 857 delta.
858 858
859 859 manifestsend is a chunk to send after manifests have been fully emitted.
860 860
861 861 ellipses indicates whether ellipsis serving mode is enabled.
862 862
863 863 bundlecaps is optional and can be used to specify the set of
864 864 capabilities which can be used to build the bundle. While bundlecaps is
865 865 unused in core Mercurial, extensions rely on this feature to communicate
866 866 capabilities to customize the changegroup packer.
867 867
868 868 shallow indicates whether shallow data might be sent. The packer may
869 869 need to pack file contents not introduced by the changes being packed.
870 870
871 871 fullnodes is the set of changelog nodes which should not be ellipsis
872 872 nodes. We store this rather than the set of nodes that should be
873 873 ellipsis because for very large histories we expect this to be
874 874 significantly smaller.
875 875 """
876 876 assert oldmatcher
877 877 assert matcher
878 878 self._oldmatcher = oldmatcher
879 879 self._matcher = matcher
880 880
881 881 self.version = version
882 882 self._forcedeltaparentprev = forcedeltaparentprev
883 883 self._builddeltaheader = builddeltaheader
884 884 self._manifestsend = manifestsend
885 885 self._ellipses = ellipses
886 886
887 887 # Set of capabilities we can use to build the bundle.
888 888 if bundlecaps is None:
889 889 bundlecaps = set()
890 890 self._bundlecaps = bundlecaps
891 891 self._isshallow = shallow
892 892 self._fullclnodes = fullnodes
893 893
894 894 # Maps ellipsis revs to their roots at the changelog level.
895 895 self._precomputedellipsis = ellipsisroots
896 896
897 897 self._repo = repo
898 898
899 899 if self._repo.ui.verbose and not self._repo.ui.debugflag:
900 900 self._verbosenote = self._repo.ui.note
901 901 else:
902 902 self._verbosenote = lambda s: None
903 903
904 904 def generate(
905 905 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
906 906 ):
907 907 """Yield a sequence of changegroup byte chunks.
908 908 If changelog is False, changelog data won't be added to changegroup
909 909 """
910 910
911 911 repo = self._repo
912 912 cl = repo.changelog
913 913
914 914 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
915 915 size = 0
916 916
917 917 clstate, deltas = self._generatechangelog(
918 918 cl, clnodes, generate=changelog
919 919 )
920 920 for delta in deltas:
921 921 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
922 922 size += len(chunk)
923 923 yield chunk
924 924
925 925 close = closechunk()
926 926 size += len(close)
927 927 yield closechunk()
928 928
929 929 self._verbosenote(_(b'%8.i (changelog)\n') % size)
930 930
931 931 clrevorder = clstate[b'clrevorder']
932 932 manifests = clstate[b'manifests']
933 933 changedfiles = clstate[b'changedfiles']
934 934
935 935 # We need to make sure that the linkrev in the changegroup refers to
936 936 # the first changeset that introduced the manifest or file revision.
937 937 # The fastpath is usually safer than the slowpath, because the filelogs
938 938 # are walked in revlog order.
939 939 #
940 940 # When taking the slowpath when the manifest revlog uses generaldelta,
941 941 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
942 942 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
943 943 #
944 944 # When taking the fastpath, we are only vulnerable to reordering
945 945 # of the changelog itself. The changelog never uses generaldelta and is
946 946 # never reordered. To handle this case, we simply take the slowpath,
947 947 # which already has the 'clrevorder' logic. This was also fixed in
948 948 # cc0ff93d0c0c.
949 949
950 950 # Treemanifests don't work correctly with fastpathlinkrev
951 951 # either, because we don't discover which directory nodes to
952 952 # send along with files. This could probably be fixed.
953 953 fastpathlinkrev = fastpathlinkrev and (
954 954 b'treemanifest' not in repo.requirements
955 955 )
956 956
957 957 fnodes = {} # needed file nodes
958 958
959 959 size = 0
960 960 it = self.generatemanifests(
961 961 commonrevs,
962 962 clrevorder,
963 963 fastpathlinkrev,
964 964 manifests,
965 965 fnodes,
966 966 source,
967 967 clstate[b'clrevtomanifestrev'],
968 968 )
969 969
970 970 for tree, deltas in it:
971 971 if tree:
972 972 assert self.version == b'03'
973 973 chunk = _fileheader(tree)
974 974 size += len(chunk)
975 975 yield chunk
976 976
977 977 for delta in deltas:
978 978 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
979 979 for chunk in chunks:
980 980 size += len(chunk)
981 981 yield chunk
982 982
983 983 close = closechunk()
984 984 size += len(close)
985 985 yield close
986 986
987 987 self._verbosenote(_(b'%8.i (manifests)\n') % size)
988 988 yield self._manifestsend
989 989
990 990 mfdicts = None
991 991 if self._ellipses and self._isshallow:
992 992 mfdicts = [
993 993 (self._repo.manifestlog[n].read(), lr)
994 994 for (n, lr) in pycompat.iteritems(manifests)
995 995 ]
996 996
997 997 manifests.clear()
998 998 clrevs = set(cl.rev(x) for x in clnodes)
999 999
1000 1000 it = self.generatefiles(
1001 1001 changedfiles,
1002 1002 commonrevs,
1003 1003 source,
1004 1004 mfdicts,
1005 1005 fastpathlinkrev,
1006 1006 fnodes,
1007 1007 clrevs,
1008 1008 )
1009 1009
1010 1010 for path, deltas in it:
1011 1011 h = _fileheader(path)
1012 1012 size = len(h)
1013 1013 yield h
1014 1014
1015 1015 for delta in deltas:
1016 1016 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1017 1017 for chunk in chunks:
1018 1018 size += len(chunk)
1019 1019 yield chunk
1020 1020
1021 1021 close = closechunk()
1022 1022 size += len(close)
1023 1023 yield close
1024 1024
1025 1025 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1026 1026
1027 1027 yield closechunk()
1028 1028
1029 1029 if clnodes:
1030 1030 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1031 1031
1032 1032 def _generatechangelog(self, cl, nodes, generate=True):
1033 1033 """Generate data for changelog chunks.
1034 1034
1035 1035 Returns a 2-tuple of a dict containing state and an iterable of
1036 1036 byte chunks. The state will not be fully populated until the
1037 1037 chunk stream has been fully consumed.
1038 1038
1039 1039 if generate is False, the state will be fully populated and no chunk
1040 1040 stream will be yielded
1041 1041 """
1042 1042 clrevorder = {}
1043 1043 manifests = {}
1044 1044 mfl = self._repo.manifestlog
1045 1045 changedfiles = set()
1046 1046 clrevtomanifestrev = {}
1047 1047
1048 1048 state = {
1049 1049 b'clrevorder': clrevorder,
1050 1050 b'manifests': manifests,
1051 1051 b'changedfiles': changedfiles,
1052 1052 b'clrevtomanifestrev': clrevtomanifestrev,
1053 1053 }
1054 1054
1055 1055 if not (generate or self._ellipses):
1056 1056 # sort the nodes in storage order
1057 1057 nodes = sorted(nodes, key=cl.rev)
1058 1058 for node in nodes:
1059 1059 c = cl.changelogrevision(node)
1060 1060 clrevorder[node] = len(clrevorder)
1061 1061 # record the first changeset introducing this manifest version
1062 1062 manifests.setdefault(c.manifest, node)
1063 1063 # Record a complete list of potentially-changed files in
1064 1064 # this manifest.
1065 1065 changedfiles.update(c.files)
1066 1066
1067 1067 return state, ()
1068 1068
1069 1069 # Callback for the changelog, used to collect changed files and
1070 1070 # manifest nodes.
1071 1071 # Returns the linkrev node (identity in the changelog case).
1072 1072 def lookupcl(x):
1073 1073 c = cl.changelogrevision(x)
1074 1074 clrevorder[x] = len(clrevorder)
1075 1075
1076 1076 if self._ellipses:
1077 1077 # Only update manifests if x is going to be sent. Otherwise we
1078 1078 # end up with bogus linkrevs specified for manifests and
1079 1079 # we skip some manifest nodes that we should otherwise
1080 1080 # have sent.
1081 1081 if (
1082 1082 x in self._fullclnodes
1083 1083 or cl.rev(x) in self._precomputedellipsis
1084 1084 ):
1085 1085
1086 1086 manifestnode = c.manifest
1087 1087 # Record the first changeset introducing this manifest
1088 1088 # version.
1089 1089 manifests.setdefault(manifestnode, x)
1090 1090 # Set this narrow-specific dict so we have the lowest
1091 1091 # manifest revnum to look up for this cl revnum. (Part of
1092 1092 # mapping changelog ellipsis parents to manifest ellipsis
1093 1093 # parents)
1094 1094 clrevtomanifestrev.setdefault(
1095 1095 cl.rev(x), mfl.rev(manifestnode)
1096 1096 )
1097 1097 # We can't trust the changed files list in the changeset if the
1098 1098 # client requested a shallow clone.
1099 1099 if self._isshallow:
1100 1100 changedfiles.update(mfl[c.manifest].read().keys())
1101 1101 else:
1102 1102 changedfiles.update(c.files)
1103 1103 else:
1104 1104 # record the first changeset introducing this manifest version
1105 1105 manifests.setdefault(c.manifest, x)
1106 1106 # Record a complete list of potentially-changed files in
1107 1107 # this manifest.
1108 1108 changedfiles.update(c.files)
1109 1109
1110 1110 return x
1111 1111
1112 1112 gen = deltagroup(
1113 1113 self._repo,
1114 1114 cl,
1115 1115 nodes,
1116 1116 True,
1117 1117 lookupcl,
1118 1118 self._forcedeltaparentprev,
1119 1119 ellipses=self._ellipses,
1120 1120 topic=_(b'changesets'),
1121 1121 clrevtolocalrev={},
1122 1122 fullclnodes=self._fullclnodes,
1123 1123 precomputedellipsis=self._precomputedellipsis,
1124 1124 )
1125 1125
1126 1126 return state, gen
1127 1127
1128 1128 def generatemanifests(
1129 1129 self,
1130 1130 commonrevs,
1131 1131 clrevorder,
1132 1132 fastpathlinkrev,
1133 1133 manifests,
1134 1134 fnodes,
1135 1135 source,
1136 1136 clrevtolocalrev,
1137 1137 ):
1138 1138 """Returns an iterator of changegroup chunks containing manifests.
1139 1139
1140 1140 `source` is unused here, but is used by extensions like remotefilelog to
1141 1141 change what is sent based in pulls vs pushes, etc.
1142 1142 """
1143 1143 repo = self._repo
1144 1144 mfl = repo.manifestlog
1145 1145 tmfnodes = {b'': manifests}
1146 1146
1147 1147 # Callback for the manifest, used to collect linkrevs for filelog
1148 1148 # revisions.
1149 1149 # Returns the linkrev node (collected in lookupcl).
1150 1150 def makelookupmflinknode(tree, nodes):
1151 1151 if fastpathlinkrev:
1152 1152 assert not tree
1153 return manifests.__getitem__
1153 return (
1154 manifests.__getitem__ # pytype: disable=unsupported-operands
1155 )
1154 1156
1155 1157 def lookupmflinknode(x):
1156 1158 """Callback for looking up the linknode for manifests.
1157 1159
1158 1160 Returns the linkrev node for the specified manifest.
1159 1161
1160 1162 SIDE EFFECT:
1161 1163
1162 1164 1) fclnodes gets populated with the list of relevant
1163 1165 file nodes if we're not using fastpathlinkrev
1164 1166 2) When treemanifests are in use, collects treemanifest nodes
1165 1167 to send
1166 1168
1167 1169 Note that this means manifests must be completely sent to
1168 1170 the client before you can trust the list of files and
1169 1171 treemanifests to send.
1170 1172 """
1171 1173 clnode = nodes[x]
1172 1174 mdata = mfl.get(tree, x).readfast(shallow=True)
1173 1175 for p, n, fl in mdata.iterentries():
1174 1176 if fl == b't': # subdirectory manifest
1175 1177 subtree = tree + p + b'/'
1176 1178 tmfclnodes = tmfnodes.setdefault(subtree, {})
1177 1179 tmfclnode = tmfclnodes.setdefault(n, clnode)
1178 1180 if clrevorder[clnode] < clrevorder[tmfclnode]:
1179 1181 tmfclnodes[n] = clnode
1180 1182 else:
1181 1183 f = tree + p
1182 1184 fclnodes = fnodes.setdefault(f, {})
1183 1185 fclnode = fclnodes.setdefault(n, clnode)
1184 1186 if clrevorder[clnode] < clrevorder[fclnode]:
1185 1187 fclnodes[n] = clnode
1186 1188 return clnode
1187 1189
1188 1190 return lookupmflinknode
1189 1191
1190 1192 while tmfnodes:
1191 1193 tree, nodes = tmfnodes.popitem()
1192 1194
1193 1195 should_visit = self._matcher.visitdir(tree[:-1])
1194 1196 if tree and not should_visit:
1195 1197 continue
1196 1198
1197 1199 store = mfl.getstorage(tree)
1198 1200
1199 1201 if not should_visit:
1200 1202 # No nodes to send because this directory is out of
1201 1203 # the client's view of the repository (probably
1202 1204 # because of narrow clones). Do this even for the root
1203 1205 # directory (tree=='')
1204 1206 prunednodes = []
1205 1207 else:
1206 1208 # Avoid sending any manifest nodes we can prove the
1207 1209 # client already has by checking linkrevs. See the
1208 1210 # related comment in generatefiles().
1209 1211 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1210 1212
1211 1213 if tree and not prunednodes:
1212 1214 continue
1213 1215
1214 1216 lookupfn = makelookupmflinknode(tree, nodes)
1215 1217
1216 1218 deltas = deltagroup(
1217 1219 self._repo,
1218 1220 store,
1219 1221 prunednodes,
1220 1222 False,
1221 1223 lookupfn,
1222 1224 self._forcedeltaparentprev,
1223 1225 ellipses=self._ellipses,
1224 1226 topic=_(b'manifests'),
1225 1227 clrevtolocalrev=clrevtolocalrev,
1226 1228 fullclnodes=self._fullclnodes,
1227 1229 precomputedellipsis=self._precomputedellipsis,
1228 1230 )
1229 1231
1230 1232 if not self._oldmatcher.visitdir(store.tree[:-1]):
1231 1233 yield tree, deltas
1232 1234 else:
1233 1235 # 'deltas' is a generator and we need to consume it even if
1234 1236 # we are not going to send it because a side-effect is that
1235 1237 # it updates tmdnodes (via lookupfn)
1236 1238 for d in deltas:
1237 1239 pass
1238 1240 if not tree:
1239 1241 yield tree, []
1240 1242
1241 1243 def _prunemanifests(self, store, nodes, commonrevs):
1242 1244 if not self._ellipses:
1243 1245 # In non-ellipses case and large repositories, it is better to
1244 1246 # prevent calling of store.rev and store.linkrev on a lot of
1245 1247 # nodes as compared to sending some extra data
1246 1248 return nodes.copy()
1247 1249 # This is split out as a separate method to allow filtering
1248 1250 # commonrevs in extension code.
1249 1251 #
1250 1252 # TODO(augie): this shouldn't be required, instead we should
1251 1253 # make filtering of revisions to send delegated to the store
1252 1254 # layer.
1253 1255 frev, flr = store.rev, store.linkrev
1254 1256 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1255 1257
1256 1258 # The 'source' parameter is useful for extensions
1257 1259 def generatefiles(
1258 1260 self,
1259 1261 changedfiles,
1260 1262 commonrevs,
1261 1263 source,
1262 1264 mfdicts,
1263 1265 fastpathlinkrev,
1264 1266 fnodes,
1265 1267 clrevs,
1266 1268 ):
1267 1269 changedfiles = [
1268 1270 f
1269 1271 for f in changedfiles
1270 1272 if self._matcher(f) and not self._oldmatcher(f)
1271 1273 ]
1272 1274
1273 1275 if not fastpathlinkrev:
1274 1276
1275 1277 def normallinknodes(unused, fname):
1276 1278 return fnodes.get(fname, {})
1277 1279
1278 1280 else:
1279 1281 cln = self._repo.changelog.node
1280 1282
1281 1283 def normallinknodes(store, fname):
1282 1284 flinkrev = store.linkrev
1283 1285 fnode = store.node
1284 1286 revs = ((r, flinkrev(r)) for r in store)
1285 1287 return dict(
1286 1288 (fnode(r), cln(lr)) for r, lr in revs if lr in clrevs
1287 1289 )
1288 1290
1289 1291 clrevtolocalrev = {}
1290 1292
1291 1293 if self._isshallow:
1292 1294 # In a shallow clone, the linknodes callback needs to also include
1293 1295 # those file nodes that are in the manifests we sent but weren't
1294 1296 # introduced by those manifests.
1295 1297 commonctxs = [self._repo[c] for c in commonrevs]
1296 1298 clrev = self._repo.changelog.rev
1297 1299
1298 1300 def linknodes(flog, fname):
1299 1301 for c in commonctxs:
1300 1302 try:
1301 1303 fnode = c.filenode(fname)
1302 1304 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1303 1305 except error.ManifestLookupError:
1304 1306 pass
1305 1307 links = normallinknodes(flog, fname)
1306 1308 if len(links) != len(mfdicts):
1307 1309 for mf, lr in mfdicts:
1308 1310 fnode = mf.get(fname, None)
1309 1311 if fnode in links:
1310 1312 links[fnode] = min(links[fnode], lr, key=clrev)
1311 1313 elif fnode:
1312 1314 links[fnode] = lr
1313 1315 return links
1314 1316
1315 1317 else:
1316 1318 linknodes = normallinknodes
1317 1319
1318 1320 repo = self._repo
1319 1321 progress = repo.ui.makeprogress(
1320 1322 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1321 1323 )
1322 1324 for i, fname in enumerate(sorted(changedfiles)):
1323 1325 filerevlog = repo.file(fname)
1324 1326 if not filerevlog:
1325 1327 raise error.Abort(
1326 1328 _(b"empty or missing file data for %s") % fname
1327 1329 )
1328 1330
1329 1331 clrevtolocalrev.clear()
1330 1332
1331 1333 linkrevnodes = linknodes(filerevlog, fname)
1332 1334 # Lookup for filenodes, we collected the linkrev nodes above in the
1333 1335 # fastpath case and with lookupmf in the slowpath case.
1334 1336 def lookupfilelog(x):
1335 1337 return linkrevnodes[x]
1336 1338
1337 1339 frev, flr = filerevlog.rev, filerevlog.linkrev
1338 1340 # Skip sending any filenode we know the client already
1339 1341 # has. This avoids over-sending files relatively
1340 1342 # inexpensively, so it's not a problem if we under-filter
1341 1343 # here.
1342 1344 filenodes = [
1343 1345 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1344 1346 ]
1345 1347
1346 1348 if not filenodes:
1347 1349 continue
1348 1350
1349 1351 progress.update(i + 1, item=fname)
1350 1352
1351 1353 deltas = deltagroup(
1352 1354 self._repo,
1353 1355 filerevlog,
1354 1356 filenodes,
1355 1357 False,
1356 1358 lookupfilelog,
1357 1359 self._forcedeltaparentprev,
1358 1360 ellipses=self._ellipses,
1359 1361 clrevtolocalrev=clrevtolocalrev,
1360 1362 fullclnodes=self._fullclnodes,
1361 1363 precomputedellipsis=self._precomputedellipsis,
1362 1364 )
1363 1365
1364 1366 yield fname, deltas
1365 1367
1366 1368 progress.complete()
1367 1369
1368 1370
1369 1371 def _makecg1packer(
1370 1372 repo,
1371 1373 oldmatcher,
1372 1374 matcher,
1373 1375 bundlecaps,
1374 1376 ellipses=False,
1375 1377 shallow=False,
1376 1378 ellipsisroots=None,
1377 1379 fullnodes=None,
1378 1380 ):
1379 1381 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1380 1382 d.node, d.p1node, d.p2node, d.linknode
1381 1383 )
1382 1384
1383 1385 return cgpacker(
1384 1386 repo,
1385 1387 oldmatcher,
1386 1388 matcher,
1387 1389 b'01',
1388 1390 builddeltaheader=builddeltaheader,
1389 1391 manifestsend=b'',
1390 1392 forcedeltaparentprev=True,
1391 1393 bundlecaps=bundlecaps,
1392 1394 ellipses=ellipses,
1393 1395 shallow=shallow,
1394 1396 ellipsisroots=ellipsisroots,
1395 1397 fullnodes=fullnodes,
1396 1398 )
1397 1399
1398 1400
1399 1401 def _makecg2packer(
1400 1402 repo,
1401 1403 oldmatcher,
1402 1404 matcher,
1403 1405 bundlecaps,
1404 1406 ellipses=False,
1405 1407 shallow=False,
1406 1408 ellipsisroots=None,
1407 1409 fullnodes=None,
1408 1410 ):
1409 1411 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1410 1412 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1411 1413 )
1412 1414
1413 1415 return cgpacker(
1414 1416 repo,
1415 1417 oldmatcher,
1416 1418 matcher,
1417 1419 b'02',
1418 1420 builddeltaheader=builddeltaheader,
1419 1421 manifestsend=b'',
1420 1422 bundlecaps=bundlecaps,
1421 1423 ellipses=ellipses,
1422 1424 shallow=shallow,
1423 1425 ellipsisroots=ellipsisroots,
1424 1426 fullnodes=fullnodes,
1425 1427 )
1426 1428
1427 1429
1428 1430 def _makecg3packer(
1429 1431 repo,
1430 1432 oldmatcher,
1431 1433 matcher,
1432 1434 bundlecaps,
1433 1435 ellipses=False,
1434 1436 shallow=False,
1435 1437 ellipsisroots=None,
1436 1438 fullnodes=None,
1437 1439 ):
1438 1440 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1439 1441 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1440 1442 )
1441 1443
1442 1444 return cgpacker(
1443 1445 repo,
1444 1446 oldmatcher,
1445 1447 matcher,
1446 1448 b'03',
1447 1449 builddeltaheader=builddeltaheader,
1448 1450 manifestsend=closechunk(),
1449 1451 bundlecaps=bundlecaps,
1450 1452 ellipses=ellipses,
1451 1453 shallow=shallow,
1452 1454 ellipsisroots=ellipsisroots,
1453 1455 fullnodes=fullnodes,
1454 1456 )
1455 1457
1456 1458
1457 1459 _packermap = {
1458 1460 b'01': (_makecg1packer, cg1unpacker),
1459 1461 # cg2 adds support for exchanging generaldelta
1460 1462 b'02': (_makecg2packer, cg2unpacker),
1461 1463 # cg3 adds support for exchanging revlog flags and treemanifests
1462 1464 b'03': (_makecg3packer, cg3unpacker),
1463 1465 }
1464 1466
1465 1467
1466 1468 def allsupportedversions(repo):
1467 1469 versions = set(_packermap.keys())
1468 1470 needv03 = False
1469 1471 if (
1470 1472 repo.ui.configbool(b'experimental', b'changegroup3')
1471 1473 or repo.ui.configbool(b'experimental', b'treemanifest')
1472 1474 or b'treemanifest' in repo.requirements
1473 1475 ):
1474 1476 # we keep version 03 because we need to to exchange treemanifest data
1475 1477 #
1476 1478 # we also keep vresion 01 and 02, because it is possible for repo to
1477 1479 # contains both normal and tree manifest at the same time. so using
1478 1480 # older version to pull data is viable
1479 1481 #
1480 1482 # (or even to push subset of history)
1481 1483 needv03 = True
1482 1484 if b'exp-sidedata-flag' in repo.requirements:
1483 1485 needv03 = True
1484 1486 # don't attempt to use 01/02 until we do sidedata cleaning
1485 1487 versions.discard(b'01')
1486 1488 versions.discard(b'02')
1487 1489 if not needv03:
1488 1490 versions.discard(b'03')
1489 1491 return versions
1490 1492
1491 1493
1492 1494 # Changegroup versions that can be applied to the repo
1493 1495 def supportedincomingversions(repo):
1494 1496 return allsupportedversions(repo)
1495 1497
1496 1498
1497 1499 # Changegroup versions that can be created from the repo
1498 1500 def supportedoutgoingversions(repo):
1499 1501 versions = allsupportedversions(repo)
1500 1502 if b'treemanifest' in repo.requirements:
1501 1503 # Versions 01 and 02 support only flat manifests and it's just too
1502 1504 # expensive to convert between the flat manifest and tree manifest on
1503 1505 # the fly. Since tree manifests are hashed differently, all of history
1504 1506 # would have to be converted. Instead, we simply don't even pretend to
1505 1507 # support versions 01 and 02.
1506 1508 versions.discard(b'01')
1507 1509 versions.discard(b'02')
1508 1510 if repository.NARROW_REQUIREMENT in repo.requirements:
1509 1511 # Versions 01 and 02 don't support revlog flags, and we need to
1510 1512 # support that for stripping and unbundling to work.
1511 1513 versions.discard(b'01')
1512 1514 versions.discard(b'02')
1513 1515 if LFS_REQUIREMENT in repo.requirements:
1514 1516 # Versions 01 and 02 don't support revlog flags, and we need to
1515 1517 # mark LFS entries with REVIDX_EXTSTORED.
1516 1518 versions.discard(b'01')
1517 1519 versions.discard(b'02')
1518 1520
1519 1521 return versions
1520 1522
1521 1523
1522 1524 def localversion(repo):
1523 1525 # Finds the best version to use for bundles that are meant to be used
1524 1526 # locally, such as those from strip and shelve, and temporary bundles.
1525 1527 return max(supportedoutgoingversions(repo))
1526 1528
1527 1529
1528 1530 def safeversion(repo):
1529 1531 # Finds the smallest version that it's safe to assume clients of the repo
1530 1532 # will support. For example, all hg versions that support generaldelta also
1531 1533 # support changegroup 02.
1532 1534 versions = supportedoutgoingversions(repo)
1533 1535 if b'generaldelta' in repo.requirements:
1534 1536 versions.discard(b'01')
1535 1537 assert versions
1536 1538 return min(versions)
1537 1539
1538 1540
1539 1541 def getbundler(
1540 1542 version,
1541 1543 repo,
1542 1544 bundlecaps=None,
1543 1545 oldmatcher=None,
1544 1546 matcher=None,
1545 1547 ellipses=False,
1546 1548 shallow=False,
1547 1549 ellipsisroots=None,
1548 1550 fullnodes=None,
1549 1551 ):
1550 1552 assert version in supportedoutgoingversions(repo)
1551 1553
1552 1554 if matcher is None:
1553 1555 matcher = matchmod.always()
1554 1556 if oldmatcher is None:
1555 1557 oldmatcher = matchmod.never()
1556 1558
1557 1559 if version == b'01' and not matcher.always():
1558 1560 raise error.ProgrammingError(
1559 1561 b'version 01 changegroups do not support sparse file matchers'
1560 1562 )
1561 1563
1562 1564 if ellipses and version in (b'01', b'02'):
1563 1565 raise error.Abort(
1564 1566 _(
1565 1567 b'ellipsis nodes require at least cg3 on client and server, '
1566 1568 b'but negotiated version %s'
1567 1569 )
1568 1570 % version
1569 1571 )
1570 1572
1571 1573 # Requested files could include files not in the local store. So
1572 1574 # filter those out.
1573 1575 matcher = repo.narrowmatch(matcher)
1574 1576
1575 1577 fn = _packermap[version][0]
1576 1578 return fn(
1577 1579 repo,
1578 1580 oldmatcher,
1579 1581 matcher,
1580 1582 bundlecaps,
1581 1583 ellipses=ellipses,
1582 1584 shallow=shallow,
1583 1585 ellipsisroots=ellipsisroots,
1584 1586 fullnodes=fullnodes,
1585 1587 )
1586 1588
1587 1589
1588 1590 def getunbundler(version, fh, alg, extras=None):
1589 1591 return _packermap[version][1](fh, alg, extras=extras)
1590 1592
1591 1593
1592 1594 def _changegroupinfo(repo, nodes, source):
1593 1595 if repo.ui.verbose or source == b'bundle':
1594 1596 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1595 1597 if repo.ui.debugflag:
1596 1598 repo.ui.debug(b"list of changesets:\n")
1597 1599 for node in nodes:
1598 1600 repo.ui.debug(b"%s\n" % hex(node))
1599 1601
1600 1602
1601 1603 def makechangegroup(
1602 1604 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1603 1605 ):
1604 1606 cgstream = makestream(
1605 1607 repo,
1606 1608 outgoing,
1607 1609 version,
1608 1610 source,
1609 1611 fastpath=fastpath,
1610 1612 bundlecaps=bundlecaps,
1611 1613 )
1612 1614 return getunbundler(
1613 1615 version,
1614 1616 util.chunkbuffer(cgstream),
1615 1617 None,
1616 1618 {b'clcount': len(outgoing.missing)},
1617 1619 )
1618 1620
1619 1621
1620 1622 def makestream(
1621 1623 repo,
1622 1624 outgoing,
1623 1625 version,
1624 1626 source,
1625 1627 fastpath=False,
1626 1628 bundlecaps=None,
1627 1629 matcher=None,
1628 1630 ):
1629 1631 bundler = getbundler(version, repo, bundlecaps=bundlecaps, matcher=matcher)
1630 1632
1631 1633 repo = repo.unfiltered()
1632 1634 commonrevs = outgoing.common
1633 1635 csets = outgoing.missing
1634 1636 heads = outgoing.missingheads
1635 1637 # We go through the fast path if we get told to, or if all (unfiltered
1636 1638 # heads have been requested (since we then know there all linkrevs will
1637 1639 # be pulled by the client).
1638 1640 heads.sort()
1639 1641 fastpathlinkrev = fastpath or (
1640 1642 repo.filtername is None and heads == sorted(repo.heads())
1641 1643 )
1642 1644
1643 1645 repo.hook(b'preoutgoing', throw=True, source=source)
1644 1646 _changegroupinfo(repo, csets, source)
1645 1647 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1646 1648
1647 1649
1648 1650 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1649 1651 revisions = 0
1650 1652 files = 0
1651 1653 progress = repo.ui.makeprogress(
1652 1654 _(b'files'), unit=_(b'files'), total=expectedfiles
1653 1655 )
1654 1656 for chunkdata in iter(source.filelogheader, {}):
1655 1657 files += 1
1656 1658 f = chunkdata[b"filename"]
1657 1659 repo.ui.debug(b"adding %s revisions\n" % f)
1658 1660 progress.increment()
1659 1661 fl = repo.file(f)
1660 1662 o = len(fl)
1661 1663 try:
1662 1664 deltas = source.deltaiter()
1663 1665 if not fl.addgroup(deltas, revmap, trp):
1664 1666 raise error.Abort(_(b"received file revlog group is empty"))
1665 1667 except error.CensoredBaseError as e:
1666 1668 raise error.Abort(_(b"received delta base is censored: %s") % e)
1667 1669 revisions += len(fl) - o
1668 1670 if f in needfiles:
1669 1671 needs = needfiles[f]
1670 1672 for new in pycompat.xrange(o, len(fl)):
1671 1673 n = fl.node(new)
1672 1674 if n in needs:
1673 1675 needs.remove(n)
1674 1676 else:
1675 1677 raise error.Abort(_(b"received spurious file revlog entry"))
1676 1678 if not needs:
1677 1679 del needfiles[f]
1678 1680 progress.complete()
1679 1681
1680 1682 for f, needs in pycompat.iteritems(needfiles):
1681 1683 fl = repo.file(f)
1682 1684 for n in needs:
1683 1685 try:
1684 1686 fl.rev(n)
1685 1687 except error.LookupError:
1686 1688 raise error.Abort(
1687 1689 _(b'missing file data for %s:%s - run hg verify')
1688 1690 % (f, hex(n))
1689 1691 )
1690 1692
1691 1693 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now