##// END OF EJS Templates
changegroup: use the local variable instead of reaching through self...
Raphaël Gomès -
r47369:357d2ea9 default
parent child Browse files
Show More
@@ -1,1710 +1,1710 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21 from .pycompat import open
22 22
23 23 from . import (
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 phases,
28 28 pycompat,
29 29 requirements,
30 30 scmutil,
31 31 util,
32 32 )
33 33
34 34 from .interfaces import repository
35 35
36 36 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
37 37 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
38 38 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
39 39
40 40 LFS_REQUIREMENT = b'lfs'
41 41
42 42 readexactly = util.readexactly
43 43
44 44
45 45 def getchunk(stream):
46 46 """return the next chunk from stream as a string"""
47 47 d = readexactly(stream, 4)
48 48 l = struct.unpack(b">l", d)[0]
49 49 if l <= 4:
50 50 if l:
51 51 raise error.Abort(_(b"invalid chunk length %d") % l)
52 52 return b""
53 53 return readexactly(stream, l - 4)
54 54
55 55
56 56 def chunkheader(length):
57 57 """return a changegroup chunk header (string)"""
58 58 return struct.pack(b">l", length + 4)
59 59
60 60
61 61 def closechunk():
62 62 """return a changegroup chunk header (string) for a zero-length chunk"""
63 63 return struct.pack(b">l", 0)
64 64
65 65
66 66 def _fileheader(path):
67 67 """Obtain a changegroup chunk header for a named path."""
68 68 return chunkheader(len(path)) + path
69 69
70 70
71 71 def writechunks(ui, chunks, filename, vfs=None):
72 72 """Write chunks to a file and return its filename.
73 73
74 74 The stream is assumed to be a bundle file.
75 75 Existing files will not be overwritten.
76 76 If no filename is specified, a temporary file is created.
77 77 """
78 78 fh = None
79 79 cleanup = None
80 80 try:
81 81 if filename:
82 82 if vfs:
83 83 fh = vfs.open(filename, b"wb")
84 84 else:
85 85 # Increase default buffer size because default is usually
86 86 # small (4k is common on Linux).
87 87 fh = open(filename, b"wb", 131072)
88 88 else:
89 89 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
90 90 fh = os.fdopen(fd, "wb")
91 91 cleanup = filename
92 92 for c in chunks:
93 93 fh.write(c)
94 94 cleanup = None
95 95 return filename
96 96 finally:
97 97 if fh is not None:
98 98 fh.close()
99 99 if cleanup is not None:
100 100 if filename and vfs:
101 101 vfs.unlink(cleanup)
102 102 else:
103 103 os.unlink(cleanup)
104 104
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124
125 125 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 126 deltaheadersize = deltaheader.size
127 127 version = b'01'
128 128 _grouplistcount = 1 # One list of files after the manifests
129 129
130 130 def __init__(self, fh, alg, extras=None):
131 131 if alg is None:
132 132 alg = b'UN'
133 133 if alg not in util.compengines.supportedbundletypes:
134 134 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
135 135 if alg == b'BZ':
136 136 alg = b'_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != b'UN'
148 148
149 149 def read(self, l):
150 150 return self._stream.read(l)
151 151
152 152 def seek(self, pos):
153 153 return self._stream.seek(pos)
154 154
155 155 def tell(self):
156 156 return self._stream.tell()
157 157
158 158 def close(self):
159 159 return self._stream.close()
160 160
161 161 def _chunklength(self):
162 162 d = readexactly(self._stream, 4)
163 163 l = struct.unpack(b">l", d)[0]
164 164 if l <= 4:
165 165 if l:
166 166 raise error.Abort(_(b"invalid chunk length %d") % l)
167 167 return 0
168 168 if self.callback:
169 169 self.callback()
170 170 return l - 4
171 171
172 172 def changelogheader(self):
173 173 """v10 does not have a changelog header chunk"""
174 174 return {}
175 175
176 176 def manifestheader(self):
177 177 """v10 does not have a manifest header chunk"""
178 178 return {}
179 179
180 180 def filelogheader(self):
181 181 """return the header of the filelogs chunk, v10 only has the filename"""
182 182 l = self._chunklength()
183 183 if not l:
184 184 return {}
185 185 fname = readexactly(self._stream, l)
186 186 return {b'filename': fname}
187 187
188 188 def _deltaheader(self, headertuple, prevnode):
189 189 node, p1, p2, cs = headertuple
190 190 if prevnode is None:
191 191 deltabase = p1
192 192 else:
193 193 deltabase = prevnode
194 194 flags = 0
195 195 return node, p1, p2, deltabase, cs, flags
196 196
197 197 def deltachunk(self, prevnode):
198 198 l = self._chunklength()
199 199 if not l:
200 200 return {}
201 201 headerdata = readexactly(self._stream, self.deltaheadersize)
202 202 header = self.deltaheader.unpack(headerdata)
203 203 delta = readexactly(self._stream, l - self.deltaheadersize)
204 204 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
205 205 return (node, p1, p2, cs, deltabase, delta, flags)
206 206
207 207 def getchunks(self):
208 208 """returns all the chunks contains in the bundle
209 209
210 210 Used when you need to forward the binary stream to a file or another
211 211 network API. To do so, it parse the changegroup data, otherwise it will
212 212 block in case of sshrepo because it don't know the end of the stream.
213 213 """
214 214 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
215 215 # and a list of filelogs. For changegroup 3, we expect 4 parts:
216 216 # changelog, manifestlog, a list of tree manifestlogs, and a list of
217 217 # filelogs.
218 218 #
219 219 # Changelog and manifestlog parts are terminated with empty chunks. The
220 220 # tree and file parts are a list of entry sections. Each entry section
221 221 # is a series of chunks terminating in an empty chunk. The list of these
222 222 # entry sections is terminated in yet another empty chunk, so we know
223 223 # we've reached the end of the tree/file list when we reach an empty
224 224 # chunk that was proceeded by no non-empty chunks.
225 225
226 226 parts = 0
227 227 while parts < 2 + self._grouplistcount:
228 228 noentries = True
229 229 while True:
230 230 chunk = getchunk(self)
231 231 if not chunk:
232 232 # The first two empty chunks represent the end of the
233 233 # changelog and the manifestlog portions. The remaining
234 234 # empty chunks represent either A) the end of individual
235 235 # tree or file entries in the file list, or B) the end of
236 236 # the entire list. It's the end of the entire list if there
237 237 # were no entries (i.e. noentries is True).
238 238 if parts < 2:
239 239 parts += 1
240 240 elif noentries:
241 241 parts += 1
242 242 break
243 243 noentries = False
244 244 yield chunkheader(len(chunk))
245 245 pos = 0
246 246 while pos < len(chunk):
247 247 next = pos + 2 ** 20
248 248 yield chunk[pos:next]
249 249 pos = next
250 250 yield closechunk()
251 251
252 252 def _unpackmanifests(self, repo, revmap, trp, prog):
253 253 self.callback = prog.increment
254 254 # no need to check for empty manifest group here:
255 255 # if the result of the merge of 1 and 2 is the same in 3 and 4,
256 256 # no new manifest will be created and the manifest group will
257 257 # be empty during the pull
258 258 self.manifestheader()
259 259 deltas = self.deltaiter()
260 260 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
261 261 prog.complete()
262 262 self.callback = None
263 263
264 264 def apply(
265 265 self,
266 266 repo,
267 267 tr,
268 268 srctype,
269 269 url,
270 270 targetphase=phases.draft,
271 271 expectedtotal=None,
272 272 ):
273 273 """Add the changegroup returned by source.read() to this repo.
274 274 srctype is a string like 'push', 'pull', or 'unbundle'. url is
275 275 the URL of the repo where this changegroup is coming from.
276 276
277 277 Return an integer summarizing the change to this repo:
278 278 - nothing changed or no source: 0
279 279 - more heads than before: 1+added heads (2..n)
280 280 - fewer heads than before: -1-removed heads (-2..-n)
281 281 - number of heads stays the same: 1
282 282 """
283 283 repo = repo.unfiltered()
284 284
285 285 def csmap(x):
286 286 repo.ui.debug(b"add changeset %s\n" % short(x))
287 287 return len(cl)
288 288
289 289 def revmap(x):
290 290 return cl.rev(x)
291 291
292 292 try:
293 293 # The transaction may already carry source information. In this
294 294 # case we use the top level data. We overwrite the argument
295 295 # because we need to use the top level value (if they exist)
296 296 # in this function.
297 297 srctype = tr.hookargs.setdefault(b'source', srctype)
298 298 tr.hookargs.setdefault(b'url', url)
299 299 repo.hook(
300 300 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
301 301 )
302 302
303 303 # write changelog data to temp files so concurrent readers
304 304 # will not see an inconsistent view
305 305 cl = repo.changelog
306 306 cl.delayupdate(tr)
307 307 oldheads = set(cl.heads())
308 308
309 309 trp = weakref.proxy(tr)
310 310 # pull off the changeset group
311 311 repo.ui.status(_(b"adding changesets\n"))
312 312 clstart = len(cl)
313 313 progress = repo.ui.makeprogress(
314 314 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
315 315 )
316 316 self.callback = progress.increment
317 317
318 318 efilesset = set()
319 319 duprevs = []
320 320
321 321 def ondupchangelog(cl, rev):
322 322 if rev < clstart:
323 323 duprevs.append(rev)
324 324
325 325 def onchangelog(cl, rev):
326 326 ctx = cl.changelogrevision(rev)
327 327 efilesset.update(ctx.files)
328 328 repo.register_changeset(rev, ctx)
329 329
330 330 self.changelogheader()
331 331 deltas = self.deltaiter()
332 332 if not cl.addgroup(
333 333 deltas,
334 334 csmap,
335 335 trp,
336 336 alwayscache=True,
337 337 addrevisioncb=onchangelog,
338 338 duplicaterevisioncb=ondupchangelog,
339 339 ):
340 340 repo.ui.develwarn(
341 341 b'applied empty changelog from changegroup',
342 342 config=b'warn-empty-changegroup',
343 343 )
344 344 efiles = len(efilesset)
345 345 clend = len(cl)
346 346 changesets = clend - clstart
347 347 progress.complete()
348 348 del deltas
349 349 # TODO Python 2.7 removal
350 350 # del efilesset
351 351 efilesset = None
352 352 self.callback = None
353 353
354 354 # pull off the manifest group
355 355 repo.ui.status(_(b"adding manifests\n"))
356 356 # We know that we'll never have more manifests than we had
357 357 # changesets.
358 358 progress = repo.ui.makeprogress(
359 359 _(b'manifests'), unit=_(b'chunks'), total=changesets
360 360 )
361 361 self._unpackmanifests(repo, revmap, trp, progress)
362 362
363 363 needfiles = {}
364 364 if repo.ui.configbool(b'server', b'validate'):
365 365 cl = repo.changelog
366 366 ml = repo.manifestlog
367 367 # validate incoming csets have their manifests
368 368 for cset in pycompat.xrange(clstart, clend):
369 369 mfnode = cl.changelogrevision(cset).manifest
370 370 mfest = ml[mfnode].readdelta()
371 371 # store file nodes we must see
372 372 for f, n in pycompat.iteritems(mfest):
373 373 needfiles.setdefault(f, set()).add(n)
374 374
375 375 # process the files
376 376 repo.ui.status(_(b"adding file changes\n"))
377 377 newrevs, newfiles = _addchangegroupfiles(
378 378 repo, self, revmap, trp, efiles, needfiles
379 379 )
380 380
381 381 # making sure the value exists
382 382 tr.changes.setdefault(b'changegroup-count-changesets', 0)
383 383 tr.changes.setdefault(b'changegroup-count-revisions', 0)
384 384 tr.changes.setdefault(b'changegroup-count-files', 0)
385 385 tr.changes.setdefault(b'changegroup-count-heads', 0)
386 386
387 387 # some code use bundle operation for internal purpose. They usually
388 388 # set `ui.quiet` to do this outside of user sight. Size the report
389 389 # of such operation now happens at the end of the transaction, that
390 390 # ui.quiet has not direct effect on the output.
391 391 #
392 392 # To preserve this intend use an inelegant hack, we fail to report
393 393 # the change if `quiet` is set. We should probably move to
394 394 # something better, but this is a good first step to allow the "end
395 395 # of transaction report" to pass tests.
396 396 if not repo.ui.quiet:
397 397 tr.changes[b'changegroup-count-changesets'] += changesets
398 398 tr.changes[b'changegroup-count-revisions'] += newrevs
399 399 tr.changes[b'changegroup-count-files'] += newfiles
400 400
401 401 deltaheads = 0
402 402 if oldheads:
403 403 heads = cl.heads()
404 404 deltaheads += len(heads) - len(oldheads)
405 405 for h in heads:
406 406 if h not in oldheads and repo[h].closesbranch():
407 407 deltaheads -= 1
408 408
409 409 # see previous comment about checking ui.quiet
410 410 if not repo.ui.quiet:
411 411 tr.changes[b'changegroup-count-heads'] += deltaheads
412 412 repo.invalidatevolatilesets()
413 413
414 414 if changesets > 0:
415 415 if b'node' not in tr.hookargs:
416 416 tr.hookargs[b'node'] = hex(cl.node(clstart))
417 417 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
418 418 hookargs = dict(tr.hookargs)
419 419 else:
420 420 hookargs = dict(tr.hookargs)
421 421 hookargs[b'node'] = hex(cl.node(clstart))
422 422 hookargs[b'node_last'] = hex(cl.node(clend - 1))
423 423 repo.hook(
424 424 b'pretxnchangegroup',
425 425 throw=True,
426 426 **pycompat.strkwargs(hookargs)
427 427 )
428 428
429 429 added = pycompat.xrange(clstart, clend)
430 430 phaseall = None
431 431 if srctype in (b'push', b'serve'):
432 432 # Old servers can not push the boundary themselves.
433 433 # New servers won't push the boundary if changeset already
434 434 # exists locally as secret
435 435 #
436 436 # We should not use added here but the list of all change in
437 437 # the bundle
438 438 if repo.publishing():
439 439 targetphase = phaseall = phases.public
440 440 else:
441 441 # closer target phase computation
442 442
443 443 # Those changesets have been pushed from the
444 444 # outside, their phases are going to be pushed
445 445 # alongside. Therefor `targetphase` is
446 446 # ignored.
447 447 targetphase = phaseall = phases.draft
448 448 if added:
449 449 phases.registernew(repo, tr, targetphase, added)
450 450 if phaseall is not None:
451 451 if duprevs:
452 452 duprevs.extend(added)
453 453 else:
454 454 duprevs = added
455 455 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
456 456 duprevs = []
457 457
458 458 if changesets > 0:
459 459
460 460 def runhooks(unused_success):
461 461 # These hooks run when the lock releases, not when the
462 462 # transaction closes. So it's possible for the changelog
463 463 # to have changed since we last saw it.
464 464 if clstart >= len(repo):
465 465 return
466 466
467 467 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
468 468
469 469 for rev in added:
470 470 args = hookargs.copy()
471 471 args[b'node'] = hex(cl.node(rev))
472 472 del args[b'node_last']
473 473 repo.hook(b"incoming", **pycompat.strkwargs(args))
474 474
475 475 newheads = [h for h in repo.heads() if h not in oldheads]
476 476 repo.ui.log(
477 477 b"incoming",
478 478 b"%d incoming changes - new heads: %s\n",
479 479 len(added),
480 480 b', '.join([hex(c[:6]) for c in newheads]),
481 481 )
482 482
483 483 tr.addpostclose(
484 484 b'changegroup-runhooks-%020i' % clstart,
485 485 lambda tr: repo._afterlock(runhooks),
486 486 )
487 487 finally:
488 488 repo.ui.flush()
489 489 # never return 0 here:
490 490 if deltaheads < 0:
491 491 ret = deltaheads - 1
492 492 else:
493 493 ret = deltaheads + 1
494 494 return ret
495 495
496 496 def deltaiter(self):
497 497 """
498 498 returns an iterator of the deltas in this changegroup
499 499
500 500 Useful for passing to the underlying storage system to be stored.
501 501 """
502 502 chain = None
503 503 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
504 504 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
505 505 yield chunkdata
506 506 chain = chunkdata[0]
507 507
508 508
509 509 class cg2unpacker(cg1unpacker):
510 510 """Unpacker for cg2 streams.
511 511
512 512 cg2 streams add support for generaldelta, so the delta header
513 513 format is slightly different. All other features about the data
514 514 remain the same.
515 515 """
516 516
517 517 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
518 518 deltaheadersize = deltaheader.size
519 519 version = b'02'
520 520
521 521 def _deltaheader(self, headertuple, prevnode):
522 522 node, p1, p2, deltabase, cs = headertuple
523 523 flags = 0
524 524 return node, p1, p2, deltabase, cs, flags
525 525
526 526
527 527 class cg3unpacker(cg2unpacker):
528 528 """Unpacker for cg3 streams.
529 529
530 530 cg3 streams add support for exchanging treemanifests and revlog
531 531 flags. It adds the revlog flags to the delta header and an empty chunk
532 532 separating manifests and files.
533 533 """
534 534
535 535 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
536 536 deltaheadersize = deltaheader.size
537 537 version = b'03'
538 538 _grouplistcount = 2 # One list of manifests and one list of files
539 539
540 540 def _deltaheader(self, headertuple, prevnode):
541 541 node, p1, p2, deltabase, cs, flags = headertuple
542 542 return node, p1, p2, deltabase, cs, flags
543 543
544 544 def _unpackmanifests(self, repo, revmap, trp, prog):
545 545 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
546 546 for chunkdata in iter(self.filelogheader, {}):
547 547 # If we get here, there are directory manifests in the changegroup
548 548 d = chunkdata[b"filename"]
549 549 repo.ui.debug(b"adding %s revisions\n" % d)
550 550 deltas = self.deltaiter()
551 551 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
552 552 raise error.Abort(_(b"received dir revlog group is empty"))
553 553
554 554
555 555 class headerlessfixup(object):
556 556 def __init__(self, fh, h):
557 557 self._h = h
558 558 self._fh = fh
559 559
560 560 def read(self, n):
561 561 if self._h:
562 562 d, self._h = self._h[:n], self._h[n:]
563 563 if len(d) < n:
564 564 d += readexactly(self._fh, n - len(d))
565 565 return d
566 566 return readexactly(self._fh, n)
567 567
568 568
569 569 def _revisiondeltatochunks(delta, headerfn):
570 570 """Serialize a revisiondelta to changegroup chunks."""
571 571
572 572 # The captured revision delta may be encoded as a delta against
573 573 # a base revision or as a full revision. The changegroup format
574 574 # requires that everything on the wire be deltas. So for full
575 575 # revisions, we need to invent a header that says to rewrite
576 576 # data.
577 577
578 578 if delta.delta is not None:
579 579 prefix, data = b'', delta.delta
580 580 elif delta.basenode == nullid:
581 581 data = delta.revision
582 582 prefix = mdiff.trivialdiffheader(len(data))
583 583 else:
584 584 data = delta.revision
585 585 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
586 586
587 587 meta = headerfn(delta)
588 588
589 589 yield chunkheader(len(meta) + len(prefix) + len(data))
590 590 yield meta
591 591 if prefix:
592 592 yield prefix
593 593 yield data
594 594
595 595
596 596 def _sortnodesellipsis(store, nodes, cl, lookup):
597 597 """Sort nodes for changegroup generation."""
598 598 # Ellipses serving mode.
599 599 #
600 600 # In a perfect world, we'd generate better ellipsis-ified graphs
601 601 # for non-changelog revlogs. In practice, we haven't started doing
602 602 # that yet, so the resulting DAGs for the manifestlog and filelogs
603 603 # are actually full of bogus parentage on all the ellipsis
604 604 # nodes. This has the side effect that, while the contents are
605 605 # correct, the individual DAGs might be completely out of whack in
606 606 # a case like 882681bc3166 and its ancestors (back about 10
607 607 # revisions or so) in the main hg repo.
608 608 #
609 609 # The one invariant we *know* holds is that the new (potentially
610 610 # bogus) DAG shape will be valid if we order the nodes in the
611 611 # order that they're introduced in dramatis personae by the
612 612 # changelog, so what we do is we sort the non-changelog histories
613 613 # by the order in which they are used by the changelog.
614 614 key = lambda n: cl.rev(lookup(n))
615 615 return sorted(nodes, key=key)
616 616
617 617
618 618 def _resolvenarrowrevisioninfo(
619 619 cl,
620 620 store,
621 621 ischangelog,
622 622 rev,
623 623 linkrev,
624 624 linknode,
625 625 clrevtolocalrev,
626 626 fullclnodes,
627 627 precomputedellipsis,
628 628 ):
629 629 linkparents = precomputedellipsis[linkrev]
630 630
631 631 def local(clrev):
632 632 """Turn a changelog revnum into a local revnum.
633 633
634 634 The ellipsis dag is stored as revnums on the changelog,
635 635 but when we're producing ellipsis entries for
636 636 non-changelog revlogs, we need to turn those numbers into
637 637 something local. This does that for us, and during the
638 638 changelog sending phase will also expand the stored
639 639 mappings as needed.
640 640 """
641 641 if clrev == nullrev:
642 642 return nullrev
643 643
644 644 if ischangelog:
645 645 return clrev
646 646
647 647 # Walk the ellipsis-ized changelog breadth-first looking for a
648 648 # change that has been linked from the current revlog.
649 649 #
650 650 # For a flat manifest revlog only a single step should be necessary
651 651 # as all relevant changelog entries are relevant to the flat
652 652 # manifest.
653 653 #
654 654 # For a filelog or tree manifest dirlog however not every changelog
655 655 # entry will have been relevant, so we need to skip some changelog
656 656 # nodes even after ellipsis-izing.
657 657 walk = [clrev]
658 658 while walk:
659 659 p = walk[0]
660 660 walk = walk[1:]
661 661 if p in clrevtolocalrev:
662 662 return clrevtolocalrev[p]
663 663 elif p in fullclnodes:
664 664 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
665 665 elif p in precomputedellipsis:
666 666 walk.extend(
667 667 [pp for pp in precomputedellipsis[p] if pp != nullrev]
668 668 )
669 669 else:
670 670 # In this case, we've got an ellipsis with parents
671 671 # outside the current bundle (likely an
672 672 # incremental pull). We "know" that we can use the
673 673 # value of this same revlog at whatever revision
674 674 # is pointed to by linknode. "Know" is in scare
675 675 # quotes because I haven't done enough examination
676 676 # of edge cases to convince myself this is really
677 677 # a fact - it works for all the (admittedly
678 678 # thorough) cases in our testsuite, but I would be
679 679 # somewhat unsurprised to find a case in the wild
680 680 # where this breaks down a bit. That said, I don't
681 681 # know if it would hurt anything.
682 682 for i in pycompat.xrange(rev, 0, -1):
683 683 if store.linkrev(i) == clrev:
684 684 return i
685 685 # We failed to resolve a parent for this node, so
686 686 # we crash the changegroup construction.
687 687 raise error.Abort(
688 688 b"unable to resolve parent while packing '%s' %r"
689 689 b' for changeset %r' % (store.indexfile, rev, clrev)
690 690 )
691 691
692 692 return nullrev
693 693
694 694 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
695 695 p1, p2 = nullrev, nullrev
696 696 elif len(linkparents) == 1:
697 697 (p1,) = sorted(local(p) for p in linkparents)
698 698 p2 = nullrev
699 699 else:
700 700 p1, p2 = sorted(local(p) for p in linkparents)
701 701
702 702 p1node, p2node = store.node(p1), store.node(p2)
703 703
704 704 return p1node, p2node, linknode
705 705
706 706
707 707 def deltagroup(
708 708 repo,
709 709 store,
710 710 nodes,
711 711 ischangelog,
712 712 lookup,
713 713 forcedeltaparentprev,
714 714 topic=None,
715 715 ellipses=False,
716 716 clrevtolocalrev=None,
717 717 fullclnodes=None,
718 718 precomputedellipsis=None,
719 719 ):
720 720 """Calculate deltas for a set of revisions.
721 721
722 722 Is a generator of ``revisiondelta`` instances.
723 723
724 724 If topic is not None, progress detail will be generated using this
725 725 topic name (e.g. changesets, manifests, etc).
726 726 """
727 727 if not nodes:
728 728 return
729 729
730 730 cl = repo.changelog
731 731
732 732 if ischangelog:
733 733 # `hg log` shows changesets in storage order. To preserve order
734 734 # across clones, send out changesets in storage order.
735 735 nodesorder = b'storage'
736 736 elif ellipses:
737 737 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
738 738 nodesorder = b'nodes'
739 739 else:
740 740 nodesorder = None
741 741
742 742 # Perform ellipses filtering and revision massaging. We do this before
743 743 # emitrevisions() because a) filtering out revisions creates less work
744 744 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
745 745 # assumptions about delta choices and we would possibly send a delta
746 746 # referencing a missing base revision.
747 747 #
748 748 # Also, calling lookup() has side-effects with regards to populating
749 749 # data structures. If we don't call lookup() for each node or if we call
750 750 # lookup() after the first pass through each node, things can break -
751 751 # possibly intermittently depending on the python hash seed! For that
752 752 # reason, we store a mapping of all linknodes during the initial node
753 753 # pass rather than use lookup() on the output side.
754 754 if ellipses:
755 755 filtered = []
756 756 adjustedparents = {}
757 757 linknodes = {}
758 758
759 759 for node in nodes:
760 760 rev = store.rev(node)
761 761 linknode = lookup(node)
762 762 linkrev = cl.rev(linknode)
763 763 clrevtolocalrev[linkrev] = rev
764 764
765 765 # If linknode is in fullclnodes, it means the corresponding
766 766 # changeset was a full changeset and is being sent unaltered.
767 767 if linknode in fullclnodes:
768 768 linknodes[node] = linknode
769 769
770 770 # If the corresponding changeset wasn't in the set computed
771 771 # as relevant to us, it should be dropped outright.
772 772 elif linkrev not in precomputedellipsis:
773 773 continue
774 774
775 775 else:
776 776 # We could probably do this later and avoid the dict
777 777 # holding state. But it likely doesn't matter.
778 778 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
779 779 cl,
780 780 store,
781 781 ischangelog,
782 782 rev,
783 783 linkrev,
784 784 linknode,
785 785 clrevtolocalrev,
786 786 fullclnodes,
787 787 precomputedellipsis,
788 788 )
789 789
790 790 adjustedparents[node] = (p1node, p2node)
791 791 linknodes[node] = linknode
792 792
793 793 filtered.append(node)
794 794
795 795 nodes = filtered
796 796
797 797 # We expect the first pass to be fast, so we only engage the progress
798 798 # meter for constructing the revision deltas.
799 799 progress = None
800 800 if topic is not None:
801 801 progress = repo.ui.makeprogress(
802 802 topic, unit=_(b'chunks'), total=len(nodes)
803 803 )
804 804
805 805 configtarget = repo.ui.config(b'devel', b'bundle.delta')
806 806 if configtarget not in (b'', b'p1', b'full'):
807 807 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
808 808 repo.ui.warn(msg % configtarget)
809 809
810 810 deltamode = repository.CG_DELTAMODE_STD
811 811 if forcedeltaparentprev:
812 812 deltamode = repository.CG_DELTAMODE_PREV
813 813 elif configtarget == b'p1':
814 814 deltamode = repository.CG_DELTAMODE_P1
815 815 elif configtarget == b'full':
816 816 deltamode = repository.CG_DELTAMODE_FULL
817 817
818 818 revisions = store.emitrevisions(
819 819 nodes,
820 820 nodesorder=nodesorder,
821 821 revisiondata=True,
822 822 assumehaveparentrevisions=not ellipses,
823 823 deltamode=deltamode,
824 824 )
825 825
826 826 for i, revision in enumerate(revisions):
827 827 if progress:
828 828 progress.update(i + 1)
829 829
830 830 if ellipses:
831 831 linknode = linknodes[revision.node]
832 832
833 833 if revision.node in adjustedparents:
834 834 p1node, p2node = adjustedparents[revision.node]
835 835 revision.p1node = p1node
836 836 revision.p2node = p2node
837 837 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
838 838
839 839 else:
840 840 linknode = lookup(revision.node)
841 841
842 842 revision.linknode = linknode
843 843 yield revision
844 844
845 845 if progress:
846 846 progress.complete()
847 847
848 848
849 849 class cgpacker(object):
850 850 def __init__(
851 851 self,
852 852 repo,
853 853 oldmatcher,
854 854 matcher,
855 855 version,
856 856 builddeltaheader,
857 857 manifestsend,
858 858 forcedeltaparentprev=False,
859 859 bundlecaps=None,
860 860 ellipses=False,
861 861 shallow=False,
862 862 ellipsisroots=None,
863 863 fullnodes=None,
864 864 ):
865 865 """Given a source repo, construct a bundler.
866 866
867 867 oldmatcher is a matcher that matches on files the client already has.
868 868 These will not be included in the changegroup.
869 869
870 870 matcher is a matcher that matches on files to include in the
871 871 changegroup. Used to facilitate sparse changegroups.
872 872
873 873 forcedeltaparentprev indicates whether delta parents must be against
874 874 the previous revision in a delta group. This should only be used for
875 875 compatibility with changegroup version 1.
876 876
877 877 builddeltaheader is a callable that constructs the header for a group
878 878 delta.
879 879
880 880 manifestsend is a chunk to send after manifests have been fully emitted.
881 881
882 882 ellipses indicates whether ellipsis serving mode is enabled.
883 883
884 884 bundlecaps is optional and can be used to specify the set of
885 885 capabilities which can be used to build the bundle. While bundlecaps is
886 886 unused in core Mercurial, extensions rely on this feature to communicate
887 887 capabilities to customize the changegroup packer.
888 888
889 889 shallow indicates whether shallow data might be sent. The packer may
890 890 need to pack file contents not introduced by the changes being packed.
891 891
892 892 fullnodes is the set of changelog nodes which should not be ellipsis
893 893 nodes. We store this rather than the set of nodes that should be
894 894 ellipsis because for very large histories we expect this to be
895 895 significantly smaller.
896 896 """
897 897 assert oldmatcher
898 898 assert matcher
899 899 self._oldmatcher = oldmatcher
900 900 self._matcher = matcher
901 901
902 902 self.version = version
903 903 self._forcedeltaparentprev = forcedeltaparentprev
904 904 self._builddeltaheader = builddeltaheader
905 905 self._manifestsend = manifestsend
906 906 self._ellipses = ellipses
907 907
908 908 # Set of capabilities we can use to build the bundle.
909 909 if bundlecaps is None:
910 910 bundlecaps = set()
911 911 self._bundlecaps = bundlecaps
912 912 self._isshallow = shallow
913 913 self._fullclnodes = fullnodes
914 914
915 915 # Maps ellipsis revs to their roots at the changelog level.
916 916 self._precomputedellipsis = ellipsisroots
917 917
918 918 self._repo = repo
919 919
920 920 if self._repo.ui.verbose and not self._repo.ui.debugflag:
921 921 self._verbosenote = self._repo.ui.note
922 922 else:
923 923 self._verbosenote = lambda s: None
924 924
925 925 def generate(
926 926 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
927 927 ):
928 928 """Yield a sequence of changegroup byte chunks.
929 929 If changelog is False, changelog data won't be added to changegroup
930 930 """
931 931
932 932 repo = self._repo
933 933 cl = repo.changelog
934 934
935 935 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
936 936 size = 0
937 937
938 938 clstate, deltas = self._generatechangelog(
939 939 cl, clnodes, generate=changelog
940 940 )
941 941 for delta in deltas:
942 942 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
943 943 size += len(chunk)
944 944 yield chunk
945 945
946 946 close = closechunk()
947 947 size += len(close)
948 948 yield closechunk()
949 949
950 950 self._verbosenote(_(b'%8.i (changelog)\n') % size)
951 951
952 952 clrevorder = clstate[b'clrevorder']
953 953 manifests = clstate[b'manifests']
954 954 changedfiles = clstate[b'changedfiles']
955 955
956 956 # We need to make sure that the linkrev in the changegroup refers to
957 957 # the first changeset that introduced the manifest or file revision.
958 958 # The fastpath is usually safer than the slowpath, because the filelogs
959 959 # are walked in revlog order.
960 960 #
961 961 # When taking the slowpath when the manifest revlog uses generaldelta,
962 962 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
963 963 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
964 964 #
965 965 # When taking the fastpath, we are only vulnerable to reordering
966 966 # of the changelog itself. The changelog never uses generaldelta and is
967 967 # never reordered. To handle this case, we simply take the slowpath,
968 968 # which already has the 'clrevorder' logic. This was also fixed in
969 969 # cc0ff93d0c0c.
970 970
971 971 # Treemanifests don't work correctly with fastpathlinkrev
972 972 # either, because we don't discover which directory nodes to
973 973 # send along with files. This could probably be fixed.
974 974 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
975 975
976 976 fnodes = {} # needed file nodes
977 977
978 978 size = 0
979 979 it = self.generatemanifests(
980 980 commonrevs,
981 981 clrevorder,
982 982 fastpathlinkrev,
983 983 manifests,
984 984 fnodes,
985 985 source,
986 986 clstate[b'clrevtomanifestrev'],
987 987 )
988 988
989 989 for tree, deltas in it:
990 990 if tree:
991 991 assert self.version == b'03'
992 992 chunk = _fileheader(tree)
993 993 size += len(chunk)
994 994 yield chunk
995 995
996 996 for delta in deltas:
997 997 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
998 998 for chunk in chunks:
999 999 size += len(chunk)
1000 1000 yield chunk
1001 1001
1002 1002 close = closechunk()
1003 1003 size += len(close)
1004 1004 yield close
1005 1005
1006 1006 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1007 1007 yield self._manifestsend
1008 1008
1009 1009 mfdicts = None
1010 1010 if self._ellipses and self._isshallow:
1011 1011 mfdicts = [
1012 (self._repo.manifestlog[n].read(), lr)
1012 (repo.manifestlog[n].read(), lr)
1013 1013 for (n, lr) in pycompat.iteritems(manifests)
1014 1014 ]
1015 1015
1016 1016 manifests.clear()
1017 1017 clrevs = {cl.rev(x) for x in clnodes}
1018 1018
1019 1019 it = self.generatefiles(
1020 1020 changedfiles,
1021 1021 commonrevs,
1022 1022 source,
1023 1023 mfdicts,
1024 1024 fastpathlinkrev,
1025 1025 fnodes,
1026 1026 clrevs,
1027 1027 )
1028 1028
1029 1029 for path, deltas in it:
1030 1030 h = _fileheader(path)
1031 1031 size = len(h)
1032 1032 yield h
1033 1033
1034 1034 for delta in deltas:
1035 1035 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1036 1036 for chunk in chunks:
1037 1037 size += len(chunk)
1038 1038 yield chunk
1039 1039
1040 1040 close = closechunk()
1041 1041 size += len(close)
1042 1042 yield close
1043 1043
1044 1044 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1045 1045
1046 1046 yield closechunk()
1047 1047
1048 1048 if clnodes:
1049 1049 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1050 1050
1051 1051 def _generatechangelog(self, cl, nodes, generate=True):
1052 1052 """Generate data for changelog chunks.
1053 1053
1054 1054 Returns a 2-tuple of a dict containing state and an iterable of
1055 1055 byte chunks. The state will not be fully populated until the
1056 1056 chunk stream has been fully consumed.
1057 1057
1058 1058 if generate is False, the state will be fully populated and no chunk
1059 1059 stream will be yielded
1060 1060 """
1061 1061 clrevorder = {}
1062 1062 manifests = {}
1063 1063 mfl = self._repo.manifestlog
1064 1064 changedfiles = set()
1065 1065 clrevtomanifestrev = {}
1066 1066
1067 1067 state = {
1068 1068 b'clrevorder': clrevorder,
1069 1069 b'manifests': manifests,
1070 1070 b'changedfiles': changedfiles,
1071 1071 b'clrevtomanifestrev': clrevtomanifestrev,
1072 1072 }
1073 1073
1074 1074 if not (generate or self._ellipses):
1075 1075 # sort the nodes in storage order
1076 1076 nodes = sorted(nodes, key=cl.rev)
1077 1077 for node in nodes:
1078 1078 c = cl.changelogrevision(node)
1079 1079 clrevorder[node] = len(clrevorder)
1080 1080 # record the first changeset introducing this manifest version
1081 1081 manifests.setdefault(c.manifest, node)
1082 1082 # Record a complete list of potentially-changed files in
1083 1083 # this manifest.
1084 1084 changedfiles.update(c.files)
1085 1085
1086 1086 return state, ()
1087 1087
1088 1088 # Callback for the changelog, used to collect changed files and
1089 1089 # manifest nodes.
1090 1090 # Returns the linkrev node (identity in the changelog case).
1091 1091 def lookupcl(x):
1092 1092 c = cl.changelogrevision(x)
1093 1093 clrevorder[x] = len(clrevorder)
1094 1094
1095 1095 if self._ellipses:
1096 1096 # Only update manifests if x is going to be sent. Otherwise we
1097 1097 # end up with bogus linkrevs specified for manifests and
1098 1098 # we skip some manifest nodes that we should otherwise
1099 1099 # have sent.
1100 1100 if (
1101 1101 x in self._fullclnodes
1102 1102 or cl.rev(x) in self._precomputedellipsis
1103 1103 ):
1104 1104
1105 1105 manifestnode = c.manifest
1106 1106 # Record the first changeset introducing this manifest
1107 1107 # version.
1108 1108 manifests.setdefault(manifestnode, x)
1109 1109 # Set this narrow-specific dict so we have the lowest
1110 1110 # manifest revnum to look up for this cl revnum. (Part of
1111 1111 # mapping changelog ellipsis parents to manifest ellipsis
1112 1112 # parents)
1113 1113 clrevtomanifestrev.setdefault(
1114 1114 cl.rev(x), mfl.rev(manifestnode)
1115 1115 )
1116 1116 # We can't trust the changed files list in the changeset if the
1117 1117 # client requested a shallow clone.
1118 1118 if self._isshallow:
1119 1119 changedfiles.update(mfl[c.manifest].read().keys())
1120 1120 else:
1121 1121 changedfiles.update(c.files)
1122 1122 else:
1123 1123 # record the first changeset introducing this manifest version
1124 1124 manifests.setdefault(c.manifest, x)
1125 1125 # Record a complete list of potentially-changed files in
1126 1126 # this manifest.
1127 1127 changedfiles.update(c.files)
1128 1128
1129 1129 return x
1130 1130
1131 1131 gen = deltagroup(
1132 1132 self._repo,
1133 1133 cl,
1134 1134 nodes,
1135 1135 True,
1136 1136 lookupcl,
1137 1137 self._forcedeltaparentprev,
1138 1138 ellipses=self._ellipses,
1139 1139 topic=_(b'changesets'),
1140 1140 clrevtolocalrev={},
1141 1141 fullclnodes=self._fullclnodes,
1142 1142 precomputedellipsis=self._precomputedellipsis,
1143 1143 )
1144 1144
1145 1145 return state, gen
1146 1146
1147 1147 def generatemanifests(
1148 1148 self,
1149 1149 commonrevs,
1150 1150 clrevorder,
1151 1151 fastpathlinkrev,
1152 1152 manifests,
1153 1153 fnodes,
1154 1154 source,
1155 1155 clrevtolocalrev,
1156 1156 ):
1157 1157 """Returns an iterator of changegroup chunks containing manifests.
1158 1158
1159 1159 `source` is unused here, but is used by extensions like remotefilelog to
1160 1160 change what is sent based in pulls vs pushes, etc.
1161 1161 """
1162 1162 repo = self._repo
1163 1163 mfl = repo.manifestlog
1164 1164 tmfnodes = {b'': manifests}
1165 1165
1166 1166 # Callback for the manifest, used to collect linkrevs for filelog
1167 1167 # revisions.
1168 1168 # Returns the linkrev node (collected in lookupcl).
1169 1169 def makelookupmflinknode(tree, nodes):
1170 1170 if fastpathlinkrev:
1171 1171 assert not tree
1172 1172 return (
1173 1173 manifests.__getitem__
1174 1174 ) # pytype: disable=unsupported-operands
1175 1175
1176 1176 def lookupmflinknode(x):
1177 1177 """Callback for looking up the linknode for manifests.
1178 1178
1179 1179 Returns the linkrev node for the specified manifest.
1180 1180
1181 1181 SIDE EFFECT:
1182 1182
1183 1183 1) fclnodes gets populated with the list of relevant
1184 1184 file nodes if we're not using fastpathlinkrev
1185 1185 2) When treemanifests are in use, collects treemanifest nodes
1186 1186 to send
1187 1187
1188 1188 Note that this means manifests must be completely sent to
1189 1189 the client before you can trust the list of files and
1190 1190 treemanifests to send.
1191 1191 """
1192 1192 clnode = nodes[x]
1193 1193 mdata = mfl.get(tree, x).readfast(shallow=True)
1194 1194 for p, n, fl in mdata.iterentries():
1195 1195 if fl == b't': # subdirectory manifest
1196 1196 subtree = tree + p + b'/'
1197 1197 tmfclnodes = tmfnodes.setdefault(subtree, {})
1198 1198 tmfclnode = tmfclnodes.setdefault(n, clnode)
1199 1199 if clrevorder[clnode] < clrevorder[tmfclnode]:
1200 1200 tmfclnodes[n] = clnode
1201 1201 else:
1202 1202 f = tree + p
1203 1203 fclnodes = fnodes.setdefault(f, {})
1204 1204 fclnode = fclnodes.setdefault(n, clnode)
1205 1205 if clrevorder[clnode] < clrevorder[fclnode]:
1206 1206 fclnodes[n] = clnode
1207 1207 return clnode
1208 1208
1209 1209 return lookupmflinknode
1210 1210
1211 1211 while tmfnodes:
1212 1212 tree, nodes = tmfnodes.popitem()
1213 1213
1214 1214 should_visit = self._matcher.visitdir(tree[:-1])
1215 1215 if tree and not should_visit:
1216 1216 continue
1217 1217
1218 1218 store = mfl.getstorage(tree)
1219 1219
1220 1220 if not should_visit:
1221 1221 # No nodes to send because this directory is out of
1222 1222 # the client's view of the repository (probably
1223 1223 # because of narrow clones). Do this even for the root
1224 1224 # directory (tree=='')
1225 1225 prunednodes = []
1226 1226 else:
1227 1227 # Avoid sending any manifest nodes we can prove the
1228 1228 # client already has by checking linkrevs. See the
1229 1229 # related comment in generatefiles().
1230 1230 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1231 1231
1232 1232 if tree and not prunednodes:
1233 1233 continue
1234 1234
1235 1235 lookupfn = makelookupmflinknode(tree, nodes)
1236 1236
1237 1237 deltas = deltagroup(
1238 1238 self._repo,
1239 1239 store,
1240 1240 prunednodes,
1241 1241 False,
1242 1242 lookupfn,
1243 1243 self._forcedeltaparentprev,
1244 1244 ellipses=self._ellipses,
1245 1245 topic=_(b'manifests'),
1246 1246 clrevtolocalrev=clrevtolocalrev,
1247 1247 fullclnodes=self._fullclnodes,
1248 1248 precomputedellipsis=self._precomputedellipsis,
1249 1249 )
1250 1250
1251 1251 if not self._oldmatcher.visitdir(store.tree[:-1]):
1252 1252 yield tree, deltas
1253 1253 else:
1254 1254 # 'deltas' is a generator and we need to consume it even if
1255 1255 # we are not going to send it because a side-effect is that
1256 1256 # it updates tmdnodes (via lookupfn)
1257 1257 for d in deltas:
1258 1258 pass
1259 1259 if not tree:
1260 1260 yield tree, []
1261 1261
1262 1262 def _prunemanifests(self, store, nodes, commonrevs):
1263 1263 if not self._ellipses:
1264 1264 # In non-ellipses case and large repositories, it is better to
1265 1265 # prevent calling of store.rev and store.linkrev on a lot of
1266 1266 # nodes as compared to sending some extra data
1267 1267 return nodes.copy()
1268 1268 # This is split out as a separate method to allow filtering
1269 1269 # commonrevs in extension code.
1270 1270 #
1271 1271 # TODO(augie): this shouldn't be required, instead we should
1272 1272 # make filtering of revisions to send delegated to the store
1273 1273 # layer.
1274 1274 frev, flr = store.rev, store.linkrev
1275 1275 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1276 1276
1277 1277 # The 'source' parameter is useful for extensions
1278 1278 def generatefiles(
1279 1279 self,
1280 1280 changedfiles,
1281 1281 commonrevs,
1282 1282 source,
1283 1283 mfdicts,
1284 1284 fastpathlinkrev,
1285 1285 fnodes,
1286 1286 clrevs,
1287 1287 ):
1288 1288 changedfiles = [
1289 1289 f
1290 1290 for f in changedfiles
1291 1291 if self._matcher(f) and not self._oldmatcher(f)
1292 1292 ]
1293 1293
1294 1294 if not fastpathlinkrev:
1295 1295
1296 1296 def normallinknodes(unused, fname):
1297 1297 return fnodes.get(fname, {})
1298 1298
1299 1299 else:
1300 1300 cln = self._repo.changelog.node
1301 1301
1302 1302 def normallinknodes(store, fname):
1303 1303 flinkrev = store.linkrev
1304 1304 fnode = store.node
1305 1305 revs = ((r, flinkrev(r)) for r in store)
1306 1306 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1307 1307
1308 1308 clrevtolocalrev = {}
1309 1309
1310 1310 if self._isshallow:
1311 1311 # In a shallow clone, the linknodes callback needs to also include
1312 1312 # those file nodes that are in the manifests we sent but weren't
1313 1313 # introduced by those manifests.
1314 1314 commonctxs = [self._repo[c] for c in commonrevs]
1315 1315 clrev = self._repo.changelog.rev
1316 1316
1317 1317 def linknodes(flog, fname):
1318 1318 for c in commonctxs:
1319 1319 try:
1320 1320 fnode = c.filenode(fname)
1321 1321 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1322 1322 except error.ManifestLookupError:
1323 1323 pass
1324 1324 links = normallinknodes(flog, fname)
1325 1325 if len(links) != len(mfdicts):
1326 1326 for mf, lr in mfdicts:
1327 1327 fnode = mf.get(fname, None)
1328 1328 if fnode in links:
1329 1329 links[fnode] = min(links[fnode], lr, key=clrev)
1330 1330 elif fnode:
1331 1331 links[fnode] = lr
1332 1332 return links
1333 1333
1334 1334 else:
1335 1335 linknodes = normallinknodes
1336 1336
1337 1337 repo = self._repo
1338 1338 progress = repo.ui.makeprogress(
1339 1339 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1340 1340 )
1341 1341 for i, fname in enumerate(sorted(changedfiles)):
1342 1342 filerevlog = repo.file(fname)
1343 1343 if not filerevlog:
1344 1344 raise error.Abort(
1345 1345 _(b"empty or missing file data for %s") % fname
1346 1346 )
1347 1347
1348 1348 clrevtolocalrev.clear()
1349 1349
1350 1350 linkrevnodes = linknodes(filerevlog, fname)
1351 1351 # Lookup for filenodes, we collected the linkrev nodes above in the
1352 1352 # fastpath case and with lookupmf in the slowpath case.
1353 1353 def lookupfilelog(x):
1354 1354 return linkrevnodes[x]
1355 1355
1356 1356 frev, flr = filerevlog.rev, filerevlog.linkrev
1357 1357 # Skip sending any filenode we know the client already
1358 1358 # has. This avoids over-sending files relatively
1359 1359 # inexpensively, so it's not a problem if we under-filter
1360 1360 # here.
1361 1361 filenodes = [
1362 1362 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1363 1363 ]
1364 1364
1365 1365 if not filenodes:
1366 1366 continue
1367 1367
1368 1368 progress.update(i + 1, item=fname)
1369 1369
1370 1370 deltas = deltagroup(
1371 1371 self._repo,
1372 1372 filerevlog,
1373 1373 filenodes,
1374 1374 False,
1375 1375 lookupfilelog,
1376 1376 self._forcedeltaparentprev,
1377 1377 ellipses=self._ellipses,
1378 1378 clrevtolocalrev=clrevtolocalrev,
1379 1379 fullclnodes=self._fullclnodes,
1380 1380 precomputedellipsis=self._precomputedellipsis,
1381 1381 )
1382 1382
1383 1383 yield fname, deltas
1384 1384
1385 1385 progress.complete()
1386 1386
1387 1387
1388 1388 def _makecg1packer(
1389 1389 repo,
1390 1390 oldmatcher,
1391 1391 matcher,
1392 1392 bundlecaps,
1393 1393 ellipses=False,
1394 1394 shallow=False,
1395 1395 ellipsisroots=None,
1396 1396 fullnodes=None,
1397 1397 ):
1398 1398 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1399 1399 d.node, d.p1node, d.p2node, d.linknode
1400 1400 )
1401 1401
1402 1402 return cgpacker(
1403 1403 repo,
1404 1404 oldmatcher,
1405 1405 matcher,
1406 1406 b'01',
1407 1407 builddeltaheader=builddeltaheader,
1408 1408 manifestsend=b'',
1409 1409 forcedeltaparentprev=True,
1410 1410 bundlecaps=bundlecaps,
1411 1411 ellipses=ellipses,
1412 1412 shallow=shallow,
1413 1413 ellipsisroots=ellipsisroots,
1414 1414 fullnodes=fullnodes,
1415 1415 )
1416 1416
1417 1417
1418 1418 def _makecg2packer(
1419 1419 repo,
1420 1420 oldmatcher,
1421 1421 matcher,
1422 1422 bundlecaps,
1423 1423 ellipses=False,
1424 1424 shallow=False,
1425 1425 ellipsisroots=None,
1426 1426 fullnodes=None,
1427 1427 ):
1428 1428 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1429 1429 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1430 1430 )
1431 1431
1432 1432 return cgpacker(
1433 1433 repo,
1434 1434 oldmatcher,
1435 1435 matcher,
1436 1436 b'02',
1437 1437 builddeltaheader=builddeltaheader,
1438 1438 manifestsend=b'',
1439 1439 bundlecaps=bundlecaps,
1440 1440 ellipses=ellipses,
1441 1441 shallow=shallow,
1442 1442 ellipsisroots=ellipsisroots,
1443 1443 fullnodes=fullnodes,
1444 1444 )
1445 1445
1446 1446
1447 1447 def _makecg3packer(
1448 1448 repo,
1449 1449 oldmatcher,
1450 1450 matcher,
1451 1451 bundlecaps,
1452 1452 ellipses=False,
1453 1453 shallow=False,
1454 1454 ellipsisroots=None,
1455 1455 fullnodes=None,
1456 1456 ):
1457 1457 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1458 1458 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1459 1459 )
1460 1460
1461 1461 return cgpacker(
1462 1462 repo,
1463 1463 oldmatcher,
1464 1464 matcher,
1465 1465 b'03',
1466 1466 builddeltaheader=builddeltaheader,
1467 1467 manifestsend=closechunk(),
1468 1468 bundlecaps=bundlecaps,
1469 1469 ellipses=ellipses,
1470 1470 shallow=shallow,
1471 1471 ellipsisroots=ellipsisroots,
1472 1472 fullnodes=fullnodes,
1473 1473 )
1474 1474
1475 1475
1476 1476 _packermap = {
1477 1477 b'01': (_makecg1packer, cg1unpacker),
1478 1478 # cg2 adds support for exchanging generaldelta
1479 1479 b'02': (_makecg2packer, cg2unpacker),
1480 1480 # cg3 adds support for exchanging revlog flags and treemanifests
1481 1481 b'03': (_makecg3packer, cg3unpacker),
1482 1482 }
1483 1483
1484 1484
1485 1485 def allsupportedversions(repo):
1486 1486 versions = set(_packermap.keys())
1487 1487 needv03 = False
1488 1488 if (
1489 1489 repo.ui.configbool(b'experimental', b'changegroup3')
1490 1490 or repo.ui.configbool(b'experimental', b'treemanifest')
1491 1491 or scmutil.istreemanifest(repo)
1492 1492 ):
1493 1493 # we keep version 03 because we need to to exchange treemanifest data
1494 1494 #
1495 1495 # we also keep vresion 01 and 02, because it is possible for repo to
1496 1496 # contains both normal and tree manifest at the same time. so using
1497 1497 # older version to pull data is viable
1498 1498 #
1499 1499 # (or even to push subset of history)
1500 1500 needv03 = True
1501 1501 if b'exp-sidedata-flag' in repo.requirements:
1502 1502 needv03 = True
1503 1503 # don't attempt to use 01/02 until we do sidedata cleaning
1504 1504 versions.discard(b'01')
1505 1505 versions.discard(b'02')
1506 1506 if not needv03:
1507 1507 versions.discard(b'03')
1508 1508 return versions
1509 1509
1510 1510
1511 1511 # Changegroup versions that can be applied to the repo
1512 1512 def supportedincomingversions(repo):
1513 1513 return allsupportedversions(repo)
1514 1514
1515 1515
1516 1516 # Changegroup versions that can be created from the repo
1517 1517 def supportedoutgoingversions(repo):
1518 1518 versions = allsupportedversions(repo)
1519 1519 if scmutil.istreemanifest(repo):
1520 1520 # Versions 01 and 02 support only flat manifests and it's just too
1521 1521 # expensive to convert between the flat manifest and tree manifest on
1522 1522 # the fly. Since tree manifests are hashed differently, all of history
1523 1523 # would have to be converted. Instead, we simply don't even pretend to
1524 1524 # support versions 01 and 02.
1525 1525 versions.discard(b'01')
1526 1526 versions.discard(b'02')
1527 1527 if requirements.NARROW_REQUIREMENT in repo.requirements:
1528 1528 # Versions 01 and 02 don't support revlog flags, and we need to
1529 1529 # support that for stripping and unbundling to work.
1530 1530 versions.discard(b'01')
1531 1531 versions.discard(b'02')
1532 1532 if LFS_REQUIREMENT in repo.requirements:
1533 1533 # Versions 01 and 02 don't support revlog flags, and we need to
1534 1534 # mark LFS entries with REVIDX_EXTSTORED.
1535 1535 versions.discard(b'01')
1536 1536 versions.discard(b'02')
1537 1537
1538 1538 return versions
1539 1539
1540 1540
1541 1541 def localversion(repo):
1542 1542 # Finds the best version to use for bundles that are meant to be used
1543 1543 # locally, such as those from strip and shelve, and temporary bundles.
1544 1544 return max(supportedoutgoingversions(repo))
1545 1545
1546 1546
1547 1547 def safeversion(repo):
1548 1548 # Finds the smallest version that it's safe to assume clients of the repo
1549 1549 # will support. For example, all hg versions that support generaldelta also
1550 1550 # support changegroup 02.
1551 1551 versions = supportedoutgoingversions(repo)
1552 1552 if b'generaldelta' in repo.requirements:
1553 1553 versions.discard(b'01')
1554 1554 assert versions
1555 1555 return min(versions)
1556 1556
1557 1557
1558 1558 def getbundler(
1559 1559 version,
1560 1560 repo,
1561 1561 bundlecaps=None,
1562 1562 oldmatcher=None,
1563 1563 matcher=None,
1564 1564 ellipses=False,
1565 1565 shallow=False,
1566 1566 ellipsisroots=None,
1567 1567 fullnodes=None,
1568 1568 ):
1569 1569 assert version in supportedoutgoingversions(repo)
1570 1570
1571 1571 if matcher is None:
1572 1572 matcher = matchmod.always()
1573 1573 if oldmatcher is None:
1574 1574 oldmatcher = matchmod.never()
1575 1575
1576 1576 if version == b'01' and not matcher.always():
1577 1577 raise error.ProgrammingError(
1578 1578 b'version 01 changegroups do not support sparse file matchers'
1579 1579 )
1580 1580
1581 1581 if ellipses and version in (b'01', b'02'):
1582 1582 raise error.Abort(
1583 1583 _(
1584 1584 b'ellipsis nodes require at least cg3 on client and server, '
1585 1585 b'but negotiated version %s'
1586 1586 )
1587 1587 % version
1588 1588 )
1589 1589
1590 1590 # Requested files could include files not in the local store. So
1591 1591 # filter those out.
1592 1592 matcher = repo.narrowmatch(matcher)
1593 1593
1594 1594 fn = _packermap[version][0]
1595 1595 return fn(
1596 1596 repo,
1597 1597 oldmatcher,
1598 1598 matcher,
1599 1599 bundlecaps,
1600 1600 ellipses=ellipses,
1601 1601 shallow=shallow,
1602 1602 ellipsisroots=ellipsisroots,
1603 1603 fullnodes=fullnodes,
1604 1604 )
1605 1605
1606 1606
1607 1607 def getunbundler(version, fh, alg, extras=None):
1608 1608 return _packermap[version][1](fh, alg, extras=extras)
1609 1609
1610 1610
1611 1611 def _changegroupinfo(repo, nodes, source):
1612 1612 if repo.ui.verbose or source == b'bundle':
1613 1613 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1614 1614 if repo.ui.debugflag:
1615 1615 repo.ui.debug(b"list of changesets:\n")
1616 1616 for node in nodes:
1617 1617 repo.ui.debug(b"%s\n" % hex(node))
1618 1618
1619 1619
1620 1620 def makechangegroup(
1621 1621 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1622 1622 ):
1623 1623 cgstream = makestream(
1624 1624 repo,
1625 1625 outgoing,
1626 1626 version,
1627 1627 source,
1628 1628 fastpath=fastpath,
1629 1629 bundlecaps=bundlecaps,
1630 1630 )
1631 1631 return getunbundler(
1632 1632 version,
1633 1633 util.chunkbuffer(cgstream),
1634 1634 None,
1635 1635 {b'clcount': len(outgoing.missing)},
1636 1636 )
1637 1637
1638 1638
1639 1639 def makestream(
1640 1640 repo,
1641 1641 outgoing,
1642 1642 version,
1643 1643 source,
1644 1644 fastpath=False,
1645 1645 bundlecaps=None,
1646 1646 matcher=None,
1647 1647 ):
1648 1648 bundler = getbundler(version, repo, bundlecaps=bundlecaps, matcher=matcher)
1649 1649
1650 1650 repo = repo.unfiltered()
1651 1651 commonrevs = outgoing.common
1652 1652 csets = outgoing.missing
1653 1653 heads = outgoing.ancestorsof
1654 1654 # We go through the fast path if we get told to, or if all (unfiltered
1655 1655 # heads have been requested (since we then know there all linkrevs will
1656 1656 # be pulled by the client).
1657 1657 heads.sort()
1658 1658 fastpathlinkrev = fastpath or (
1659 1659 repo.filtername is None and heads == sorted(repo.heads())
1660 1660 )
1661 1661
1662 1662 repo.hook(b'preoutgoing', throw=True, source=source)
1663 1663 _changegroupinfo(repo, csets, source)
1664 1664 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1665 1665
1666 1666
1667 1667 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1668 1668 revisions = 0
1669 1669 files = 0
1670 1670 progress = repo.ui.makeprogress(
1671 1671 _(b'files'), unit=_(b'files'), total=expectedfiles
1672 1672 )
1673 1673 for chunkdata in iter(source.filelogheader, {}):
1674 1674 files += 1
1675 1675 f = chunkdata[b"filename"]
1676 1676 repo.ui.debug(b"adding %s revisions\n" % f)
1677 1677 progress.increment()
1678 1678 fl = repo.file(f)
1679 1679 o = len(fl)
1680 1680 try:
1681 1681 deltas = source.deltaiter()
1682 1682 if not fl.addgroup(deltas, revmap, trp):
1683 1683 raise error.Abort(_(b"received file revlog group is empty"))
1684 1684 except error.CensoredBaseError as e:
1685 1685 raise error.Abort(_(b"received delta base is censored: %s") % e)
1686 1686 revisions += len(fl) - o
1687 1687 if f in needfiles:
1688 1688 needs = needfiles[f]
1689 1689 for new in pycompat.xrange(o, len(fl)):
1690 1690 n = fl.node(new)
1691 1691 if n in needs:
1692 1692 needs.remove(n)
1693 1693 else:
1694 1694 raise error.Abort(_(b"received spurious file revlog entry"))
1695 1695 if not needs:
1696 1696 del needfiles[f]
1697 1697 progress.complete()
1698 1698
1699 1699 for f, needs in pycompat.iteritems(needfiles):
1700 1700 fl = repo.file(f)
1701 1701 for n in needs:
1702 1702 try:
1703 1703 fl.rev(n)
1704 1704 except error.LookupError:
1705 1705 raise error.Abort(
1706 1706 _(b'missing file data for %s:%s - run hg verify')
1707 1707 % (f, hex(n))
1708 1708 )
1709 1709
1710 1710 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now