##// END OF EJS Templates
changegroup: don't convert revisions to node for duplicate handling...
Joerg Sonnenberger -
r47260:fa7ae7aa default
parent child Browse files
Show More
@@ -1,1706 +1,1710 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21 from .pycompat import open
22 22
23 23 from . import (
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 phases,
28 28 pycompat,
29 29 requirements,
30 30 scmutil,
31 31 util,
32 32 )
33 33
34 34 from .interfaces import repository
35 35
36 36 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
37 37 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
38 38 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
39 39
40 40 LFS_REQUIREMENT = b'lfs'
41 41
42 42 readexactly = util.readexactly
43 43
44 44
45 45 def getchunk(stream):
46 46 """return the next chunk from stream as a string"""
47 47 d = readexactly(stream, 4)
48 48 l = struct.unpack(b">l", d)[0]
49 49 if l <= 4:
50 50 if l:
51 51 raise error.Abort(_(b"invalid chunk length %d") % l)
52 52 return b""
53 53 return readexactly(stream, l - 4)
54 54
55 55
56 56 def chunkheader(length):
57 57 """return a changegroup chunk header (string)"""
58 58 return struct.pack(b">l", length + 4)
59 59
60 60
61 61 def closechunk():
62 62 """return a changegroup chunk header (string) for a zero-length chunk"""
63 63 return struct.pack(b">l", 0)
64 64
65 65
66 66 def _fileheader(path):
67 67 """Obtain a changegroup chunk header for a named path."""
68 68 return chunkheader(len(path)) + path
69 69
70 70
71 71 def writechunks(ui, chunks, filename, vfs=None):
72 72 """Write chunks to a file and return its filename.
73 73
74 74 The stream is assumed to be a bundle file.
75 75 Existing files will not be overwritten.
76 76 If no filename is specified, a temporary file is created.
77 77 """
78 78 fh = None
79 79 cleanup = None
80 80 try:
81 81 if filename:
82 82 if vfs:
83 83 fh = vfs.open(filename, b"wb")
84 84 else:
85 85 # Increase default buffer size because default is usually
86 86 # small (4k is common on Linux).
87 87 fh = open(filename, b"wb", 131072)
88 88 else:
89 89 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
90 90 fh = os.fdopen(fd, "wb")
91 91 cleanup = filename
92 92 for c in chunks:
93 93 fh.write(c)
94 94 cleanup = None
95 95 return filename
96 96 finally:
97 97 if fh is not None:
98 98 fh.close()
99 99 if cleanup is not None:
100 100 if filename and vfs:
101 101 vfs.unlink(cleanup)
102 102 else:
103 103 os.unlink(cleanup)
104 104
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124
125 125 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 126 deltaheadersize = deltaheader.size
127 127 version = b'01'
128 128 _grouplistcount = 1 # One list of files after the manifests
129 129
130 130 def __init__(self, fh, alg, extras=None):
131 131 if alg is None:
132 132 alg = b'UN'
133 133 if alg not in util.compengines.supportedbundletypes:
134 134 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
135 135 if alg == b'BZ':
136 136 alg = b'_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != b'UN'
148 148
149 149 def read(self, l):
150 150 return self._stream.read(l)
151 151
152 152 def seek(self, pos):
153 153 return self._stream.seek(pos)
154 154
155 155 def tell(self):
156 156 return self._stream.tell()
157 157
158 158 def close(self):
159 159 return self._stream.close()
160 160
161 161 def _chunklength(self):
162 162 d = readexactly(self._stream, 4)
163 163 l = struct.unpack(b">l", d)[0]
164 164 if l <= 4:
165 165 if l:
166 166 raise error.Abort(_(b"invalid chunk length %d") % l)
167 167 return 0
168 168 if self.callback:
169 169 self.callback()
170 170 return l - 4
171 171
172 172 def changelogheader(self):
173 173 """v10 does not have a changelog header chunk"""
174 174 return {}
175 175
176 176 def manifestheader(self):
177 177 """v10 does not have a manifest header chunk"""
178 178 return {}
179 179
180 180 def filelogheader(self):
181 181 """return the header of the filelogs chunk, v10 only has the filename"""
182 182 l = self._chunklength()
183 183 if not l:
184 184 return {}
185 185 fname = readexactly(self._stream, l)
186 186 return {b'filename': fname}
187 187
188 188 def _deltaheader(self, headertuple, prevnode):
189 189 node, p1, p2, cs = headertuple
190 190 if prevnode is None:
191 191 deltabase = p1
192 192 else:
193 193 deltabase = prevnode
194 194 flags = 0
195 195 return node, p1, p2, deltabase, cs, flags
196 196
197 197 def deltachunk(self, prevnode):
198 198 l = self._chunklength()
199 199 if not l:
200 200 return {}
201 201 headerdata = readexactly(self._stream, self.deltaheadersize)
202 202 header = self.deltaheader.unpack(headerdata)
203 203 delta = readexactly(self._stream, l - self.deltaheadersize)
204 204 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
205 205 return (node, p1, p2, cs, deltabase, delta, flags)
206 206
207 207 def getchunks(self):
208 208 """returns all the chunks contains in the bundle
209 209
210 210 Used when you need to forward the binary stream to a file or another
211 211 network API. To do so, it parse the changegroup data, otherwise it will
212 212 block in case of sshrepo because it don't know the end of the stream.
213 213 """
214 214 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
215 215 # and a list of filelogs. For changegroup 3, we expect 4 parts:
216 216 # changelog, manifestlog, a list of tree manifestlogs, and a list of
217 217 # filelogs.
218 218 #
219 219 # Changelog and manifestlog parts are terminated with empty chunks. The
220 220 # tree and file parts are a list of entry sections. Each entry section
221 221 # is a series of chunks terminating in an empty chunk. The list of these
222 222 # entry sections is terminated in yet another empty chunk, so we know
223 223 # we've reached the end of the tree/file list when we reach an empty
224 224 # chunk that was proceeded by no non-empty chunks.
225 225
226 226 parts = 0
227 227 while parts < 2 + self._grouplistcount:
228 228 noentries = True
229 229 while True:
230 230 chunk = getchunk(self)
231 231 if not chunk:
232 232 # The first two empty chunks represent the end of the
233 233 # changelog and the manifestlog portions. The remaining
234 234 # empty chunks represent either A) the end of individual
235 235 # tree or file entries in the file list, or B) the end of
236 236 # the entire list. It's the end of the entire list if there
237 237 # were no entries (i.e. noentries is True).
238 238 if parts < 2:
239 239 parts += 1
240 240 elif noentries:
241 241 parts += 1
242 242 break
243 243 noentries = False
244 244 yield chunkheader(len(chunk))
245 245 pos = 0
246 246 while pos < len(chunk):
247 247 next = pos + 2 ** 20
248 248 yield chunk[pos:next]
249 249 pos = next
250 250 yield closechunk()
251 251
252 252 def _unpackmanifests(self, repo, revmap, trp, prog):
253 253 self.callback = prog.increment
254 254 # no need to check for empty manifest group here:
255 255 # if the result of the merge of 1 and 2 is the same in 3 and 4,
256 256 # no new manifest will be created and the manifest group will
257 257 # be empty during the pull
258 258 self.manifestheader()
259 259 deltas = self.deltaiter()
260 260 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
261 261 prog.complete()
262 262 self.callback = None
263 263
264 264 def apply(
265 265 self,
266 266 repo,
267 267 tr,
268 268 srctype,
269 269 url,
270 270 targetphase=phases.draft,
271 271 expectedtotal=None,
272 272 ):
273 273 """Add the changegroup returned by source.read() to this repo.
274 274 srctype is a string like 'push', 'pull', or 'unbundle'. url is
275 275 the URL of the repo where this changegroup is coming from.
276 276
277 277 Return an integer summarizing the change to this repo:
278 278 - nothing changed or no source: 0
279 279 - more heads than before: 1+added heads (2..n)
280 280 - fewer heads than before: -1-removed heads (-2..-n)
281 281 - number of heads stays the same: 1
282 282 """
283 283 repo = repo.unfiltered()
284 284
285 285 def csmap(x):
286 286 repo.ui.debug(b"add changeset %s\n" % short(x))
287 287 return len(cl)
288 288
289 289 def revmap(x):
290 290 return cl.rev(x)
291 291
292 292 try:
293 293 # The transaction may already carry source information. In this
294 294 # case we use the top level data. We overwrite the argument
295 295 # because we need to use the top level value (if they exist)
296 296 # in this function.
297 297 srctype = tr.hookargs.setdefault(b'source', srctype)
298 298 tr.hookargs.setdefault(b'url', url)
299 299 repo.hook(
300 300 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
301 301 )
302 302
303 303 # write changelog data to temp files so concurrent readers
304 304 # will not see an inconsistent view
305 305 cl = repo.changelog
306 306 cl.delayupdate(tr)
307 307 oldheads = set(cl.heads())
308 308
309 309 trp = weakref.proxy(tr)
310 310 # pull off the changeset group
311 311 repo.ui.status(_(b"adding changesets\n"))
312 312 clstart = len(cl)
313 313 progress = repo.ui.makeprogress(
314 314 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
315 315 )
316 316 self.callback = progress.increment
317 317
318 318 efilesset = set()
319 cgnodes = []
319 duprevs = []
320 320
321 321 def ondupchangelog(cl, rev):
322 322 if rev < clstart:
323 cgnodes.append(cl.node(rev))
323 duprevs.append(rev)
324 324
325 325 def onchangelog(cl, rev):
326 326 ctx = cl.changelogrevision(rev)
327 327 efilesset.update(ctx.files)
328 328 repo.register_changeset(rev, ctx)
329 329
330 330 self.changelogheader()
331 331 deltas = self.deltaiter()
332 332 if not cl.addgroup(
333 333 deltas,
334 334 csmap,
335 335 trp,
336 336 alwayscache=True,
337 337 addrevisioncb=onchangelog,
338 338 duplicaterevisioncb=ondupchangelog,
339 339 ):
340 340 repo.ui.develwarn(
341 341 b'applied empty changelog from changegroup',
342 342 config=b'warn-empty-changegroup',
343 343 )
344 344 efiles = len(efilesset)
345 345 clend = len(cl)
346 346 changesets = clend - clstart
347 347 progress.complete()
348 348 del deltas
349 349 # TODO Python 2.7 removal
350 350 # del efilesset
351 351 efilesset = None
352 352 self.callback = None
353 353
354 354 # pull off the manifest group
355 355 repo.ui.status(_(b"adding manifests\n"))
356 356 # We know that we'll never have more manifests than we had
357 357 # changesets.
358 358 progress = repo.ui.makeprogress(
359 359 _(b'manifests'), unit=_(b'chunks'), total=changesets
360 360 )
361 361 self._unpackmanifests(repo, revmap, trp, progress)
362 362
363 363 needfiles = {}
364 364 if repo.ui.configbool(b'server', b'validate'):
365 365 cl = repo.changelog
366 366 ml = repo.manifestlog
367 367 # validate incoming csets have their manifests
368 368 for cset in pycompat.xrange(clstart, clend):
369 369 mfnode = cl.changelogrevision(cset).manifest
370 370 mfest = ml[mfnode].readdelta()
371 371 # store file nodes we must see
372 372 for f, n in pycompat.iteritems(mfest):
373 373 needfiles.setdefault(f, set()).add(n)
374 374
375 375 # process the files
376 376 repo.ui.status(_(b"adding file changes\n"))
377 377 newrevs, newfiles = _addchangegroupfiles(
378 378 repo, self, revmap, trp, efiles, needfiles
379 379 )
380 380
381 381 # making sure the value exists
382 382 tr.changes.setdefault(b'changegroup-count-changesets', 0)
383 383 tr.changes.setdefault(b'changegroup-count-revisions', 0)
384 384 tr.changes.setdefault(b'changegroup-count-files', 0)
385 385 tr.changes.setdefault(b'changegroup-count-heads', 0)
386 386
387 387 # some code use bundle operation for internal purpose. They usually
388 388 # set `ui.quiet` to do this outside of user sight. Size the report
389 389 # of such operation now happens at the end of the transaction, that
390 390 # ui.quiet has not direct effect on the output.
391 391 #
392 392 # To preserve this intend use an inelegant hack, we fail to report
393 393 # the change if `quiet` is set. We should probably move to
394 394 # something better, but this is a good first step to allow the "end
395 395 # of transaction report" to pass tests.
396 396 if not repo.ui.quiet:
397 397 tr.changes[b'changegroup-count-changesets'] += changesets
398 398 tr.changes[b'changegroup-count-revisions'] += newrevs
399 399 tr.changes[b'changegroup-count-files'] += newfiles
400 400
401 401 deltaheads = 0
402 402 if oldheads:
403 403 heads = cl.heads()
404 404 deltaheads += len(heads) - len(oldheads)
405 405 for h in heads:
406 406 if h not in oldheads and repo[h].closesbranch():
407 407 deltaheads -= 1
408 408
409 409 # see previous comment about checking ui.quiet
410 410 if not repo.ui.quiet:
411 411 tr.changes[b'changegroup-count-heads'] += deltaheads
412 412 repo.invalidatevolatilesets()
413 413
414 414 if changesets > 0:
415 415 if b'node' not in tr.hookargs:
416 416 tr.hookargs[b'node'] = hex(cl.node(clstart))
417 417 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
418 418 hookargs = dict(tr.hookargs)
419 419 else:
420 420 hookargs = dict(tr.hookargs)
421 421 hookargs[b'node'] = hex(cl.node(clstart))
422 422 hookargs[b'node_last'] = hex(cl.node(clend - 1))
423 423 repo.hook(
424 424 b'pretxnchangegroup',
425 425 throw=True,
426 426 **pycompat.strkwargs(hookargs)
427 427 )
428 428
429 429 added = pycompat.xrange(clstart, clend)
430 430 phaseall = None
431 431 if srctype in (b'push', b'serve'):
432 432 # Old servers can not push the boundary themselves.
433 433 # New servers won't push the boundary if changeset already
434 434 # exists locally as secret
435 435 #
436 436 # We should not use added here but the list of all change in
437 437 # the bundle
438 438 if repo.publishing():
439 439 targetphase = phaseall = phases.public
440 440 else:
441 441 # closer target phase computation
442 442
443 443 # Those changesets have been pushed from the
444 444 # outside, their phases are going to be pushed
445 445 # alongside. Therefor `targetphase` is
446 446 # ignored.
447 447 targetphase = phaseall = phases.draft
448 448 if added:
449 449 phases.registernew(repo, tr, targetphase, added)
450 450 if phaseall is not None:
451 phases.advanceboundary(repo, tr, phaseall, cgnodes, revs=added)
452 cgnodes = []
451 if duprevs:
452 duprevs.extend(added)
453 else:
454 duprevs = added
455 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
456 duprevs = []
453 457
454 458 if changesets > 0:
455 459
456 460 def runhooks(unused_success):
457 461 # These hooks run when the lock releases, not when the
458 462 # transaction closes. So it's possible for the changelog
459 463 # to have changed since we last saw it.
460 464 if clstart >= len(repo):
461 465 return
462 466
463 467 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
464 468
465 469 for rev in added:
466 470 args = hookargs.copy()
467 471 args[b'node'] = hex(cl.node(rev))
468 472 del args[b'node_last']
469 473 repo.hook(b"incoming", **pycompat.strkwargs(args))
470 474
471 475 newheads = [h for h in repo.heads() if h not in oldheads]
472 476 repo.ui.log(
473 477 b"incoming",
474 478 b"%d incoming changes - new heads: %s\n",
475 479 len(added),
476 480 b', '.join([hex(c[:6]) for c in newheads]),
477 481 )
478 482
479 483 tr.addpostclose(
480 484 b'changegroup-runhooks-%020i' % clstart,
481 485 lambda tr: repo._afterlock(runhooks),
482 486 )
483 487 finally:
484 488 repo.ui.flush()
485 489 # never return 0 here:
486 490 if deltaheads < 0:
487 491 ret = deltaheads - 1
488 492 else:
489 493 ret = deltaheads + 1
490 494 return ret
491 495
492 496 def deltaiter(self):
493 497 """
494 498 returns an iterator of the deltas in this changegroup
495 499
496 500 Useful for passing to the underlying storage system to be stored.
497 501 """
498 502 chain = None
499 503 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
500 504 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
501 505 yield chunkdata
502 506 chain = chunkdata[0]
503 507
504 508
505 509 class cg2unpacker(cg1unpacker):
506 510 """Unpacker for cg2 streams.
507 511
508 512 cg2 streams add support for generaldelta, so the delta header
509 513 format is slightly different. All other features about the data
510 514 remain the same.
511 515 """
512 516
513 517 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
514 518 deltaheadersize = deltaheader.size
515 519 version = b'02'
516 520
517 521 def _deltaheader(self, headertuple, prevnode):
518 522 node, p1, p2, deltabase, cs = headertuple
519 523 flags = 0
520 524 return node, p1, p2, deltabase, cs, flags
521 525
522 526
523 527 class cg3unpacker(cg2unpacker):
524 528 """Unpacker for cg3 streams.
525 529
526 530 cg3 streams add support for exchanging treemanifests and revlog
527 531 flags. It adds the revlog flags to the delta header and an empty chunk
528 532 separating manifests and files.
529 533 """
530 534
531 535 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
532 536 deltaheadersize = deltaheader.size
533 537 version = b'03'
534 538 _grouplistcount = 2 # One list of manifests and one list of files
535 539
536 540 def _deltaheader(self, headertuple, prevnode):
537 541 node, p1, p2, deltabase, cs, flags = headertuple
538 542 return node, p1, p2, deltabase, cs, flags
539 543
540 544 def _unpackmanifests(self, repo, revmap, trp, prog):
541 545 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
542 546 for chunkdata in iter(self.filelogheader, {}):
543 547 # If we get here, there are directory manifests in the changegroup
544 548 d = chunkdata[b"filename"]
545 549 repo.ui.debug(b"adding %s revisions\n" % d)
546 550 deltas = self.deltaiter()
547 551 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
548 552 raise error.Abort(_(b"received dir revlog group is empty"))
549 553
550 554
551 555 class headerlessfixup(object):
552 556 def __init__(self, fh, h):
553 557 self._h = h
554 558 self._fh = fh
555 559
556 560 def read(self, n):
557 561 if self._h:
558 562 d, self._h = self._h[:n], self._h[n:]
559 563 if len(d) < n:
560 564 d += readexactly(self._fh, n - len(d))
561 565 return d
562 566 return readexactly(self._fh, n)
563 567
564 568
565 569 def _revisiondeltatochunks(delta, headerfn):
566 570 """Serialize a revisiondelta to changegroup chunks."""
567 571
568 572 # The captured revision delta may be encoded as a delta against
569 573 # a base revision or as a full revision. The changegroup format
570 574 # requires that everything on the wire be deltas. So for full
571 575 # revisions, we need to invent a header that says to rewrite
572 576 # data.
573 577
574 578 if delta.delta is not None:
575 579 prefix, data = b'', delta.delta
576 580 elif delta.basenode == nullid:
577 581 data = delta.revision
578 582 prefix = mdiff.trivialdiffheader(len(data))
579 583 else:
580 584 data = delta.revision
581 585 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
582 586
583 587 meta = headerfn(delta)
584 588
585 589 yield chunkheader(len(meta) + len(prefix) + len(data))
586 590 yield meta
587 591 if prefix:
588 592 yield prefix
589 593 yield data
590 594
591 595
592 596 def _sortnodesellipsis(store, nodes, cl, lookup):
593 597 """Sort nodes for changegroup generation."""
594 598 # Ellipses serving mode.
595 599 #
596 600 # In a perfect world, we'd generate better ellipsis-ified graphs
597 601 # for non-changelog revlogs. In practice, we haven't started doing
598 602 # that yet, so the resulting DAGs for the manifestlog and filelogs
599 603 # are actually full of bogus parentage on all the ellipsis
600 604 # nodes. This has the side effect that, while the contents are
601 605 # correct, the individual DAGs might be completely out of whack in
602 606 # a case like 882681bc3166 and its ancestors (back about 10
603 607 # revisions or so) in the main hg repo.
604 608 #
605 609 # The one invariant we *know* holds is that the new (potentially
606 610 # bogus) DAG shape will be valid if we order the nodes in the
607 611 # order that they're introduced in dramatis personae by the
608 612 # changelog, so what we do is we sort the non-changelog histories
609 613 # by the order in which they are used by the changelog.
610 614 key = lambda n: cl.rev(lookup(n))
611 615 return sorted(nodes, key=key)
612 616
613 617
614 618 def _resolvenarrowrevisioninfo(
615 619 cl,
616 620 store,
617 621 ischangelog,
618 622 rev,
619 623 linkrev,
620 624 linknode,
621 625 clrevtolocalrev,
622 626 fullclnodes,
623 627 precomputedellipsis,
624 628 ):
625 629 linkparents = precomputedellipsis[linkrev]
626 630
627 631 def local(clrev):
628 632 """Turn a changelog revnum into a local revnum.
629 633
630 634 The ellipsis dag is stored as revnums on the changelog,
631 635 but when we're producing ellipsis entries for
632 636 non-changelog revlogs, we need to turn those numbers into
633 637 something local. This does that for us, and during the
634 638 changelog sending phase will also expand the stored
635 639 mappings as needed.
636 640 """
637 641 if clrev == nullrev:
638 642 return nullrev
639 643
640 644 if ischangelog:
641 645 return clrev
642 646
643 647 # Walk the ellipsis-ized changelog breadth-first looking for a
644 648 # change that has been linked from the current revlog.
645 649 #
646 650 # For a flat manifest revlog only a single step should be necessary
647 651 # as all relevant changelog entries are relevant to the flat
648 652 # manifest.
649 653 #
650 654 # For a filelog or tree manifest dirlog however not every changelog
651 655 # entry will have been relevant, so we need to skip some changelog
652 656 # nodes even after ellipsis-izing.
653 657 walk = [clrev]
654 658 while walk:
655 659 p = walk[0]
656 660 walk = walk[1:]
657 661 if p in clrevtolocalrev:
658 662 return clrevtolocalrev[p]
659 663 elif p in fullclnodes:
660 664 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
661 665 elif p in precomputedellipsis:
662 666 walk.extend(
663 667 [pp for pp in precomputedellipsis[p] if pp != nullrev]
664 668 )
665 669 else:
666 670 # In this case, we've got an ellipsis with parents
667 671 # outside the current bundle (likely an
668 672 # incremental pull). We "know" that we can use the
669 673 # value of this same revlog at whatever revision
670 674 # is pointed to by linknode. "Know" is in scare
671 675 # quotes because I haven't done enough examination
672 676 # of edge cases to convince myself this is really
673 677 # a fact - it works for all the (admittedly
674 678 # thorough) cases in our testsuite, but I would be
675 679 # somewhat unsurprised to find a case in the wild
676 680 # where this breaks down a bit. That said, I don't
677 681 # know if it would hurt anything.
678 682 for i in pycompat.xrange(rev, 0, -1):
679 683 if store.linkrev(i) == clrev:
680 684 return i
681 685 # We failed to resolve a parent for this node, so
682 686 # we crash the changegroup construction.
683 687 raise error.Abort(
684 688 b'unable to resolve parent while packing %r %r'
685 689 b' for changeset %r' % (store.indexfile, rev, clrev)
686 690 )
687 691
688 692 return nullrev
689 693
690 694 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
691 695 p1, p2 = nullrev, nullrev
692 696 elif len(linkparents) == 1:
693 697 (p1,) = sorted(local(p) for p in linkparents)
694 698 p2 = nullrev
695 699 else:
696 700 p1, p2 = sorted(local(p) for p in linkparents)
697 701
698 702 p1node, p2node = store.node(p1), store.node(p2)
699 703
700 704 return p1node, p2node, linknode
701 705
702 706
703 707 def deltagroup(
704 708 repo,
705 709 store,
706 710 nodes,
707 711 ischangelog,
708 712 lookup,
709 713 forcedeltaparentprev,
710 714 topic=None,
711 715 ellipses=False,
712 716 clrevtolocalrev=None,
713 717 fullclnodes=None,
714 718 precomputedellipsis=None,
715 719 ):
716 720 """Calculate deltas for a set of revisions.
717 721
718 722 Is a generator of ``revisiondelta`` instances.
719 723
720 724 If topic is not None, progress detail will be generated using this
721 725 topic name (e.g. changesets, manifests, etc).
722 726 """
723 727 if not nodes:
724 728 return
725 729
726 730 cl = repo.changelog
727 731
728 732 if ischangelog:
729 733 # `hg log` shows changesets in storage order. To preserve order
730 734 # across clones, send out changesets in storage order.
731 735 nodesorder = b'storage'
732 736 elif ellipses:
733 737 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
734 738 nodesorder = b'nodes'
735 739 else:
736 740 nodesorder = None
737 741
738 742 # Perform ellipses filtering and revision massaging. We do this before
739 743 # emitrevisions() because a) filtering out revisions creates less work
740 744 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
741 745 # assumptions about delta choices and we would possibly send a delta
742 746 # referencing a missing base revision.
743 747 #
744 748 # Also, calling lookup() has side-effects with regards to populating
745 749 # data structures. If we don't call lookup() for each node or if we call
746 750 # lookup() after the first pass through each node, things can break -
747 751 # possibly intermittently depending on the python hash seed! For that
748 752 # reason, we store a mapping of all linknodes during the initial node
749 753 # pass rather than use lookup() on the output side.
750 754 if ellipses:
751 755 filtered = []
752 756 adjustedparents = {}
753 757 linknodes = {}
754 758
755 759 for node in nodes:
756 760 rev = store.rev(node)
757 761 linknode = lookup(node)
758 762 linkrev = cl.rev(linknode)
759 763 clrevtolocalrev[linkrev] = rev
760 764
761 765 # If linknode is in fullclnodes, it means the corresponding
762 766 # changeset was a full changeset and is being sent unaltered.
763 767 if linknode in fullclnodes:
764 768 linknodes[node] = linknode
765 769
766 770 # If the corresponding changeset wasn't in the set computed
767 771 # as relevant to us, it should be dropped outright.
768 772 elif linkrev not in precomputedellipsis:
769 773 continue
770 774
771 775 else:
772 776 # We could probably do this later and avoid the dict
773 777 # holding state. But it likely doesn't matter.
774 778 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
775 779 cl,
776 780 store,
777 781 ischangelog,
778 782 rev,
779 783 linkrev,
780 784 linknode,
781 785 clrevtolocalrev,
782 786 fullclnodes,
783 787 precomputedellipsis,
784 788 )
785 789
786 790 adjustedparents[node] = (p1node, p2node)
787 791 linknodes[node] = linknode
788 792
789 793 filtered.append(node)
790 794
791 795 nodes = filtered
792 796
793 797 # We expect the first pass to be fast, so we only engage the progress
794 798 # meter for constructing the revision deltas.
795 799 progress = None
796 800 if topic is not None:
797 801 progress = repo.ui.makeprogress(
798 802 topic, unit=_(b'chunks'), total=len(nodes)
799 803 )
800 804
801 805 configtarget = repo.ui.config(b'devel', b'bundle.delta')
802 806 if configtarget not in (b'', b'p1', b'full'):
803 807 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
804 808 repo.ui.warn(msg % configtarget)
805 809
806 810 deltamode = repository.CG_DELTAMODE_STD
807 811 if forcedeltaparentprev:
808 812 deltamode = repository.CG_DELTAMODE_PREV
809 813 elif configtarget == b'p1':
810 814 deltamode = repository.CG_DELTAMODE_P1
811 815 elif configtarget == b'full':
812 816 deltamode = repository.CG_DELTAMODE_FULL
813 817
814 818 revisions = store.emitrevisions(
815 819 nodes,
816 820 nodesorder=nodesorder,
817 821 revisiondata=True,
818 822 assumehaveparentrevisions=not ellipses,
819 823 deltamode=deltamode,
820 824 )
821 825
822 826 for i, revision in enumerate(revisions):
823 827 if progress:
824 828 progress.update(i + 1)
825 829
826 830 if ellipses:
827 831 linknode = linknodes[revision.node]
828 832
829 833 if revision.node in adjustedparents:
830 834 p1node, p2node = adjustedparents[revision.node]
831 835 revision.p1node = p1node
832 836 revision.p2node = p2node
833 837 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
834 838
835 839 else:
836 840 linknode = lookup(revision.node)
837 841
838 842 revision.linknode = linknode
839 843 yield revision
840 844
841 845 if progress:
842 846 progress.complete()
843 847
844 848
845 849 class cgpacker(object):
846 850 def __init__(
847 851 self,
848 852 repo,
849 853 oldmatcher,
850 854 matcher,
851 855 version,
852 856 builddeltaheader,
853 857 manifestsend,
854 858 forcedeltaparentprev=False,
855 859 bundlecaps=None,
856 860 ellipses=False,
857 861 shallow=False,
858 862 ellipsisroots=None,
859 863 fullnodes=None,
860 864 ):
861 865 """Given a source repo, construct a bundler.
862 866
863 867 oldmatcher is a matcher that matches on files the client already has.
864 868 These will not be included in the changegroup.
865 869
866 870 matcher is a matcher that matches on files to include in the
867 871 changegroup. Used to facilitate sparse changegroups.
868 872
869 873 forcedeltaparentprev indicates whether delta parents must be against
870 874 the previous revision in a delta group. This should only be used for
871 875 compatibility with changegroup version 1.
872 876
873 877 builddeltaheader is a callable that constructs the header for a group
874 878 delta.
875 879
876 880 manifestsend is a chunk to send after manifests have been fully emitted.
877 881
878 882 ellipses indicates whether ellipsis serving mode is enabled.
879 883
880 884 bundlecaps is optional and can be used to specify the set of
881 885 capabilities which can be used to build the bundle. While bundlecaps is
882 886 unused in core Mercurial, extensions rely on this feature to communicate
883 887 capabilities to customize the changegroup packer.
884 888
885 889 shallow indicates whether shallow data might be sent. The packer may
886 890 need to pack file contents not introduced by the changes being packed.
887 891
888 892 fullnodes is the set of changelog nodes which should not be ellipsis
889 893 nodes. We store this rather than the set of nodes that should be
890 894 ellipsis because for very large histories we expect this to be
891 895 significantly smaller.
892 896 """
893 897 assert oldmatcher
894 898 assert matcher
895 899 self._oldmatcher = oldmatcher
896 900 self._matcher = matcher
897 901
898 902 self.version = version
899 903 self._forcedeltaparentprev = forcedeltaparentprev
900 904 self._builddeltaheader = builddeltaheader
901 905 self._manifestsend = manifestsend
902 906 self._ellipses = ellipses
903 907
904 908 # Set of capabilities we can use to build the bundle.
905 909 if bundlecaps is None:
906 910 bundlecaps = set()
907 911 self._bundlecaps = bundlecaps
908 912 self._isshallow = shallow
909 913 self._fullclnodes = fullnodes
910 914
911 915 # Maps ellipsis revs to their roots at the changelog level.
912 916 self._precomputedellipsis = ellipsisroots
913 917
914 918 self._repo = repo
915 919
916 920 if self._repo.ui.verbose and not self._repo.ui.debugflag:
917 921 self._verbosenote = self._repo.ui.note
918 922 else:
919 923 self._verbosenote = lambda s: None
920 924
921 925 def generate(
922 926 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
923 927 ):
924 928 """Yield a sequence of changegroup byte chunks.
925 929 If changelog is False, changelog data won't be added to changegroup
926 930 """
927 931
928 932 repo = self._repo
929 933 cl = repo.changelog
930 934
931 935 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
932 936 size = 0
933 937
934 938 clstate, deltas = self._generatechangelog(
935 939 cl, clnodes, generate=changelog
936 940 )
937 941 for delta in deltas:
938 942 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
939 943 size += len(chunk)
940 944 yield chunk
941 945
942 946 close = closechunk()
943 947 size += len(close)
944 948 yield closechunk()
945 949
946 950 self._verbosenote(_(b'%8.i (changelog)\n') % size)
947 951
948 952 clrevorder = clstate[b'clrevorder']
949 953 manifests = clstate[b'manifests']
950 954 changedfiles = clstate[b'changedfiles']
951 955
952 956 # We need to make sure that the linkrev in the changegroup refers to
953 957 # the first changeset that introduced the manifest or file revision.
954 958 # The fastpath is usually safer than the slowpath, because the filelogs
955 959 # are walked in revlog order.
956 960 #
957 961 # When taking the slowpath when the manifest revlog uses generaldelta,
958 962 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
959 963 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
960 964 #
961 965 # When taking the fastpath, we are only vulnerable to reordering
962 966 # of the changelog itself. The changelog never uses generaldelta and is
963 967 # never reordered. To handle this case, we simply take the slowpath,
964 968 # which already has the 'clrevorder' logic. This was also fixed in
965 969 # cc0ff93d0c0c.
966 970
967 971 # Treemanifests don't work correctly with fastpathlinkrev
968 972 # either, because we don't discover which directory nodes to
969 973 # send along with files. This could probably be fixed.
970 974 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
971 975
972 976 fnodes = {} # needed file nodes
973 977
974 978 size = 0
975 979 it = self.generatemanifests(
976 980 commonrevs,
977 981 clrevorder,
978 982 fastpathlinkrev,
979 983 manifests,
980 984 fnodes,
981 985 source,
982 986 clstate[b'clrevtomanifestrev'],
983 987 )
984 988
985 989 for tree, deltas in it:
986 990 if tree:
987 991 assert self.version == b'03'
988 992 chunk = _fileheader(tree)
989 993 size += len(chunk)
990 994 yield chunk
991 995
992 996 for delta in deltas:
993 997 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
994 998 for chunk in chunks:
995 999 size += len(chunk)
996 1000 yield chunk
997 1001
998 1002 close = closechunk()
999 1003 size += len(close)
1000 1004 yield close
1001 1005
1002 1006 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1003 1007 yield self._manifestsend
1004 1008
1005 1009 mfdicts = None
1006 1010 if self._ellipses and self._isshallow:
1007 1011 mfdicts = [
1008 1012 (self._repo.manifestlog[n].read(), lr)
1009 1013 for (n, lr) in pycompat.iteritems(manifests)
1010 1014 ]
1011 1015
1012 1016 manifests.clear()
1013 1017 clrevs = {cl.rev(x) for x in clnodes}
1014 1018
1015 1019 it = self.generatefiles(
1016 1020 changedfiles,
1017 1021 commonrevs,
1018 1022 source,
1019 1023 mfdicts,
1020 1024 fastpathlinkrev,
1021 1025 fnodes,
1022 1026 clrevs,
1023 1027 )
1024 1028
1025 1029 for path, deltas in it:
1026 1030 h = _fileheader(path)
1027 1031 size = len(h)
1028 1032 yield h
1029 1033
1030 1034 for delta in deltas:
1031 1035 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1032 1036 for chunk in chunks:
1033 1037 size += len(chunk)
1034 1038 yield chunk
1035 1039
1036 1040 close = closechunk()
1037 1041 size += len(close)
1038 1042 yield close
1039 1043
1040 1044 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1041 1045
1042 1046 yield closechunk()
1043 1047
1044 1048 if clnodes:
1045 1049 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1046 1050
1047 1051 def _generatechangelog(self, cl, nodes, generate=True):
1048 1052 """Generate data for changelog chunks.
1049 1053
1050 1054 Returns a 2-tuple of a dict containing state and an iterable of
1051 1055 byte chunks. The state will not be fully populated until the
1052 1056 chunk stream has been fully consumed.
1053 1057
1054 1058 if generate is False, the state will be fully populated and no chunk
1055 1059 stream will be yielded
1056 1060 """
1057 1061 clrevorder = {}
1058 1062 manifests = {}
1059 1063 mfl = self._repo.manifestlog
1060 1064 changedfiles = set()
1061 1065 clrevtomanifestrev = {}
1062 1066
1063 1067 state = {
1064 1068 b'clrevorder': clrevorder,
1065 1069 b'manifests': manifests,
1066 1070 b'changedfiles': changedfiles,
1067 1071 b'clrevtomanifestrev': clrevtomanifestrev,
1068 1072 }
1069 1073
1070 1074 if not (generate or self._ellipses):
1071 1075 # sort the nodes in storage order
1072 1076 nodes = sorted(nodes, key=cl.rev)
1073 1077 for node in nodes:
1074 1078 c = cl.changelogrevision(node)
1075 1079 clrevorder[node] = len(clrevorder)
1076 1080 # record the first changeset introducing this manifest version
1077 1081 manifests.setdefault(c.manifest, node)
1078 1082 # Record a complete list of potentially-changed files in
1079 1083 # this manifest.
1080 1084 changedfiles.update(c.files)
1081 1085
1082 1086 return state, ()
1083 1087
1084 1088 # Callback for the changelog, used to collect changed files and
1085 1089 # manifest nodes.
1086 1090 # Returns the linkrev node (identity in the changelog case).
1087 1091 def lookupcl(x):
1088 1092 c = cl.changelogrevision(x)
1089 1093 clrevorder[x] = len(clrevorder)
1090 1094
1091 1095 if self._ellipses:
1092 1096 # Only update manifests if x is going to be sent. Otherwise we
1093 1097 # end up with bogus linkrevs specified for manifests and
1094 1098 # we skip some manifest nodes that we should otherwise
1095 1099 # have sent.
1096 1100 if (
1097 1101 x in self._fullclnodes
1098 1102 or cl.rev(x) in self._precomputedellipsis
1099 1103 ):
1100 1104
1101 1105 manifestnode = c.manifest
1102 1106 # Record the first changeset introducing this manifest
1103 1107 # version.
1104 1108 manifests.setdefault(manifestnode, x)
1105 1109 # Set this narrow-specific dict so we have the lowest
1106 1110 # manifest revnum to look up for this cl revnum. (Part of
1107 1111 # mapping changelog ellipsis parents to manifest ellipsis
1108 1112 # parents)
1109 1113 clrevtomanifestrev.setdefault(
1110 1114 cl.rev(x), mfl.rev(manifestnode)
1111 1115 )
1112 1116 # We can't trust the changed files list in the changeset if the
1113 1117 # client requested a shallow clone.
1114 1118 if self._isshallow:
1115 1119 changedfiles.update(mfl[c.manifest].read().keys())
1116 1120 else:
1117 1121 changedfiles.update(c.files)
1118 1122 else:
1119 1123 # record the first changeset introducing this manifest version
1120 1124 manifests.setdefault(c.manifest, x)
1121 1125 # Record a complete list of potentially-changed files in
1122 1126 # this manifest.
1123 1127 changedfiles.update(c.files)
1124 1128
1125 1129 return x
1126 1130
1127 1131 gen = deltagroup(
1128 1132 self._repo,
1129 1133 cl,
1130 1134 nodes,
1131 1135 True,
1132 1136 lookupcl,
1133 1137 self._forcedeltaparentprev,
1134 1138 ellipses=self._ellipses,
1135 1139 topic=_(b'changesets'),
1136 1140 clrevtolocalrev={},
1137 1141 fullclnodes=self._fullclnodes,
1138 1142 precomputedellipsis=self._precomputedellipsis,
1139 1143 )
1140 1144
1141 1145 return state, gen
1142 1146
1143 1147 def generatemanifests(
1144 1148 self,
1145 1149 commonrevs,
1146 1150 clrevorder,
1147 1151 fastpathlinkrev,
1148 1152 manifests,
1149 1153 fnodes,
1150 1154 source,
1151 1155 clrevtolocalrev,
1152 1156 ):
1153 1157 """Returns an iterator of changegroup chunks containing manifests.
1154 1158
1155 1159 `source` is unused here, but is used by extensions like remotefilelog to
1156 1160 change what is sent based in pulls vs pushes, etc.
1157 1161 """
1158 1162 repo = self._repo
1159 1163 mfl = repo.manifestlog
1160 1164 tmfnodes = {b'': manifests}
1161 1165
1162 1166 # Callback for the manifest, used to collect linkrevs for filelog
1163 1167 # revisions.
1164 1168 # Returns the linkrev node (collected in lookupcl).
1165 1169 def makelookupmflinknode(tree, nodes):
1166 1170 if fastpathlinkrev:
1167 1171 assert not tree
1168 1172 return (
1169 1173 manifests.__getitem__
1170 1174 ) # pytype: disable=unsupported-operands
1171 1175
1172 1176 def lookupmflinknode(x):
1173 1177 """Callback for looking up the linknode for manifests.
1174 1178
1175 1179 Returns the linkrev node for the specified manifest.
1176 1180
1177 1181 SIDE EFFECT:
1178 1182
1179 1183 1) fclnodes gets populated with the list of relevant
1180 1184 file nodes if we're not using fastpathlinkrev
1181 1185 2) When treemanifests are in use, collects treemanifest nodes
1182 1186 to send
1183 1187
1184 1188 Note that this means manifests must be completely sent to
1185 1189 the client before you can trust the list of files and
1186 1190 treemanifests to send.
1187 1191 """
1188 1192 clnode = nodes[x]
1189 1193 mdata = mfl.get(tree, x).readfast(shallow=True)
1190 1194 for p, n, fl in mdata.iterentries():
1191 1195 if fl == b't': # subdirectory manifest
1192 1196 subtree = tree + p + b'/'
1193 1197 tmfclnodes = tmfnodes.setdefault(subtree, {})
1194 1198 tmfclnode = tmfclnodes.setdefault(n, clnode)
1195 1199 if clrevorder[clnode] < clrevorder[tmfclnode]:
1196 1200 tmfclnodes[n] = clnode
1197 1201 else:
1198 1202 f = tree + p
1199 1203 fclnodes = fnodes.setdefault(f, {})
1200 1204 fclnode = fclnodes.setdefault(n, clnode)
1201 1205 if clrevorder[clnode] < clrevorder[fclnode]:
1202 1206 fclnodes[n] = clnode
1203 1207 return clnode
1204 1208
1205 1209 return lookupmflinknode
1206 1210
1207 1211 while tmfnodes:
1208 1212 tree, nodes = tmfnodes.popitem()
1209 1213
1210 1214 should_visit = self._matcher.visitdir(tree[:-1])
1211 1215 if tree and not should_visit:
1212 1216 continue
1213 1217
1214 1218 store = mfl.getstorage(tree)
1215 1219
1216 1220 if not should_visit:
1217 1221 # No nodes to send because this directory is out of
1218 1222 # the client's view of the repository (probably
1219 1223 # because of narrow clones). Do this even for the root
1220 1224 # directory (tree=='')
1221 1225 prunednodes = []
1222 1226 else:
1223 1227 # Avoid sending any manifest nodes we can prove the
1224 1228 # client already has by checking linkrevs. See the
1225 1229 # related comment in generatefiles().
1226 1230 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1227 1231
1228 1232 if tree and not prunednodes:
1229 1233 continue
1230 1234
1231 1235 lookupfn = makelookupmflinknode(tree, nodes)
1232 1236
1233 1237 deltas = deltagroup(
1234 1238 self._repo,
1235 1239 store,
1236 1240 prunednodes,
1237 1241 False,
1238 1242 lookupfn,
1239 1243 self._forcedeltaparentprev,
1240 1244 ellipses=self._ellipses,
1241 1245 topic=_(b'manifests'),
1242 1246 clrevtolocalrev=clrevtolocalrev,
1243 1247 fullclnodes=self._fullclnodes,
1244 1248 precomputedellipsis=self._precomputedellipsis,
1245 1249 )
1246 1250
1247 1251 if not self._oldmatcher.visitdir(store.tree[:-1]):
1248 1252 yield tree, deltas
1249 1253 else:
1250 1254 # 'deltas' is a generator and we need to consume it even if
1251 1255 # we are not going to send it because a side-effect is that
1252 1256 # it updates tmdnodes (via lookupfn)
1253 1257 for d in deltas:
1254 1258 pass
1255 1259 if not tree:
1256 1260 yield tree, []
1257 1261
1258 1262 def _prunemanifests(self, store, nodes, commonrevs):
1259 1263 if not self._ellipses:
1260 1264 # In non-ellipses case and large repositories, it is better to
1261 1265 # prevent calling of store.rev and store.linkrev on a lot of
1262 1266 # nodes as compared to sending some extra data
1263 1267 return nodes.copy()
1264 1268 # This is split out as a separate method to allow filtering
1265 1269 # commonrevs in extension code.
1266 1270 #
1267 1271 # TODO(augie): this shouldn't be required, instead we should
1268 1272 # make filtering of revisions to send delegated to the store
1269 1273 # layer.
1270 1274 frev, flr = store.rev, store.linkrev
1271 1275 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1272 1276
1273 1277 # The 'source' parameter is useful for extensions
1274 1278 def generatefiles(
1275 1279 self,
1276 1280 changedfiles,
1277 1281 commonrevs,
1278 1282 source,
1279 1283 mfdicts,
1280 1284 fastpathlinkrev,
1281 1285 fnodes,
1282 1286 clrevs,
1283 1287 ):
1284 1288 changedfiles = [
1285 1289 f
1286 1290 for f in changedfiles
1287 1291 if self._matcher(f) and not self._oldmatcher(f)
1288 1292 ]
1289 1293
1290 1294 if not fastpathlinkrev:
1291 1295
1292 1296 def normallinknodes(unused, fname):
1293 1297 return fnodes.get(fname, {})
1294 1298
1295 1299 else:
1296 1300 cln = self._repo.changelog.node
1297 1301
1298 1302 def normallinknodes(store, fname):
1299 1303 flinkrev = store.linkrev
1300 1304 fnode = store.node
1301 1305 revs = ((r, flinkrev(r)) for r in store)
1302 1306 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1303 1307
1304 1308 clrevtolocalrev = {}
1305 1309
1306 1310 if self._isshallow:
1307 1311 # In a shallow clone, the linknodes callback needs to also include
1308 1312 # those file nodes that are in the manifests we sent but weren't
1309 1313 # introduced by those manifests.
1310 1314 commonctxs = [self._repo[c] for c in commonrevs]
1311 1315 clrev = self._repo.changelog.rev
1312 1316
1313 1317 def linknodes(flog, fname):
1314 1318 for c in commonctxs:
1315 1319 try:
1316 1320 fnode = c.filenode(fname)
1317 1321 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1318 1322 except error.ManifestLookupError:
1319 1323 pass
1320 1324 links = normallinknodes(flog, fname)
1321 1325 if len(links) != len(mfdicts):
1322 1326 for mf, lr in mfdicts:
1323 1327 fnode = mf.get(fname, None)
1324 1328 if fnode in links:
1325 1329 links[fnode] = min(links[fnode], lr, key=clrev)
1326 1330 elif fnode:
1327 1331 links[fnode] = lr
1328 1332 return links
1329 1333
1330 1334 else:
1331 1335 linknodes = normallinknodes
1332 1336
1333 1337 repo = self._repo
1334 1338 progress = repo.ui.makeprogress(
1335 1339 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1336 1340 )
1337 1341 for i, fname in enumerate(sorted(changedfiles)):
1338 1342 filerevlog = repo.file(fname)
1339 1343 if not filerevlog:
1340 1344 raise error.Abort(
1341 1345 _(b"empty or missing file data for %s") % fname
1342 1346 )
1343 1347
1344 1348 clrevtolocalrev.clear()
1345 1349
1346 1350 linkrevnodes = linknodes(filerevlog, fname)
1347 1351 # Lookup for filenodes, we collected the linkrev nodes above in the
1348 1352 # fastpath case and with lookupmf in the slowpath case.
1349 1353 def lookupfilelog(x):
1350 1354 return linkrevnodes[x]
1351 1355
1352 1356 frev, flr = filerevlog.rev, filerevlog.linkrev
1353 1357 # Skip sending any filenode we know the client already
1354 1358 # has. This avoids over-sending files relatively
1355 1359 # inexpensively, so it's not a problem if we under-filter
1356 1360 # here.
1357 1361 filenodes = [
1358 1362 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1359 1363 ]
1360 1364
1361 1365 if not filenodes:
1362 1366 continue
1363 1367
1364 1368 progress.update(i + 1, item=fname)
1365 1369
1366 1370 deltas = deltagroup(
1367 1371 self._repo,
1368 1372 filerevlog,
1369 1373 filenodes,
1370 1374 False,
1371 1375 lookupfilelog,
1372 1376 self._forcedeltaparentprev,
1373 1377 ellipses=self._ellipses,
1374 1378 clrevtolocalrev=clrevtolocalrev,
1375 1379 fullclnodes=self._fullclnodes,
1376 1380 precomputedellipsis=self._precomputedellipsis,
1377 1381 )
1378 1382
1379 1383 yield fname, deltas
1380 1384
1381 1385 progress.complete()
1382 1386
1383 1387
1384 1388 def _makecg1packer(
1385 1389 repo,
1386 1390 oldmatcher,
1387 1391 matcher,
1388 1392 bundlecaps,
1389 1393 ellipses=False,
1390 1394 shallow=False,
1391 1395 ellipsisroots=None,
1392 1396 fullnodes=None,
1393 1397 ):
1394 1398 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1395 1399 d.node, d.p1node, d.p2node, d.linknode
1396 1400 )
1397 1401
1398 1402 return cgpacker(
1399 1403 repo,
1400 1404 oldmatcher,
1401 1405 matcher,
1402 1406 b'01',
1403 1407 builddeltaheader=builddeltaheader,
1404 1408 manifestsend=b'',
1405 1409 forcedeltaparentprev=True,
1406 1410 bundlecaps=bundlecaps,
1407 1411 ellipses=ellipses,
1408 1412 shallow=shallow,
1409 1413 ellipsisroots=ellipsisroots,
1410 1414 fullnodes=fullnodes,
1411 1415 )
1412 1416
1413 1417
1414 1418 def _makecg2packer(
1415 1419 repo,
1416 1420 oldmatcher,
1417 1421 matcher,
1418 1422 bundlecaps,
1419 1423 ellipses=False,
1420 1424 shallow=False,
1421 1425 ellipsisroots=None,
1422 1426 fullnodes=None,
1423 1427 ):
1424 1428 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1425 1429 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1426 1430 )
1427 1431
1428 1432 return cgpacker(
1429 1433 repo,
1430 1434 oldmatcher,
1431 1435 matcher,
1432 1436 b'02',
1433 1437 builddeltaheader=builddeltaheader,
1434 1438 manifestsend=b'',
1435 1439 bundlecaps=bundlecaps,
1436 1440 ellipses=ellipses,
1437 1441 shallow=shallow,
1438 1442 ellipsisroots=ellipsisroots,
1439 1443 fullnodes=fullnodes,
1440 1444 )
1441 1445
1442 1446
1443 1447 def _makecg3packer(
1444 1448 repo,
1445 1449 oldmatcher,
1446 1450 matcher,
1447 1451 bundlecaps,
1448 1452 ellipses=False,
1449 1453 shallow=False,
1450 1454 ellipsisroots=None,
1451 1455 fullnodes=None,
1452 1456 ):
1453 1457 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1454 1458 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1455 1459 )
1456 1460
1457 1461 return cgpacker(
1458 1462 repo,
1459 1463 oldmatcher,
1460 1464 matcher,
1461 1465 b'03',
1462 1466 builddeltaheader=builddeltaheader,
1463 1467 manifestsend=closechunk(),
1464 1468 bundlecaps=bundlecaps,
1465 1469 ellipses=ellipses,
1466 1470 shallow=shallow,
1467 1471 ellipsisroots=ellipsisroots,
1468 1472 fullnodes=fullnodes,
1469 1473 )
1470 1474
1471 1475
1472 1476 _packermap = {
1473 1477 b'01': (_makecg1packer, cg1unpacker),
1474 1478 # cg2 adds support for exchanging generaldelta
1475 1479 b'02': (_makecg2packer, cg2unpacker),
1476 1480 # cg3 adds support for exchanging revlog flags and treemanifests
1477 1481 b'03': (_makecg3packer, cg3unpacker),
1478 1482 }
1479 1483
1480 1484
1481 1485 def allsupportedversions(repo):
1482 1486 versions = set(_packermap.keys())
1483 1487 needv03 = False
1484 1488 if (
1485 1489 repo.ui.configbool(b'experimental', b'changegroup3')
1486 1490 or repo.ui.configbool(b'experimental', b'treemanifest')
1487 1491 or scmutil.istreemanifest(repo)
1488 1492 ):
1489 1493 # we keep version 03 because we need to to exchange treemanifest data
1490 1494 #
1491 1495 # we also keep vresion 01 and 02, because it is possible for repo to
1492 1496 # contains both normal and tree manifest at the same time. so using
1493 1497 # older version to pull data is viable
1494 1498 #
1495 1499 # (or even to push subset of history)
1496 1500 needv03 = True
1497 1501 if b'exp-sidedata-flag' in repo.requirements:
1498 1502 needv03 = True
1499 1503 # don't attempt to use 01/02 until we do sidedata cleaning
1500 1504 versions.discard(b'01')
1501 1505 versions.discard(b'02')
1502 1506 if not needv03:
1503 1507 versions.discard(b'03')
1504 1508 return versions
1505 1509
1506 1510
1507 1511 # Changegroup versions that can be applied to the repo
1508 1512 def supportedincomingversions(repo):
1509 1513 return allsupportedversions(repo)
1510 1514
1511 1515
1512 1516 # Changegroup versions that can be created from the repo
1513 1517 def supportedoutgoingversions(repo):
1514 1518 versions = allsupportedversions(repo)
1515 1519 if scmutil.istreemanifest(repo):
1516 1520 # Versions 01 and 02 support only flat manifests and it's just too
1517 1521 # expensive to convert between the flat manifest and tree manifest on
1518 1522 # the fly. Since tree manifests are hashed differently, all of history
1519 1523 # would have to be converted. Instead, we simply don't even pretend to
1520 1524 # support versions 01 and 02.
1521 1525 versions.discard(b'01')
1522 1526 versions.discard(b'02')
1523 1527 if requirements.NARROW_REQUIREMENT in repo.requirements:
1524 1528 # Versions 01 and 02 don't support revlog flags, and we need to
1525 1529 # support that for stripping and unbundling to work.
1526 1530 versions.discard(b'01')
1527 1531 versions.discard(b'02')
1528 1532 if LFS_REQUIREMENT in repo.requirements:
1529 1533 # Versions 01 and 02 don't support revlog flags, and we need to
1530 1534 # mark LFS entries with REVIDX_EXTSTORED.
1531 1535 versions.discard(b'01')
1532 1536 versions.discard(b'02')
1533 1537
1534 1538 return versions
1535 1539
1536 1540
1537 1541 def localversion(repo):
1538 1542 # Finds the best version to use for bundles that are meant to be used
1539 1543 # locally, such as those from strip and shelve, and temporary bundles.
1540 1544 return max(supportedoutgoingversions(repo))
1541 1545
1542 1546
1543 1547 def safeversion(repo):
1544 1548 # Finds the smallest version that it's safe to assume clients of the repo
1545 1549 # will support. For example, all hg versions that support generaldelta also
1546 1550 # support changegroup 02.
1547 1551 versions = supportedoutgoingversions(repo)
1548 1552 if b'generaldelta' in repo.requirements:
1549 1553 versions.discard(b'01')
1550 1554 assert versions
1551 1555 return min(versions)
1552 1556
1553 1557
1554 1558 def getbundler(
1555 1559 version,
1556 1560 repo,
1557 1561 bundlecaps=None,
1558 1562 oldmatcher=None,
1559 1563 matcher=None,
1560 1564 ellipses=False,
1561 1565 shallow=False,
1562 1566 ellipsisroots=None,
1563 1567 fullnodes=None,
1564 1568 ):
1565 1569 assert version in supportedoutgoingversions(repo)
1566 1570
1567 1571 if matcher is None:
1568 1572 matcher = matchmod.always()
1569 1573 if oldmatcher is None:
1570 1574 oldmatcher = matchmod.never()
1571 1575
1572 1576 if version == b'01' and not matcher.always():
1573 1577 raise error.ProgrammingError(
1574 1578 b'version 01 changegroups do not support sparse file matchers'
1575 1579 )
1576 1580
1577 1581 if ellipses and version in (b'01', b'02'):
1578 1582 raise error.Abort(
1579 1583 _(
1580 1584 b'ellipsis nodes require at least cg3 on client and server, '
1581 1585 b'but negotiated version %s'
1582 1586 )
1583 1587 % version
1584 1588 )
1585 1589
1586 1590 # Requested files could include files not in the local store. So
1587 1591 # filter those out.
1588 1592 matcher = repo.narrowmatch(matcher)
1589 1593
1590 1594 fn = _packermap[version][0]
1591 1595 return fn(
1592 1596 repo,
1593 1597 oldmatcher,
1594 1598 matcher,
1595 1599 bundlecaps,
1596 1600 ellipses=ellipses,
1597 1601 shallow=shallow,
1598 1602 ellipsisroots=ellipsisroots,
1599 1603 fullnodes=fullnodes,
1600 1604 )
1601 1605
1602 1606
1603 1607 def getunbundler(version, fh, alg, extras=None):
1604 1608 return _packermap[version][1](fh, alg, extras=extras)
1605 1609
1606 1610
1607 1611 def _changegroupinfo(repo, nodes, source):
1608 1612 if repo.ui.verbose or source == b'bundle':
1609 1613 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1610 1614 if repo.ui.debugflag:
1611 1615 repo.ui.debug(b"list of changesets:\n")
1612 1616 for node in nodes:
1613 1617 repo.ui.debug(b"%s\n" % hex(node))
1614 1618
1615 1619
1616 1620 def makechangegroup(
1617 1621 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1618 1622 ):
1619 1623 cgstream = makestream(
1620 1624 repo,
1621 1625 outgoing,
1622 1626 version,
1623 1627 source,
1624 1628 fastpath=fastpath,
1625 1629 bundlecaps=bundlecaps,
1626 1630 )
1627 1631 return getunbundler(
1628 1632 version,
1629 1633 util.chunkbuffer(cgstream),
1630 1634 None,
1631 1635 {b'clcount': len(outgoing.missing)},
1632 1636 )
1633 1637
1634 1638
1635 1639 def makestream(
1636 1640 repo,
1637 1641 outgoing,
1638 1642 version,
1639 1643 source,
1640 1644 fastpath=False,
1641 1645 bundlecaps=None,
1642 1646 matcher=None,
1643 1647 ):
1644 1648 bundler = getbundler(version, repo, bundlecaps=bundlecaps, matcher=matcher)
1645 1649
1646 1650 repo = repo.unfiltered()
1647 1651 commonrevs = outgoing.common
1648 1652 csets = outgoing.missing
1649 1653 heads = outgoing.ancestorsof
1650 1654 # We go through the fast path if we get told to, or if all (unfiltered
1651 1655 # heads have been requested (since we then know there all linkrevs will
1652 1656 # be pulled by the client).
1653 1657 heads.sort()
1654 1658 fastpathlinkrev = fastpath or (
1655 1659 repo.filtername is None and heads == sorted(repo.heads())
1656 1660 )
1657 1661
1658 1662 repo.hook(b'preoutgoing', throw=True, source=source)
1659 1663 _changegroupinfo(repo, csets, source)
1660 1664 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1661 1665
1662 1666
1663 1667 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1664 1668 revisions = 0
1665 1669 files = 0
1666 1670 progress = repo.ui.makeprogress(
1667 1671 _(b'files'), unit=_(b'files'), total=expectedfiles
1668 1672 )
1669 1673 for chunkdata in iter(source.filelogheader, {}):
1670 1674 files += 1
1671 1675 f = chunkdata[b"filename"]
1672 1676 repo.ui.debug(b"adding %s revisions\n" % f)
1673 1677 progress.increment()
1674 1678 fl = repo.file(f)
1675 1679 o = len(fl)
1676 1680 try:
1677 1681 deltas = source.deltaiter()
1678 1682 if not fl.addgroup(deltas, revmap, trp):
1679 1683 raise error.Abort(_(b"received file revlog group is empty"))
1680 1684 except error.CensoredBaseError as e:
1681 1685 raise error.Abort(_(b"received delta base is censored: %s") % e)
1682 1686 revisions += len(fl) - o
1683 1687 if f in needfiles:
1684 1688 needs = needfiles[f]
1685 1689 for new in pycompat.xrange(o, len(fl)):
1686 1690 n = fl.node(new)
1687 1691 if n in needs:
1688 1692 needs.remove(n)
1689 1693 else:
1690 1694 raise error.Abort(_(b"received spurious file revlog entry"))
1691 1695 if not needs:
1692 1696 del needfiles[f]
1693 1697 progress.complete()
1694 1698
1695 1699 for f, needs in pycompat.iteritems(needfiles):
1696 1700 fl = repo.file(f)
1697 1701 for n in needs:
1698 1702 try:
1699 1703 fl.rev(n)
1700 1704 except error.LookupError:
1701 1705 raise error.Abort(
1702 1706 _(b'missing file data for %s:%s - run hg verify')
1703 1707 % (f, hex(n))
1704 1708 )
1705 1709
1706 1710 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now