##// END OF EJS Templates
changegroup: convert a warning message to bytes...
Matt Harbison -
r47513:26d1ddc3 stable
parent child Browse files
Show More
@@ -1,1703 +1,1703 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21 from .pycompat import open
22 22
23 23 from . import (
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 phases,
28 28 pycompat,
29 29 requirements,
30 30 scmutil,
31 31 util,
32 32 )
33 33
34 34 from .interfaces import repository
35 35
36 36 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
37 37 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
38 38 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
39 39
40 40 LFS_REQUIREMENT = b'lfs'
41 41
42 42 readexactly = util.readexactly
43 43
44 44
45 45 def getchunk(stream):
46 46 """return the next chunk from stream as a string"""
47 47 d = readexactly(stream, 4)
48 48 l = struct.unpack(b">l", d)[0]
49 49 if l <= 4:
50 50 if l:
51 51 raise error.Abort(_(b"invalid chunk length %d") % l)
52 52 return b""
53 53 return readexactly(stream, l - 4)
54 54
55 55
56 56 def chunkheader(length):
57 57 """return a changegroup chunk header (string)"""
58 58 return struct.pack(b">l", length + 4)
59 59
60 60
61 61 def closechunk():
62 62 """return a changegroup chunk header (string) for a zero-length chunk"""
63 63 return struct.pack(b">l", 0)
64 64
65 65
66 66 def _fileheader(path):
67 67 """Obtain a changegroup chunk header for a named path."""
68 68 return chunkheader(len(path)) + path
69 69
70 70
71 71 def writechunks(ui, chunks, filename, vfs=None):
72 72 """Write chunks to a file and return its filename.
73 73
74 74 The stream is assumed to be a bundle file.
75 75 Existing files will not be overwritten.
76 76 If no filename is specified, a temporary file is created.
77 77 """
78 78 fh = None
79 79 cleanup = None
80 80 try:
81 81 if filename:
82 82 if vfs:
83 83 fh = vfs.open(filename, b"wb")
84 84 else:
85 85 # Increase default buffer size because default is usually
86 86 # small (4k is common on Linux).
87 87 fh = open(filename, b"wb", 131072)
88 88 else:
89 89 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
90 90 fh = os.fdopen(fd, "wb")
91 91 cleanup = filename
92 92 for c in chunks:
93 93 fh.write(c)
94 94 cleanup = None
95 95 return filename
96 96 finally:
97 97 if fh is not None:
98 98 fh.close()
99 99 if cleanup is not None:
100 100 if filename and vfs:
101 101 vfs.unlink(cleanup)
102 102 else:
103 103 os.unlink(cleanup)
104 104
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124
125 125 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 126 deltaheadersize = deltaheader.size
127 127 version = b'01'
128 128 _grouplistcount = 1 # One list of files after the manifests
129 129
130 130 def __init__(self, fh, alg, extras=None):
131 131 if alg is None:
132 132 alg = b'UN'
133 133 if alg not in util.compengines.supportedbundletypes:
134 134 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
135 135 if alg == b'BZ':
136 136 alg = b'_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != b'UN'
148 148
149 149 def read(self, l):
150 150 return self._stream.read(l)
151 151
152 152 def seek(self, pos):
153 153 return self._stream.seek(pos)
154 154
155 155 def tell(self):
156 156 return self._stream.tell()
157 157
158 158 def close(self):
159 159 return self._stream.close()
160 160
161 161 def _chunklength(self):
162 162 d = readexactly(self._stream, 4)
163 163 l = struct.unpack(b">l", d)[0]
164 164 if l <= 4:
165 165 if l:
166 166 raise error.Abort(_(b"invalid chunk length %d") % l)
167 167 return 0
168 168 if self.callback:
169 169 self.callback()
170 170 return l - 4
171 171
172 172 def changelogheader(self):
173 173 """v10 does not have a changelog header chunk"""
174 174 return {}
175 175
176 176 def manifestheader(self):
177 177 """v10 does not have a manifest header chunk"""
178 178 return {}
179 179
180 180 def filelogheader(self):
181 181 """return the header of the filelogs chunk, v10 only has the filename"""
182 182 l = self._chunklength()
183 183 if not l:
184 184 return {}
185 185 fname = readexactly(self._stream, l)
186 186 return {b'filename': fname}
187 187
188 188 def _deltaheader(self, headertuple, prevnode):
189 189 node, p1, p2, cs = headertuple
190 190 if prevnode is None:
191 191 deltabase = p1
192 192 else:
193 193 deltabase = prevnode
194 194 flags = 0
195 195 return node, p1, p2, deltabase, cs, flags
196 196
197 197 def deltachunk(self, prevnode):
198 198 l = self._chunklength()
199 199 if not l:
200 200 return {}
201 201 headerdata = readexactly(self._stream, self.deltaheadersize)
202 202 header = self.deltaheader.unpack(headerdata)
203 203 delta = readexactly(self._stream, l - self.deltaheadersize)
204 204 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
205 205 return (node, p1, p2, cs, deltabase, delta, flags)
206 206
207 207 def getchunks(self):
208 208 """returns all the chunks contains in the bundle
209 209
210 210 Used when you need to forward the binary stream to a file or another
211 211 network API. To do so, it parse the changegroup data, otherwise it will
212 212 block in case of sshrepo because it don't know the end of the stream.
213 213 """
214 214 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
215 215 # and a list of filelogs. For changegroup 3, we expect 4 parts:
216 216 # changelog, manifestlog, a list of tree manifestlogs, and a list of
217 217 # filelogs.
218 218 #
219 219 # Changelog and manifestlog parts are terminated with empty chunks. The
220 220 # tree and file parts are a list of entry sections. Each entry section
221 221 # is a series of chunks terminating in an empty chunk. The list of these
222 222 # entry sections is terminated in yet another empty chunk, so we know
223 223 # we've reached the end of the tree/file list when we reach an empty
224 224 # chunk that was proceeded by no non-empty chunks.
225 225
226 226 parts = 0
227 227 while parts < 2 + self._grouplistcount:
228 228 noentries = True
229 229 while True:
230 230 chunk = getchunk(self)
231 231 if not chunk:
232 232 # The first two empty chunks represent the end of the
233 233 # changelog and the manifestlog portions. The remaining
234 234 # empty chunks represent either A) the end of individual
235 235 # tree or file entries in the file list, or B) the end of
236 236 # the entire list. It's the end of the entire list if there
237 237 # were no entries (i.e. noentries is True).
238 238 if parts < 2:
239 239 parts += 1
240 240 elif noentries:
241 241 parts += 1
242 242 break
243 243 noentries = False
244 244 yield chunkheader(len(chunk))
245 245 pos = 0
246 246 while pos < len(chunk):
247 247 next = pos + 2 ** 20
248 248 yield chunk[pos:next]
249 249 pos = next
250 250 yield closechunk()
251 251
252 252 def _unpackmanifests(self, repo, revmap, trp, prog):
253 253 self.callback = prog.increment
254 254 # no need to check for empty manifest group here:
255 255 # if the result of the merge of 1 and 2 is the same in 3 and 4,
256 256 # no new manifest will be created and the manifest group will
257 257 # be empty during the pull
258 258 self.manifestheader()
259 259 deltas = self.deltaiter()
260 260 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
261 261 prog.complete()
262 262 self.callback = None
263 263
264 264 def apply(
265 265 self,
266 266 repo,
267 267 tr,
268 268 srctype,
269 269 url,
270 270 targetphase=phases.draft,
271 271 expectedtotal=None,
272 272 ):
273 273 """Add the changegroup returned by source.read() to this repo.
274 274 srctype is a string like 'push', 'pull', or 'unbundle'. url is
275 275 the URL of the repo where this changegroup is coming from.
276 276
277 277 Return an integer summarizing the change to this repo:
278 278 - nothing changed or no source: 0
279 279 - more heads than before: 1+added heads (2..n)
280 280 - fewer heads than before: -1-removed heads (-2..-n)
281 281 - number of heads stays the same: 1
282 282 """
283 283 repo = repo.unfiltered()
284 284
285 285 def csmap(x):
286 286 repo.ui.debug(b"add changeset %s\n" % short(x))
287 287 return len(cl)
288 288
289 289 def revmap(x):
290 290 return cl.rev(x)
291 291
292 292 try:
293 293 # The transaction may already carry source information. In this
294 294 # case we use the top level data. We overwrite the argument
295 295 # because we need to use the top level value (if they exist)
296 296 # in this function.
297 297 srctype = tr.hookargs.setdefault(b'source', srctype)
298 298 tr.hookargs.setdefault(b'url', url)
299 299 repo.hook(
300 300 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
301 301 )
302 302
303 303 # write changelog data to temp files so concurrent readers
304 304 # will not see an inconsistent view
305 305 cl = repo.changelog
306 306 cl.delayupdate(tr)
307 307 oldheads = set(cl.heads())
308 308
309 309 trp = weakref.proxy(tr)
310 310 # pull off the changeset group
311 311 repo.ui.status(_(b"adding changesets\n"))
312 312 clstart = len(cl)
313 313 progress = repo.ui.makeprogress(
314 314 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
315 315 )
316 316 self.callback = progress.increment
317 317
318 318 efilesset = set()
319 319 cgnodes = []
320 320
321 321 def ondupchangelog(cl, node):
322 322 if cl.rev(node) < clstart:
323 323 cgnodes.append(node)
324 324
325 325 def onchangelog(cl, node):
326 326 efilesset.update(cl.readfiles(node))
327 327
328 328 self.changelogheader()
329 329 deltas = self.deltaiter()
330 330 if not cl.addgroup(
331 331 deltas,
332 332 csmap,
333 333 trp,
334 334 addrevisioncb=onchangelog,
335 335 duplicaterevisioncb=ondupchangelog,
336 336 ):
337 337 repo.ui.develwarn(
338 338 b'applied empty changelog from changegroup',
339 339 config=b'warn-empty-changegroup',
340 340 )
341 341 efiles = len(efilesset)
342 342 clend = len(cl)
343 343 changesets = clend - clstart
344 344 progress.complete()
345 345 del deltas
346 346 # TODO Python 2.7 removal
347 347 # del efilesset
348 348 efilesset = None
349 349 self.callback = None
350 350
351 351 # pull off the manifest group
352 352 repo.ui.status(_(b"adding manifests\n"))
353 353 # We know that we'll never have more manifests than we had
354 354 # changesets.
355 355 progress = repo.ui.makeprogress(
356 356 _(b'manifests'), unit=_(b'chunks'), total=changesets
357 357 )
358 358 self._unpackmanifests(repo, revmap, trp, progress)
359 359
360 360 needfiles = {}
361 361 if repo.ui.configbool(b'server', b'validate'):
362 362 cl = repo.changelog
363 363 ml = repo.manifestlog
364 364 # validate incoming csets have their manifests
365 365 for cset in pycompat.xrange(clstart, clend):
366 366 mfnode = cl.changelogrevision(cset).manifest
367 367 mfest = ml[mfnode].readdelta()
368 368 # store file nodes we must see
369 369 for f, n in pycompat.iteritems(mfest):
370 370 needfiles.setdefault(f, set()).add(n)
371 371
372 372 # process the files
373 373 repo.ui.status(_(b"adding file changes\n"))
374 374 newrevs, newfiles = _addchangegroupfiles(
375 375 repo, self, revmap, trp, efiles, needfiles
376 376 )
377 377
378 378 # making sure the value exists
379 379 tr.changes.setdefault(b'changegroup-count-changesets', 0)
380 380 tr.changes.setdefault(b'changegroup-count-revisions', 0)
381 381 tr.changes.setdefault(b'changegroup-count-files', 0)
382 382 tr.changes.setdefault(b'changegroup-count-heads', 0)
383 383
384 384 # some code use bundle operation for internal purpose. They usually
385 385 # set `ui.quiet` to do this outside of user sight. Size the report
386 386 # of such operation now happens at the end of the transaction, that
387 387 # ui.quiet has not direct effect on the output.
388 388 #
389 389 # To preserve this intend use an inelegant hack, we fail to report
390 390 # the change if `quiet` is set. We should probably move to
391 391 # something better, but this is a good first step to allow the "end
392 392 # of transaction report" to pass tests.
393 393 if not repo.ui.quiet:
394 394 tr.changes[b'changegroup-count-changesets'] += changesets
395 395 tr.changes[b'changegroup-count-revisions'] += newrevs
396 396 tr.changes[b'changegroup-count-files'] += newfiles
397 397
398 398 deltaheads = 0
399 399 if oldheads:
400 400 heads = cl.heads()
401 401 deltaheads += len(heads) - len(oldheads)
402 402 for h in heads:
403 403 if h not in oldheads and repo[h].closesbranch():
404 404 deltaheads -= 1
405 405
406 406 # see previous comment about checking ui.quiet
407 407 if not repo.ui.quiet:
408 408 tr.changes[b'changegroup-count-heads'] += deltaheads
409 409 repo.invalidatevolatilesets()
410 410
411 411 if changesets > 0:
412 412 if b'node' not in tr.hookargs:
413 413 tr.hookargs[b'node'] = hex(cl.node(clstart))
414 414 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
415 415 hookargs = dict(tr.hookargs)
416 416 else:
417 417 hookargs = dict(tr.hookargs)
418 418 hookargs[b'node'] = hex(cl.node(clstart))
419 419 hookargs[b'node_last'] = hex(cl.node(clend - 1))
420 420 repo.hook(
421 421 b'pretxnchangegroup',
422 422 throw=True,
423 423 **pycompat.strkwargs(hookargs)
424 424 )
425 425
426 426 added = pycompat.xrange(clstart, clend)
427 427 phaseall = None
428 428 if srctype in (b'push', b'serve'):
429 429 # Old servers can not push the boundary themselves.
430 430 # New servers won't push the boundary if changeset already
431 431 # exists locally as secret
432 432 #
433 433 # We should not use added here but the list of all change in
434 434 # the bundle
435 435 if repo.publishing():
436 436 targetphase = phaseall = phases.public
437 437 else:
438 438 # closer target phase computation
439 439
440 440 # Those changesets have been pushed from the
441 441 # outside, their phases are going to be pushed
442 442 # alongside. Therefor `targetphase` is
443 443 # ignored.
444 444 targetphase = phaseall = phases.draft
445 445 if added:
446 446 phases.registernew(repo, tr, targetphase, added)
447 447 if phaseall is not None:
448 448 phases.advanceboundary(repo, tr, phaseall, cgnodes, revs=added)
449 449 cgnodes = []
450 450
451 451 if changesets > 0:
452 452
453 453 def runhooks(unused_success):
454 454 # These hooks run when the lock releases, not when the
455 455 # transaction closes. So it's possible for the changelog
456 456 # to have changed since we last saw it.
457 457 if clstart >= len(repo):
458 458 return
459 459
460 460 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
461 461
462 462 for rev in added:
463 463 args = hookargs.copy()
464 464 args[b'node'] = hex(cl.node(rev))
465 465 del args[b'node_last']
466 466 repo.hook(b"incoming", **pycompat.strkwargs(args))
467 467
468 468 newheads = [h for h in repo.heads() if h not in oldheads]
469 469 repo.ui.log(
470 470 b"incoming",
471 471 b"%d incoming changes - new heads: %s\n",
472 472 len(added),
473 473 b', '.join([hex(c[:6]) for c in newheads]),
474 474 )
475 475
476 476 tr.addpostclose(
477 477 b'changegroup-runhooks-%020i' % clstart,
478 478 lambda tr: repo._afterlock(runhooks),
479 479 )
480 480 finally:
481 481 repo.ui.flush()
482 482 # never return 0 here:
483 483 if deltaheads < 0:
484 484 ret = deltaheads - 1
485 485 else:
486 486 ret = deltaheads + 1
487 487 return ret
488 488
489 489 def deltaiter(self):
490 490 """
491 491 returns an iterator of the deltas in this changegroup
492 492
493 493 Useful for passing to the underlying storage system to be stored.
494 494 """
495 495 chain = None
496 496 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
497 497 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
498 498 yield chunkdata
499 499 chain = chunkdata[0]
500 500
501 501
502 502 class cg2unpacker(cg1unpacker):
503 503 """Unpacker for cg2 streams.
504 504
505 505 cg2 streams add support for generaldelta, so the delta header
506 506 format is slightly different. All other features about the data
507 507 remain the same.
508 508 """
509 509
510 510 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
511 511 deltaheadersize = deltaheader.size
512 512 version = b'02'
513 513
514 514 def _deltaheader(self, headertuple, prevnode):
515 515 node, p1, p2, deltabase, cs = headertuple
516 516 flags = 0
517 517 return node, p1, p2, deltabase, cs, flags
518 518
519 519
520 520 class cg3unpacker(cg2unpacker):
521 521 """Unpacker for cg3 streams.
522 522
523 523 cg3 streams add support for exchanging treemanifests and revlog
524 524 flags. It adds the revlog flags to the delta header and an empty chunk
525 525 separating manifests and files.
526 526 """
527 527
528 528 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
529 529 deltaheadersize = deltaheader.size
530 530 version = b'03'
531 531 _grouplistcount = 2 # One list of manifests and one list of files
532 532
533 533 def _deltaheader(self, headertuple, prevnode):
534 534 node, p1, p2, deltabase, cs, flags = headertuple
535 535 return node, p1, p2, deltabase, cs, flags
536 536
537 537 def _unpackmanifests(self, repo, revmap, trp, prog):
538 538 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
539 539 for chunkdata in iter(self.filelogheader, {}):
540 540 # If we get here, there are directory manifests in the changegroup
541 541 d = chunkdata[b"filename"]
542 542 repo.ui.debug(b"adding %s revisions\n" % d)
543 543 deltas = self.deltaiter()
544 544 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
545 545 raise error.Abort(_(b"received dir revlog group is empty"))
546 546
547 547
548 548 class headerlessfixup(object):
549 549 def __init__(self, fh, h):
550 550 self._h = h
551 551 self._fh = fh
552 552
553 553 def read(self, n):
554 554 if self._h:
555 555 d, self._h = self._h[:n], self._h[n:]
556 556 if len(d) < n:
557 557 d += readexactly(self._fh, n - len(d))
558 558 return d
559 559 return readexactly(self._fh, n)
560 560
561 561
562 562 def _revisiondeltatochunks(delta, headerfn):
563 563 """Serialize a revisiondelta to changegroup chunks."""
564 564
565 565 # The captured revision delta may be encoded as a delta against
566 566 # a base revision or as a full revision. The changegroup format
567 567 # requires that everything on the wire be deltas. So for full
568 568 # revisions, we need to invent a header that says to rewrite
569 569 # data.
570 570
571 571 if delta.delta is not None:
572 572 prefix, data = b'', delta.delta
573 573 elif delta.basenode == nullid:
574 574 data = delta.revision
575 575 prefix = mdiff.trivialdiffheader(len(data))
576 576 else:
577 577 data = delta.revision
578 578 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
579 579
580 580 meta = headerfn(delta)
581 581
582 582 yield chunkheader(len(meta) + len(prefix) + len(data))
583 583 yield meta
584 584 if prefix:
585 585 yield prefix
586 586 yield data
587 587
588 588
589 589 def _sortnodesellipsis(store, nodes, cl, lookup):
590 590 """Sort nodes for changegroup generation."""
591 591 # Ellipses serving mode.
592 592 #
593 593 # In a perfect world, we'd generate better ellipsis-ified graphs
594 594 # for non-changelog revlogs. In practice, we haven't started doing
595 595 # that yet, so the resulting DAGs for the manifestlog and filelogs
596 596 # are actually full of bogus parentage on all the ellipsis
597 597 # nodes. This has the side effect that, while the contents are
598 598 # correct, the individual DAGs might be completely out of whack in
599 599 # a case like 882681bc3166 and its ancestors (back about 10
600 600 # revisions or so) in the main hg repo.
601 601 #
602 602 # The one invariant we *know* holds is that the new (potentially
603 603 # bogus) DAG shape will be valid if we order the nodes in the
604 604 # order that they're introduced in dramatis personae by the
605 605 # changelog, so what we do is we sort the non-changelog histories
606 606 # by the order in which they are used by the changelog.
607 607 key = lambda n: cl.rev(lookup(n))
608 608 return sorted(nodes, key=key)
609 609
610 610
611 611 def _resolvenarrowrevisioninfo(
612 612 cl,
613 613 store,
614 614 ischangelog,
615 615 rev,
616 616 linkrev,
617 617 linknode,
618 618 clrevtolocalrev,
619 619 fullclnodes,
620 620 precomputedellipsis,
621 621 ):
622 622 linkparents = precomputedellipsis[linkrev]
623 623
624 624 def local(clrev):
625 625 """Turn a changelog revnum into a local revnum.
626 626
627 627 The ellipsis dag is stored as revnums on the changelog,
628 628 but when we're producing ellipsis entries for
629 629 non-changelog revlogs, we need to turn those numbers into
630 630 something local. This does that for us, and during the
631 631 changelog sending phase will also expand the stored
632 632 mappings as needed.
633 633 """
634 634 if clrev == nullrev:
635 635 return nullrev
636 636
637 637 if ischangelog:
638 638 return clrev
639 639
640 640 # Walk the ellipsis-ized changelog breadth-first looking for a
641 641 # change that has been linked from the current revlog.
642 642 #
643 643 # For a flat manifest revlog only a single step should be necessary
644 644 # as all relevant changelog entries are relevant to the flat
645 645 # manifest.
646 646 #
647 647 # For a filelog or tree manifest dirlog however not every changelog
648 648 # entry will have been relevant, so we need to skip some changelog
649 649 # nodes even after ellipsis-izing.
650 650 walk = [clrev]
651 651 while walk:
652 652 p = walk[0]
653 653 walk = walk[1:]
654 654 if p in clrevtolocalrev:
655 655 return clrevtolocalrev[p]
656 656 elif p in fullclnodes:
657 657 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
658 658 elif p in precomputedellipsis:
659 659 walk.extend(
660 660 [pp for pp in precomputedellipsis[p] if pp != nullrev]
661 661 )
662 662 else:
663 663 # In this case, we've got an ellipsis with parents
664 664 # outside the current bundle (likely an
665 665 # incremental pull). We "know" that we can use the
666 666 # value of this same revlog at whatever revision
667 667 # is pointed to by linknode. "Know" is in scare
668 668 # quotes because I haven't done enough examination
669 669 # of edge cases to convince myself this is really
670 670 # a fact - it works for all the (admittedly
671 671 # thorough) cases in our testsuite, but I would be
672 672 # somewhat unsurprised to find a case in the wild
673 673 # where this breaks down a bit. That said, I don't
674 674 # know if it would hurt anything.
675 675 for i in pycompat.xrange(rev, 0, -1):
676 676 if store.linkrev(i) == clrev:
677 677 return i
678 678 # We failed to resolve a parent for this node, so
679 679 # we crash the changegroup construction.
680 680 raise error.Abort(
681 681 b'unable to resolve parent while packing %r %r'
682 682 b' for changeset %r' % (store.indexfile, rev, clrev)
683 683 )
684 684
685 685 return nullrev
686 686
687 687 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
688 688 p1, p2 = nullrev, nullrev
689 689 elif len(linkparents) == 1:
690 690 (p1,) = sorted(local(p) for p in linkparents)
691 691 p2 = nullrev
692 692 else:
693 693 p1, p2 = sorted(local(p) for p in linkparents)
694 694
695 695 p1node, p2node = store.node(p1), store.node(p2)
696 696
697 697 return p1node, p2node, linknode
698 698
699 699
700 700 def deltagroup(
701 701 repo,
702 702 store,
703 703 nodes,
704 704 ischangelog,
705 705 lookup,
706 706 forcedeltaparentprev,
707 707 topic=None,
708 708 ellipses=False,
709 709 clrevtolocalrev=None,
710 710 fullclnodes=None,
711 711 precomputedellipsis=None,
712 712 ):
713 713 """Calculate deltas for a set of revisions.
714 714
715 715 Is a generator of ``revisiondelta`` instances.
716 716
717 717 If topic is not None, progress detail will be generated using this
718 718 topic name (e.g. changesets, manifests, etc).
719 719 """
720 720 if not nodes:
721 721 return
722 722
723 723 cl = repo.changelog
724 724
725 725 if ischangelog:
726 726 # `hg log` shows changesets in storage order. To preserve order
727 727 # across clones, send out changesets in storage order.
728 728 nodesorder = b'storage'
729 729 elif ellipses:
730 730 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
731 731 nodesorder = b'nodes'
732 732 else:
733 733 nodesorder = None
734 734
735 735 # Perform ellipses filtering and revision massaging. We do this before
736 736 # emitrevisions() because a) filtering out revisions creates less work
737 737 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
738 738 # assumptions about delta choices and we would possibly send a delta
739 739 # referencing a missing base revision.
740 740 #
741 741 # Also, calling lookup() has side-effects with regards to populating
742 742 # data structures. If we don't call lookup() for each node or if we call
743 743 # lookup() after the first pass through each node, things can break -
744 744 # possibly intermittently depending on the python hash seed! For that
745 745 # reason, we store a mapping of all linknodes during the initial node
746 746 # pass rather than use lookup() on the output side.
747 747 if ellipses:
748 748 filtered = []
749 749 adjustedparents = {}
750 750 linknodes = {}
751 751
752 752 for node in nodes:
753 753 rev = store.rev(node)
754 754 linknode = lookup(node)
755 755 linkrev = cl.rev(linknode)
756 756 clrevtolocalrev[linkrev] = rev
757 757
758 758 # If linknode is in fullclnodes, it means the corresponding
759 759 # changeset was a full changeset and is being sent unaltered.
760 760 if linknode in fullclnodes:
761 761 linknodes[node] = linknode
762 762
763 763 # If the corresponding changeset wasn't in the set computed
764 764 # as relevant to us, it should be dropped outright.
765 765 elif linkrev not in precomputedellipsis:
766 766 continue
767 767
768 768 else:
769 769 # We could probably do this later and avoid the dict
770 770 # holding state. But it likely doesn't matter.
771 771 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
772 772 cl,
773 773 store,
774 774 ischangelog,
775 775 rev,
776 776 linkrev,
777 777 linknode,
778 778 clrevtolocalrev,
779 779 fullclnodes,
780 780 precomputedellipsis,
781 781 )
782 782
783 783 adjustedparents[node] = (p1node, p2node)
784 784 linknodes[node] = linknode
785 785
786 786 filtered.append(node)
787 787
788 788 nodes = filtered
789 789
790 790 # We expect the first pass to be fast, so we only engage the progress
791 791 # meter for constructing the revision deltas.
792 792 progress = None
793 793 if topic is not None:
794 794 progress = repo.ui.makeprogress(
795 795 topic, unit=_(b'chunks'), total=len(nodes)
796 796 )
797 797
798 798 configtarget = repo.ui.config(b'devel', b'bundle.delta')
799 799 if configtarget not in (b'', b'p1', b'full'):
800 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
800 msg = _(b"""config "devel.bundle.delta" as unknown value: %s""")
801 801 repo.ui.warn(msg % configtarget)
802 802
803 803 deltamode = repository.CG_DELTAMODE_STD
804 804 if forcedeltaparentprev:
805 805 deltamode = repository.CG_DELTAMODE_PREV
806 806 elif configtarget == b'p1':
807 807 deltamode = repository.CG_DELTAMODE_P1
808 808 elif configtarget == b'full':
809 809 deltamode = repository.CG_DELTAMODE_FULL
810 810
811 811 revisions = store.emitrevisions(
812 812 nodes,
813 813 nodesorder=nodesorder,
814 814 revisiondata=True,
815 815 assumehaveparentrevisions=not ellipses,
816 816 deltamode=deltamode,
817 817 )
818 818
819 819 for i, revision in enumerate(revisions):
820 820 if progress:
821 821 progress.update(i + 1)
822 822
823 823 if ellipses:
824 824 linknode = linknodes[revision.node]
825 825
826 826 if revision.node in adjustedparents:
827 827 p1node, p2node = adjustedparents[revision.node]
828 828 revision.p1node = p1node
829 829 revision.p2node = p2node
830 830 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
831 831
832 832 else:
833 833 linknode = lookup(revision.node)
834 834
835 835 revision.linknode = linknode
836 836 yield revision
837 837
838 838 if progress:
839 839 progress.complete()
840 840
841 841
842 842 class cgpacker(object):
843 843 def __init__(
844 844 self,
845 845 repo,
846 846 oldmatcher,
847 847 matcher,
848 848 version,
849 849 builddeltaheader,
850 850 manifestsend,
851 851 forcedeltaparentprev=False,
852 852 bundlecaps=None,
853 853 ellipses=False,
854 854 shallow=False,
855 855 ellipsisroots=None,
856 856 fullnodes=None,
857 857 ):
858 858 """Given a source repo, construct a bundler.
859 859
860 860 oldmatcher is a matcher that matches on files the client already has.
861 861 These will not be included in the changegroup.
862 862
863 863 matcher is a matcher that matches on files to include in the
864 864 changegroup. Used to facilitate sparse changegroups.
865 865
866 866 forcedeltaparentprev indicates whether delta parents must be against
867 867 the previous revision in a delta group. This should only be used for
868 868 compatibility with changegroup version 1.
869 869
870 870 builddeltaheader is a callable that constructs the header for a group
871 871 delta.
872 872
873 873 manifestsend is a chunk to send after manifests have been fully emitted.
874 874
875 875 ellipses indicates whether ellipsis serving mode is enabled.
876 876
877 877 bundlecaps is optional and can be used to specify the set of
878 878 capabilities which can be used to build the bundle. While bundlecaps is
879 879 unused in core Mercurial, extensions rely on this feature to communicate
880 880 capabilities to customize the changegroup packer.
881 881
882 882 shallow indicates whether shallow data might be sent. The packer may
883 883 need to pack file contents not introduced by the changes being packed.
884 884
885 885 fullnodes is the set of changelog nodes which should not be ellipsis
886 886 nodes. We store this rather than the set of nodes that should be
887 887 ellipsis because for very large histories we expect this to be
888 888 significantly smaller.
889 889 """
890 890 assert oldmatcher
891 891 assert matcher
892 892 self._oldmatcher = oldmatcher
893 893 self._matcher = matcher
894 894
895 895 self.version = version
896 896 self._forcedeltaparentprev = forcedeltaparentprev
897 897 self._builddeltaheader = builddeltaheader
898 898 self._manifestsend = manifestsend
899 899 self._ellipses = ellipses
900 900
901 901 # Set of capabilities we can use to build the bundle.
902 902 if bundlecaps is None:
903 903 bundlecaps = set()
904 904 self._bundlecaps = bundlecaps
905 905 self._isshallow = shallow
906 906 self._fullclnodes = fullnodes
907 907
908 908 # Maps ellipsis revs to their roots at the changelog level.
909 909 self._precomputedellipsis = ellipsisroots
910 910
911 911 self._repo = repo
912 912
913 913 if self._repo.ui.verbose and not self._repo.ui.debugflag:
914 914 self._verbosenote = self._repo.ui.note
915 915 else:
916 916 self._verbosenote = lambda s: None
917 917
918 918 def generate(
919 919 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
920 920 ):
921 921 """Yield a sequence of changegroup byte chunks.
922 922 If changelog is False, changelog data won't be added to changegroup
923 923 """
924 924
925 925 repo = self._repo
926 926 cl = repo.changelog
927 927
928 928 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
929 929 size = 0
930 930
931 931 clstate, deltas = self._generatechangelog(
932 932 cl, clnodes, generate=changelog
933 933 )
934 934 for delta in deltas:
935 935 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
936 936 size += len(chunk)
937 937 yield chunk
938 938
939 939 close = closechunk()
940 940 size += len(close)
941 941 yield closechunk()
942 942
943 943 self._verbosenote(_(b'%8.i (changelog)\n') % size)
944 944
945 945 clrevorder = clstate[b'clrevorder']
946 946 manifests = clstate[b'manifests']
947 947 changedfiles = clstate[b'changedfiles']
948 948
949 949 # We need to make sure that the linkrev in the changegroup refers to
950 950 # the first changeset that introduced the manifest or file revision.
951 951 # The fastpath is usually safer than the slowpath, because the filelogs
952 952 # are walked in revlog order.
953 953 #
954 954 # When taking the slowpath when the manifest revlog uses generaldelta,
955 955 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
956 956 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
957 957 #
958 958 # When taking the fastpath, we are only vulnerable to reordering
959 959 # of the changelog itself. The changelog never uses generaldelta and is
960 960 # never reordered. To handle this case, we simply take the slowpath,
961 961 # which already has the 'clrevorder' logic. This was also fixed in
962 962 # cc0ff93d0c0c.
963 963
964 964 # Treemanifests don't work correctly with fastpathlinkrev
965 965 # either, because we don't discover which directory nodes to
966 966 # send along with files. This could probably be fixed.
967 967 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
968 968
969 969 fnodes = {} # needed file nodes
970 970
971 971 size = 0
972 972 it = self.generatemanifests(
973 973 commonrevs,
974 974 clrevorder,
975 975 fastpathlinkrev,
976 976 manifests,
977 977 fnodes,
978 978 source,
979 979 clstate[b'clrevtomanifestrev'],
980 980 )
981 981
982 982 for tree, deltas in it:
983 983 if tree:
984 984 assert self.version == b'03'
985 985 chunk = _fileheader(tree)
986 986 size += len(chunk)
987 987 yield chunk
988 988
989 989 for delta in deltas:
990 990 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
991 991 for chunk in chunks:
992 992 size += len(chunk)
993 993 yield chunk
994 994
995 995 close = closechunk()
996 996 size += len(close)
997 997 yield close
998 998
999 999 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1000 1000 yield self._manifestsend
1001 1001
1002 1002 mfdicts = None
1003 1003 if self._ellipses and self._isshallow:
1004 1004 mfdicts = [
1005 1005 (self._repo.manifestlog[n].read(), lr)
1006 1006 for (n, lr) in pycompat.iteritems(manifests)
1007 1007 ]
1008 1008
1009 1009 manifests.clear()
1010 1010 clrevs = {cl.rev(x) for x in clnodes}
1011 1011
1012 1012 it = self.generatefiles(
1013 1013 changedfiles,
1014 1014 commonrevs,
1015 1015 source,
1016 1016 mfdicts,
1017 1017 fastpathlinkrev,
1018 1018 fnodes,
1019 1019 clrevs,
1020 1020 )
1021 1021
1022 1022 for path, deltas in it:
1023 1023 h = _fileheader(path)
1024 1024 size = len(h)
1025 1025 yield h
1026 1026
1027 1027 for delta in deltas:
1028 1028 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1029 1029 for chunk in chunks:
1030 1030 size += len(chunk)
1031 1031 yield chunk
1032 1032
1033 1033 close = closechunk()
1034 1034 size += len(close)
1035 1035 yield close
1036 1036
1037 1037 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1038 1038
1039 1039 yield closechunk()
1040 1040
1041 1041 if clnodes:
1042 1042 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1043 1043
1044 1044 def _generatechangelog(self, cl, nodes, generate=True):
1045 1045 """Generate data for changelog chunks.
1046 1046
1047 1047 Returns a 2-tuple of a dict containing state and an iterable of
1048 1048 byte chunks. The state will not be fully populated until the
1049 1049 chunk stream has been fully consumed.
1050 1050
1051 1051 if generate is False, the state will be fully populated and no chunk
1052 1052 stream will be yielded
1053 1053 """
1054 1054 clrevorder = {}
1055 1055 manifests = {}
1056 1056 mfl = self._repo.manifestlog
1057 1057 changedfiles = set()
1058 1058 clrevtomanifestrev = {}
1059 1059
1060 1060 state = {
1061 1061 b'clrevorder': clrevorder,
1062 1062 b'manifests': manifests,
1063 1063 b'changedfiles': changedfiles,
1064 1064 b'clrevtomanifestrev': clrevtomanifestrev,
1065 1065 }
1066 1066
1067 1067 if not (generate or self._ellipses):
1068 1068 # sort the nodes in storage order
1069 1069 nodes = sorted(nodes, key=cl.rev)
1070 1070 for node in nodes:
1071 1071 c = cl.changelogrevision(node)
1072 1072 clrevorder[node] = len(clrevorder)
1073 1073 # record the first changeset introducing this manifest version
1074 1074 manifests.setdefault(c.manifest, node)
1075 1075 # Record a complete list of potentially-changed files in
1076 1076 # this manifest.
1077 1077 changedfiles.update(c.files)
1078 1078
1079 1079 return state, ()
1080 1080
1081 1081 # Callback for the changelog, used to collect changed files and
1082 1082 # manifest nodes.
1083 1083 # Returns the linkrev node (identity in the changelog case).
1084 1084 def lookupcl(x):
1085 1085 c = cl.changelogrevision(x)
1086 1086 clrevorder[x] = len(clrevorder)
1087 1087
1088 1088 if self._ellipses:
1089 1089 # Only update manifests if x is going to be sent. Otherwise we
1090 1090 # end up with bogus linkrevs specified for manifests and
1091 1091 # we skip some manifest nodes that we should otherwise
1092 1092 # have sent.
1093 1093 if (
1094 1094 x in self._fullclnodes
1095 1095 or cl.rev(x) in self._precomputedellipsis
1096 1096 ):
1097 1097
1098 1098 manifestnode = c.manifest
1099 1099 # Record the first changeset introducing this manifest
1100 1100 # version.
1101 1101 manifests.setdefault(manifestnode, x)
1102 1102 # Set this narrow-specific dict so we have the lowest
1103 1103 # manifest revnum to look up for this cl revnum. (Part of
1104 1104 # mapping changelog ellipsis parents to manifest ellipsis
1105 1105 # parents)
1106 1106 clrevtomanifestrev.setdefault(
1107 1107 cl.rev(x), mfl.rev(manifestnode)
1108 1108 )
1109 1109 # We can't trust the changed files list in the changeset if the
1110 1110 # client requested a shallow clone.
1111 1111 if self._isshallow:
1112 1112 changedfiles.update(mfl[c.manifest].read().keys())
1113 1113 else:
1114 1114 changedfiles.update(c.files)
1115 1115 else:
1116 1116 # record the first changeset introducing this manifest version
1117 1117 manifests.setdefault(c.manifest, x)
1118 1118 # Record a complete list of potentially-changed files in
1119 1119 # this manifest.
1120 1120 changedfiles.update(c.files)
1121 1121
1122 1122 return x
1123 1123
1124 1124 gen = deltagroup(
1125 1125 self._repo,
1126 1126 cl,
1127 1127 nodes,
1128 1128 True,
1129 1129 lookupcl,
1130 1130 self._forcedeltaparentprev,
1131 1131 ellipses=self._ellipses,
1132 1132 topic=_(b'changesets'),
1133 1133 clrevtolocalrev={},
1134 1134 fullclnodes=self._fullclnodes,
1135 1135 precomputedellipsis=self._precomputedellipsis,
1136 1136 )
1137 1137
1138 1138 return state, gen
1139 1139
1140 1140 def generatemanifests(
1141 1141 self,
1142 1142 commonrevs,
1143 1143 clrevorder,
1144 1144 fastpathlinkrev,
1145 1145 manifests,
1146 1146 fnodes,
1147 1147 source,
1148 1148 clrevtolocalrev,
1149 1149 ):
1150 1150 """Returns an iterator of changegroup chunks containing manifests.
1151 1151
1152 1152 `source` is unused here, but is used by extensions like remotefilelog to
1153 1153 change what is sent based in pulls vs pushes, etc.
1154 1154 """
1155 1155 repo = self._repo
1156 1156 mfl = repo.manifestlog
1157 1157 tmfnodes = {b'': manifests}
1158 1158
1159 1159 # Callback for the manifest, used to collect linkrevs for filelog
1160 1160 # revisions.
1161 1161 # Returns the linkrev node (collected in lookupcl).
1162 1162 def makelookupmflinknode(tree, nodes):
1163 1163 if fastpathlinkrev:
1164 1164 assert not tree
1165 1165 return (
1166 1166 manifests.__getitem__
1167 1167 ) # pytype: disable=unsupported-operands
1168 1168
1169 1169 def lookupmflinknode(x):
1170 1170 """Callback for looking up the linknode for manifests.
1171 1171
1172 1172 Returns the linkrev node for the specified manifest.
1173 1173
1174 1174 SIDE EFFECT:
1175 1175
1176 1176 1) fclnodes gets populated with the list of relevant
1177 1177 file nodes if we're not using fastpathlinkrev
1178 1178 2) When treemanifests are in use, collects treemanifest nodes
1179 1179 to send
1180 1180
1181 1181 Note that this means manifests must be completely sent to
1182 1182 the client before you can trust the list of files and
1183 1183 treemanifests to send.
1184 1184 """
1185 1185 clnode = nodes[x]
1186 1186 mdata = mfl.get(tree, x).readfast(shallow=True)
1187 1187 for p, n, fl in mdata.iterentries():
1188 1188 if fl == b't': # subdirectory manifest
1189 1189 subtree = tree + p + b'/'
1190 1190 tmfclnodes = tmfnodes.setdefault(subtree, {})
1191 1191 tmfclnode = tmfclnodes.setdefault(n, clnode)
1192 1192 if clrevorder[clnode] < clrevorder[tmfclnode]:
1193 1193 tmfclnodes[n] = clnode
1194 1194 else:
1195 1195 f = tree + p
1196 1196 fclnodes = fnodes.setdefault(f, {})
1197 1197 fclnode = fclnodes.setdefault(n, clnode)
1198 1198 if clrevorder[clnode] < clrevorder[fclnode]:
1199 1199 fclnodes[n] = clnode
1200 1200 return clnode
1201 1201
1202 1202 return lookupmflinknode
1203 1203
1204 1204 while tmfnodes:
1205 1205 tree, nodes = tmfnodes.popitem()
1206 1206
1207 1207 should_visit = self._matcher.visitdir(tree[:-1])
1208 1208 if tree and not should_visit:
1209 1209 continue
1210 1210
1211 1211 store = mfl.getstorage(tree)
1212 1212
1213 1213 if not should_visit:
1214 1214 # No nodes to send because this directory is out of
1215 1215 # the client's view of the repository (probably
1216 1216 # because of narrow clones). Do this even for the root
1217 1217 # directory (tree=='')
1218 1218 prunednodes = []
1219 1219 else:
1220 1220 # Avoid sending any manifest nodes we can prove the
1221 1221 # client already has by checking linkrevs. See the
1222 1222 # related comment in generatefiles().
1223 1223 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1224 1224
1225 1225 if tree and not prunednodes:
1226 1226 continue
1227 1227
1228 1228 lookupfn = makelookupmflinknode(tree, nodes)
1229 1229
1230 1230 deltas = deltagroup(
1231 1231 self._repo,
1232 1232 store,
1233 1233 prunednodes,
1234 1234 False,
1235 1235 lookupfn,
1236 1236 self._forcedeltaparentprev,
1237 1237 ellipses=self._ellipses,
1238 1238 topic=_(b'manifests'),
1239 1239 clrevtolocalrev=clrevtolocalrev,
1240 1240 fullclnodes=self._fullclnodes,
1241 1241 precomputedellipsis=self._precomputedellipsis,
1242 1242 )
1243 1243
1244 1244 if not self._oldmatcher.visitdir(store.tree[:-1]):
1245 1245 yield tree, deltas
1246 1246 else:
1247 1247 # 'deltas' is a generator and we need to consume it even if
1248 1248 # we are not going to send it because a side-effect is that
1249 1249 # it updates tmdnodes (via lookupfn)
1250 1250 for d in deltas:
1251 1251 pass
1252 1252 if not tree:
1253 1253 yield tree, []
1254 1254
1255 1255 def _prunemanifests(self, store, nodes, commonrevs):
1256 1256 if not self._ellipses:
1257 1257 # In non-ellipses case and large repositories, it is better to
1258 1258 # prevent calling of store.rev and store.linkrev on a lot of
1259 1259 # nodes as compared to sending some extra data
1260 1260 return nodes.copy()
1261 1261 # This is split out as a separate method to allow filtering
1262 1262 # commonrevs in extension code.
1263 1263 #
1264 1264 # TODO(augie): this shouldn't be required, instead we should
1265 1265 # make filtering of revisions to send delegated to the store
1266 1266 # layer.
1267 1267 frev, flr = store.rev, store.linkrev
1268 1268 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1269 1269
1270 1270 # The 'source' parameter is useful for extensions
1271 1271 def generatefiles(
1272 1272 self,
1273 1273 changedfiles,
1274 1274 commonrevs,
1275 1275 source,
1276 1276 mfdicts,
1277 1277 fastpathlinkrev,
1278 1278 fnodes,
1279 1279 clrevs,
1280 1280 ):
1281 1281 changedfiles = [
1282 1282 f
1283 1283 for f in changedfiles
1284 1284 if self._matcher(f) and not self._oldmatcher(f)
1285 1285 ]
1286 1286
1287 1287 if not fastpathlinkrev:
1288 1288
1289 1289 def normallinknodes(unused, fname):
1290 1290 return fnodes.get(fname, {})
1291 1291
1292 1292 else:
1293 1293 cln = self._repo.changelog.node
1294 1294
1295 1295 def normallinknodes(store, fname):
1296 1296 flinkrev = store.linkrev
1297 1297 fnode = store.node
1298 1298 revs = ((r, flinkrev(r)) for r in store)
1299 1299 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1300 1300
1301 1301 clrevtolocalrev = {}
1302 1302
1303 1303 if self._isshallow:
1304 1304 # In a shallow clone, the linknodes callback needs to also include
1305 1305 # those file nodes that are in the manifests we sent but weren't
1306 1306 # introduced by those manifests.
1307 1307 commonctxs = [self._repo[c] for c in commonrevs]
1308 1308 clrev = self._repo.changelog.rev
1309 1309
1310 1310 def linknodes(flog, fname):
1311 1311 for c in commonctxs:
1312 1312 try:
1313 1313 fnode = c.filenode(fname)
1314 1314 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1315 1315 except error.ManifestLookupError:
1316 1316 pass
1317 1317 links = normallinknodes(flog, fname)
1318 1318 if len(links) != len(mfdicts):
1319 1319 for mf, lr in mfdicts:
1320 1320 fnode = mf.get(fname, None)
1321 1321 if fnode in links:
1322 1322 links[fnode] = min(links[fnode], lr, key=clrev)
1323 1323 elif fnode:
1324 1324 links[fnode] = lr
1325 1325 return links
1326 1326
1327 1327 else:
1328 1328 linknodes = normallinknodes
1329 1329
1330 1330 repo = self._repo
1331 1331 progress = repo.ui.makeprogress(
1332 1332 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1333 1333 )
1334 1334 for i, fname in enumerate(sorted(changedfiles)):
1335 1335 filerevlog = repo.file(fname)
1336 1336 if not filerevlog:
1337 1337 raise error.Abort(
1338 1338 _(b"empty or missing file data for %s") % fname
1339 1339 )
1340 1340
1341 1341 clrevtolocalrev.clear()
1342 1342
1343 1343 linkrevnodes = linknodes(filerevlog, fname)
1344 1344 # Lookup for filenodes, we collected the linkrev nodes above in the
1345 1345 # fastpath case and with lookupmf in the slowpath case.
1346 1346 def lookupfilelog(x):
1347 1347 return linkrevnodes[x]
1348 1348
1349 1349 frev, flr = filerevlog.rev, filerevlog.linkrev
1350 1350 # Skip sending any filenode we know the client already
1351 1351 # has. This avoids over-sending files relatively
1352 1352 # inexpensively, so it's not a problem if we under-filter
1353 1353 # here.
1354 1354 filenodes = [
1355 1355 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1356 1356 ]
1357 1357
1358 1358 if not filenodes:
1359 1359 continue
1360 1360
1361 1361 progress.update(i + 1, item=fname)
1362 1362
1363 1363 deltas = deltagroup(
1364 1364 self._repo,
1365 1365 filerevlog,
1366 1366 filenodes,
1367 1367 False,
1368 1368 lookupfilelog,
1369 1369 self._forcedeltaparentprev,
1370 1370 ellipses=self._ellipses,
1371 1371 clrevtolocalrev=clrevtolocalrev,
1372 1372 fullclnodes=self._fullclnodes,
1373 1373 precomputedellipsis=self._precomputedellipsis,
1374 1374 )
1375 1375
1376 1376 yield fname, deltas
1377 1377
1378 1378 progress.complete()
1379 1379
1380 1380
1381 1381 def _makecg1packer(
1382 1382 repo,
1383 1383 oldmatcher,
1384 1384 matcher,
1385 1385 bundlecaps,
1386 1386 ellipses=False,
1387 1387 shallow=False,
1388 1388 ellipsisroots=None,
1389 1389 fullnodes=None,
1390 1390 ):
1391 1391 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1392 1392 d.node, d.p1node, d.p2node, d.linknode
1393 1393 )
1394 1394
1395 1395 return cgpacker(
1396 1396 repo,
1397 1397 oldmatcher,
1398 1398 matcher,
1399 1399 b'01',
1400 1400 builddeltaheader=builddeltaheader,
1401 1401 manifestsend=b'',
1402 1402 forcedeltaparentprev=True,
1403 1403 bundlecaps=bundlecaps,
1404 1404 ellipses=ellipses,
1405 1405 shallow=shallow,
1406 1406 ellipsisroots=ellipsisroots,
1407 1407 fullnodes=fullnodes,
1408 1408 )
1409 1409
1410 1410
1411 1411 def _makecg2packer(
1412 1412 repo,
1413 1413 oldmatcher,
1414 1414 matcher,
1415 1415 bundlecaps,
1416 1416 ellipses=False,
1417 1417 shallow=False,
1418 1418 ellipsisroots=None,
1419 1419 fullnodes=None,
1420 1420 ):
1421 1421 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1422 1422 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1423 1423 )
1424 1424
1425 1425 return cgpacker(
1426 1426 repo,
1427 1427 oldmatcher,
1428 1428 matcher,
1429 1429 b'02',
1430 1430 builddeltaheader=builddeltaheader,
1431 1431 manifestsend=b'',
1432 1432 bundlecaps=bundlecaps,
1433 1433 ellipses=ellipses,
1434 1434 shallow=shallow,
1435 1435 ellipsisroots=ellipsisroots,
1436 1436 fullnodes=fullnodes,
1437 1437 )
1438 1438
1439 1439
1440 1440 def _makecg3packer(
1441 1441 repo,
1442 1442 oldmatcher,
1443 1443 matcher,
1444 1444 bundlecaps,
1445 1445 ellipses=False,
1446 1446 shallow=False,
1447 1447 ellipsisroots=None,
1448 1448 fullnodes=None,
1449 1449 ):
1450 1450 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1451 1451 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1452 1452 )
1453 1453
1454 1454 return cgpacker(
1455 1455 repo,
1456 1456 oldmatcher,
1457 1457 matcher,
1458 1458 b'03',
1459 1459 builddeltaheader=builddeltaheader,
1460 1460 manifestsend=closechunk(),
1461 1461 bundlecaps=bundlecaps,
1462 1462 ellipses=ellipses,
1463 1463 shallow=shallow,
1464 1464 ellipsisroots=ellipsisroots,
1465 1465 fullnodes=fullnodes,
1466 1466 )
1467 1467
1468 1468
1469 1469 _packermap = {
1470 1470 b'01': (_makecg1packer, cg1unpacker),
1471 1471 # cg2 adds support for exchanging generaldelta
1472 1472 b'02': (_makecg2packer, cg2unpacker),
1473 1473 # cg3 adds support for exchanging revlog flags and treemanifests
1474 1474 b'03': (_makecg3packer, cg3unpacker),
1475 1475 }
1476 1476
1477 1477
1478 1478 def allsupportedversions(repo):
1479 1479 versions = set(_packermap.keys())
1480 1480 needv03 = False
1481 1481 if (
1482 1482 repo.ui.configbool(b'experimental', b'changegroup3')
1483 1483 or repo.ui.configbool(b'experimental', b'treemanifest')
1484 1484 or scmutil.istreemanifest(repo)
1485 1485 ):
1486 1486 # we keep version 03 because we need to to exchange treemanifest data
1487 1487 #
1488 1488 # we also keep vresion 01 and 02, because it is possible for repo to
1489 1489 # contains both normal and tree manifest at the same time. so using
1490 1490 # older version to pull data is viable
1491 1491 #
1492 1492 # (or even to push subset of history)
1493 1493 needv03 = True
1494 1494 if b'exp-sidedata-flag' in repo.requirements:
1495 1495 needv03 = True
1496 1496 # don't attempt to use 01/02 until we do sidedata cleaning
1497 1497 versions.discard(b'01')
1498 1498 versions.discard(b'02')
1499 1499 if not needv03:
1500 1500 versions.discard(b'03')
1501 1501 return versions
1502 1502
1503 1503
1504 1504 # Changegroup versions that can be applied to the repo
1505 1505 def supportedincomingversions(repo):
1506 1506 return allsupportedversions(repo)
1507 1507
1508 1508
1509 1509 # Changegroup versions that can be created from the repo
1510 1510 def supportedoutgoingversions(repo):
1511 1511 versions = allsupportedversions(repo)
1512 1512 if scmutil.istreemanifest(repo):
1513 1513 # Versions 01 and 02 support only flat manifests and it's just too
1514 1514 # expensive to convert between the flat manifest and tree manifest on
1515 1515 # the fly. Since tree manifests are hashed differently, all of history
1516 1516 # would have to be converted. Instead, we simply don't even pretend to
1517 1517 # support versions 01 and 02.
1518 1518 versions.discard(b'01')
1519 1519 versions.discard(b'02')
1520 1520 if requirements.NARROW_REQUIREMENT in repo.requirements:
1521 1521 # Versions 01 and 02 don't support revlog flags, and we need to
1522 1522 # support that for stripping and unbundling to work.
1523 1523 versions.discard(b'01')
1524 1524 versions.discard(b'02')
1525 1525 if LFS_REQUIREMENT in repo.requirements:
1526 1526 # Versions 01 and 02 don't support revlog flags, and we need to
1527 1527 # mark LFS entries with REVIDX_EXTSTORED.
1528 1528 versions.discard(b'01')
1529 1529 versions.discard(b'02')
1530 1530
1531 1531 return versions
1532 1532
1533 1533
1534 1534 def localversion(repo):
1535 1535 # Finds the best version to use for bundles that are meant to be used
1536 1536 # locally, such as those from strip and shelve, and temporary bundles.
1537 1537 return max(supportedoutgoingversions(repo))
1538 1538
1539 1539
1540 1540 def safeversion(repo):
1541 1541 # Finds the smallest version that it's safe to assume clients of the repo
1542 1542 # will support. For example, all hg versions that support generaldelta also
1543 1543 # support changegroup 02.
1544 1544 versions = supportedoutgoingversions(repo)
1545 1545 if b'generaldelta' in repo.requirements:
1546 1546 versions.discard(b'01')
1547 1547 assert versions
1548 1548 return min(versions)
1549 1549
1550 1550
1551 1551 def getbundler(
1552 1552 version,
1553 1553 repo,
1554 1554 bundlecaps=None,
1555 1555 oldmatcher=None,
1556 1556 matcher=None,
1557 1557 ellipses=False,
1558 1558 shallow=False,
1559 1559 ellipsisroots=None,
1560 1560 fullnodes=None,
1561 1561 ):
1562 1562 assert version in supportedoutgoingversions(repo)
1563 1563
1564 1564 if matcher is None:
1565 1565 matcher = matchmod.always()
1566 1566 if oldmatcher is None:
1567 1567 oldmatcher = matchmod.never()
1568 1568
1569 1569 if version == b'01' and not matcher.always():
1570 1570 raise error.ProgrammingError(
1571 1571 b'version 01 changegroups do not support sparse file matchers'
1572 1572 )
1573 1573
1574 1574 if ellipses and version in (b'01', b'02'):
1575 1575 raise error.Abort(
1576 1576 _(
1577 1577 b'ellipsis nodes require at least cg3 on client and server, '
1578 1578 b'but negotiated version %s'
1579 1579 )
1580 1580 % version
1581 1581 )
1582 1582
1583 1583 # Requested files could include files not in the local store. So
1584 1584 # filter those out.
1585 1585 matcher = repo.narrowmatch(matcher)
1586 1586
1587 1587 fn = _packermap[version][0]
1588 1588 return fn(
1589 1589 repo,
1590 1590 oldmatcher,
1591 1591 matcher,
1592 1592 bundlecaps,
1593 1593 ellipses=ellipses,
1594 1594 shallow=shallow,
1595 1595 ellipsisroots=ellipsisroots,
1596 1596 fullnodes=fullnodes,
1597 1597 )
1598 1598
1599 1599
1600 1600 def getunbundler(version, fh, alg, extras=None):
1601 1601 return _packermap[version][1](fh, alg, extras=extras)
1602 1602
1603 1603
1604 1604 def _changegroupinfo(repo, nodes, source):
1605 1605 if repo.ui.verbose or source == b'bundle':
1606 1606 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1607 1607 if repo.ui.debugflag:
1608 1608 repo.ui.debug(b"list of changesets:\n")
1609 1609 for node in nodes:
1610 1610 repo.ui.debug(b"%s\n" % hex(node))
1611 1611
1612 1612
1613 1613 def makechangegroup(
1614 1614 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1615 1615 ):
1616 1616 cgstream = makestream(
1617 1617 repo,
1618 1618 outgoing,
1619 1619 version,
1620 1620 source,
1621 1621 fastpath=fastpath,
1622 1622 bundlecaps=bundlecaps,
1623 1623 )
1624 1624 return getunbundler(
1625 1625 version,
1626 1626 util.chunkbuffer(cgstream),
1627 1627 None,
1628 1628 {b'clcount': len(outgoing.missing)},
1629 1629 )
1630 1630
1631 1631
1632 1632 def makestream(
1633 1633 repo,
1634 1634 outgoing,
1635 1635 version,
1636 1636 source,
1637 1637 fastpath=False,
1638 1638 bundlecaps=None,
1639 1639 matcher=None,
1640 1640 ):
1641 1641 bundler = getbundler(version, repo, bundlecaps=bundlecaps, matcher=matcher)
1642 1642
1643 1643 repo = repo.unfiltered()
1644 1644 commonrevs = outgoing.common
1645 1645 csets = outgoing.missing
1646 1646 heads = outgoing.ancestorsof
1647 1647 # We go through the fast path if we get told to, or if all (unfiltered
1648 1648 # heads have been requested (since we then know there all linkrevs will
1649 1649 # be pulled by the client).
1650 1650 heads.sort()
1651 1651 fastpathlinkrev = fastpath or (
1652 1652 repo.filtername is None and heads == sorted(repo.heads())
1653 1653 )
1654 1654
1655 1655 repo.hook(b'preoutgoing', throw=True, source=source)
1656 1656 _changegroupinfo(repo, csets, source)
1657 1657 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1658 1658
1659 1659
1660 1660 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1661 1661 revisions = 0
1662 1662 files = 0
1663 1663 progress = repo.ui.makeprogress(
1664 1664 _(b'files'), unit=_(b'files'), total=expectedfiles
1665 1665 )
1666 1666 for chunkdata in iter(source.filelogheader, {}):
1667 1667 files += 1
1668 1668 f = chunkdata[b"filename"]
1669 1669 repo.ui.debug(b"adding %s revisions\n" % f)
1670 1670 progress.increment()
1671 1671 fl = repo.file(f)
1672 1672 o = len(fl)
1673 1673 try:
1674 1674 deltas = source.deltaiter()
1675 1675 if not fl.addgroup(deltas, revmap, trp):
1676 1676 raise error.Abort(_(b"received file revlog group is empty"))
1677 1677 except error.CensoredBaseError as e:
1678 1678 raise error.Abort(_(b"received delta base is censored: %s") % e)
1679 1679 revisions += len(fl) - o
1680 1680 if f in needfiles:
1681 1681 needs = needfiles[f]
1682 1682 for new in pycompat.xrange(o, len(fl)):
1683 1683 n = fl.node(new)
1684 1684 if n in needs:
1685 1685 needs.remove(n)
1686 1686 else:
1687 1687 raise error.Abort(_(b"received spurious file revlog entry"))
1688 1688 if not needs:
1689 1689 del needfiles[f]
1690 1690 progress.complete()
1691 1691
1692 1692 for f, needs in pycompat.iteritems(needfiles):
1693 1693 fl = repo.file(f)
1694 1694 for n in needs:
1695 1695 try:
1696 1696 fl.rev(n)
1697 1697 except error.LookupError:
1698 1698 raise error.Abort(
1699 1699 _(b'missing file data for %s:%s - run hg verify')
1700 1700 % (f, hex(n))
1701 1701 )
1702 1702
1703 1703 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now