##// END OF EJS Templates
unbundle: free temporary objects after use...
Joerg Sonnenberger -
r46321:44d84b72 default
parent child Browse files
Show More
@@ -1,1689 +1,1693
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21 from .pycompat import open
22 22
23 23 from . import (
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 phases,
28 28 pycompat,
29 29 requirements,
30 30 scmutil,
31 31 util,
32 32 )
33 33
34 34 from .interfaces import repository
35 35
36 36 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
37 37 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
38 38 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
39 39
40 40 LFS_REQUIREMENT = b'lfs'
41 41
42 42 readexactly = util.readexactly
43 43
44 44
45 45 def getchunk(stream):
46 46 """return the next chunk from stream as a string"""
47 47 d = readexactly(stream, 4)
48 48 l = struct.unpack(b">l", d)[0]
49 49 if l <= 4:
50 50 if l:
51 51 raise error.Abort(_(b"invalid chunk length %d") % l)
52 52 return b""
53 53 return readexactly(stream, l - 4)
54 54
55 55
56 56 def chunkheader(length):
57 57 """return a changegroup chunk header (string)"""
58 58 return struct.pack(b">l", length + 4)
59 59
60 60
61 61 def closechunk():
62 62 """return a changegroup chunk header (string) for a zero-length chunk"""
63 63 return struct.pack(b">l", 0)
64 64
65 65
66 66 def _fileheader(path):
67 67 """Obtain a changegroup chunk header for a named path."""
68 68 return chunkheader(len(path)) + path
69 69
70 70
71 71 def writechunks(ui, chunks, filename, vfs=None):
72 72 """Write chunks to a file and return its filename.
73 73
74 74 The stream is assumed to be a bundle file.
75 75 Existing files will not be overwritten.
76 76 If no filename is specified, a temporary file is created.
77 77 """
78 78 fh = None
79 79 cleanup = None
80 80 try:
81 81 if filename:
82 82 if vfs:
83 83 fh = vfs.open(filename, b"wb")
84 84 else:
85 85 # Increase default buffer size because default is usually
86 86 # small (4k is common on Linux).
87 87 fh = open(filename, b"wb", 131072)
88 88 else:
89 89 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
90 90 fh = os.fdopen(fd, "wb")
91 91 cleanup = filename
92 92 for c in chunks:
93 93 fh.write(c)
94 94 cleanup = None
95 95 return filename
96 96 finally:
97 97 if fh is not None:
98 98 fh.close()
99 99 if cleanup is not None:
100 100 if filename and vfs:
101 101 vfs.unlink(cleanup)
102 102 else:
103 103 os.unlink(cleanup)
104 104
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124
125 125 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 126 deltaheadersize = deltaheader.size
127 127 version = b'01'
128 128 _grouplistcount = 1 # One list of files after the manifests
129 129
130 130 def __init__(self, fh, alg, extras=None):
131 131 if alg is None:
132 132 alg = b'UN'
133 133 if alg not in util.compengines.supportedbundletypes:
134 134 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
135 135 if alg == b'BZ':
136 136 alg = b'_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != b'UN'
148 148
149 149 def read(self, l):
150 150 return self._stream.read(l)
151 151
152 152 def seek(self, pos):
153 153 return self._stream.seek(pos)
154 154
155 155 def tell(self):
156 156 return self._stream.tell()
157 157
158 158 def close(self):
159 159 return self._stream.close()
160 160
161 161 def _chunklength(self):
162 162 d = readexactly(self._stream, 4)
163 163 l = struct.unpack(b">l", d)[0]
164 164 if l <= 4:
165 165 if l:
166 166 raise error.Abort(_(b"invalid chunk length %d") % l)
167 167 return 0
168 168 if self.callback:
169 169 self.callback()
170 170 return l - 4
171 171
172 172 def changelogheader(self):
173 173 """v10 does not have a changelog header chunk"""
174 174 return {}
175 175
176 176 def manifestheader(self):
177 177 """v10 does not have a manifest header chunk"""
178 178 return {}
179 179
180 180 def filelogheader(self):
181 181 """return the header of the filelogs chunk, v10 only has the filename"""
182 182 l = self._chunklength()
183 183 if not l:
184 184 return {}
185 185 fname = readexactly(self._stream, l)
186 186 return {b'filename': fname}
187 187
188 188 def _deltaheader(self, headertuple, prevnode):
189 189 node, p1, p2, cs = headertuple
190 190 if prevnode is None:
191 191 deltabase = p1
192 192 else:
193 193 deltabase = prevnode
194 194 flags = 0
195 195 return node, p1, p2, deltabase, cs, flags
196 196
197 197 def deltachunk(self, prevnode):
198 198 l = self._chunklength()
199 199 if not l:
200 200 return {}
201 201 headerdata = readexactly(self._stream, self.deltaheadersize)
202 202 header = self.deltaheader.unpack(headerdata)
203 203 delta = readexactly(self._stream, l - self.deltaheadersize)
204 204 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
205 205 return (node, p1, p2, cs, deltabase, delta, flags)
206 206
207 207 def getchunks(self):
208 208 """returns all the chunks contains in the bundle
209 209
210 210 Used when you need to forward the binary stream to a file or another
211 211 network API. To do so, it parse the changegroup data, otherwise it will
212 212 block in case of sshrepo because it don't know the end of the stream.
213 213 """
214 214 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
215 215 # and a list of filelogs. For changegroup 3, we expect 4 parts:
216 216 # changelog, manifestlog, a list of tree manifestlogs, and a list of
217 217 # filelogs.
218 218 #
219 219 # Changelog and manifestlog parts are terminated with empty chunks. The
220 220 # tree and file parts are a list of entry sections. Each entry section
221 221 # is a series of chunks terminating in an empty chunk. The list of these
222 222 # entry sections is terminated in yet another empty chunk, so we know
223 223 # we've reached the end of the tree/file list when we reach an empty
224 224 # chunk that was proceeded by no non-empty chunks.
225 225
226 226 parts = 0
227 227 while parts < 2 + self._grouplistcount:
228 228 noentries = True
229 229 while True:
230 230 chunk = getchunk(self)
231 231 if not chunk:
232 232 # The first two empty chunks represent the end of the
233 233 # changelog and the manifestlog portions. The remaining
234 234 # empty chunks represent either A) the end of individual
235 235 # tree or file entries in the file list, or B) the end of
236 236 # the entire list. It's the end of the entire list if there
237 237 # were no entries (i.e. noentries is True).
238 238 if parts < 2:
239 239 parts += 1
240 240 elif noentries:
241 241 parts += 1
242 242 break
243 243 noentries = False
244 244 yield chunkheader(len(chunk))
245 245 pos = 0
246 246 while pos < len(chunk):
247 247 next = pos + 2 ** 20
248 248 yield chunk[pos:next]
249 249 pos = next
250 250 yield closechunk()
251 251
252 252 def _unpackmanifests(self, repo, revmap, trp, prog):
253 253 self.callback = prog.increment
254 254 # no need to check for empty manifest group here:
255 255 # if the result of the merge of 1 and 2 is the same in 3 and 4,
256 256 # no new manifest will be created and the manifest group will
257 257 # be empty during the pull
258 258 self.manifestheader()
259 259 deltas = self.deltaiter()
260 260 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
261 261 prog.complete()
262 262 self.callback = None
263 263
264 264 def apply(
265 265 self,
266 266 repo,
267 267 tr,
268 268 srctype,
269 269 url,
270 270 targetphase=phases.draft,
271 271 expectedtotal=None,
272 272 ):
273 273 """Add the changegroup returned by source.read() to this repo.
274 274 srctype is a string like 'push', 'pull', or 'unbundle'. url is
275 275 the URL of the repo where this changegroup is coming from.
276 276
277 277 Return an integer summarizing the change to this repo:
278 278 - nothing changed or no source: 0
279 279 - more heads than before: 1+added heads (2..n)
280 280 - fewer heads than before: -1-removed heads (-2..-n)
281 281 - number of heads stays the same: 1
282 282 """
283 283 repo = repo.unfiltered()
284 284
285 285 def csmap(x):
286 286 repo.ui.debug(b"add changeset %s\n" % short(x))
287 287 return len(cl)
288 288
289 289 def revmap(x):
290 290 return cl.rev(x)
291 291
292 292 try:
293 293 # The transaction may already carry source information. In this
294 294 # case we use the top level data. We overwrite the argument
295 295 # because we need to use the top level value (if they exist)
296 296 # in this function.
297 297 srctype = tr.hookargs.setdefault(b'source', srctype)
298 298 tr.hookargs.setdefault(b'url', url)
299 299 repo.hook(
300 300 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
301 301 )
302 302
303 303 # write changelog data to temp files so concurrent readers
304 304 # will not see an inconsistent view
305 305 cl = repo.changelog
306 306 cl.delayupdate(tr)
307 307 oldheads = set(cl.heads())
308 308
309 309 trp = weakref.proxy(tr)
310 310 # pull off the changeset group
311 311 repo.ui.status(_(b"adding changesets\n"))
312 312 clstart = len(cl)
313 313 progress = repo.ui.makeprogress(
314 314 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
315 315 )
316 316 self.callback = progress.increment
317 317
318 318 efilesset = set()
319 319
320 320 def onchangelog(cl, node):
321 321 efilesset.update(cl.readfiles(node))
322 322
323 323 self.changelogheader()
324 324 deltas = self.deltaiter()
325 325 cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
326 326 efiles = len(efilesset)
327 327
328 328 if not cgnodes:
329 329 repo.ui.develwarn(
330 330 b'applied empty changelog from changegroup',
331 331 config=b'warn-empty-changegroup',
332 332 )
333 333 clend = len(cl)
334 334 changesets = clend - clstart
335 335 progress.complete()
336 del deltas
337 # TODO Python 2.7 removal
338 # del efilesset
339 efilesset = None
336 340 self.callback = None
337 341
338 342 # pull off the manifest group
339 343 repo.ui.status(_(b"adding manifests\n"))
340 344 # We know that we'll never have more manifests than we had
341 345 # changesets.
342 346 progress = repo.ui.makeprogress(
343 347 _(b'manifests'), unit=_(b'chunks'), total=changesets
344 348 )
345 349 self._unpackmanifests(repo, revmap, trp, progress)
346 350
347 351 needfiles = {}
348 352 if repo.ui.configbool(b'server', b'validate'):
349 353 cl = repo.changelog
350 354 ml = repo.manifestlog
351 355 # validate incoming csets have their manifests
352 356 for cset in pycompat.xrange(clstart, clend):
353 357 mfnode = cl.changelogrevision(cset).manifest
354 358 mfest = ml[mfnode].readdelta()
355 359 # store file cgnodes we must see
356 360 for f, n in pycompat.iteritems(mfest):
357 361 needfiles.setdefault(f, set()).add(n)
358 362
359 363 # process the files
360 364 repo.ui.status(_(b"adding file changes\n"))
361 365 newrevs, newfiles = _addchangegroupfiles(
362 366 repo, self, revmap, trp, efiles, needfiles
363 367 )
364 368
365 369 # making sure the value exists
366 370 tr.changes.setdefault(b'changegroup-count-changesets', 0)
367 371 tr.changes.setdefault(b'changegroup-count-revisions', 0)
368 372 tr.changes.setdefault(b'changegroup-count-files', 0)
369 373 tr.changes.setdefault(b'changegroup-count-heads', 0)
370 374
371 375 # some code use bundle operation for internal purpose. They usually
372 376 # set `ui.quiet` to do this outside of user sight. Size the report
373 377 # of such operation now happens at the end of the transaction, that
374 378 # ui.quiet has not direct effect on the output.
375 379 #
376 380 # To preserve this intend use an inelegant hack, we fail to report
377 381 # the change if `quiet` is set. We should probably move to
378 382 # something better, but this is a good first step to allow the "end
379 383 # of transaction report" to pass tests.
380 384 if not repo.ui.quiet:
381 385 tr.changes[b'changegroup-count-changesets'] += changesets
382 386 tr.changes[b'changegroup-count-revisions'] += newrevs
383 387 tr.changes[b'changegroup-count-files'] += newfiles
384 388
385 389 deltaheads = 0
386 390 if oldheads:
387 391 heads = cl.heads()
388 392 deltaheads += len(heads) - len(oldheads)
389 393 for h in heads:
390 394 if h not in oldheads and repo[h].closesbranch():
391 395 deltaheads -= 1
392 396
393 397 # see previous comment about checking ui.quiet
394 398 if not repo.ui.quiet:
395 399 tr.changes[b'changegroup-count-heads'] += deltaheads
396 400 repo.invalidatevolatilesets()
397 401
398 402 if changesets > 0:
399 403 if b'node' not in tr.hookargs:
400 404 tr.hookargs[b'node'] = hex(cl.node(clstart))
401 405 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
402 406 hookargs = dict(tr.hookargs)
403 407 else:
404 408 hookargs = dict(tr.hookargs)
405 409 hookargs[b'node'] = hex(cl.node(clstart))
406 410 hookargs[b'node_last'] = hex(cl.node(clend - 1))
407 411 repo.hook(
408 412 b'pretxnchangegroup',
409 413 throw=True,
410 414 **pycompat.strkwargs(hookargs)
411 415 )
412 416
413 417 added = [cl.node(r) for r in pycompat.xrange(clstart, clend)]
414 418 phaseall = None
415 419 if srctype in (b'push', b'serve'):
416 420 # Old servers can not push the boundary themselves.
417 421 # New servers won't push the boundary if changeset already
418 422 # exists locally as secret
419 423 #
420 424 # We should not use added here but the list of all change in
421 425 # the bundle
422 426 if repo.publishing():
423 427 targetphase = phaseall = phases.public
424 428 else:
425 429 # closer target phase computation
426 430
427 431 # Those changesets have been pushed from the
428 432 # outside, their phases are going to be pushed
429 433 # alongside. Therefor `targetphase` is
430 434 # ignored.
431 435 targetphase = phaseall = phases.draft
432 436 if added:
433 437 phases.registernew(repo, tr, targetphase, added)
434 438 if phaseall is not None:
435 439 phases.advanceboundary(repo, tr, phaseall, cgnodes)
436 440
437 441 if changesets > 0:
438 442
439 443 def runhooks(unused_success):
440 444 # These hooks run when the lock releases, not when the
441 445 # transaction closes. So it's possible for the changelog
442 446 # to have changed since we last saw it.
443 447 if clstart >= len(repo):
444 448 return
445 449
446 450 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
447 451
448 452 for n in added:
449 453 args = hookargs.copy()
450 454 args[b'node'] = hex(n)
451 455 del args[b'node_last']
452 456 repo.hook(b"incoming", **pycompat.strkwargs(args))
453 457
454 458 newheads = [h for h in repo.heads() if h not in oldheads]
455 459 repo.ui.log(
456 460 b"incoming",
457 461 b"%d incoming changes - new heads: %s\n",
458 462 len(added),
459 463 b', '.join([hex(c[:6]) for c in newheads]),
460 464 )
461 465
462 466 tr.addpostclose(
463 467 b'changegroup-runhooks-%020i' % clstart,
464 468 lambda tr: repo._afterlock(runhooks),
465 469 )
466 470 finally:
467 471 repo.ui.flush()
468 472 # never return 0 here:
469 473 if deltaheads < 0:
470 474 ret = deltaheads - 1
471 475 else:
472 476 ret = deltaheads + 1
473 477 return ret
474 478
475 479 def deltaiter(self):
476 480 """
477 481 returns an iterator of the deltas in this changegroup
478 482
479 483 Useful for passing to the underlying storage system to be stored.
480 484 """
481 485 chain = None
482 486 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
483 487 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
484 488 yield chunkdata
485 489 chain = chunkdata[0]
486 490
487 491
488 492 class cg2unpacker(cg1unpacker):
489 493 """Unpacker for cg2 streams.
490 494
491 495 cg2 streams add support for generaldelta, so the delta header
492 496 format is slightly different. All other features about the data
493 497 remain the same.
494 498 """
495 499
496 500 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
497 501 deltaheadersize = deltaheader.size
498 502 version = b'02'
499 503
500 504 def _deltaheader(self, headertuple, prevnode):
501 505 node, p1, p2, deltabase, cs = headertuple
502 506 flags = 0
503 507 return node, p1, p2, deltabase, cs, flags
504 508
505 509
506 510 class cg3unpacker(cg2unpacker):
507 511 """Unpacker for cg3 streams.
508 512
509 513 cg3 streams add support for exchanging treemanifests and revlog
510 514 flags. It adds the revlog flags to the delta header and an empty chunk
511 515 separating manifests and files.
512 516 """
513 517
514 518 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
515 519 deltaheadersize = deltaheader.size
516 520 version = b'03'
517 521 _grouplistcount = 2 # One list of manifests and one list of files
518 522
519 523 def _deltaheader(self, headertuple, prevnode):
520 524 node, p1, p2, deltabase, cs, flags = headertuple
521 525 return node, p1, p2, deltabase, cs, flags
522 526
523 527 def _unpackmanifests(self, repo, revmap, trp, prog):
524 528 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
525 529 for chunkdata in iter(self.filelogheader, {}):
526 530 # If we get here, there are directory manifests in the changegroup
527 531 d = chunkdata[b"filename"]
528 532 repo.ui.debug(b"adding %s revisions\n" % d)
529 533 deltas = self.deltaiter()
530 534 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
531 535 raise error.Abort(_(b"received dir revlog group is empty"))
532 536
533 537
534 538 class headerlessfixup(object):
535 539 def __init__(self, fh, h):
536 540 self._h = h
537 541 self._fh = fh
538 542
539 543 def read(self, n):
540 544 if self._h:
541 545 d, self._h = self._h[:n], self._h[n:]
542 546 if len(d) < n:
543 547 d += readexactly(self._fh, n - len(d))
544 548 return d
545 549 return readexactly(self._fh, n)
546 550
547 551
548 552 def _revisiondeltatochunks(delta, headerfn):
549 553 """Serialize a revisiondelta to changegroup chunks."""
550 554
551 555 # The captured revision delta may be encoded as a delta against
552 556 # a base revision or as a full revision. The changegroup format
553 557 # requires that everything on the wire be deltas. So for full
554 558 # revisions, we need to invent a header that says to rewrite
555 559 # data.
556 560
557 561 if delta.delta is not None:
558 562 prefix, data = b'', delta.delta
559 563 elif delta.basenode == nullid:
560 564 data = delta.revision
561 565 prefix = mdiff.trivialdiffheader(len(data))
562 566 else:
563 567 data = delta.revision
564 568 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
565 569
566 570 meta = headerfn(delta)
567 571
568 572 yield chunkheader(len(meta) + len(prefix) + len(data))
569 573 yield meta
570 574 if prefix:
571 575 yield prefix
572 576 yield data
573 577
574 578
575 579 def _sortnodesellipsis(store, nodes, cl, lookup):
576 580 """Sort nodes for changegroup generation."""
577 581 # Ellipses serving mode.
578 582 #
579 583 # In a perfect world, we'd generate better ellipsis-ified graphs
580 584 # for non-changelog revlogs. In practice, we haven't started doing
581 585 # that yet, so the resulting DAGs for the manifestlog and filelogs
582 586 # are actually full of bogus parentage on all the ellipsis
583 587 # nodes. This has the side effect that, while the contents are
584 588 # correct, the individual DAGs might be completely out of whack in
585 589 # a case like 882681bc3166 and its ancestors (back about 10
586 590 # revisions or so) in the main hg repo.
587 591 #
588 592 # The one invariant we *know* holds is that the new (potentially
589 593 # bogus) DAG shape will be valid if we order the nodes in the
590 594 # order that they're introduced in dramatis personae by the
591 595 # changelog, so what we do is we sort the non-changelog histories
592 596 # by the order in which they are used by the changelog.
593 597 key = lambda n: cl.rev(lookup(n))
594 598 return sorted(nodes, key=key)
595 599
596 600
597 601 def _resolvenarrowrevisioninfo(
598 602 cl,
599 603 store,
600 604 ischangelog,
601 605 rev,
602 606 linkrev,
603 607 linknode,
604 608 clrevtolocalrev,
605 609 fullclnodes,
606 610 precomputedellipsis,
607 611 ):
608 612 linkparents = precomputedellipsis[linkrev]
609 613
610 614 def local(clrev):
611 615 """Turn a changelog revnum into a local revnum.
612 616
613 617 The ellipsis dag is stored as revnums on the changelog,
614 618 but when we're producing ellipsis entries for
615 619 non-changelog revlogs, we need to turn those numbers into
616 620 something local. This does that for us, and during the
617 621 changelog sending phase will also expand the stored
618 622 mappings as needed.
619 623 """
620 624 if clrev == nullrev:
621 625 return nullrev
622 626
623 627 if ischangelog:
624 628 return clrev
625 629
626 630 # Walk the ellipsis-ized changelog breadth-first looking for a
627 631 # change that has been linked from the current revlog.
628 632 #
629 633 # For a flat manifest revlog only a single step should be necessary
630 634 # as all relevant changelog entries are relevant to the flat
631 635 # manifest.
632 636 #
633 637 # For a filelog or tree manifest dirlog however not every changelog
634 638 # entry will have been relevant, so we need to skip some changelog
635 639 # nodes even after ellipsis-izing.
636 640 walk = [clrev]
637 641 while walk:
638 642 p = walk[0]
639 643 walk = walk[1:]
640 644 if p in clrevtolocalrev:
641 645 return clrevtolocalrev[p]
642 646 elif p in fullclnodes:
643 647 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
644 648 elif p in precomputedellipsis:
645 649 walk.extend(
646 650 [pp for pp in precomputedellipsis[p] if pp != nullrev]
647 651 )
648 652 else:
649 653 # In this case, we've got an ellipsis with parents
650 654 # outside the current bundle (likely an
651 655 # incremental pull). We "know" that we can use the
652 656 # value of this same revlog at whatever revision
653 657 # is pointed to by linknode. "Know" is in scare
654 658 # quotes because I haven't done enough examination
655 659 # of edge cases to convince myself this is really
656 660 # a fact - it works for all the (admittedly
657 661 # thorough) cases in our testsuite, but I would be
658 662 # somewhat unsurprised to find a case in the wild
659 663 # where this breaks down a bit. That said, I don't
660 664 # know if it would hurt anything.
661 665 for i in pycompat.xrange(rev, 0, -1):
662 666 if store.linkrev(i) == clrev:
663 667 return i
664 668 # We failed to resolve a parent for this node, so
665 669 # we crash the changegroup construction.
666 670 raise error.Abort(
667 671 b'unable to resolve parent while packing %r %r'
668 672 b' for changeset %r' % (store.indexfile, rev, clrev)
669 673 )
670 674
671 675 return nullrev
672 676
673 677 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
674 678 p1, p2 = nullrev, nullrev
675 679 elif len(linkparents) == 1:
676 680 (p1,) = sorted(local(p) for p in linkparents)
677 681 p2 = nullrev
678 682 else:
679 683 p1, p2 = sorted(local(p) for p in linkparents)
680 684
681 685 p1node, p2node = store.node(p1), store.node(p2)
682 686
683 687 return p1node, p2node, linknode
684 688
685 689
686 690 def deltagroup(
687 691 repo,
688 692 store,
689 693 nodes,
690 694 ischangelog,
691 695 lookup,
692 696 forcedeltaparentprev,
693 697 topic=None,
694 698 ellipses=False,
695 699 clrevtolocalrev=None,
696 700 fullclnodes=None,
697 701 precomputedellipsis=None,
698 702 ):
699 703 """Calculate deltas for a set of revisions.
700 704
701 705 Is a generator of ``revisiondelta`` instances.
702 706
703 707 If topic is not None, progress detail will be generated using this
704 708 topic name (e.g. changesets, manifests, etc).
705 709 """
706 710 if not nodes:
707 711 return
708 712
709 713 cl = repo.changelog
710 714
711 715 if ischangelog:
712 716 # `hg log` shows changesets in storage order. To preserve order
713 717 # across clones, send out changesets in storage order.
714 718 nodesorder = b'storage'
715 719 elif ellipses:
716 720 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
717 721 nodesorder = b'nodes'
718 722 else:
719 723 nodesorder = None
720 724
721 725 # Perform ellipses filtering and revision massaging. We do this before
722 726 # emitrevisions() because a) filtering out revisions creates less work
723 727 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
724 728 # assumptions about delta choices and we would possibly send a delta
725 729 # referencing a missing base revision.
726 730 #
727 731 # Also, calling lookup() has side-effects with regards to populating
728 732 # data structures. If we don't call lookup() for each node or if we call
729 733 # lookup() after the first pass through each node, things can break -
730 734 # possibly intermittently depending on the python hash seed! For that
731 735 # reason, we store a mapping of all linknodes during the initial node
732 736 # pass rather than use lookup() on the output side.
733 737 if ellipses:
734 738 filtered = []
735 739 adjustedparents = {}
736 740 linknodes = {}
737 741
738 742 for node in nodes:
739 743 rev = store.rev(node)
740 744 linknode = lookup(node)
741 745 linkrev = cl.rev(linknode)
742 746 clrevtolocalrev[linkrev] = rev
743 747
744 748 # If linknode is in fullclnodes, it means the corresponding
745 749 # changeset was a full changeset and is being sent unaltered.
746 750 if linknode in fullclnodes:
747 751 linknodes[node] = linknode
748 752
749 753 # If the corresponding changeset wasn't in the set computed
750 754 # as relevant to us, it should be dropped outright.
751 755 elif linkrev not in precomputedellipsis:
752 756 continue
753 757
754 758 else:
755 759 # We could probably do this later and avoid the dict
756 760 # holding state. But it likely doesn't matter.
757 761 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
758 762 cl,
759 763 store,
760 764 ischangelog,
761 765 rev,
762 766 linkrev,
763 767 linknode,
764 768 clrevtolocalrev,
765 769 fullclnodes,
766 770 precomputedellipsis,
767 771 )
768 772
769 773 adjustedparents[node] = (p1node, p2node)
770 774 linknodes[node] = linknode
771 775
772 776 filtered.append(node)
773 777
774 778 nodes = filtered
775 779
776 780 # We expect the first pass to be fast, so we only engage the progress
777 781 # meter for constructing the revision deltas.
778 782 progress = None
779 783 if topic is not None:
780 784 progress = repo.ui.makeprogress(
781 785 topic, unit=_(b'chunks'), total=len(nodes)
782 786 )
783 787
784 788 configtarget = repo.ui.config(b'devel', b'bundle.delta')
785 789 if configtarget not in (b'', b'p1', b'full'):
786 790 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
787 791 repo.ui.warn(msg % configtarget)
788 792
789 793 deltamode = repository.CG_DELTAMODE_STD
790 794 if forcedeltaparentprev:
791 795 deltamode = repository.CG_DELTAMODE_PREV
792 796 elif configtarget == b'p1':
793 797 deltamode = repository.CG_DELTAMODE_P1
794 798 elif configtarget == b'full':
795 799 deltamode = repository.CG_DELTAMODE_FULL
796 800
797 801 revisions = store.emitrevisions(
798 802 nodes,
799 803 nodesorder=nodesorder,
800 804 revisiondata=True,
801 805 assumehaveparentrevisions=not ellipses,
802 806 deltamode=deltamode,
803 807 )
804 808
805 809 for i, revision in enumerate(revisions):
806 810 if progress:
807 811 progress.update(i + 1)
808 812
809 813 if ellipses:
810 814 linknode = linknodes[revision.node]
811 815
812 816 if revision.node in adjustedparents:
813 817 p1node, p2node = adjustedparents[revision.node]
814 818 revision.p1node = p1node
815 819 revision.p2node = p2node
816 820 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
817 821
818 822 else:
819 823 linknode = lookup(revision.node)
820 824
821 825 revision.linknode = linknode
822 826 yield revision
823 827
824 828 if progress:
825 829 progress.complete()
826 830
827 831
828 832 class cgpacker(object):
829 833 def __init__(
830 834 self,
831 835 repo,
832 836 oldmatcher,
833 837 matcher,
834 838 version,
835 839 builddeltaheader,
836 840 manifestsend,
837 841 forcedeltaparentprev=False,
838 842 bundlecaps=None,
839 843 ellipses=False,
840 844 shallow=False,
841 845 ellipsisroots=None,
842 846 fullnodes=None,
843 847 ):
844 848 """Given a source repo, construct a bundler.
845 849
846 850 oldmatcher is a matcher that matches on files the client already has.
847 851 These will not be included in the changegroup.
848 852
849 853 matcher is a matcher that matches on files to include in the
850 854 changegroup. Used to facilitate sparse changegroups.
851 855
852 856 forcedeltaparentprev indicates whether delta parents must be against
853 857 the previous revision in a delta group. This should only be used for
854 858 compatibility with changegroup version 1.
855 859
856 860 builddeltaheader is a callable that constructs the header for a group
857 861 delta.
858 862
859 863 manifestsend is a chunk to send after manifests have been fully emitted.
860 864
861 865 ellipses indicates whether ellipsis serving mode is enabled.
862 866
863 867 bundlecaps is optional and can be used to specify the set of
864 868 capabilities which can be used to build the bundle. While bundlecaps is
865 869 unused in core Mercurial, extensions rely on this feature to communicate
866 870 capabilities to customize the changegroup packer.
867 871
868 872 shallow indicates whether shallow data might be sent. The packer may
869 873 need to pack file contents not introduced by the changes being packed.
870 874
871 875 fullnodes is the set of changelog nodes which should not be ellipsis
872 876 nodes. We store this rather than the set of nodes that should be
873 877 ellipsis because for very large histories we expect this to be
874 878 significantly smaller.
875 879 """
876 880 assert oldmatcher
877 881 assert matcher
878 882 self._oldmatcher = oldmatcher
879 883 self._matcher = matcher
880 884
881 885 self.version = version
882 886 self._forcedeltaparentprev = forcedeltaparentprev
883 887 self._builddeltaheader = builddeltaheader
884 888 self._manifestsend = manifestsend
885 889 self._ellipses = ellipses
886 890
887 891 # Set of capabilities we can use to build the bundle.
888 892 if bundlecaps is None:
889 893 bundlecaps = set()
890 894 self._bundlecaps = bundlecaps
891 895 self._isshallow = shallow
892 896 self._fullclnodes = fullnodes
893 897
894 898 # Maps ellipsis revs to their roots at the changelog level.
895 899 self._precomputedellipsis = ellipsisroots
896 900
897 901 self._repo = repo
898 902
899 903 if self._repo.ui.verbose and not self._repo.ui.debugflag:
900 904 self._verbosenote = self._repo.ui.note
901 905 else:
902 906 self._verbosenote = lambda s: None
903 907
904 908 def generate(
905 909 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
906 910 ):
907 911 """Yield a sequence of changegroup byte chunks.
908 912 If changelog is False, changelog data won't be added to changegroup
909 913 """
910 914
911 915 repo = self._repo
912 916 cl = repo.changelog
913 917
914 918 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
915 919 size = 0
916 920
917 921 clstate, deltas = self._generatechangelog(
918 922 cl, clnodes, generate=changelog
919 923 )
920 924 for delta in deltas:
921 925 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
922 926 size += len(chunk)
923 927 yield chunk
924 928
925 929 close = closechunk()
926 930 size += len(close)
927 931 yield closechunk()
928 932
929 933 self._verbosenote(_(b'%8.i (changelog)\n') % size)
930 934
931 935 clrevorder = clstate[b'clrevorder']
932 936 manifests = clstate[b'manifests']
933 937 changedfiles = clstate[b'changedfiles']
934 938
935 939 # We need to make sure that the linkrev in the changegroup refers to
936 940 # the first changeset that introduced the manifest or file revision.
937 941 # The fastpath is usually safer than the slowpath, because the filelogs
938 942 # are walked in revlog order.
939 943 #
940 944 # When taking the slowpath when the manifest revlog uses generaldelta,
941 945 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
942 946 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
943 947 #
944 948 # When taking the fastpath, we are only vulnerable to reordering
945 949 # of the changelog itself. The changelog never uses generaldelta and is
946 950 # never reordered. To handle this case, we simply take the slowpath,
947 951 # which already has the 'clrevorder' logic. This was also fixed in
948 952 # cc0ff93d0c0c.
949 953
950 954 # Treemanifests don't work correctly with fastpathlinkrev
951 955 # either, because we don't discover which directory nodes to
952 956 # send along with files. This could probably be fixed.
953 957 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
954 958
955 959 fnodes = {} # needed file nodes
956 960
957 961 size = 0
958 962 it = self.generatemanifests(
959 963 commonrevs,
960 964 clrevorder,
961 965 fastpathlinkrev,
962 966 manifests,
963 967 fnodes,
964 968 source,
965 969 clstate[b'clrevtomanifestrev'],
966 970 )
967 971
968 972 for tree, deltas in it:
969 973 if tree:
970 974 assert self.version == b'03'
971 975 chunk = _fileheader(tree)
972 976 size += len(chunk)
973 977 yield chunk
974 978
975 979 for delta in deltas:
976 980 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
977 981 for chunk in chunks:
978 982 size += len(chunk)
979 983 yield chunk
980 984
981 985 close = closechunk()
982 986 size += len(close)
983 987 yield close
984 988
985 989 self._verbosenote(_(b'%8.i (manifests)\n') % size)
986 990 yield self._manifestsend
987 991
988 992 mfdicts = None
989 993 if self._ellipses and self._isshallow:
990 994 mfdicts = [
991 995 (self._repo.manifestlog[n].read(), lr)
992 996 for (n, lr) in pycompat.iteritems(manifests)
993 997 ]
994 998
995 999 manifests.clear()
996 1000 clrevs = {cl.rev(x) for x in clnodes}
997 1001
998 1002 it = self.generatefiles(
999 1003 changedfiles,
1000 1004 commonrevs,
1001 1005 source,
1002 1006 mfdicts,
1003 1007 fastpathlinkrev,
1004 1008 fnodes,
1005 1009 clrevs,
1006 1010 )
1007 1011
1008 1012 for path, deltas in it:
1009 1013 h = _fileheader(path)
1010 1014 size = len(h)
1011 1015 yield h
1012 1016
1013 1017 for delta in deltas:
1014 1018 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1015 1019 for chunk in chunks:
1016 1020 size += len(chunk)
1017 1021 yield chunk
1018 1022
1019 1023 close = closechunk()
1020 1024 size += len(close)
1021 1025 yield close
1022 1026
1023 1027 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1024 1028
1025 1029 yield closechunk()
1026 1030
1027 1031 if clnodes:
1028 1032 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1029 1033
1030 1034 def _generatechangelog(self, cl, nodes, generate=True):
1031 1035 """Generate data for changelog chunks.
1032 1036
1033 1037 Returns a 2-tuple of a dict containing state and an iterable of
1034 1038 byte chunks. The state will not be fully populated until the
1035 1039 chunk stream has been fully consumed.
1036 1040
1037 1041 if generate is False, the state will be fully populated and no chunk
1038 1042 stream will be yielded
1039 1043 """
1040 1044 clrevorder = {}
1041 1045 manifests = {}
1042 1046 mfl = self._repo.manifestlog
1043 1047 changedfiles = set()
1044 1048 clrevtomanifestrev = {}
1045 1049
1046 1050 state = {
1047 1051 b'clrevorder': clrevorder,
1048 1052 b'manifests': manifests,
1049 1053 b'changedfiles': changedfiles,
1050 1054 b'clrevtomanifestrev': clrevtomanifestrev,
1051 1055 }
1052 1056
1053 1057 if not (generate or self._ellipses):
1054 1058 # sort the nodes in storage order
1055 1059 nodes = sorted(nodes, key=cl.rev)
1056 1060 for node in nodes:
1057 1061 c = cl.changelogrevision(node)
1058 1062 clrevorder[node] = len(clrevorder)
1059 1063 # record the first changeset introducing this manifest version
1060 1064 manifests.setdefault(c.manifest, node)
1061 1065 # Record a complete list of potentially-changed files in
1062 1066 # this manifest.
1063 1067 changedfiles.update(c.files)
1064 1068
1065 1069 return state, ()
1066 1070
1067 1071 # Callback for the changelog, used to collect changed files and
1068 1072 # manifest nodes.
1069 1073 # Returns the linkrev node (identity in the changelog case).
1070 1074 def lookupcl(x):
1071 1075 c = cl.changelogrevision(x)
1072 1076 clrevorder[x] = len(clrevorder)
1073 1077
1074 1078 if self._ellipses:
1075 1079 # Only update manifests if x is going to be sent. Otherwise we
1076 1080 # end up with bogus linkrevs specified for manifests and
1077 1081 # we skip some manifest nodes that we should otherwise
1078 1082 # have sent.
1079 1083 if (
1080 1084 x in self._fullclnodes
1081 1085 or cl.rev(x) in self._precomputedellipsis
1082 1086 ):
1083 1087
1084 1088 manifestnode = c.manifest
1085 1089 # Record the first changeset introducing this manifest
1086 1090 # version.
1087 1091 manifests.setdefault(manifestnode, x)
1088 1092 # Set this narrow-specific dict so we have the lowest
1089 1093 # manifest revnum to look up for this cl revnum. (Part of
1090 1094 # mapping changelog ellipsis parents to manifest ellipsis
1091 1095 # parents)
1092 1096 clrevtomanifestrev.setdefault(
1093 1097 cl.rev(x), mfl.rev(manifestnode)
1094 1098 )
1095 1099 # We can't trust the changed files list in the changeset if the
1096 1100 # client requested a shallow clone.
1097 1101 if self._isshallow:
1098 1102 changedfiles.update(mfl[c.manifest].read().keys())
1099 1103 else:
1100 1104 changedfiles.update(c.files)
1101 1105 else:
1102 1106 # record the first changeset introducing this manifest version
1103 1107 manifests.setdefault(c.manifest, x)
1104 1108 # Record a complete list of potentially-changed files in
1105 1109 # this manifest.
1106 1110 changedfiles.update(c.files)
1107 1111
1108 1112 return x
1109 1113
1110 1114 gen = deltagroup(
1111 1115 self._repo,
1112 1116 cl,
1113 1117 nodes,
1114 1118 True,
1115 1119 lookupcl,
1116 1120 self._forcedeltaparentprev,
1117 1121 ellipses=self._ellipses,
1118 1122 topic=_(b'changesets'),
1119 1123 clrevtolocalrev={},
1120 1124 fullclnodes=self._fullclnodes,
1121 1125 precomputedellipsis=self._precomputedellipsis,
1122 1126 )
1123 1127
1124 1128 return state, gen
1125 1129
1126 1130 def generatemanifests(
1127 1131 self,
1128 1132 commonrevs,
1129 1133 clrevorder,
1130 1134 fastpathlinkrev,
1131 1135 manifests,
1132 1136 fnodes,
1133 1137 source,
1134 1138 clrevtolocalrev,
1135 1139 ):
1136 1140 """Returns an iterator of changegroup chunks containing manifests.
1137 1141
1138 1142 `source` is unused here, but is used by extensions like remotefilelog to
1139 1143 change what is sent based in pulls vs pushes, etc.
1140 1144 """
1141 1145 repo = self._repo
1142 1146 mfl = repo.manifestlog
1143 1147 tmfnodes = {b'': manifests}
1144 1148
1145 1149 # Callback for the manifest, used to collect linkrevs for filelog
1146 1150 # revisions.
1147 1151 # Returns the linkrev node (collected in lookupcl).
1148 1152 def makelookupmflinknode(tree, nodes):
1149 1153 if fastpathlinkrev:
1150 1154 assert not tree
1151 1155 return (
1152 1156 manifests.__getitem__
1153 1157 ) # pytype: disable=unsupported-operands
1154 1158
1155 1159 def lookupmflinknode(x):
1156 1160 """Callback for looking up the linknode for manifests.
1157 1161
1158 1162 Returns the linkrev node for the specified manifest.
1159 1163
1160 1164 SIDE EFFECT:
1161 1165
1162 1166 1) fclnodes gets populated with the list of relevant
1163 1167 file nodes if we're not using fastpathlinkrev
1164 1168 2) When treemanifests are in use, collects treemanifest nodes
1165 1169 to send
1166 1170
1167 1171 Note that this means manifests must be completely sent to
1168 1172 the client before you can trust the list of files and
1169 1173 treemanifests to send.
1170 1174 """
1171 1175 clnode = nodes[x]
1172 1176 mdata = mfl.get(tree, x).readfast(shallow=True)
1173 1177 for p, n, fl in mdata.iterentries():
1174 1178 if fl == b't': # subdirectory manifest
1175 1179 subtree = tree + p + b'/'
1176 1180 tmfclnodes = tmfnodes.setdefault(subtree, {})
1177 1181 tmfclnode = tmfclnodes.setdefault(n, clnode)
1178 1182 if clrevorder[clnode] < clrevorder[tmfclnode]:
1179 1183 tmfclnodes[n] = clnode
1180 1184 else:
1181 1185 f = tree + p
1182 1186 fclnodes = fnodes.setdefault(f, {})
1183 1187 fclnode = fclnodes.setdefault(n, clnode)
1184 1188 if clrevorder[clnode] < clrevorder[fclnode]:
1185 1189 fclnodes[n] = clnode
1186 1190 return clnode
1187 1191
1188 1192 return lookupmflinknode
1189 1193
1190 1194 while tmfnodes:
1191 1195 tree, nodes = tmfnodes.popitem()
1192 1196
1193 1197 should_visit = self._matcher.visitdir(tree[:-1])
1194 1198 if tree and not should_visit:
1195 1199 continue
1196 1200
1197 1201 store = mfl.getstorage(tree)
1198 1202
1199 1203 if not should_visit:
1200 1204 # No nodes to send because this directory is out of
1201 1205 # the client's view of the repository (probably
1202 1206 # because of narrow clones). Do this even for the root
1203 1207 # directory (tree=='')
1204 1208 prunednodes = []
1205 1209 else:
1206 1210 # Avoid sending any manifest nodes we can prove the
1207 1211 # client already has by checking linkrevs. See the
1208 1212 # related comment in generatefiles().
1209 1213 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1210 1214
1211 1215 if tree and not prunednodes:
1212 1216 continue
1213 1217
1214 1218 lookupfn = makelookupmflinknode(tree, nodes)
1215 1219
1216 1220 deltas = deltagroup(
1217 1221 self._repo,
1218 1222 store,
1219 1223 prunednodes,
1220 1224 False,
1221 1225 lookupfn,
1222 1226 self._forcedeltaparentprev,
1223 1227 ellipses=self._ellipses,
1224 1228 topic=_(b'manifests'),
1225 1229 clrevtolocalrev=clrevtolocalrev,
1226 1230 fullclnodes=self._fullclnodes,
1227 1231 precomputedellipsis=self._precomputedellipsis,
1228 1232 )
1229 1233
1230 1234 if not self._oldmatcher.visitdir(store.tree[:-1]):
1231 1235 yield tree, deltas
1232 1236 else:
1233 1237 # 'deltas' is a generator and we need to consume it even if
1234 1238 # we are not going to send it because a side-effect is that
1235 1239 # it updates tmdnodes (via lookupfn)
1236 1240 for d in deltas:
1237 1241 pass
1238 1242 if not tree:
1239 1243 yield tree, []
1240 1244
1241 1245 def _prunemanifests(self, store, nodes, commonrevs):
1242 1246 if not self._ellipses:
1243 1247 # In non-ellipses case and large repositories, it is better to
1244 1248 # prevent calling of store.rev and store.linkrev on a lot of
1245 1249 # nodes as compared to sending some extra data
1246 1250 return nodes.copy()
1247 1251 # This is split out as a separate method to allow filtering
1248 1252 # commonrevs in extension code.
1249 1253 #
1250 1254 # TODO(augie): this shouldn't be required, instead we should
1251 1255 # make filtering of revisions to send delegated to the store
1252 1256 # layer.
1253 1257 frev, flr = store.rev, store.linkrev
1254 1258 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1255 1259
1256 1260 # The 'source' parameter is useful for extensions
1257 1261 def generatefiles(
1258 1262 self,
1259 1263 changedfiles,
1260 1264 commonrevs,
1261 1265 source,
1262 1266 mfdicts,
1263 1267 fastpathlinkrev,
1264 1268 fnodes,
1265 1269 clrevs,
1266 1270 ):
1267 1271 changedfiles = [
1268 1272 f
1269 1273 for f in changedfiles
1270 1274 if self._matcher(f) and not self._oldmatcher(f)
1271 1275 ]
1272 1276
1273 1277 if not fastpathlinkrev:
1274 1278
1275 1279 def normallinknodes(unused, fname):
1276 1280 return fnodes.get(fname, {})
1277 1281
1278 1282 else:
1279 1283 cln = self._repo.changelog.node
1280 1284
1281 1285 def normallinknodes(store, fname):
1282 1286 flinkrev = store.linkrev
1283 1287 fnode = store.node
1284 1288 revs = ((r, flinkrev(r)) for r in store)
1285 1289 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1286 1290
1287 1291 clrevtolocalrev = {}
1288 1292
1289 1293 if self._isshallow:
1290 1294 # In a shallow clone, the linknodes callback needs to also include
1291 1295 # those file nodes that are in the manifests we sent but weren't
1292 1296 # introduced by those manifests.
1293 1297 commonctxs = [self._repo[c] for c in commonrevs]
1294 1298 clrev = self._repo.changelog.rev
1295 1299
1296 1300 def linknodes(flog, fname):
1297 1301 for c in commonctxs:
1298 1302 try:
1299 1303 fnode = c.filenode(fname)
1300 1304 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1301 1305 except error.ManifestLookupError:
1302 1306 pass
1303 1307 links = normallinknodes(flog, fname)
1304 1308 if len(links) != len(mfdicts):
1305 1309 for mf, lr in mfdicts:
1306 1310 fnode = mf.get(fname, None)
1307 1311 if fnode in links:
1308 1312 links[fnode] = min(links[fnode], lr, key=clrev)
1309 1313 elif fnode:
1310 1314 links[fnode] = lr
1311 1315 return links
1312 1316
1313 1317 else:
1314 1318 linknodes = normallinknodes
1315 1319
1316 1320 repo = self._repo
1317 1321 progress = repo.ui.makeprogress(
1318 1322 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1319 1323 )
1320 1324 for i, fname in enumerate(sorted(changedfiles)):
1321 1325 filerevlog = repo.file(fname)
1322 1326 if not filerevlog:
1323 1327 raise error.Abort(
1324 1328 _(b"empty or missing file data for %s") % fname
1325 1329 )
1326 1330
1327 1331 clrevtolocalrev.clear()
1328 1332
1329 1333 linkrevnodes = linknodes(filerevlog, fname)
1330 1334 # Lookup for filenodes, we collected the linkrev nodes above in the
1331 1335 # fastpath case and with lookupmf in the slowpath case.
1332 1336 def lookupfilelog(x):
1333 1337 return linkrevnodes[x]
1334 1338
1335 1339 frev, flr = filerevlog.rev, filerevlog.linkrev
1336 1340 # Skip sending any filenode we know the client already
1337 1341 # has. This avoids over-sending files relatively
1338 1342 # inexpensively, so it's not a problem if we under-filter
1339 1343 # here.
1340 1344 filenodes = [
1341 1345 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1342 1346 ]
1343 1347
1344 1348 if not filenodes:
1345 1349 continue
1346 1350
1347 1351 progress.update(i + 1, item=fname)
1348 1352
1349 1353 deltas = deltagroup(
1350 1354 self._repo,
1351 1355 filerevlog,
1352 1356 filenodes,
1353 1357 False,
1354 1358 lookupfilelog,
1355 1359 self._forcedeltaparentprev,
1356 1360 ellipses=self._ellipses,
1357 1361 clrevtolocalrev=clrevtolocalrev,
1358 1362 fullclnodes=self._fullclnodes,
1359 1363 precomputedellipsis=self._precomputedellipsis,
1360 1364 )
1361 1365
1362 1366 yield fname, deltas
1363 1367
1364 1368 progress.complete()
1365 1369
1366 1370
1367 1371 def _makecg1packer(
1368 1372 repo,
1369 1373 oldmatcher,
1370 1374 matcher,
1371 1375 bundlecaps,
1372 1376 ellipses=False,
1373 1377 shallow=False,
1374 1378 ellipsisroots=None,
1375 1379 fullnodes=None,
1376 1380 ):
1377 1381 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1378 1382 d.node, d.p1node, d.p2node, d.linknode
1379 1383 )
1380 1384
1381 1385 return cgpacker(
1382 1386 repo,
1383 1387 oldmatcher,
1384 1388 matcher,
1385 1389 b'01',
1386 1390 builddeltaheader=builddeltaheader,
1387 1391 manifestsend=b'',
1388 1392 forcedeltaparentprev=True,
1389 1393 bundlecaps=bundlecaps,
1390 1394 ellipses=ellipses,
1391 1395 shallow=shallow,
1392 1396 ellipsisroots=ellipsisroots,
1393 1397 fullnodes=fullnodes,
1394 1398 )
1395 1399
1396 1400
1397 1401 def _makecg2packer(
1398 1402 repo,
1399 1403 oldmatcher,
1400 1404 matcher,
1401 1405 bundlecaps,
1402 1406 ellipses=False,
1403 1407 shallow=False,
1404 1408 ellipsisroots=None,
1405 1409 fullnodes=None,
1406 1410 ):
1407 1411 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1408 1412 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1409 1413 )
1410 1414
1411 1415 return cgpacker(
1412 1416 repo,
1413 1417 oldmatcher,
1414 1418 matcher,
1415 1419 b'02',
1416 1420 builddeltaheader=builddeltaheader,
1417 1421 manifestsend=b'',
1418 1422 bundlecaps=bundlecaps,
1419 1423 ellipses=ellipses,
1420 1424 shallow=shallow,
1421 1425 ellipsisroots=ellipsisroots,
1422 1426 fullnodes=fullnodes,
1423 1427 )
1424 1428
1425 1429
1426 1430 def _makecg3packer(
1427 1431 repo,
1428 1432 oldmatcher,
1429 1433 matcher,
1430 1434 bundlecaps,
1431 1435 ellipses=False,
1432 1436 shallow=False,
1433 1437 ellipsisroots=None,
1434 1438 fullnodes=None,
1435 1439 ):
1436 1440 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1437 1441 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1438 1442 )
1439 1443
1440 1444 return cgpacker(
1441 1445 repo,
1442 1446 oldmatcher,
1443 1447 matcher,
1444 1448 b'03',
1445 1449 builddeltaheader=builddeltaheader,
1446 1450 manifestsend=closechunk(),
1447 1451 bundlecaps=bundlecaps,
1448 1452 ellipses=ellipses,
1449 1453 shallow=shallow,
1450 1454 ellipsisroots=ellipsisroots,
1451 1455 fullnodes=fullnodes,
1452 1456 )
1453 1457
1454 1458
1455 1459 _packermap = {
1456 1460 b'01': (_makecg1packer, cg1unpacker),
1457 1461 # cg2 adds support for exchanging generaldelta
1458 1462 b'02': (_makecg2packer, cg2unpacker),
1459 1463 # cg3 adds support for exchanging revlog flags and treemanifests
1460 1464 b'03': (_makecg3packer, cg3unpacker),
1461 1465 }
1462 1466
1463 1467
1464 1468 def allsupportedversions(repo):
1465 1469 versions = set(_packermap.keys())
1466 1470 needv03 = False
1467 1471 if (
1468 1472 repo.ui.configbool(b'experimental', b'changegroup3')
1469 1473 or repo.ui.configbool(b'experimental', b'treemanifest')
1470 1474 or scmutil.istreemanifest(repo)
1471 1475 ):
1472 1476 # we keep version 03 because we need to to exchange treemanifest data
1473 1477 #
1474 1478 # we also keep vresion 01 and 02, because it is possible for repo to
1475 1479 # contains both normal and tree manifest at the same time. so using
1476 1480 # older version to pull data is viable
1477 1481 #
1478 1482 # (or even to push subset of history)
1479 1483 needv03 = True
1480 1484 if b'exp-sidedata-flag' in repo.requirements:
1481 1485 needv03 = True
1482 1486 # don't attempt to use 01/02 until we do sidedata cleaning
1483 1487 versions.discard(b'01')
1484 1488 versions.discard(b'02')
1485 1489 if not needv03:
1486 1490 versions.discard(b'03')
1487 1491 return versions
1488 1492
1489 1493
1490 1494 # Changegroup versions that can be applied to the repo
1491 1495 def supportedincomingversions(repo):
1492 1496 return allsupportedversions(repo)
1493 1497
1494 1498
1495 1499 # Changegroup versions that can be created from the repo
1496 1500 def supportedoutgoingversions(repo):
1497 1501 versions = allsupportedversions(repo)
1498 1502 if scmutil.istreemanifest(repo):
1499 1503 # Versions 01 and 02 support only flat manifests and it's just too
1500 1504 # expensive to convert between the flat manifest and tree manifest on
1501 1505 # the fly. Since tree manifests are hashed differently, all of history
1502 1506 # would have to be converted. Instead, we simply don't even pretend to
1503 1507 # support versions 01 and 02.
1504 1508 versions.discard(b'01')
1505 1509 versions.discard(b'02')
1506 1510 if requirements.NARROW_REQUIREMENT in repo.requirements:
1507 1511 # Versions 01 and 02 don't support revlog flags, and we need to
1508 1512 # support that for stripping and unbundling to work.
1509 1513 versions.discard(b'01')
1510 1514 versions.discard(b'02')
1511 1515 if LFS_REQUIREMENT in repo.requirements:
1512 1516 # Versions 01 and 02 don't support revlog flags, and we need to
1513 1517 # mark LFS entries with REVIDX_EXTSTORED.
1514 1518 versions.discard(b'01')
1515 1519 versions.discard(b'02')
1516 1520
1517 1521 return versions
1518 1522
1519 1523
1520 1524 def localversion(repo):
1521 1525 # Finds the best version to use for bundles that are meant to be used
1522 1526 # locally, such as those from strip and shelve, and temporary bundles.
1523 1527 return max(supportedoutgoingversions(repo))
1524 1528
1525 1529
1526 1530 def safeversion(repo):
1527 1531 # Finds the smallest version that it's safe to assume clients of the repo
1528 1532 # will support. For example, all hg versions that support generaldelta also
1529 1533 # support changegroup 02.
1530 1534 versions = supportedoutgoingversions(repo)
1531 1535 if b'generaldelta' in repo.requirements:
1532 1536 versions.discard(b'01')
1533 1537 assert versions
1534 1538 return min(versions)
1535 1539
1536 1540
1537 1541 def getbundler(
1538 1542 version,
1539 1543 repo,
1540 1544 bundlecaps=None,
1541 1545 oldmatcher=None,
1542 1546 matcher=None,
1543 1547 ellipses=False,
1544 1548 shallow=False,
1545 1549 ellipsisroots=None,
1546 1550 fullnodes=None,
1547 1551 ):
1548 1552 assert version in supportedoutgoingversions(repo)
1549 1553
1550 1554 if matcher is None:
1551 1555 matcher = matchmod.always()
1552 1556 if oldmatcher is None:
1553 1557 oldmatcher = matchmod.never()
1554 1558
1555 1559 if version == b'01' and not matcher.always():
1556 1560 raise error.ProgrammingError(
1557 1561 b'version 01 changegroups do not support sparse file matchers'
1558 1562 )
1559 1563
1560 1564 if ellipses and version in (b'01', b'02'):
1561 1565 raise error.Abort(
1562 1566 _(
1563 1567 b'ellipsis nodes require at least cg3 on client and server, '
1564 1568 b'but negotiated version %s'
1565 1569 )
1566 1570 % version
1567 1571 )
1568 1572
1569 1573 # Requested files could include files not in the local store. So
1570 1574 # filter those out.
1571 1575 matcher = repo.narrowmatch(matcher)
1572 1576
1573 1577 fn = _packermap[version][0]
1574 1578 return fn(
1575 1579 repo,
1576 1580 oldmatcher,
1577 1581 matcher,
1578 1582 bundlecaps,
1579 1583 ellipses=ellipses,
1580 1584 shallow=shallow,
1581 1585 ellipsisroots=ellipsisroots,
1582 1586 fullnodes=fullnodes,
1583 1587 )
1584 1588
1585 1589
1586 1590 def getunbundler(version, fh, alg, extras=None):
1587 1591 return _packermap[version][1](fh, alg, extras=extras)
1588 1592
1589 1593
1590 1594 def _changegroupinfo(repo, nodes, source):
1591 1595 if repo.ui.verbose or source == b'bundle':
1592 1596 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1593 1597 if repo.ui.debugflag:
1594 1598 repo.ui.debug(b"list of changesets:\n")
1595 1599 for node in nodes:
1596 1600 repo.ui.debug(b"%s\n" % hex(node))
1597 1601
1598 1602
1599 1603 def makechangegroup(
1600 1604 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1601 1605 ):
1602 1606 cgstream = makestream(
1603 1607 repo,
1604 1608 outgoing,
1605 1609 version,
1606 1610 source,
1607 1611 fastpath=fastpath,
1608 1612 bundlecaps=bundlecaps,
1609 1613 )
1610 1614 return getunbundler(
1611 1615 version,
1612 1616 util.chunkbuffer(cgstream),
1613 1617 None,
1614 1618 {b'clcount': len(outgoing.missing)},
1615 1619 )
1616 1620
1617 1621
1618 1622 def makestream(
1619 1623 repo,
1620 1624 outgoing,
1621 1625 version,
1622 1626 source,
1623 1627 fastpath=False,
1624 1628 bundlecaps=None,
1625 1629 matcher=None,
1626 1630 ):
1627 1631 bundler = getbundler(version, repo, bundlecaps=bundlecaps, matcher=matcher)
1628 1632
1629 1633 repo = repo.unfiltered()
1630 1634 commonrevs = outgoing.common
1631 1635 csets = outgoing.missing
1632 1636 heads = outgoing.ancestorsof
1633 1637 # We go through the fast path if we get told to, or if all (unfiltered
1634 1638 # heads have been requested (since we then know there all linkrevs will
1635 1639 # be pulled by the client).
1636 1640 heads.sort()
1637 1641 fastpathlinkrev = fastpath or (
1638 1642 repo.filtername is None and heads == sorted(repo.heads())
1639 1643 )
1640 1644
1641 1645 repo.hook(b'preoutgoing', throw=True, source=source)
1642 1646 _changegroupinfo(repo, csets, source)
1643 1647 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1644 1648
1645 1649
1646 1650 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1647 1651 revisions = 0
1648 1652 files = 0
1649 1653 progress = repo.ui.makeprogress(
1650 1654 _(b'files'), unit=_(b'files'), total=expectedfiles
1651 1655 )
1652 1656 for chunkdata in iter(source.filelogheader, {}):
1653 1657 files += 1
1654 1658 f = chunkdata[b"filename"]
1655 1659 repo.ui.debug(b"adding %s revisions\n" % f)
1656 1660 progress.increment()
1657 1661 fl = repo.file(f)
1658 1662 o = len(fl)
1659 1663 try:
1660 1664 deltas = source.deltaiter()
1661 1665 if not fl.addgroup(deltas, revmap, trp):
1662 1666 raise error.Abort(_(b"received file revlog group is empty"))
1663 1667 except error.CensoredBaseError as e:
1664 1668 raise error.Abort(_(b"received delta base is censored: %s") % e)
1665 1669 revisions += len(fl) - o
1666 1670 if f in needfiles:
1667 1671 needs = needfiles[f]
1668 1672 for new in pycompat.xrange(o, len(fl)):
1669 1673 n = fl.node(new)
1670 1674 if n in needs:
1671 1675 needs.remove(n)
1672 1676 else:
1673 1677 raise error.Abort(_(b"received spurious file revlog entry"))
1674 1678 if not needs:
1675 1679 del needfiles[f]
1676 1680 progress.complete()
1677 1681
1678 1682 for f, needs in pycompat.iteritems(needfiles):
1679 1683 fl = repo.file(f)
1680 1684 for n in needs:
1681 1685 try:
1682 1686 fl.rev(n)
1683 1687 except error.LookupError:
1684 1688 raise error.Abort(
1685 1689 _(b'missing file data for %s:%s - run hg verify')
1686 1690 % (f, hex(n))
1687 1691 )
1688 1692
1689 1693 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now