##// END OF EJS Templates
manifest: use read_delta_new_entries in changegroup validate...
marmoute -
r52679:e4954fd3 default
parent child Browse files
Show More
@@ -1,2442 +1,2442 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import os
10 10 import struct
11 11 import weakref
12 12
13 13 from .i18n import _
14 14 from .node import (
15 15 hex,
16 16 nullrev,
17 17 short,
18 18 )
19 19 from .pycompat import open
20 20
21 21 from . import (
22 22 error,
23 23 match as matchmod,
24 24 mdiff,
25 25 phases,
26 26 pycompat,
27 27 requirements,
28 28 scmutil,
29 29 util,
30 30 )
31 31
32 32 from .interfaces import repository
33 33 from .revlogutils import sidedata as sidedatamod
34 34 from .revlogutils import constants as revlog_constants
35 35 from .utils import storageutil
36 36
37 37 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
38 38 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
39 39 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
40 40 _CHANGEGROUPV4_DELTA_HEADER = struct.Struct(b">B20s20s20s20s20sH")
41 41
42 42 LFS_REQUIREMENT = b'lfs'
43 43
44 44 readexactly = util.readexactly
45 45
46 46
47 47 def getchunk(stream):
48 48 """return the next chunk from stream as a string"""
49 49 d = readexactly(stream, 4)
50 50 l = struct.unpack(b">l", d)[0]
51 51 if l <= 4:
52 52 if l:
53 53 raise error.Abort(_(b"invalid chunk length %d") % l)
54 54 return b""
55 55 return readexactly(stream, l - 4)
56 56
57 57
58 58 def chunkheader(length):
59 59 """return a changegroup chunk header (string)"""
60 60 return struct.pack(b">l", length + 4)
61 61
62 62
63 63 def closechunk():
64 64 """return a changegroup chunk header (string) for a zero-length chunk"""
65 65 return struct.pack(b">l", 0)
66 66
67 67
68 68 def _fileheader(path):
69 69 """Obtain a changegroup chunk header for a named path."""
70 70 return chunkheader(len(path)) + path
71 71
72 72
73 73 def writechunks(ui, chunks, filename, vfs=None):
74 74 """Write chunks to a file and return its filename.
75 75
76 76 The stream is assumed to be a bundle file.
77 77 Existing files will not be overwritten.
78 78 If no filename is specified, a temporary file is created.
79 79 """
80 80 fh = None
81 81 cleanup = None
82 82 try:
83 83 if filename:
84 84 if vfs:
85 85 fh = vfs.open(filename, b"wb")
86 86 else:
87 87 # Increase default buffer size because default is usually
88 88 # small (4k is common on Linux).
89 89 fh = open(filename, b"wb", 131072)
90 90 else:
91 91 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
92 92 fh = os.fdopen(fd, "wb")
93 93 cleanup = filename
94 94 for c in chunks:
95 95 fh.write(c)
96 96 cleanup = None
97 97 return filename
98 98 finally:
99 99 if fh is not None:
100 100 fh.close()
101 101 if cleanup is not None:
102 102 if filename and vfs:
103 103 vfs.unlink(cleanup)
104 104 else:
105 105 os.unlink(cleanup)
106 106
107 107
108 108 def _dbg_ubdl_line(
109 109 ui,
110 110 indent,
111 111 key,
112 112 base_value=None,
113 113 percentage_base=None,
114 114 percentage_key=None,
115 115 ):
116 116 """Print one line of debug_unbundle_debug_info"""
117 117 line = b"DEBUG-UNBUNDLING: "
118 118 line += b' ' * (2 * indent)
119 119 key += b":"
120 120 padding = b''
121 121 if base_value is not None:
122 122 assert len(key) + 1 + (2 * indent) <= _KEY_PART_WIDTH
123 123 line += key.ljust(_KEY_PART_WIDTH - (2 * indent))
124 124 if isinstance(base_value, float):
125 125 line += b"%14.3f seconds" % base_value
126 126 else:
127 127 line += b"%10d" % base_value
128 128 padding = b' '
129 129 else:
130 130 line += key
131 131
132 132 if percentage_base is not None:
133 133 line += padding
134 134 padding = b''
135 135 assert base_value is not None
136 136 percentage = base_value * 100 // percentage_base
137 137 if percentage_key is not None:
138 138 line += b" (%3d%% of %s)" % (
139 139 percentage,
140 140 percentage_key,
141 141 )
142 142 else:
143 143 line += b" (%3d%%)" % percentage
144 144
145 145 line += b'\n'
146 146 ui.write_err(line)
147 147
148 148
149 149 def _sumf(items):
150 150 # python < 3.8 does not support a `start=0.0` argument to sum
151 151 # So we have to cheat a bit until we drop support for those version
152 152 if not items:
153 153 return 0.0
154 154 return sum(items)
155 155
156 156
157 157 def display_unbundle_debug_info(ui, debug_info):
158 158 """display an unbundling report from debug information"""
159 159 cl_info = []
160 160 mn_info = []
161 161 fl_info = []
162 162 _dispatch = [
163 163 (b'CHANGELOG:', cl_info),
164 164 (b'MANIFESTLOG:', mn_info),
165 165 (b'FILELOG:', fl_info),
166 166 ]
167 167 for e in debug_info:
168 168 for prefix, info in _dispatch:
169 169 if e["target-revlog"].startswith(prefix):
170 170 info.append(e)
171 171 break
172 172 else:
173 173 assert False, 'unreachable'
174 174 each_info = [
175 175 (b'changelog', cl_info),
176 176 (b'manifests', mn_info),
177 177 (b'files', fl_info),
178 178 ]
179 179
180 180 # General Revision Countss
181 181 _dbg_ubdl_line(ui, 0, b'revisions', len(debug_info))
182 182 for key, info in each_info:
183 183 if not info:
184 184 continue
185 185 _dbg_ubdl_line(ui, 1, key, len(info), len(debug_info))
186 186
187 187 # General Time spent
188 188 all_durations = [e['duration'] for e in debug_info]
189 189 all_durations.sort()
190 190 total_duration = _sumf(all_durations)
191 191 _dbg_ubdl_line(ui, 0, b'total-time', total_duration)
192 192
193 193 for key, info in each_info:
194 194 if not info:
195 195 continue
196 196 durations = [e['duration'] for e in info]
197 197 durations.sort()
198 198 _dbg_ubdl_line(ui, 1, key, _sumf(durations), total_duration)
199 199
200 200 # Count and cache reuse per delta types
201 201 each_types = {}
202 202 for key, info in each_info:
203 203 each_types[key] = types = {
204 204 b'full': 0,
205 205 b'full-cached': 0,
206 206 b'snapshot': 0,
207 207 b'snapshot-cached': 0,
208 208 b'delta': 0,
209 209 b'delta-cached': 0,
210 210 b'unknown': 0,
211 211 b'unknown-cached': 0,
212 212 }
213 213 for e in info:
214 214 types[e['type']] += 1
215 215 if e['using-cached-base']:
216 216 types[e['type'] + b'-cached'] += 1
217 217
218 218 EXPECTED_TYPES = (b'full', b'snapshot', b'delta', b'unknown')
219 219 if debug_info:
220 220 _dbg_ubdl_line(ui, 0, b'type-count')
221 221 for key, info in each_info:
222 222 if info:
223 223 _dbg_ubdl_line(ui, 1, key)
224 224 t = each_types[key]
225 225 for tn in EXPECTED_TYPES:
226 226 if t[tn]:
227 227 tc = tn + b'-cached'
228 228 _dbg_ubdl_line(ui, 2, tn, t[tn])
229 229 _dbg_ubdl_line(ui, 3, b'cached', t[tc], t[tn])
230 230
231 231 # time perf delta types and reuse
232 232 each_type_time = {}
233 233 for key, info in each_info:
234 234 each_type_time[key] = t = {
235 235 b'full': [],
236 236 b'full-cached': [],
237 237 b'snapshot': [],
238 238 b'snapshot-cached': [],
239 239 b'delta': [],
240 240 b'delta-cached': [],
241 241 b'unknown': [],
242 242 b'unknown-cached': [],
243 243 }
244 244 for e in info:
245 245 t[e['type']].append(e['duration'])
246 246 if e['using-cached-base']:
247 247 t[e['type'] + b'-cached'].append(e['duration'])
248 248 for t_key, value in list(t.items()):
249 249 value.sort()
250 250 t[t_key] = _sumf(value)
251 251
252 252 if debug_info:
253 253 _dbg_ubdl_line(ui, 0, b'type-time')
254 254 for key, info in each_info:
255 255 if info:
256 256 _dbg_ubdl_line(ui, 1, key)
257 257 t = each_type_time[key]
258 258 td = total_duration # to same space on next lines
259 259 for tn in EXPECTED_TYPES:
260 260 if t[tn]:
261 261 tc = tn + b'-cached'
262 262 _dbg_ubdl_line(ui, 2, tn, t[tn], td, b"total")
263 263 _dbg_ubdl_line(ui, 3, b'cached', t[tc], td, b"total")
264 264
265 265
266 266 class cg1unpacker:
267 267 """Unpacker for cg1 changegroup streams.
268 268
269 269 A changegroup unpacker handles the framing of the revision data in
270 270 the wire format. Most consumers will want to use the apply()
271 271 method to add the changes from the changegroup to a repository.
272 272
273 273 If you're forwarding a changegroup unmodified to another consumer,
274 274 use getchunks(), which returns an iterator of changegroup
275 275 chunks. This is mostly useful for cases where you need to know the
276 276 data stream has ended by observing the end of the changegroup.
277 277
278 278 deltachunk() is useful only if you're applying delta data. Most
279 279 consumers should prefer apply() instead.
280 280
281 281 A few other public methods exist. Those are used only for
282 282 bundlerepo and some debug commands - their use is discouraged.
283 283 """
284 284
285 285 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
286 286 deltaheadersize = deltaheader.size
287 287 version = b'01'
288 288 _grouplistcount = 1 # One list of files after the manifests
289 289
290 290 def __init__(self, fh, alg, extras=None):
291 291 if alg is None:
292 292 alg = b'UN'
293 293 if alg not in util.compengines.supportedbundletypes:
294 294 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
295 295 if alg == b'BZ':
296 296 alg = b'_truncatedBZ'
297 297
298 298 compengine = util.compengines.forbundletype(alg)
299 299 self._stream = compengine.decompressorreader(fh)
300 300 self._type = alg
301 301 self.extras = extras or {}
302 302 self.callback = None
303 303
304 304 # These methods (compressed, read, seek, tell) all appear to only
305 305 # be used by bundlerepo, but it's a little hard to tell.
306 306 def compressed(self):
307 307 return self._type is not None and self._type != b'UN'
308 308
309 309 def read(self, l):
310 310 return self._stream.read(l)
311 311
312 312 def seek(self, pos):
313 313 return self._stream.seek(pos)
314 314
315 315 def tell(self):
316 316 return self._stream.tell()
317 317
318 318 def close(self):
319 319 return self._stream.close()
320 320
321 321 def _chunklength(self):
322 322 d = readexactly(self._stream, 4)
323 323 l = struct.unpack(b">l", d)[0]
324 324 if l <= 4:
325 325 if l:
326 326 raise error.Abort(_(b"invalid chunk length %d") % l)
327 327 return 0
328 328 if self.callback:
329 329 self.callback()
330 330 return l - 4
331 331
332 332 def changelogheader(self):
333 333 """v10 does not have a changelog header chunk"""
334 334 return {}
335 335
336 336 def manifestheader(self):
337 337 """v10 does not have a manifest header chunk"""
338 338 return {}
339 339
340 340 def filelogheader(self):
341 341 """return the header of the filelogs chunk, v10 only has the filename"""
342 342 l = self._chunklength()
343 343 if not l:
344 344 return {}
345 345 fname = readexactly(self._stream, l)
346 346 return {b'filename': fname}
347 347
348 348 def _deltaheader(self, headertuple, prevnode):
349 349 node, p1, p2, cs = headertuple
350 350 if prevnode is None:
351 351 deltabase = p1
352 352 else:
353 353 deltabase = prevnode
354 354 flags = 0
355 355 protocol_flags = 0
356 356 return node, p1, p2, deltabase, cs, flags, protocol_flags
357 357
358 358 def deltachunk(self, prevnode):
359 359 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata, proto_flags)
360 360 l = self._chunklength()
361 361 if not l:
362 362 return {}
363 363 headerdata = readexactly(self._stream, self.deltaheadersize)
364 364 header = self.deltaheader.unpack(headerdata)
365 365 delta = readexactly(self._stream, l - self.deltaheadersize)
366 366 header = self._deltaheader(header, prevnode)
367 367 node, p1, p2, deltabase, cs, flags, protocol_flags = header
368 368 return node, p1, p2, cs, deltabase, delta, flags, {}, protocol_flags
369 369
370 370 def getchunks(self):
371 371 """returns all the chunks contains in the bundle
372 372
373 373 Used when you need to forward the binary stream to a file or another
374 374 network API. To do so, it parse the changegroup data, otherwise it will
375 375 block in case of sshrepo because it don't know the end of the stream.
376 376 """
377 377 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
378 378 # and a list of filelogs. For changegroup 3, we expect 4 parts:
379 379 # changelog, manifestlog, a list of tree manifestlogs, and a list of
380 380 # filelogs.
381 381 #
382 382 # Changelog and manifestlog parts are terminated with empty chunks. The
383 383 # tree and file parts are a list of entry sections. Each entry section
384 384 # is a series of chunks terminating in an empty chunk. The list of these
385 385 # entry sections is terminated in yet another empty chunk, so we know
386 386 # we've reached the end of the tree/file list when we reach an empty
387 387 # chunk that was proceeded by no non-empty chunks.
388 388
389 389 parts = 0
390 390 while parts < 2 + self._grouplistcount:
391 391 noentries = True
392 392 while True:
393 393 chunk = getchunk(self)
394 394 if not chunk:
395 395 # The first two empty chunks represent the end of the
396 396 # changelog and the manifestlog portions. The remaining
397 397 # empty chunks represent either A) the end of individual
398 398 # tree or file entries in the file list, or B) the end of
399 399 # the entire list. It's the end of the entire list if there
400 400 # were no entries (i.e. noentries is True).
401 401 if parts < 2:
402 402 parts += 1
403 403 elif noentries:
404 404 parts += 1
405 405 break
406 406 noentries = False
407 407 yield chunkheader(len(chunk))
408 408 pos = 0
409 409 while pos < len(chunk):
410 410 next = pos + 2**20
411 411 yield chunk[pos:next]
412 412 pos = next
413 413 yield closechunk()
414 414
415 415 def _unpackmanifests(
416 416 self,
417 417 repo,
418 418 revmap,
419 419 trp,
420 420 prog,
421 421 addrevisioncb=None,
422 422 debug_info=None,
423 423 delta_base_reuse_policy=None,
424 424 ):
425 425 self.callback = prog.increment
426 426 # no need to check for empty manifest group here:
427 427 # if the result of the merge of 1 and 2 is the same in 3 and 4,
428 428 # no new manifest will be created and the manifest group will
429 429 # be empty during the pull
430 430 self.manifestheader()
431 431 deltas = self.deltaiter()
432 432 storage = repo.manifestlog.getstorage(b'')
433 433 storage.addgroup(
434 434 deltas,
435 435 revmap,
436 436 trp,
437 437 addrevisioncb=addrevisioncb,
438 438 debug_info=debug_info,
439 439 delta_base_reuse_policy=delta_base_reuse_policy,
440 440 )
441 441 prog.complete()
442 442 self.callback = None
443 443
444 444 def apply(
445 445 self,
446 446 repo,
447 447 tr,
448 448 srctype,
449 449 url,
450 450 targetphase=phases.draft,
451 451 expectedtotal=None,
452 452 sidedata_categories=None,
453 453 delta_base_reuse_policy=None,
454 454 ):
455 455 """Add the changegroup returned by source.read() to this repo.
456 456 srctype is a string like 'push', 'pull', or 'unbundle'. url is
457 457 the URL of the repo where this changegroup is coming from.
458 458
459 459 Return an integer summarizing the change to this repo:
460 460 - nothing changed or no source: 0
461 461 - more heads than before: 1+added heads (2..n)
462 462 - fewer heads than before: -1-removed heads (-2..-n)
463 463 - number of heads stays the same: 1
464 464
465 465 `sidedata_categories` is an optional set of the remote's sidedata wanted
466 466 categories.
467 467
468 468 `delta_base_reuse_policy` is an optional argument, when set to a value
469 469 it will control the way the delta contained into the bundle are reused
470 470 when applied in the revlog.
471 471
472 472 See `DELTA_BASE_REUSE_*` entry in mercurial.revlogutils.constants.
473 473 """
474 474 repo = repo.unfiltered()
475 475
476 476 debug_info = None
477 477 if repo.ui.configbool(b'debug', b'unbundling-stats'):
478 478 debug_info = []
479 479
480 480 # Only useful if we're adding sidedata categories. If both peers have
481 481 # the same categories, then we simply don't do anything.
482 482 adding_sidedata = (
483 483 (
484 484 requirements.REVLOGV2_REQUIREMENT in repo.requirements
485 485 or requirements.CHANGELOGV2_REQUIREMENT in repo.requirements
486 486 )
487 487 and self.version == b'04'
488 488 and srctype == b'pull'
489 489 )
490 490 if adding_sidedata:
491 491 sidedata_helpers = sidedatamod.get_sidedata_helpers(
492 492 repo,
493 493 sidedata_categories or set(),
494 494 pull=True,
495 495 )
496 496 else:
497 497 sidedata_helpers = None
498 498
499 499 def csmap(x):
500 500 repo.ui.debug(b"add changeset %s\n" % short(x))
501 501 return len(cl)
502 502
503 503 def revmap(x):
504 504 return cl.rev(x)
505 505
506 506 try:
507 507 # The transaction may already carry source information. In this
508 508 # case we use the top level data. We overwrite the argument
509 509 # because we need to use the top level value (if they exist)
510 510 # in this function.
511 511 srctype = tr.hookargs.setdefault(b'source', srctype)
512 512 tr.hookargs.setdefault(b'url', url)
513 513 repo.hook(
514 514 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
515 515 )
516 516
517 517 # write changelog data to temp files so concurrent readers
518 518 # will not see an inconsistent view
519 519 cl = repo.changelog
520 520 cl.delayupdate(tr)
521 521 oldrevcount = len(cl)
522 522
523 523 trp = weakref.proxy(tr)
524 524 # pull off the changeset group
525 525 repo.ui.status(_(b"adding changesets\n"))
526 526 clstart = len(cl)
527 527 progress = repo.ui.makeprogress(
528 528 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
529 529 )
530 530 self.callback = progress.increment
531 531
532 532 efilesset = set()
533 533 duprevs = []
534 534
535 535 def ondupchangelog(cl, rev):
536 536 if rev < clstart:
537 537 duprevs.append(rev) # pytype: disable=attribute-error
538 538
539 539 def onchangelog(cl, rev):
540 540 ctx = cl.changelogrevision(rev)
541 541 assert efilesset is not None # help pytype
542 542 efilesset.update(ctx.files)
543 543 repo.register_changeset(rev, ctx)
544 544
545 545 self.changelogheader()
546 546 deltas = self.deltaiter()
547 547 if not cl.addgroup(
548 548 deltas,
549 549 csmap,
550 550 trp,
551 551 alwayscache=True,
552 552 addrevisioncb=onchangelog,
553 553 duplicaterevisioncb=ondupchangelog,
554 554 debug_info=debug_info,
555 555 delta_base_reuse_policy=delta_base_reuse_policy,
556 556 ):
557 557 repo.ui.develwarn(
558 558 b'applied empty changelog from changegroup',
559 559 config=b'warn-empty-changegroup',
560 560 )
561 561 efiles = len(efilesset)
562 562 clend = len(cl)
563 563 changesets = clend - clstart
564 564 progress.complete()
565 565 del deltas
566 566 # TODO Python 2.7 removal
567 567 # del efilesset
568 568 efilesset = None
569 569 self.callback = None
570 570
571 571 # Keep track of the (non-changelog) revlogs we've updated and their
572 572 # range of new revisions for sidedata rewrite.
573 573 # TODO do something more efficient than keeping the reference to
574 574 # the revlogs, especially memory-wise.
575 575 touched_manifests = {}
576 576 touched_filelogs = {}
577 577
578 578 # pull off the manifest group
579 579 repo.ui.status(_(b"adding manifests\n"))
580 580 # We know that we'll never have more manifests than we had
581 581 # changesets.
582 582 progress = repo.ui.makeprogress(
583 583 _(b'manifests'), unit=_(b'chunks'), total=changesets
584 584 )
585 585 on_manifest_rev = None
586 586 if sidedata_helpers:
587 587 if revlog_constants.KIND_MANIFESTLOG in sidedata_helpers[1]:
588 588
589 589 def on_manifest_rev(manifest, rev):
590 590 range = touched_manifests.get(manifest)
591 591 if not range:
592 592 touched_manifests[manifest] = (rev, rev)
593 593 else:
594 594 assert rev == range[1] + 1
595 595 touched_manifests[manifest] = (range[0], rev)
596 596
597 597 self._unpackmanifests(
598 598 repo,
599 599 revmap,
600 600 trp,
601 601 progress,
602 602 addrevisioncb=on_manifest_rev,
603 603 debug_info=debug_info,
604 604 delta_base_reuse_policy=delta_base_reuse_policy,
605 605 )
606 606
607 607 needfiles = {}
608 608 if repo.ui.configbool(b'server', b'validate'):
609 609 cl = repo.changelog
610 610 ml = repo.manifestlog
611 611 # validate incoming csets have their manifests
612 612 for cset in range(clstart, clend):
613 613 mfnode = cl.changelogrevision(cset).manifest
614 mfest = ml[mfnode].readdelta()
614 mfest = ml[mfnode].read_delta_new_entries()
615 615 # store file nodes we must see
616 616 for f, n in mfest.items():
617 617 needfiles.setdefault(f, set()).add(n)
618 618
619 619 on_filelog_rev = None
620 620 if sidedata_helpers:
621 621 if revlog_constants.KIND_FILELOG in sidedata_helpers[1]:
622 622
623 623 def on_filelog_rev(filelog, rev):
624 624 range = touched_filelogs.get(filelog)
625 625 if not range:
626 626 touched_filelogs[filelog] = (rev, rev)
627 627 else:
628 628 assert rev == range[1] + 1
629 629 touched_filelogs[filelog] = (range[0], rev)
630 630
631 631 # process the files
632 632 repo.ui.status(_(b"adding file changes\n"))
633 633 newrevs, newfiles = _addchangegroupfiles(
634 634 repo,
635 635 self,
636 636 revmap,
637 637 trp,
638 638 efiles,
639 639 needfiles,
640 640 addrevisioncb=on_filelog_rev,
641 641 debug_info=debug_info,
642 642 delta_base_reuse_policy=delta_base_reuse_policy,
643 643 )
644 644
645 645 if sidedata_helpers:
646 646 if revlog_constants.KIND_CHANGELOG in sidedata_helpers[1]:
647 647 cl.rewrite_sidedata(
648 648 trp, sidedata_helpers, clstart, clend - 1
649 649 )
650 650 for mf, (startrev, endrev) in touched_manifests.items():
651 651 mf.rewrite_sidedata(trp, sidedata_helpers, startrev, endrev)
652 652 for fl, (startrev, endrev) in touched_filelogs.items():
653 653 fl.rewrite_sidedata(trp, sidedata_helpers, startrev, endrev)
654 654
655 655 # making sure the value exists
656 656 tr.changes.setdefault(b'changegroup-count-changesets', 0)
657 657 tr.changes.setdefault(b'changegroup-count-revisions', 0)
658 658 tr.changes.setdefault(b'changegroup-count-files', 0)
659 659 tr.changes.setdefault(b'changegroup-count-heads', 0)
660 660
661 661 # some code use bundle operation for internal purpose. They usually
662 662 # set `ui.quiet` to do this outside of user sight. Size the report
663 663 # of such operation now happens at the end of the transaction, that
664 664 # ui.quiet has not direct effect on the output.
665 665 #
666 666 # To preserve this intend use an inelegant hack, we fail to report
667 667 # the change if `quiet` is set. We should probably move to
668 668 # something better, but this is a good first step to allow the "end
669 669 # of transaction report" to pass tests.
670 670 if not repo.ui.quiet:
671 671 tr.changes[b'changegroup-count-changesets'] += changesets
672 672 tr.changes[b'changegroup-count-revisions'] += newrevs
673 673 tr.changes[b'changegroup-count-files'] += newfiles
674 674
675 675 deltaheads = 0
676 676 newrevcount = len(cl)
677 677 heads_removed, heads_added = cl.diffheads(oldrevcount, newrevcount)
678 678 deltaheads += len(heads_added) - len(heads_removed)
679 679 for h in heads_added:
680 680 if repo[h].closesbranch():
681 681 deltaheads -= 1
682 682
683 683 # see previous comment about checking ui.quiet
684 684 if not repo.ui.quiet:
685 685 tr.changes[b'changegroup-count-heads'] += deltaheads
686 686 repo.invalidatevolatilesets()
687 687
688 688 if changesets > 0:
689 689 if b'node' not in tr.hookargs:
690 690 tr.hookargs[b'node'] = hex(cl.node(clstart))
691 691 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
692 692 hookargs = dict(tr.hookargs)
693 693 else:
694 694 hookargs = dict(tr.hookargs)
695 695 hookargs[b'node'] = hex(cl.node(clstart))
696 696 hookargs[b'node_last'] = hex(cl.node(clend - 1))
697 697 repo.hook(
698 698 b'pretxnchangegroup',
699 699 throw=True,
700 700 **pycompat.strkwargs(hookargs)
701 701 )
702 702
703 703 added = range(clstart, clend)
704 704 phaseall = None
705 705 if srctype in (b'push', b'serve'):
706 706 # Old servers can not push the boundary themselves.
707 707 # New servers won't push the boundary if changeset already
708 708 # exists locally as secret
709 709 #
710 710 # We should not use added here but the list of all change in
711 711 # the bundle
712 712 if repo.publishing():
713 713 targetphase = phaseall = phases.public
714 714 else:
715 715 # closer target phase computation
716 716
717 717 # Those changesets have been pushed from the
718 718 # outside, their phases are going to be pushed
719 719 # alongside. Therefor `targetphase` is
720 720 # ignored.
721 721 targetphase = phaseall = phases.draft
722 722 if added:
723 723 phases.registernew(repo, tr, targetphase, added)
724 724 if phaseall is not None:
725 725 if duprevs:
726 726 duprevs.extend(added)
727 727 else:
728 728 duprevs = added
729 729 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
730 730 duprevs = []
731 731
732 732 if changesets > 0:
733 733
734 734 def runhooks(unused_success):
735 735 # These hooks run when the lock releases, not when the
736 736 # transaction closes. So it's possible for the changelog
737 737 # to have changed since we last saw it.
738 738 if clstart >= len(repo):
739 739 return
740 740
741 741 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
742 742
743 743 for rev in added:
744 744 args = hookargs.copy()
745 745 args[b'node'] = hex(cl.node(rev))
746 746 del args[b'node_last']
747 747 repo.hook(b"incoming", **pycompat.strkwargs(args))
748 748
749 749 repo.ui.log(
750 750 b"incoming",
751 751 b"%d incoming changes - new heads: %s\n",
752 752 len(added),
753 753 b', '.join([hex(c[:6]) for c in heads_added]),
754 754 )
755 755
756 756 tr.addpostclose(
757 757 b'changegroup-runhooks-%020i' % clstart,
758 758 lambda tr: repo._afterlock(runhooks),
759 759 )
760 760 if debug_info is not None:
761 761 display_unbundle_debug_info(repo.ui, debug_info)
762 762 finally:
763 763 repo.ui.flush()
764 764 # never return 0 here:
765 765 if deltaheads < 0:
766 766 ret = deltaheads - 1
767 767 else:
768 768 ret = deltaheads + 1
769 769 return ret
770 770
771 771 def deltaiter(self):
772 772 """
773 773 returns an iterator of the deltas in this changegroup
774 774
775 775 Useful for passing to the underlying storage system to be stored.
776 776 """
777 777 chain = None
778 778 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
779 779 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags, sidedata, proto_flags)
780 780 yield chunkdata[:8]
781 781 chain = chunkdata[0]
782 782
783 783
784 784 class cg2unpacker(cg1unpacker):
785 785 """Unpacker for cg2 streams.
786 786
787 787 cg2 streams add support for generaldelta, so the delta header
788 788 format is slightly different. All other features about the data
789 789 remain the same.
790 790 """
791 791
792 792 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
793 793 deltaheadersize = deltaheader.size
794 794 version = b'02'
795 795
796 796 def _deltaheader(self, headertuple, prevnode):
797 797 node, p1, p2, deltabase, cs = headertuple
798 798 flags = 0
799 799 protocol_flags = 0
800 800 return node, p1, p2, deltabase, cs, flags, protocol_flags
801 801
802 802
803 803 class cg3unpacker(cg2unpacker):
804 804 """Unpacker for cg3 streams.
805 805
806 806 cg3 streams add support for exchanging treemanifests and revlog
807 807 flags. It adds the revlog flags to the delta header and an empty chunk
808 808 separating manifests and files.
809 809 """
810 810
811 811 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
812 812 deltaheadersize = deltaheader.size
813 813 version = b'03'
814 814 _grouplistcount = 2 # One list of manifests and one list of files
815 815
816 816 def _deltaheader(self, headertuple, prevnode):
817 817 node, p1, p2, deltabase, cs, flags = headertuple
818 818 protocol_flags = 0
819 819 return node, p1, p2, deltabase, cs, flags, protocol_flags
820 820
821 821 def _unpackmanifests(
822 822 self,
823 823 repo,
824 824 revmap,
825 825 trp,
826 826 prog,
827 827 addrevisioncb=None,
828 828 debug_info=None,
829 829 delta_base_reuse_policy=None,
830 830 ):
831 831 super(cg3unpacker, self)._unpackmanifests(
832 832 repo,
833 833 revmap,
834 834 trp,
835 835 prog,
836 836 addrevisioncb=addrevisioncb,
837 837 debug_info=debug_info,
838 838 delta_base_reuse_policy=delta_base_reuse_policy,
839 839 )
840 840 for chunkdata in iter(self.filelogheader, {}):
841 841 # If we get here, there are directory manifests in the changegroup
842 842 d = chunkdata[b"filename"]
843 843 repo.ui.debug(b"adding %s revisions\n" % d)
844 844 deltas = self.deltaiter()
845 845 if not repo.manifestlog.getstorage(d).addgroup(
846 846 deltas,
847 847 revmap,
848 848 trp,
849 849 addrevisioncb=addrevisioncb,
850 850 debug_info=debug_info,
851 851 delta_base_reuse_policy=delta_base_reuse_policy,
852 852 ):
853 853 raise error.Abort(_(b"received dir revlog group is empty"))
854 854
855 855
856 856 class cg4unpacker(cg3unpacker):
857 857 """Unpacker for cg4 streams.
858 858
859 859 cg4 streams add support for exchanging sidedata.
860 860 """
861 861
862 862 deltaheader = _CHANGEGROUPV4_DELTA_HEADER
863 863 deltaheadersize = deltaheader.size
864 864 version = b'04'
865 865
866 866 def _deltaheader(self, headertuple, prevnode):
867 867 protocol_flags, node, p1, p2, deltabase, cs, flags = headertuple
868 868 return node, p1, p2, deltabase, cs, flags, protocol_flags
869 869
870 870 def deltachunk(self, prevnode):
871 871 res = super(cg4unpacker, self).deltachunk(prevnode)
872 872 if not res:
873 873 return res
874 874
875 875 (
876 876 node,
877 877 p1,
878 878 p2,
879 879 cs,
880 880 deltabase,
881 881 delta,
882 882 flags,
883 883 sidedata,
884 884 protocol_flags,
885 885 ) = res
886 886 assert not sidedata
887 887
888 888 sidedata = {}
889 889 if protocol_flags & storageutil.CG_FLAG_SIDEDATA:
890 890 sidedata_raw = getchunk(self._stream)
891 891 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
892 892
893 893 return (
894 894 node,
895 895 p1,
896 896 p2,
897 897 cs,
898 898 deltabase,
899 899 delta,
900 900 flags,
901 901 sidedata,
902 902 protocol_flags,
903 903 )
904 904
905 905
906 906 class headerlessfixup:
907 907 def __init__(self, fh, h):
908 908 self._h = h
909 909 self._fh = fh
910 910
911 911 def read(self, n):
912 912 if self._h:
913 913 d, self._h = self._h[:n], self._h[n:]
914 914 if len(d) < n:
915 915 d += readexactly(self._fh, n - len(d))
916 916 return d
917 917 return readexactly(self._fh, n)
918 918
919 919
920 920 def _revisiondeltatochunks(repo, delta, headerfn):
921 921 """Serialize a revisiondelta to changegroup chunks."""
922 922
923 923 # The captured revision delta may be encoded as a delta against
924 924 # a base revision or as a full revision. The changegroup format
925 925 # requires that everything on the wire be deltas. So for full
926 926 # revisions, we need to invent a header that says to rewrite
927 927 # data.
928 928
929 929 if delta.delta is not None:
930 930 prefix, data = b'', delta.delta
931 931 elif delta.basenode == repo.nullid:
932 932 data = delta.revision
933 933 prefix = mdiff.trivialdiffheader(len(data))
934 934 else:
935 935 data = delta.revision
936 936 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
937 937
938 938 meta = headerfn(delta)
939 939
940 940 yield chunkheader(len(meta) + len(prefix) + len(data))
941 941 yield meta
942 942 if prefix:
943 943 yield prefix
944 944 yield data
945 945
946 946 if delta.protocol_flags & storageutil.CG_FLAG_SIDEDATA:
947 947 # Need a separate chunk for sidedata to be able to differentiate
948 948 # "raw delta" length and sidedata length
949 949 sidedata = delta.sidedata
950 950 yield chunkheader(len(sidedata))
951 951 yield sidedata
952 952
953 953
954 954 def _sortnodesellipsis(store, nodes, cl, lookup):
955 955 """Sort nodes for changegroup generation."""
956 956 # Ellipses serving mode.
957 957 #
958 958 # In a perfect world, we'd generate better ellipsis-ified graphs
959 959 # for non-changelog revlogs. In practice, we haven't started doing
960 960 # that yet, so the resulting DAGs for the manifestlog and filelogs
961 961 # are actually full of bogus parentage on all the ellipsis
962 962 # nodes. This has the side effect that, while the contents are
963 963 # correct, the individual DAGs might be completely out of whack in
964 964 # a case like 882681bc3166 and its ancestors (back about 10
965 965 # revisions or so) in the main hg repo.
966 966 #
967 967 # The one invariant we *know* holds is that the new (potentially
968 968 # bogus) DAG shape will be valid if we order the nodes in the
969 969 # order that they're introduced in dramatis personae by the
970 970 # changelog, so what we do is we sort the non-changelog histories
971 971 # by the order in which they are used by the changelog.
972 972 key = lambda n: cl.rev(lookup(n))
973 973 return sorted(nodes, key=key)
974 974
975 975
976 976 def _resolvenarrowrevisioninfo(
977 977 cl,
978 978 store,
979 979 ischangelog,
980 980 rev,
981 981 linkrev,
982 982 linknode,
983 983 clrevtolocalrev,
984 984 fullclnodes,
985 985 precomputedellipsis,
986 986 ):
987 987 linkparents = precomputedellipsis[linkrev]
988 988
989 989 def local(clrev):
990 990 """Turn a changelog revnum into a local revnum.
991 991
992 992 The ellipsis dag is stored as revnums on the changelog,
993 993 but when we're producing ellipsis entries for
994 994 non-changelog revlogs, we need to turn those numbers into
995 995 something local. This does that for us, and during the
996 996 changelog sending phase will also expand the stored
997 997 mappings as needed.
998 998 """
999 999 if clrev == nullrev:
1000 1000 return nullrev
1001 1001
1002 1002 if ischangelog:
1003 1003 return clrev
1004 1004
1005 1005 # Walk the ellipsis-ized changelog breadth-first looking for a
1006 1006 # change that has been linked from the current revlog.
1007 1007 #
1008 1008 # For a flat manifest revlog only a single step should be necessary
1009 1009 # as all relevant changelog entries are relevant to the flat
1010 1010 # manifest.
1011 1011 #
1012 1012 # For a filelog or tree manifest dirlog however not every changelog
1013 1013 # entry will have been relevant, so we need to skip some changelog
1014 1014 # nodes even after ellipsis-izing.
1015 1015 walk = [clrev]
1016 1016 while walk:
1017 1017 p = walk[0]
1018 1018 walk = walk[1:]
1019 1019 if p in clrevtolocalrev:
1020 1020 return clrevtolocalrev[p]
1021 1021 elif p in fullclnodes:
1022 1022 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
1023 1023 elif p in precomputedellipsis:
1024 1024 walk.extend(
1025 1025 [pp for pp in precomputedellipsis[p] if pp != nullrev]
1026 1026 )
1027 1027 else:
1028 1028 # In this case, we've got an ellipsis with parents
1029 1029 # outside the current bundle (likely an
1030 1030 # incremental pull). We "know" that we can use the
1031 1031 # value of this same revlog at whatever revision
1032 1032 # is pointed to by linknode. "Know" is in scare
1033 1033 # quotes because I haven't done enough examination
1034 1034 # of edge cases to convince myself this is really
1035 1035 # a fact - it works for all the (admittedly
1036 1036 # thorough) cases in our testsuite, but I would be
1037 1037 # somewhat unsurprised to find a case in the wild
1038 1038 # where this breaks down a bit. That said, I don't
1039 1039 # know if it would hurt anything.
1040 1040 for i in range(rev, 0, -1):
1041 1041 if store.linkrev(i) == clrev:
1042 1042 return i
1043 1043 # We failed to resolve a parent for this node, so
1044 1044 # we crash the changegroup construction.
1045 1045 if hasattr(store, 'target'):
1046 1046 target = store.display_id
1047 1047 else:
1048 1048 # some revlog not actually a revlog
1049 1049 target = store._revlog.display_id
1050 1050
1051 1051 raise error.Abort(
1052 1052 b"unable to resolve parent while packing '%s' %r"
1053 1053 b' for changeset %r' % (target, rev, clrev)
1054 1054 )
1055 1055
1056 1056 return nullrev
1057 1057
1058 1058 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
1059 1059 p1, p2 = nullrev, nullrev
1060 1060 elif len(linkparents) == 1:
1061 1061 (p1,) = sorted(local(p) for p in linkparents)
1062 1062 p2 = nullrev
1063 1063 else:
1064 1064 p1, p2 = sorted(local(p) for p in linkparents)
1065 1065
1066 1066 p1node, p2node = store.node(p1), store.node(p2)
1067 1067
1068 1068 return p1node, p2node, linknode
1069 1069
1070 1070
1071 1071 def deltagroup(
1072 1072 repo,
1073 1073 store,
1074 1074 nodes,
1075 1075 ischangelog,
1076 1076 lookup,
1077 1077 forcedeltaparentprev,
1078 1078 topic=None,
1079 1079 ellipses=False,
1080 1080 clrevtolocalrev=None,
1081 1081 fullclnodes=None,
1082 1082 precomputedellipsis=None,
1083 1083 sidedata_helpers=None,
1084 1084 debug_info=None,
1085 1085 ):
1086 1086 """Calculate deltas for a set of revisions.
1087 1087
1088 1088 Is a generator of ``revisiondelta`` instances.
1089 1089
1090 1090 If topic is not None, progress detail will be generated using this
1091 1091 topic name (e.g. changesets, manifests, etc).
1092 1092
1093 1093 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1094 1094 `sidedata_helpers`.
1095 1095 """
1096 1096 if not nodes:
1097 1097 return
1098 1098
1099 1099 cl = repo.changelog
1100 1100
1101 1101 if ischangelog:
1102 1102 # `hg log` shows changesets in storage order. To preserve order
1103 1103 # across clones, send out changesets in storage order.
1104 1104 nodesorder = b'storage'
1105 1105 elif ellipses:
1106 1106 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
1107 1107 nodesorder = b'nodes'
1108 1108 else:
1109 1109 nodesorder = None
1110 1110
1111 1111 # Perform ellipses filtering and revision massaging. We do this before
1112 1112 # emitrevisions() because a) filtering out revisions creates less work
1113 1113 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
1114 1114 # assumptions about delta choices and we would possibly send a delta
1115 1115 # referencing a missing base revision.
1116 1116 #
1117 1117 # Also, calling lookup() has side-effects with regards to populating
1118 1118 # data structures. If we don't call lookup() for each node or if we call
1119 1119 # lookup() after the first pass through each node, things can break -
1120 1120 # possibly intermittently depending on the python hash seed! For that
1121 1121 # reason, we store a mapping of all linknodes during the initial node
1122 1122 # pass rather than use lookup() on the output side.
1123 1123 if ellipses:
1124 1124 filtered = []
1125 1125 adjustedparents = {}
1126 1126 linknodes = {}
1127 1127
1128 1128 for node in nodes:
1129 1129 rev = store.rev(node)
1130 1130 linknode = lookup(node)
1131 1131 linkrev = cl.rev(linknode)
1132 1132 clrevtolocalrev[linkrev] = rev
1133 1133
1134 1134 # If linknode is in fullclnodes, it means the corresponding
1135 1135 # changeset was a full changeset and is being sent unaltered.
1136 1136 if linknode in fullclnodes:
1137 1137 linknodes[node] = linknode
1138 1138
1139 1139 # If the corresponding changeset wasn't in the set computed
1140 1140 # as relevant to us, it should be dropped outright.
1141 1141 elif linkrev not in precomputedellipsis:
1142 1142 continue
1143 1143
1144 1144 else:
1145 1145 # We could probably do this later and avoid the dict
1146 1146 # holding state. But it likely doesn't matter.
1147 1147 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
1148 1148 cl,
1149 1149 store,
1150 1150 ischangelog,
1151 1151 rev,
1152 1152 linkrev,
1153 1153 linknode,
1154 1154 clrevtolocalrev,
1155 1155 fullclnodes,
1156 1156 precomputedellipsis,
1157 1157 )
1158 1158
1159 1159 adjustedparents[node] = (p1node, p2node)
1160 1160 linknodes[node] = linknode
1161 1161
1162 1162 filtered.append(node)
1163 1163
1164 1164 nodes = filtered
1165 1165
1166 1166 # We expect the first pass to be fast, so we only engage the progress
1167 1167 # meter for constructing the revision deltas.
1168 1168 progress = None
1169 1169 if topic is not None:
1170 1170 progress = repo.ui.makeprogress(
1171 1171 topic, unit=_(b'chunks'), total=len(nodes)
1172 1172 )
1173 1173
1174 1174 configtarget = repo.ui.config(b'devel', b'bundle.delta')
1175 1175 if configtarget not in (b'', b'p1', b'full'):
1176 1176 msg = _(b"""config "devel.bundle.delta" as unknown value: %s""")
1177 1177 repo.ui.warn(msg % configtarget)
1178 1178
1179 1179 deltamode = repository.CG_DELTAMODE_STD
1180 1180 if forcedeltaparentprev:
1181 1181 deltamode = repository.CG_DELTAMODE_PREV
1182 1182 elif configtarget == b'p1':
1183 1183 deltamode = repository.CG_DELTAMODE_P1
1184 1184 elif configtarget == b'full':
1185 1185 deltamode = repository.CG_DELTAMODE_FULL
1186 1186
1187 1187 revisions = store.emitrevisions(
1188 1188 nodes,
1189 1189 nodesorder=nodesorder,
1190 1190 revisiondata=True,
1191 1191 assumehaveparentrevisions=not ellipses,
1192 1192 deltamode=deltamode,
1193 1193 sidedata_helpers=sidedata_helpers,
1194 1194 debug_info=debug_info,
1195 1195 )
1196 1196
1197 1197 for i, revision in enumerate(revisions):
1198 1198 if progress:
1199 1199 progress.update(i + 1)
1200 1200
1201 1201 if ellipses:
1202 1202 linknode = linknodes[revision.node]
1203 1203
1204 1204 if revision.node in adjustedparents:
1205 1205 p1node, p2node = adjustedparents[revision.node]
1206 1206 revision.p1node = p1node
1207 1207 revision.p2node = p2node
1208 1208 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
1209 1209
1210 1210 else:
1211 1211 linknode = lookup(revision.node)
1212 1212
1213 1213 revision.linknode = linknode
1214 1214 yield revision
1215 1215
1216 1216 if progress:
1217 1217 progress.complete()
1218 1218
1219 1219
1220 1220 def make_debug_info():
1221 1221 """ "build a "new" debug_info dictionnary
1222 1222
1223 1223 That dictionnary can be used to gather information about the bundle process
1224 1224 """
1225 1225 return {
1226 1226 'revision-total': 0,
1227 1227 'revision-changelog': 0,
1228 1228 'revision-manifest': 0,
1229 1229 'revision-files': 0,
1230 1230 'file-count': 0,
1231 1231 'merge-total': 0,
1232 1232 'available-delta': 0,
1233 1233 'available-full': 0,
1234 1234 'delta-against-prev': 0,
1235 1235 'delta-full': 0,
1236 1236 'delta-against-p1': 0,
1237 1237 'denied-delta-candeltafn': 0,
1238 1238 'denied-base-not-available': 0,
1239 1239 'reused-storage-delta': 0,
1240 1240 'computed-delta': 0,
1241 1241 }
1242 1242
1243 1243
1244 1244 def merge_debug_info(base, other):
1245 1245 """merge the debug information from <other> into <base>
1246 1246
1247 1247 This function can be used to gather lower level information into higher level ones.
1248 1248 """
1249 1249 for key in (
1250 1250 'revision-total',
1251 1251 'revision-changelog',
1252 1252 'revision-manifest',
1253 1253 'revision-files',
1254 1254 'merge-total',
1255 1255 'available-delta',
1256 1256 'available-full',
1257 1257 'delta-against-prev',
1258 1258 'delta-full',
1259 1259 'delta-against-p1',
1260 1260 'denied-delta-candeltafn',
1261 1261 'denied-base-not-available',
1262 1262 'reused-storage-delta',
1263 1263 'computed-delta',
1264 1264 ):
1265 1265 base[key] += other[key]
1266 1266
1267 1267
1268 1268 _KEY_PART_WIDTH = 17
1269 1269
1270 1270
1271 1271 def _dbg_bdl_line(
1272 1272 ui,
1273 1273 indent,
1274 1274 key,
1275 1275 base_value=None,
1276 1276 percentage_base=None,
1277 1277 percentage_key=None,
1278 1278 percentage_ref=None,
1279 1279 extra=None,
1280 1280 ):
1281 1281 """Print one line of debug_bundle_debug_info"""
1282 1282 line = b"DEBUG-BUNDLING: "
1283 1283 line += b' ' * (2 * indent)
1284 1284 key += b":"
1285 1285 if base_value is not None:
1286 1286 assert len(key) + 1 + (2 * indent) <= _KEY_PART_WIDTH
1287 1287 line += key.ljust(_KEY_PART_WIDTH - (2 * indent))
1288 1288 line += b"%10d" % base_value
1289 1289 else:
1290 1290 line += key
1291 1291
1292 1292 if percentage_base is not None:
1293 1293 assert base_value is not None
1294 1294 percentage = base_value * 100 // percentage_base
1295 1295 if percentage_key is not None:
1296 1296 line += b" (%d%% of %s %d)" % (
1297 1297 percentage,
1298 1298 percentage_key,
1299 1299 percentage_ref,
1300 1300 )
1301 1301 else:
1302 1302 line += b" (%d%%)" % percentage
1303 1303
1304 1304 if extra:
1305 1305 line += b" "
1306 1306 line += extra
1307 1307
1308 1308 line += b'\n'
1309 1309 ui.write_err(line)
1310 1310
1311 1311
1312 1312 def display_bundling_debug_info(
1313 1313 ui,
1314 1314 debug_info,
1315 1315 cl_debug_info,
1316 1316 mn_debug_info,
1317 1317 fl_debug_info,
1318 1318 ):
1319 1319 """display debug information gathered during a bundling through `ui`"""
1320 1320 d = debug_info
1321 1321 c = cl_debug_info
1322 1322 m = mn_debug_info
1323 1323 f = fl_debug_info
1324 1324 all_info = [
1325 1325 (b"changelog", b"cl", c),
1326 1326 (b"manifests", b"mn", m),
1327 1327 (b"files", b"fl", f),
1328 1328 ]
1329 1329 _dbg_bdl_line(ui, 0, b'revisions', d['revision-total'])
1330 1330 _dbg_bdl_line(ui, 1, b'changelog', d['revision-changelog'])
1331 1331 _dbg_bdl_line(ui, 1, b'manifest', d['revision-manifest'])
1332 1332 extra = b'(for %d revlogs)' % d['file-count']
1333 1333 _dbg_bdl_line(ui, 1, b'files', d['revision-files'], extra=extra)
1334 1334 if d['merge-total']:
1335 1335 _dbg_bdl_line(ui, 1, b'merge', d['merge-total'], d['revision-total'])
1336 1336 for k, __, v in all_info:
1337 1337 if v['merge-total']:
1338 1338 _dbg_bdl_line(ui, 2, k, v['merge-total'], v['revision-total'])
1339 1339
1340 1340 _dbg_bdl_line(ui, 0, b'deltas')
1341 1341 _dbg_bdl_line(
1342 1342 ui,
1343 1343 1,
1344 1344 b'from-storage',
1345 1345 d['reused-storage-delta'],
1346 1346 percentage_base=d['available-delta'],
1347 1347 percentage_key=b"available",
1348 1348 percentage_ref=d['available-delta'],
1349 1349 )
1350 1350
1351 1351 if d['denied-delta-candeltafn']:
1352 1352 _dbg_bdl_line(ui, 2, b'denied-fn', d['denied-delta-candeltafn'])
1353 1353 for __, k, v in all_info:
1354 1354 if v['denied-delta-candeltafn']:
1355 1355 _dbg_bdl_line(ui, 3, k, v['denied-delta-candeltafn'])
1356 1356
1357 1357 if d['denied-base-not-available']:
1358 1358 _dbg_bdl_line(ui, 2, b'denied-nb', d['denied-base-not-available'])
1359 1359 for k, __, v in all_info:
1360 1360 if v['denied-base-not-available']:
1361 1361 _dbg_bdl_line(ui, 3, k, v['denied-base-not-available'])
1362 1362
1363 1363 if d['computed-delta']:
1364 1364 _dbg_bdl_line(ui, 1, b'computed', d['computed-delta'])
1365 1365
1366 1366 if d['available-full']:
1367 1367 _dbg_bdl_line(
1368 1368 ui,
1369 1369 2,
1370 1370 b'full',
1371 1371 d['delta-full'],
1372 1372 percentage_base=d['available-full'],
1373 1373 percentage_key=b"native",
1374 1374 percentage_ref=d['available-full'],
1375 1375 )
1376 1376 for k, __, v in all_info:
1377 1377 if v['available-full']:
1378 1378 _dbg_bdl_line(
1379 1379 ui,
1380 1380 3,
1381 1381 k,
1382 1382 v['delta-full'],
1383 1383 percentage_base=v['available-full'],
1384 1384 percentage_key=b"native",
1385 1385 percentage_ref=v['available-full'],
1386 1386 )
1387 1387
1388 1388 if d['delta-against-prev']:
1389 1389 _dbg_bdl_line(ui, 2, b'previous', d['delta-against-prev'])
1390 1390 for k, __, v in all_info:
1391 1391 if v['delta-against-prev']:
1392 1392 _dbg_bdl_line(ui, 3, k, v['delta-against-prev'])
1393 1393
1394 1394 if d['delta-against-p1']:
1395 1395 _dbg_bdl_line(ui, 2, b'parent-1', d['delta-against-prev'])
1396 1396 for k, __, v in all_info:
1397 1397 if v['delta-against-p1']:
1398 1398 _dbg_bdl_line(ui, 3, k, v['delta-against-p1'])
1399 1399
1400 1400
1401 1401 class cgpacker:
1402 1402 def __init__(
1403 1403 self,
1404 1404 repo,
1405 1405 oldmatcher,
1406 1406 matcher,
1407 1407 version,
1408 1408 builddeltaheader,
1409 1409 manifestsend,
1410 1410 forcedeltaparentprev=False,
1411 1411 bundlecaps=None,
1412 1412 ellipses=False,
1413 1413 shallow=False,
1414 1414 ellipsisroots=None,
1415 1415 fullnodes=None,
1416 1416 remote_sidedata=None,
1417 1417 ):
1418 1418 """Given a source repo, construct a bundler.
1419 1419
1420 1420 oldmatcher is a matcher that matches on files the client already has.
1421 1421 These will not be included in the changegroup.
1422 1422
1423 1423 matcher is a matcher that matches on files to include in the
1424 1424 changegroup. Used to facilitate sparse changegroups.
1425 1425
1426 1426 forcedeltaparentprev indicates whether delta parents must be against
1427 1427 the previous revision in a delta group. This should only be used for
1428 1428 compatibility with changegroup version 1.
1429 1429
1430 1430 builddeltaheader is a callable that constructs the header for a group
1431 1431 delta.
1432 1432
1433 1433 manifestsend is a chunk to send after manifests have been fully emitted.
1434 1434
1435 1435 ellipses indicates whether ellipsis serving mode is enabled.
1436 1436
1437 1437 bundlecaps is optional and can be used to specify the set of
1438 1438 capabilities which can be used to build the bundle. While bundlecaps is
1439 1439 unused in core Mercurial, extensions rely on this feature to communicate
1440 1440 capabilities to customize the changegroup packer.
1441 1441
1442 1442 shallow indicates whether shallow data might be sent. The packer may
1443 1443 need to pack file contents not introduced by the changes being packed.
1444 1444
1445 1445 fullnodes is the set of changelog nodes which should not be ellipsis
1446 1446 nodes. We store this rather than the set of nodes that should be
1447 1447 ellipsis because for very large histories we expect this to be
1448 1448 significantly smaller.
1449 1449
1450 1450 remote_sidedata is the set of sidedata categories wanted by the remote.
1451 1451 """
1452 1452 assert oldmatcher
1453 1453 assert matcher
1454 1454 self._oldmatcher = oldmatcher
1455 1455 self._matcher = matcher
1456 1456
1457 1457 self.version = version
1458 1458 self._forcedeltaparentprev = forcedeltaparentprev
1459 1459 self._builddeltaheader = builddeltaheader
1460 1460 self._manifestsend = manifestsend
1461 1461 self._ellipses = ellipses
1462 1462
1463 1463 # Set of capabilities we can use to build the bundle.
1464 1464 if bundlecaps is None:
1465 1465 bundlecaps = set()
1466 1466 self._bundlecaps = bundlecaps
1467 1467 if remote_sidedata is None:
1468 1468 remote_sidedata = set()
1469 1469 self._remote_sidedata = remote_sidedata
1470 1470 self._isshallow = shallow
1471 1471 self._fullclnodes = fullnodes
1472 1472
1473 1473 # Maps ellipsis revs to their roots at the changelog level.
1474 1474 self._precomputedellipsis = ellipsisroots
1475 1475
1476 1476 self._repo = repo
1477 1477
1478 1478 if self._repo.ui.verbose and not self._repo.ui.debugflag:
1479 1479 self._verbosenote = self._repo.ui.note
1480 1480 else:
1481 1481 self._verbosenote = lambda s: None
1482 1482
1483 1483 def generate(
1484 1484 self,
1485 1485 commonrevs,
1486 1486 clnodes,
1487 1487 fastpathlinkrev,
1488 1488 source,
1489 1489 changelog=True,
1490 1490 ):
1491 1491 """Yield a sequence of changegroup byte chunks.
1492 1492 If changelog is False, changelog data won't be added to changegroup
1493 1493 """
1494 1494
1495 1495 debug_info = None
1496 1496 repo = self._repo
1497 1497 if repo.ui.configbool(b'debug', b'bundling-stats'):
1498 1498 debug_info = make_debug_info()
1499 1499 cl = repo.changelog
1500 1500
1501 1501 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
1502 1502 size = 0
1503 1503
1504 1504 sidedata_helpers = None
1505 1505 if self.version == b'04':
1506 1506 remote_sidedata = self._remote_sidedata
1507 1507 if source == b'strip':
1508 1508 # We're our own remote when stripping, get the no-op helpers
1509 1509 # TODO a better approach would be for the strip bundle to
1510 1510 # correctly advertise its sidedata categories directly.
1511 1511 remote_sidedata = repo._wanted_sidedata
1512 1512 sidedata_helpers = sidedatamod.get_sidedata_helpers(
1513 1513 repo,
1514 1514 remote_sidedata,
1515 1515 )
1516 1516
1517 1517 cl_debug_info = None
1518 1518 if debug_info is not None:
1519 1519 cl_debug_info = make_debug_info()
1520 1520 clstate, deltas = self._generatechangelog(
1521 1521 cl,
1522 1522 clnodes,
1523 1523 generate=changelog,
1524 1524 sidedata_helpers=sidedata_helpers,
1525 1525 debug_info=cl_debug_info,
1526 1526 )
1527 1527 for delta in deltas:
1528 1528 for chunk in _revisiondeltatochunks(
1529 1529 self._repo, delta, self._builddeltaheader
1530 1530 ):
1531 1531 size += len(chunk)
1532 1532 yield chunk
1533 1533
1534 1534 close = closechunk()
1535 1535 size += len(close)
1536 1536 yield closechunk()
1537 1537 if debug_info is not None:
1538 1538 merge_debug_info(debug_info, cl_debug_info)
1539 1539 debug_info['revision-changelog'] = cl_debug_info['revision-total']
1540 1540
1541 1541 self._verbosenote(_(b'%8.i (changelog)\n') % size)
1542 1542
1543 1543 clrevorder = clstate[b'clrevorder']
1544 1544 manifests = clstate[b'manifests']
1545 1545 changedfiles = clstate[b'changedfiles']
1546 1546
1547 1547 if debug_info is not None:
1548 1548 debug_info['file-count'] = len(changedfiles)
1549 1549
1550 1550 # We need to make sure that the linkrev in the changegroup refers to
1551 1551 # the first changeset that introduced the manifest or file revision.
1552 1552 # The fastpath is usually safer than the slowpath, because the filelogs
1553 1553 # are walked in revlog order.
1554 1554 #
1555 1555 # When taking the slowpath when the manifest revlog uses generaldelta,
1556 1556 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1557 1557 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1558 1558 #
1559 1559 # When taking the fastpath, we are only vulnerable to reordering
1560 1560 # of the changelog itself. The changelog never uses generaldelta and is
1561 1561 # never reordered. To handle this case, we simply take the slowpath,
1562 1562 # which already has the 'clrevorder' logic. This was also fixed in
1563 1563 # cc0ff93d0c0c.
1564 1564
1565 1565 # Treemanifests don't work correctly with fastpathlinkrev
1566 1566 # either, because we don't discover which directory nodes to
1567 1567 # send along with files. This could probably be fixed.
1568 1568 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1569 1569
1570 1570 fnodes = {} # needed file nodes
1571 1571
1572 1572 size = 0
1573 1573 mn_debug_info = None
1574 1574 if debug_info is not None:
1575 1575 mn_debug_info = make_debug_info()
1576 1576 it = self.generatemanifests(
1577 1577 commonrevs,
1578 1578 clrevorder,
1579 1579 fastpathlinkrev,
1580 1580 manifests,
1581 1581 fnodes,
1582 1582 source,
1583 1583 clstate[b'clrevtomanifestrev'],
1584 1584 sidedata_helpers=sidedata_helpers,
1585 1585 debug_info=mn_debug_info,
1586 1586 )
1587 1587
1588 1588 for tree, deltas in it:
1589 1589 if tree:
1590 1590 assert self.version in (b'03', b'04')
1591 1591 chunk = _fileheader(tree)
1592 1592 size += len(chunk)
1593 1593 yield chunk
1594 1594
1595 1595 for delta in deltas:
1596 1596 chunks = _revisiondeltatochunks(
1597 1597 self._repo, delta, self._builddeltaheader
1598 1598 )
1599 1599 for chunk in chunks:
1600 1600 size += len(chunk)
1601 1601 yield chunk
1602 1602
1603 1603 close = closechunk()
1604 1604 size += len(close)
1605 1605 yield close
1606 1606 if debug_info is not None:
1607 1607 merge_debug_info(debug_info, mn_debug_info)
1608 1608 debug_info['revision-manifest'] = mn_debug_info['revision-total']
1609 1609
1610 1610 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1611 1611 yield self._manifestsend
1612 1612
1613 1613 mfdicts = None
1614 1614 if self._ellipses and self._isshallow:
1615 1615 mfdicts = [
1616 1616 (repo.manifestlog[n].read(), lr)
1617 1617 for (n, lr) in pycompat.iteritems(manifests)
1618 1618 ]
1619 1619
1620 1620 manifests.clear()
1621 1621 clrevs = {cl.rev(x) for x in clnodes}
1622 1622
1623 1623 fl_debug_info = None
1624 1624 if debug_info is not None:
1625 1625 fl_debug_info = make_debug_info()
1626 1626 it = self.generatefiles(
1627 1627 changedfiles,
1628 1628 commonrevs,
1629 1629 source,
1630 1630 mfdicts,
1631 1631 fastpathlinkrev,
1632 1632 fnodes,
1633 1633 clrevs,
1634 1634 sidedata_helpers=sidedata_helpers,
1635 1635 debug_info=fl_debug_info,
1636 1636 )
1637 1637
1638 1638 for path, deltas in it:
1639 1639 h = _fileheader(path)
1640 1640 size = len(h)
1641 1641 yield h
1642 1642
1643 1643 for delta in deltas:
1644 1644 chunks = _revisiondeltatochunks(
1645 1645 self._repo, delta, self._builddeltaheader
1646 1646 )
1647 1647 for chunk in chunks:
1648 1648 size += len(chunk)
1649 1649 yield chunk
1650 1650
1651 1651 close = closechunk()
1652 1652 size += len(close)
1653 1653 yield close
1654 1654
1655 1655 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1656 1656
1657 1657 yield closechunk()
1658 1658 if debug_info is not None:
1659 1659 merge_debug_info(debug_info, fl_debug_info)
1660 1660 debug_info['revision-files'] = fl_debug_info['revision-total']
1661 1661
1662 1662 if debug_info is not None:
1663 1663 display_bundling_debug_info(
1664 1664 repo.ui,
1665 1665 debug_info,
1666 1666 cl_debug_info,
1667 1667 mn_debug_info,
1668 1668 fl_debug_info,
1669 1669 )
1670 1670
1671 1671 if clnodes:
1672 1672 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1673 1673
1674 1674 def _generatechangelog(
1675 1675 self,
1676 1676 cl,
1677 1677 nodes,
1678 1678 generate=True,
1679 1679 sidedata_helpers=None,
1680 1680 debug_info=None,
1681 1681 ):
1682 1682 """Generate data for changelog chunks.
1683 1683
1684 1684 Returns a 2-tuple of a dict containing state and an iterable of
1685 1685 byte chunks. The state will not be fully populated until the
1686 1686 chunk stream has been fully consumed.
1687 1687
1688 1688 if generate is False, the state will be fully populated and no chunk
1689 1689 stream will be yielded
1690 1690
1691 1691 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1692 1692 `sidedata_helpers`.
1693 1693 """
1694 1694 clrevorder = {}
1695 1695 manifests = {}
1696 1696 mfl = self._repo.manifestlog
1697 1697 changedfiles = set()
1698 1698 clrevtomanifestrev = {}
1699 1699
1700 1700 state = {
1701 1701 b'clrevorder': clrevorder,
1702 1702 b'manifests': manifests,
1703 1703 b'changedfiles': changedfiles,
1704 1704 b'clrevtomanifestrev': clrevtomanifestrev,
1705 1705 }
1706 1706
1707 1707 if not (generate or self._ellipses):
1708 1708 # sort the nodes in storage order
1709 1709 nodes = sorted(nodes, key=cl.rev)
1710 1710 for node in nodes:
1711 1711 c = cl.changelogrevision(node)
1712 1712 clrevorder[node] = len(clrevorder)
1713 1713 # record the first changeset introducing this manifest version
1714 1714 manifests.setdefault(c.manifest, node)
1715 1715 # Record a complete list of potentially-changed files in
1716 1716 # this manifest.
1717 1717 changedfiles.update(c.files)
1718 1718
1719 1719 return state, ()
1720 1720
1721 1721 # Callback for the changelog, used to collect changed files and
1722 1722 # manifest nodes.
1723 1723 # Returns the linkrev node (identity in the changelog case).
1724 1724 def lookupcl(x):
1725 1725 c = cl.changelogrevision(x)
1726 1726 clrevorder[x] = len(clrevorder)
1727 1727
1728 1728 if self._ellipses:
1729 1729 # Only update manifests if x is going to be sent. Otherwise we
1730 1730 # end up with bogus linkrevs specified for manifests and
1731 1731 # we skip some manifest nodes that we should otherwise
1732 1732 # have sent.
1733 1733 if (
1734 1734 x in self._fullclnodes
1735 1735 or cl.rev(x) in self._precomputedellipsis
1736 1736 ):
1737 1737 manifestnode = c.manifest
1738 1738 # Record the first changeset introducing this manifest
1739 1739 # version.
1740 1740 manifests.setdefault(manifestnode, x)
1741 1741 # Set this narrow-specific dict so we have the lowest
1742 1742 # manifest revnum to look up for this cl revnum. (Part of
1743 1743 # mapping changelog ellipsis parents to manifest ellipsis
1744 1744 # parents)
1745 1745 clrevtomanifestrev.setdefault(
1746 1746 cl.rev(x), mfl.rev(manifestnode)
1747 1747 )
1748 1748 # We can't trust the changed files list in the changeset if the
1749 1749 # client requested a shallow clone.
1750 1750 if self._isshallow:
1751 1751 changedfiles.update(mfl[c.manifest].read().keys())
1752 1752 else:
1753 1753 changedfiles.update(c.files)
1754 1754 else:
1755 1755 # record the first changeset introducing this manifest version
1756 1756 manifests.setdefault(c.manifest, x)
1757 1757 # Record a complete list of potentially-changed files in
1758 1758 # this manifest.
1759 1759 changedfiles.update(c.files)
1760 1760
1761 1761 return x
1762 1762
1763 1763 gen = deltagroup(
1764 1764 self._repo,
1765 1765 cl,
1766 1766 nodes,
1767 1767 True,
1768 1768 lookupcl,
1769 1769 self._forcedeltaparentprev,
1770 1770 ellipses=self._ellipses,
1771 1771 topic=_(b'changesets'),
1772 1772 clrevtolocalrev={},
1773 1773 fullclnodes=self._fullclnodes,
1774 1774 precomputedellipsis=self._precomputedellipsis,
1775 1775 sidedata_helpers=sidedata_helpers,
1776 1776 debug_info=debug_info,
1777 1777 )
1778 1778
1779 1779 return state, gen
1780 1780
1781 1781 def generatemanifests(
1782 1782 self,
1783 1783 commonrevs,
1784 1784 clrevorder,
1785 1785 fastpathlinkrev,
1786 1786 manifests,
1787 1787 fnodes,
1788 1788 source,
1789 1789 clrevtolocalrev,
1790 1790 sidedata_helpers=None,
1791 1791 debug_info=None,
1792 1792 ):
1793 1793 """Returns an iterator of changegroup chunks containing manifests.
1794 1794
1795 1795 `source` is unused here, but is used by extensions like remotefilelog to
1796 1796 change what is sent based in pulls vs pushes, etc.
1797 1797
1798 1798 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
1799 1799 `sidedata_helpers`.
1800 1800 """
1801 1801 repo = self._repo
1802 1802 mfl = repo.manifestlog
1803 1803 tmfnodes = {b'': manifests}
1804 1804
1805 1805 # Callback for the manifest, used to collect linkrevs for filelog
1806 1806 # revisions.
1807 1807 # Returns the linkrev node (collected in lookupcl).
1808 1808 def makelookupmflinknode(tree, nodes):
1809 1809 if fastpathlinkrev:
1810 1810 assert not tree
1811 1811
1812 1812 # pytype: disable=unsupported-operands
1813 1813 return manifests.__getitem__
1814 1814 # pytype: enable=unsupported-operands
1815 1815
1816 1816 def lookupmflinknode(x):
1817 1817 """Callback for looking up the linknode for manifests.
1818 1818
1819 1819 Returns the linkrev node for the specified manifest.
1820 1820
1821 1821 SIDE EFFECT:
1822 1822
1823 1823 1) fclnodes gets populated with the list of relevant
1824 1824 file nodes if we're not using fastpathlinkrev
1825 1825 2) When treemanifests are in use, collects treemanifest nodes
1826 1826 to send
1827 1827
1828 1828 Note that this means manifests must be completely sent to
1829 1829 the client before you can trust the list of files and
1830 1830 treemanifests to send.
1831 1831 """
1832 1832 clnode = nodes[x]
1833 1833 mctx = mfl.get(tree, x)
1834 1834 mdata = mctx.read_delta_parents(shallow=True, exact=False)
1835 1835 for p, n, fl in mdata.iterentries():
1836 1836 if fl == b't': # subdirectory manifest
1837 1837 subtree = tree + p + b'/'
1838 1838 tmfclnodes = tmfnodes.setdefault(subtree, {})
1839 1839 tmfclnode = tmfclnodes.setdefault(n, clnode)
1840 1840 if clrevorder[clnode] < clrevorder[tmfclnode]:
1841 1841 tmfclnodes[n] = clnode
1842 1842 else:
1843 1843 f = tree + p
1844 1844 fclnodes = fnodes.setdefault(f, {})
1845 1845 fclnode = fclnodes.setdefault(n, clnode)
1846 1846 if clrevorder[clnode] < clrevorder[fclnode]:
1847 1847 fclnodes[n] = clnode
1848 1848 return clnode
1849 1849
1850 1850 return lookupmflinknode
1851 1851
1852 1852 while tmfnodes:
1853 1853 tree, nodes = tmfnodes.popitem()
1854 1854
1855 1855 should_visit = self._matcher.visitdir(tree[:-1])
1856 1856 if tree and not should_visit:
1857 1857 continue
1858 1858
1859 1859 store = mfl.getstorage(tree)
1860 1860
1861 1861 if not should_visit:
1862 1862 # No nodes to send because this directory is out of
1863 1863 # the client's view of the repository (probably
1864 1864 # because of narrow clones). Do this even for the root
1865 1865 # directory (tree=='')
1866 1866 prunednodes = []
1867 1867 else:
1868 1868 # Avoid sending any manifest nodes we can prove the
1869 1869 # client already has by checking linkrevs. See the
1870 1870 # related comment in generatefiles().
1871 1871 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1872 1872
1873 1873 if tree and not prunednodes:
1874 1874 continue
1875 1875
1876 1876 lookupfn = makelookupmflinknode(tree, nodes)
1877 1877
1878 1878 deltas = deltagroup(
1879 1879 self._repo,
1880 1880 store,
1881 1881 prunednodes,
1882 1882 False,
1883 1883 lookupfn,
1884 1884 self._forcedeltaparentprev,
1885 1885 ellipses=self._ellipses,
1886 1886 topic=_(b'manifests'),
1887 1887 clrevtolocalrev=clrevtolocalrev,
1888 1888 fullclnodes=self._fullclnodes,
1889 1889 precomputedellipsis=self._precomputedellipsis,
1890 1890 sidedata_helpers=sidedata_helpers,
1891 1891 debug_info=debug_info,
1892 1892 )
1893 1893
1894 1894 if not self._oldmatcher.visitdir(store.tree[:-1]):
1895 1895 yield tree, deltas
1896 1896 else:
1897 1897 # 'deltas' is a generator and we need to consume it even if
1898 1898 # we are not going to send it because a side-effect is that
1899 1899 # it updates tmdnodes (via lookupfn)
1900 1900 for d in deltas:
1901 1901 pass
1902 1902 if not tree:
1903 1903 yield tree, []
1904 1904
1905 1905 def _prunemanifests(self, store, nodes, commonrevs):
1906 1906 if not self._ellipses:
1907 1907 # In non-ellipses case and large repositories, it is better to
1908 1908 # prevent calling of store.rev and store.linkrev on a lot of
1909 1909 # nodes as compared to sending some extra data
1910 1910 return nodes.copy()
1911 1911 # This is split out as a separate method to allow filtering
1912 1912 # commonrevs in extension code.
1913 1913 #
1914 1914 # TODO(augie): this shouldn't be required, instead we should
1915 1915 # make filtering of revisions to send delegated to the store
1916 1916 # layer.
1917 1917 frev, flr = store.rev, store.linkrev
1918 1918 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1919 1919
1920 1920 # The 'source' parameter is useful for extensions
1921 1921 def generatefiles(
1922 1922 self,
1923 1923 changedfiles,
1924 1924 commonrevs,
1925 1925 source,
1926 1926 mfdicts,
1927 1927 fastpathlinkrev,
1928 1928 fnodes,
1929 1929 clrevs,
1930 1930 sidedata_helpers=None,
1931 1931 debug_info=None,
1932 1932 ):
1933 1933 changedfiles = [
1934 1934 f
1935 1935 for f in changedfiles
1936 1936 if self._matcher(f) and not self._oldmatcher(f)
1937 1937 ]
1938 1938
1939 1939 if not fastpathlinkrev:
1940 1940
1941 1941 def normallinknodes(unused, fname):
1942 1942 return fnodes.get(fname, {})
1943 1943
1944 1944 else:
1945 1945 cln = self._repo.changelog.node
1946 1946
1947 1947 def normallinknodes(store, fname):
1948 1948 flinkrev = store.linkrev
1949 1949 fnode = store.node
1950 1950 revs = ((r, flinkrev(r)) for r in store)
1951 1951 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1952 1952
1953 1953 clrevtolocalrev = {}
1954 1954
1955 1955 if self._isshallow:
1956 1956 # In a shallow clone, the linknodes callback needs to also include
1957 1957 # those file nodes that are in the manifests we sent but weren't
1958 1958 # introduced by those manifests.
1959 1959 commonctxs = [self._repo[c] for c in commonrevs]
1960 1960 clrev = self._repo.changelog.rev
1961 1961
1962 1962 def linknodes(flog, fname):
1963 1963 for c in commonctxs:
1964 1964 try:
1965 1965 fnode = c.filenode(fname)
1966 1966 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1967 1967 except error.ManifestLookupError:
1968 1968 pass
1969 1969 links = normallinknodes(flog, fname)
1970 1970 if len(links) != len(mfdicts):
1971 1971 for mf, lr in mfdicts:
1972 1972 fnode = mf.get(fname, None)
1973 1973 if fnode in links:
1974 1974 links[fnode] = min(links[fnode], lr, key=clrev)
1975 1975 elif fnode:
1976 1976 links[fnode] = lr
1977 1977 return links
1978 1978
1979 1979 else:
1980 1980 linknodes = normallinknodes
1981 1981
1982 1982 repo = self._repo
1983 1983 progress = repo.ui.makeprogress(
1984 1984 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1985 1985 )
1986 1986 for i, fname in enumerate(sorted(changedfiles)):
1987 1987 filerevlog = repo.file(fname)
1988 1988 if not filerevlog:
1989 1989 raise error.Abort(
1990 1990 _(b"empty or missing file data for %s") % fname
1991 1991 )
1992 1992
1993 1993 clrevtolocalrev.clear()
1994 1994
1995 1995 linkrevnodes = linknodes(filerevlog, fname)
1996 1996
1997 1997 # Lookup for filenodes, we collected the linkrev nodes above in the
1998 1998 # fastpath case and with lookupmf in the slowpath case.
1999 1999 def lookupfilelog(x):
2000 2000 return linkrevnodes[x]
2001 2001
2002 2002 frev, flr = filerevlog.rev, filerevlog.linkrev
2003 2003 # Skip sending any filenode we know the client already
2004 2004 # has. This avoids over-sending files relatively
2005 2005 # inexpensively, so it's not a problem if we under-filter
2006 2006 # here.
2007 2007 filenodes = [
2008 2008 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
2009 2009 ]
2010 2010
2011 2011 if not filenodes:
2012 2012 continue
2013 2013
2014 2014 progress.update(i + 1, item=fname)
2015 2015
2016 2016 deltas = deltagroup(
2017 2017 self._repo,
2018 2018 filerevlog,
2019 2019 filenodes,
2020 2020 False,
2021 2021 lookupfilelog,
2022 2022 self._forcedeltaparentprev,
2023 2023 ellipses=self._ellipses,
2024 2024 clrevtolocalrev=clrevtolocalrev,
2025 2025 fullclnodes=self._fullclnodes,
2026 2026 precomputedellipsis=self._precomputedellipsis,
2027 2027 sidedata_helpers=sidedata_helpers,
2028 2028 debug_info=debug_info,
2029 2029 )
2030 2030
2031 2031 yield fname, deltas
2032 2032
2033 2033 progress.complete()
2034 2034
2035 2035
2036 2036 def _makecg1packer(
2037 2037 repo,
2038 2038 oldmatcher,
2039 2039 matcher,
2040 2040 bundlecaps,
2041 2041 ellipses=False,
2042 2042 shallow=False,
2043 2043 ellipsisroots=None,
2044 2044 fullnodes=None,
2045 2045 remote_sidedata=None,
2046 2046 ):
2047 2047 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
2048 2048 d.node, d.p1node, d.p2node, d.linknode
2049 2049 )
2050 2050
2051 2051 return cgpacker(
2052 2052 repo,
2053 2053 oldmatcher,
2054 2054 matcher,
2055 2055 b'01',
2056 2056 builddeltaheader=builddeltaheader,
2057 2057 manifestsend=b'',
2058 2058 forcedeltaparentprev=True,
2059 2059 bundlecaps=bundlecaps,
2060 2060 ellipses=ellipses,
2061 2061 shallow=shallow,
2062 2062 ellipsisroots=ellipsisroots,
2063 2063 fullnodes=fullnodes,
2064 2064 )
2065 2065
2066 2066
2067 2067 def _makecg2packer(
2068 2068 repo,
2069 2069 oldmatcher,
2070 2070 matcher,
2071 2071 bundlecaps,
2072 2072 ellipses=False,
2073 2073 shallow=False,
2074 2074 ellipsisroots=None,
2075 2075 fullnodes=None,
2076 2076 remote_sidedata=None,
2077 2077 ):
2078 2078 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
2079 2079 d.node, d.p1node, d.p2node, d.basenode, d.linknode
2080 2080 )
2081 2081
2082 2082 return cgpacker(
2083 2083 repo,
2084 2084 oldmatcher,
2085 2085 matcher,
2086 2086 b'02',
2087 2087 builddeltaheader=builddeltaheader,
2088 2088 manifestsend=b'',
2089 2089 bundlecaps=bundlecaps,
2090 2090 ellipses=ellipses,
2091 2091 shallow=shallow,
2092 2092 ellipsisroots=ellipsisroots,
2093 2093 fullnodes=fullnodes,
2094 2094 )
2095 2095
2096 2096
2097 2097 def _makecg3packer(
2098 2098 repo,
2099 2099 oldmatcher,
2100 2100 matcher,
2101 2101 bundlecaps,
2102 2102 ellipses=False,
2103 2103 shallow=False,
2104 2104 ellipsisroots=None,
2105 2105 fullnodes=None,
2106 2106 remote_sidedata=None,
2107 2107 ):
2108 2108 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
2109 2109 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
2110 2110 )
2111 2111
2112 2112 return cgpacker(
2113 2113 repo,
2114 2114 oldmatcher,
2115 2115 matcher,
2116 2116 b'03',
2117 2117 builddeltaheader=builddeltaheader,
2118 2118 manifestsend=closechunk(),
2119 2119 bundlecaps=bundlecaps,
2120 2120 ellipses=ellipses,
2121 2121 shallow=shallow,
2122 2122 ellipsisroots=ellipsisroots,
2123 2123 fullnodes=fullnodes,
2124 2124 )
2125 2125
2126 2126
2127 2127 def _makecg4packer(
2128 2128 repo,
2129 2129 oldmatcher,
2130 2130 matcher,
2131 2131 bundlecaps,
2132 2132 ellipses=False,
2133 2133 shallow=False,
2134 2134 ellipsisroots=None,
2135 2135 fullnodes=None,
2136 2136 remote_sidedata=None,
2137 2137 ):
2138 2138 # Sidedata is in a separate chunk from the delta to differentiate
2139 2139 # "raw delta" and sidedata.
2140 2140 def builddeltaheader(d):
2141 2141 return _CHANGEGROUPV4_DELTA_HEADER.pack(
2142 2142 d.protocol_flags,
2143 2143 d.node,
2144 2144 d.p1node,
2145 2145 d.p2node,
2146 2146 d.basenode,
2147 2147 d.linknode,
2148 2148 d.flags,
2149 2149 )
2150 2150
2151 2151 return cgpacker(
2152 2152 repo,
2153 2153 oldmatcher,
2154 2154 matcher,
2155 2155 b'04',
2156 2156 builddeltaheader=builddeltaheader,
2157 2157 manifestsend=closechunk(),
2158 2158 bundlecaps=bundlecaps,
2159 2159 ellipses=ellipses,
2160 2160 shallow=shallow,
2161 2161 ellipsisroots=ellipsisroots,
2162 2162 fullnodes=fullnodes,
2163 2163 remote_sidedata=remote_sidedata,
2164 2164 )
2165 2165
2166 2166
2167 2167 _packermap = {
2168 2168 b'01': (_makecg1packer, cg1unpacker),
2169 2169 # cg2 adds support for exchanging generaldelta
2170 2170 b'02': (_makecg2packer, cg2unpacker),
2171 2171 # cg3 adds support for exchanging revlog flags and treemanifests
2172 2172 b'03': (_makecg3packer, cg3unpacker),
2173 2173 # ch4 adds support for exchanging sidedata
2174 2174 b'04': (_makecg4packer, cg4unpacker),
2175 2175 }
2176 2176
2177 2177
2178 2178 def allsupportedversions(repo):
2179 2179 versions = set(_packermap.keys())
2180 2180 needv03 = False
2181 2181 if (
2182 2182 repo.ui.configbool(b'experimental', b'changegroup3')
2183 2183 or repo.ui.configbool(b'experimental', b'treemanifest')
2184 2184 or scmutil.istreemanifest(repo)
2185 2185 ):
2186 2186 # we keep version 03 because we need to to exchange treemanifest data
2187 2187 #
2188 2188 # we also keep vresion 01 and 02, because it is possible for repo to
2189 2189 # contains both normal and tree manifest at the same time. so using
2190 2190 # older version to pull data is viable
2191 2191 #
2192 2192 # (or even to push subset of history)
2193 2193 needv03 = True
2194 2194 if not needv03:
2195 2195 versions.discard(b'03')
2196 2196 want_v4 = (
2197 2197 repo.ui.configbool(b'experimental', b'changegroup4')
2198 2198 or requirements.REVLOGV2_REQUIREMENT in repo.requirements
2199 2199 or requirements.CHANGELOGV2_REQUIREMENT in repo.requirements
2200 2200 )
2201 2201 if not want_v4:
2202 2202 versions.discard(b'04')
2203 2203 return versions
2204 2204
2205 2205
2206 2206 # Changegroup versions that can be applied to the repo
2207 2207 def supportedincomingversions(repo):
2208 2208 return allsupportedversions(repo)
2209 2209
2210 2210
2211 2211 # Changegroup versions that can be created from the repo
2212 2212 def supportedoutgoingversions(repo):
2213 2213 versions = allsupportedversions(repo)
2214 2214 if scmutil.istreemanifest(repo):
2215 2215 # Versions 01 and 02 support only flat manifests and it's just too
2216 2216 # expensive to convert between the flat manifest and tree manifest on
2217 2217 # the fly. Since tree manifests are hashed differently, all of history
2218 2218 # would have to be converted. Instead, we simply don't even pretend to
2219 2219 # support versions 01 and 02.
2220 2220 versions.discard(b'01')
2221 2221 versions.discard(b'02')
2222 2222 if requirements.NARROW_REQUIREMENT in repo.requirements:
2223 2223 # Versions 01 and 02 don't support revlog flags, and we need to
2224 2224 # support that for stripping and unbundling to work.
2225 2225 versions.discard(b'01')
2226 2226 versions.discard(b'02')
2227 2227 if LFS_REQUIREMENT in repo.requirements:
2228 2228 # Versions 01 and 02 don't support revlog flags, and we need to
2229 2229 # mark LFS entries with REVIDX_EXTSTORED.
2230 2230 versions.discard(b'01')
2231 2231 versions.discard(b'02')
2232 2232
2233 2233 return versions
2234 2234
2235 2235
2236 2236 def localversion(repo):
2237 2237 # Finds the best version to use for bundles that are meant to be used
2238 2238 # locally, such as those from strip and shelve, and temporary bundles.
2239 2239 return max(supportedoutgoingversions(repo))
2240 2240
2241 2241
2242 2242 def safeversion(repo):
2243 2243 # Finds the smallest version that it's safe to assume clients of the repo
2244 2244 # will support. For example, all hg versions that support generaldelta also
2245 2245 # support changegroup 02.
2246 2246 versions = supportedoutgoingversions(repo)
2247 2247 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
2248 2248 versions.discard(b'01')
2249 2249 assert versions
2250 2250 return min(versions)
2251 2251
2252 2252
2253 2253 def getbundler(
2254 2254 version,
2255 2255 repo,
2256 2256 bundlecaps=None,
2257 2257 oldmatcher=None,
2258 2258 matcher=None,
2259 2259 ellipses=False,
2260 2260 shallow=False,
2261 2261 ellipsisroots=None,
2262 2262 fullnodes=None,
2263 2263 remote_sidedata=None,
2264 2264 ):
2265 2265 assert version in supportedoutgoingversions(repo)
2266 2266
2267 2267 if matcher is None:
2268 2268 matcher = matchmod.always()
2269 2269 if oldmatcher is None:
2270 2270 oldmatcher = matchmod.never()
2271 2271
2272 2272 if version == b'01' and not matcher.always():
2273 2273 raise error.ProgrammingError(
2274 2274 b'version 01 changegroups do not support sparse file matchers'
2275 2275 )
2276 2276
2277 2277 if ellipses and version in (b'01', b'02'):
2278 2278 raise error.Abort(
2279 2279 _(
2280 2280 b'ellipsis nodes require at least cg3 on client and server, '
2281 2281 b'but negotiated version %s'
2282 2282 )
2283 2283 % version
2284 2284 )
2285 2285
2286 2286 # Requested files could include files not in the local store. So
2287 2287 # filter those out.
2288 2288 matcher = repo.narrowmatch(matcher)
2289 2289
2290 2290 fn = _packermap[version][0]
2291 2291 return fn(
2292 2292 repo,
2293 2293 oldmatcher,
2294 2294 matcher,
2295 2295 bundlecaps,
2296 2296 ellipses=ellipses,
2297 2297 shallow=shallow,
2298 2298 ellipsisroots=ellipsisroots,
2299 2299 fullnodes=fullnodes,
2300 2300 remote_sidedata=remote_sidedata,
2301 2301 )
2302 2302
2303 2303
2304 2304 def getunbundler(version, fh, alg, extras=None):
2305 2305 return _packermap[version][1](fh, alg, extras=extras)
2306 2306
2307 2307
2308 2308 def _changegroupinfo(repo, nodes, source):
2309 2309 if repo.ui.verbose or source == b'bundle':
2310 2310 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
2311 2311 if repo.ui.debugflag:
2312 2312 repo.ui.debug(b"list of changesets:\n")
2313 2313 for node in nodes:
2314 2314 repo.ui.debug(b"%s\n" % hex(node))
2315 2315
2316 2316
2317 2317 def makechangegroup(
2318 2318 repo,
2319 2319 outgoing,
2320 2320 version,
2321 2321 source,
2322 2322 fastpath=False,
2323 2323 bundlecaps=None,
2324 2324 ):
2325 2325 cgstream = makestream(
2326 2326 repo,
2327 2327 outgoing,
2328 2328 version,
2329 2329 source,
2330 2330 fastpath=fastpath,
2331 2331 bundlecaps=bundlecaps,
2332 2332 )
2333 2333 return getunbundler(
2334 2334 version,
2335 2335 util.chunkbuffer(cgstream),
2336 2336 None,
2337 2337 {b'clcount': len(outgoing.missing)},
2338 2338 )
2339 2339
2340 2340
2341 2341 def makestream(
2342 2342 repo,
2343 2343 outgoing,
2344 2344 version,
2345 2345 source,
2346 2346 fastpath=False,
2347 2347 bundlecaps=None,
2348 2348 matcher=None,
2349 2349 remote_sidedata=None,
2350 2350 ):
2351 2351 bundler = getbundler(
2352 2352 version,
2353 2353 repo,
2354 2354 bundlecaps=bundlecaps,
2355 2355 matcher=matcher,
2356 2356 remote_sidedata=remote_sidedata,
2357 2357 )
2358 2358
2359 2359 repo = repo.unfiltered()
2360 2360 commonrevs = outgoing.common
2361 2361 csets = outgoing.missing
2362 2362 heads = outgoing.ancestorsof
2363 2363 # We go through the fast path if we get told to, or if all (unfiltered
2364 2364 # heads have been requested (since we then know there all linkrevs will
2365 2365 # be pulled by the client).
2366 2366 heads.sort()
2367 2367 fastpathlinkrev = fastpath or (
2368 2368 repo.filtername is None and heads == sorted(repo.heads())
2369 2369 )
2370 2370
2371 2371 repo.hook(b'preoutgoing', throw=True, source=source)
2372 2372 _changegroupinfo(repo, csets, source)
2373 2373 return bundler.generate(
2374 2374 commonrevs,
2375 2375 csets,
2376 2376 fastpathlinkrev,
2377 2377 source,
2378 2378 )
2379 2379
2380 2380
2381 2381 def _addchangegroupfiles(
2382 2382 repo,
2383 2383 source,
2384 2384 revmap,
2385 2385 trp,
2386 2386 expectedfiles,
2387 2387 needfiles,
2388 2388 addrevisioncb=None,
2389 2389 debug_info=None,
2390 2390 delta_base_reuse_policy=None,
2391 2391 ):
2392 2392 revisions = 0
2393 2393 files = 0
2394 2394 progress = repo.ui.makeprogress(
2395 2395 _(b'files'), unit=_(b'files'), total=expectedfiles
2396 2396 )
2397 2397 for chunkdata in iter(source.filelogheader, {}):
2398 2398 files += 1
2399 2399 f = chunkdata[b"filename"]
2400 2400 repo.ui.debug(b"adding %s revisions\n" % f)
2401 2401 progress.increment()
2402 2402 fl = repo.file(f)
2403 2403 o = len(fl)
2404 2404 try:
2405 2405 deltas = source.deltaiter()
2406 2406 added = fl.addgroup(
2407 2407 deltas,
2408 2408 revmap,
2409 2409 trp,
2410 2410 addrevisioncb=addrevisioncb,
2411 2411 debug_info=debug_info,
2412 2412 delta_base_reuse_policy=delta_base_reuse_policy,
2413 2413 )
2414 2414 if not added:
2415 2415 raise error.Abort(_(b"received file revlog group is empty"))
2416 2416 except error.CensoredBaseError as e:
2417 2417 raise error.Abort(_(b"received delta base is censored: %s") % e)
2418 2418 revisions += len(fl) - o
2419 2419 if f in needfiles:
2420 2420 needs = needfiles[f]
2421 2421 for new in range(o, len(fl)):
2422 2422 n = fl.node(new)
2423 2423 if n in needs:
2424 2424 needs.remove(n)
2425 2425 else:
2426 2426 raise error.Abort(_(b"received spurious file revlog entry"))
2427 2427 if not needs:
2428 2428 del needfiles[f]
2429 2429 progress.complete()
2430 2430
2431 2431 for f, needs in needfiles.items():
2432 2432 fl = repo.file(f)
2433 2433 for n in needs:
2434 2434 try:
2435 2435 fl.rev(n)
2436 2436 except error.LookupError:
2437 2437 raise error.Abort(
2438 2438 _(b'missing file data for %s:%s - run hg verify')
2439 2439 % (f, hex(n))
2440 2440 )
2441 2441
2442 2442 return revisions, files
General Comments 0
You need to be logged in to leave comments. Login now