##// END OF EJS Templates
revlog: move _getsegmentforrevs on the internal object...
marmoute -
r51980:df50a159 default
parent child Browse files
Show More
@@ -1,4626 +1,4633 b''
1 1 # perf.py - performance test routines
2 2 '''helper extension to measure performance
3 3
4 4 Configurations
5 5 ==============
6 6
7 7 ``perf``
8 8 --------
9 9
10 10 ``all-timing``
11 11 When set, additional statistics will be reported for each benchmark: best,
12 12 worst, median average. If not set only the best timing is reported
13 13 (default: off).
14 14
15 15 ``presleep``
16 16 number of second to wait before any group of runs (default: 1)
17 17
18 18 ``pre-run``
19 19 number of run to perform before starting measurement.
20 20
21 21 ``profile-benchmark``
22 22 Enable profiling for the benchmarked section.
23 23 (The first iteration is benchmarked)
24 24
25 25 ``run-limits``
26 26 Control the number of runs each benchmark will perform. The option value
27 27 should be a list of `<time>-<numberofrun>` pairs. After each run the
28 28 conditions are considered in order with the following logic:
29 29
30 30 If benchmark has been running for <time> seconds, and we have performed
31 31 <numberofrun> iterations, stop the benchmark,
32 32
33 33 The default value is: `3.0-100, 10.0-3`
34 34
35 35 ``stub``
36 36 When set, benchmarks will only be run once, useful for testing
37 37 (default: off)
38 38 '''
39 39
40 40 # "historical portability" policy of perf.py:
41 41 #
42 42 # We have to do:
43 43 # - make perf.py "loadable" with as wide Mercurial version as possible
44 44 # This doesn't mean that perf commands work correctly with that Mercurial.
45 45 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
46 46 # - make historical perf command work correctly with as wide Mercurial
47 47 # version as possible
48 48 #
49 49 # We have to do, if possible with reasonable cost:
50 50 # - make recent perf command for historical feature work correctly
51 51 # with early Mercurial
52 52 #
53 53 # We don't have to do:
54 54 # - make perf command for recent feature work correctly with early
55 55 # Mercurial
56 56
57 57 import contextlib
58 58 import functools
59 59 import gc
60 60 import os
61 61 import random
62 62 import shutil
63 63 import struct
64 64 import sys
65 65 import tempfile
66 66 import threading
67 67 import time
68 68
69 69 import mercurial.revlog
70 70 from mercurial import (
71 71 changegroup,
72 72 cmdutil,
73 73 commands,
74 74 copies,
75 75 error,
76 76 extensions,
77 77 hg,
78 78 mdiff,
79 79 merge,
80 80 util,
81 81 )
82 82
83 83 # for "historical portability":
84 84 # try to import modules separately (in dict order), and ignore
85 85 # failure, because these aren't available with early Mercurial
86 86 try:
87 87 from mercurial import branchmap # since 2.5 (or bcee63733aad)
88 88 except ImportError:
89 89 pass
90 90 try:
91 91 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
92 92 except ImportError:
93 93 pass
94 94 try:
95 95 from mercurial import registrar # since 3.7 (or 37d50250b696)
96 96
97 97 dir(registrar) # forcibly load it
98 98 except ImportError:
99 99 registrar = None
100 100 try:
101 101 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
102 102 except ImportError:
103 103 pass
104 104 try:
105 105 from mercurial.utils import repoviewutil # since 5.0
106 106 except ImportError:
107 107 repoviewutil = None
108 108 try:
109 109 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
110 110 except ImportError:
111 111 pass
112 112 try:
113 113 from mercurial import setdiscovery # since 1.9 (or cb98fed52495)
114 114 except ImportError:
115 115 pass
116 116
117 117 try:
118 118 from mercurial import profiling
119 119 except ImportError:
120 120 profiling = None
121 121
122 122 try:
123 123 from mercurial.revlogutils import constants as revlog_constants
124 124
125 125 perf_rl_kind = (revlog_constants.KIND_OTHER, b'created-by-perf')
126 126
127 127 def revlog(opener, *args, **kwargs):
128 128 return mercurial.revlog.revlog(opener, perf_rl_kind, *args, **kwargs)
129 129
130 130
131 131 except (ImportError, AttributeError):
132 132 perf_rl_kind = None
133 133
134 134 def revlog(opener, *args, **kwargs):
135 135 return mercurial.revlog.revlog(opener, *args, **kwargs)
136 136
137 137
138 138 def identity(a):
139 139 return a
140 140
141 141
142 142 try:
143 143 from mercurial import pycompat
144 144
145 145 getargspec = pycompat.getargspec # added to module after 4.5
146 146 _byteskwargs = pycompat.byteskwargs # since 4.1 (or fbc3f73dc802)
147 147 _sysstr = pycompat.sysstr # since 4.0 (or 2219f4f82ede)
148 148 _bytestr = pycompat.bytestr # since 4.2 (or b70407bd84d5)
149 149 _xrange = pycompat.xrange # since 4.8 (or 7eba8f83129b)
150 150 fsencode = pycompat.fsencode # since 3.9 (or f4a5e0e86a7e)
151 151 if pycompat.ispy3:
152 152 _maxint = sys.maxsize # per py3 docs for replacing maxint
153 153 else:
154 154 _maxint = sys.maxint
155 155 except (NameError, ImportError, AttributeError):
156 156 import inspect
157 157
158 158 getargspec = inspect.getargspec
159 159 _byteskwargs = identity
160 160 _bytestr = str
161 161 fsencode = identity # no py3 support
162 162 _maxint = sys.maxint # no py3 support
163 163 _sysstr = lambda x: x # no py3 support
164 164 _xrange = xrange
165 165
166 166 try:
167 167 # 4.7+
168 168 queue = pycompat.queue.Queue
169 169 except (NameError, AttributeError, ImportError):
170 170 # <4.7.
171 171 try:
172 172 queue = pycompat.queue
173 173 except (NameError, AttributeError, ImportError):
174 174 import Queue as queue
175 175
176 176 try:
177 177 from mercurial import logcmdutil
178 178
179 179 makelogtemplater = logcmdutil.maketemplater
180 180 except (AttributeError, ImportError):
181 181 try:
182 182 makelogtemplater = cmdutil.makelogtemplater
183 183 except (AttributeError, ImportError):
184 184 makelogtemplater = None
185 185
186 186 # for "historical portability":
187 187 # define util.safehasattr forcibly, because util.safehasattr has been
188 188 # available since 1.9.3 (or 94b200a11cf7)
189 189 _undefined = object()
190 190
191 191
192 192 def safehasattr(thing, attr):
193 193 return getattr(thing, _sysstr(attr), _undefined) is not _undefined
194 194
195 195
196 196 setattr(util, 'safehasattr', safehasattr)
197 197
198 198 # for "historical portability":
199 199 # define util.timer forcibly, because util.timer has been available
200 200 # since ae5d60bb70c9
201 201 if safehasattr(time, 'perf_counter'):
202 202 util.timer = time.perf_counter
203 203 elif os.name == b'nt':
204 204 util.timer = time.clock
205 205 else:
206 206 util.timer = time.time
207 207
208 208 # for "historical portability":
209 209 # use locally defined empty option list, if formatteropts isn't
210 210 # available, because commands.formatteropts has been available since
211 211 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
212 212 # available since 2.2 (or ae5f92e154d3)
213 213 formatteropts = getattr(
214 214 cmdutil, "formatteropts", getattr(commands, "formatteropts", [])
215 215 )
216 216
217 217 # for "historical portability":
218 218 # use locally defined option list, if debugrevlogopts isn't available,
219 219 # because commands.debugrevlogopts has been available since 3.7 (or
220 220 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
221 221 # since 1.9 (or a79fea6b3e77).
222 222 revlogopts = getattr(
223 223 cmdutil,
224 224 "debugrevlogopts",
225 225 getattr(
226 226 commands,
227 227 "debugrevlogopts",
228 228 [
229 229 (b'c', b'changelog', False, b'open changelog'),
230 230 (b'm', b'manifest', False, b'open manifest'),
231 231 (b'', b'dir', False, b'open directory manifest'),
232 232 ],
233 233 ),
234 234 )
235 235
236 236 cmdtable = {}
237 237
238 238
239 239 # for "historical portability":
240 240 # define parsealiases locally, because cmdutil.parsealiases has been
241 241 # available since 1.5 (or 6252852b4332)
242 242 def parsealiases(cmd):
243 243 return cmd.split(b"|")
244 244
245 245
246 246 if safehasattr(registrar, 'command'):
247 247 command = registrar.command(cmdtable)
248 248 elif safehasattr(cmdutil, 'command'):
249 249 command = cmdutil.command(cmdtable)
250 250 if 'norepo' not in getargspec(command).args:
251 251 # for "historical portability":
252 252 # wrap original cmdutil.command, because "norepo" option has
253 253 # been available since 3.1 (or 75a96326cecb)
254 254 _command = command
255 255
256 256 def command(name, options=(), synopsis=None, norepo=False):
257 257 if norepo:
258 258 commands.norepo += b' %s' % b' '.join(parsealiases(name))
259 259 return _command(name, list(options), synopsis)
260 260
261 261
262 262 else:
263 263 # for "historical portability":
264 264 # define "@command" annotation locally, because cmdutil.command
265 265 # has been available since 1.9 (or 2daa5179e73f)
266 266 def command(name, options=(), synopsis=None, norepo=False):
267 267 def decorator(func):
268 268 if synopsis:
269 269 cmdtable[name] = func, list(options), synopsis
270 270 else:
271 271 cmdtable[name] = func, list(options)
272 272 if norepo:
273 273 commands.norepo += b' %s' % b' '.join(parsealiases(name))
274 274 return func
275 275
276 276 return decorator
277 277
278 278
279 279 try:
280 280 import mercurial.registrar
281 281 import mercurial.configitems
282 282
283 283 configtable = {}
284 284 configitem = mercurial.registrar.configitem(configtable)
285 285 configitem(
286 286 b'perf',
287 287 b'presleep',
288 288 default=mercurial.configitems.dynamicdefault,
289 289 experimental=True,
290 290 )
291 291 configitem(
292 292 b'perf',
293 293 b'stub',
294 294 default=mercurial.configitems.dynamicdefault,
295 295 experimental=True,
296 296 )
297 297 configitem(
298 298 b'perf',
299 299 b'parentscount',
300 300 default=mercurial.configitems.dynamicdefault,
301 301 experimental=True,
302 302 )
303 303 configitem(
304 304 b'perf',
305 305 b'all-timing',
306 306 default=mercurial.configitems.dynamicdefault,
307 307 experimental=True,
308 308 )
309 309 configitem(
310 310 b'perf',
311 311 b'pre-run',
312 312 default=mercurial.configitems.dynamicdefault,
313 313 )
314 314 configitem(
315 315 b'perf',
316 316 b'profile-benchmark',
317 317 default=mercurial.configitems.dynamicdefault,
318 318 )
319 319 configitem(
320 320 b'perf',
321 321 b'run-limits',
322 322 default=mercurial.configitems.dynamicdefault,
323 323 experimental=True,
324 324 )
325 325 except (ImportError, AttributeError):
326 326 pass
327 327 except TypeError:
328 328 # compatibility fix for a11fd395e83f
329 329 # hg version: 5.2
330 330 configitem(
331 331 b'perf',
332 332 b'presleep',
333 333 default=mercurial.configitems.dynamicdefault,
334 334 )
335 335 configitem(
336 336 b'perf',
337 337 b'stub',
338 338 default=mercurial.configitems.dynamicdefault,
339 339 )
340 340 configitem(
341 341 b'perf',
342 342 b'parentscount',
343 343 default=mercurial.configitems.dynamicdefault,
344 344 )
345 345 configitem(
346 346 b'perf',
347 347 b'all-timing',
348 348 default=mercurial.configitems.dynamicdefault,
349 349 )
350 350 configitem(
351 351 b'perf',
352 352 b'pre-run',
353 353 default=mercurial.configitems.dynamicdefault,
354 354 )
355 355 configitem(
356 356 b'perf',
357 357 b'profile-benchmark',
358 358 default=mercurial.configitems.dynamicdefault,
359 359 )
360 360 configitem(
361 361 b'perf',
362 362 b'run-limits',
363 363 default=mercurial.configitems.dynamicdefault,
364 364 )
365 365
366 366
367 367 def getlen(ui):
368 368 if ui.configbool(b"perf", b"stub", False):
369 369 return lambda x: 1
370 370 return len
371 371
372 372
373 373 class noop:
374 374 """dummy context manager"""
375 375
376 376 def __enter__(self):
377 377 pass
378 378
379 379 def __exit__(self, *args):
380 380 pass
381 381
382 382
383 383 NOOPCTX = noop()
384 384
385 385
386 386 def gettimer(ui, opts=None):
387 387 """return a timer function and formatter: (timer, formatter)
388 388
389 389 This function exists to gather the creation of formatter in a single
390 390 place instead of duplicating it in all performance commands."""
391 391
392 392 # enforce an idle period before execution to counteract power management
393 393 # experimental config: perf.presleep
394 394 time.sleep(getint(ui, b"perf", b"presleep", 1))
395 395
396 396 if opts is None:
397 397 opts = {}
398 398 # redirect all to stderr unless buffer api is in use
399 399 if not ui._buffers:
400 400 ui = ui.copy()
401 401 uifout = safeattrsetter(ui, b'fout', ignoremissing=True)
402 402 if uifout:
403 403 # for "historical portability":
404 404 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
405 405 uifout.set(ui.ferr)
406 406
407 407 # get a formatter
408 408 uiformatter = getattr(ui, 'formatter', None)
409 409 if uiformatter:
410 410 fm = uiformatter(b'perf', opts)
411 411 else:
412 412 # for "historical portability":
413 413 # define formatter locally, because ui.formatter has been
414 414 # available since 2.2 (or ae5f92e154d3)
415 415 from mercurial import node
416 416
417 417 class defaultformatter:
418 418 """Minimized composition of baseformatter and plainformatter"""
419 419
420 420 def __init__(self, ui, topic, opts):
421 421 self._ui = ui
422 422 if ui.debugflag:
423 423 self.hexfunc = node.hex
424 424 else:
425 425 self.hexfunc = node.short
426 426
427 427 def __nonzero__(self):
428 428 return False
429 429
430 430 __bool__ = __nonzero__
431 431
432 432 def startitem(self):
433 433 pass
434 434
435 435 def data(self, **data):
436 436 pass
437 437
438 438 def write(self, fields, deftext, *fielddata, **opts):
439 439 self._ui.write(deftext % fielddata, **opts)
440 440
441 441 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
442 442 if cond:
443 443 self._ui.write(deftext % fielddata, **opts)
444 444
445 445 def plain(self, text, **opts):
446 446 self._ui.write(text, **opts)
447 447
448 448 def end(self):
449 449 pass
450 450
451 451 fm = defaultformatter(ui, b'perf', opts)
452 452
453 453 # stub function, runs code only once instead of in a loop
454 454 # experimental config: perf.stub
455 455 if ui.configbool(b"perf", b"stub", False):
456 456 return functools.partial(stub_timer, fm), fm
457 457
458 458 # experimental config: perf.all-timing
459 459 displayall = ui.configbool(b"perf", b"all-timing", True)
460 460
461 461 # experimental config: perf.run-limits
462 462 limitspec = ui.configlist(b"perf", b"run-limits", [])
463 463 limits = []
464 464 for item in limitspec:
465 465 parts = item.split(b'-', 1)
466 466 if len(parts) < 2:
467 467 ui.warn((b'malformatted run limit entry, missing "-": %s\n' % item))
468 468 continue
469 469 try:
470 470 time_limit = float(_sysstr(parts[0]))
471 471 except ValueError as e:
472 472 ui.warn(
473 473 (
474 474 b'malformatted run limit entry, %s: %s\n'
475 475 % (_bytestr(e), item)
476 476 )
477 477 )
478 478 continue
479 479 try:
480 480 run_limit = int(_sysstr(parts[1]))
481 481 except ValueError as e:
482 482 ui.warn(
483 483 (
484 484 b'malformatted run limit entry, %s: %s\n'
485 485 % (_bytestr(e), item)
486 486 )
487 487 )
488 488 continue
489 489 limits.append((time_limit, run_limit))
490 490 if not limits:
491 491 limits = DEFAULTLIMITS
492 492
493 493 profiler = None
494 494 if profiling is not None:
495 495 if ui.configbool(b"perf", b"profile-benchmark", False):
496 496 profiler = profiling.profile(ui)
497 497
498 498 prerun = getint(ui, b"perf", b"pre-run", 0)
499 499 t = functools.partial(
500 500 _timer,
501 501 fm,
502 502 displayall=displayall,
503 503 limits=limits,
504 504 prerun=prerun,
505 505 profiler=profiler,
506 506 )
507 507 return t, fm
508 508
509 509
510 510 def stub_timer(fm, func, setup=None, title=None):
511 511 if setup is not None:
512 512 setup()
513 513 func()
514 514
515 515
516 516 @contextlib.contextmanager
517 517 def timeone():
518 518 r = []
519 519 ostart = os.times()
520 520 cstart = util.timer()
521 521 yield r
522 522 cstop = util.timer()
523 523 ostop = os.times()
524 524 a, b = ostart, ostop
525 525 r.append((cstop - cstart, b[0] - a[0], b[1] - a[1]))
526 526
527 527
528 528 # list of stop condition (elapsed time, minimal run count)
529 529 DEFAULTLIMITS = (
530 530 (3.0, 100),
531 531 (10.0, 3),
532 532 )
533 533
534 534
535 535 @contextlib.contextmanager
536 536 def noop_context():
537 537 yield
538 538
539 539
540 540 def _timer(
541 541 fm,
542 542 func,
543 543 setup=None,
544 544 context=noop_context,
545 545 title=None,
546 546 displayall=False,
547 547 limits=DEFAULTLIMITS,
548 548 prerun=0,
549 549 profiler=None,
550 550 ):
551 551 gc.collect()
552 552 results = []
553 553 begin = util.timer()
554 554 count = 0
555 555 if profiler is None:
556 556 profiler = NOOPCTX
557 557 for i in range(prerun):
558 558 if setup is not None:
559 559 setup()
560 560 with context():
561 561 func()
562 562 keepgoing = True
563 563 while keepgoing:
564 564 if setup is not None:
565 565 setup()
566 566 with context():
567 567 with profiler:
568 568 with timeone() as item:
569 569 r = func()
570 570 profiler = NOOPCTX
571 571 count += 1
572 572 results.append(item[0])
573 573 cstop = util.timer()
574 574 # Look for a stop condition.
575 575 elapsed = cstop - begin
576 576 for t, mincount in limits:
577 577 if elapsed >= t and count >= mincount:
578 578 keepgoing = False
579 579 break
580 580
581 581 formatone(fm, results, title=title, result=r, displayall=displayall)
582 582
583 583
584 584 def formatone(fm, timings, title=None, result=None, displayall=False):
585 585 count = len(timings)
586 586
587 587 fm.startitem()
588 588
589 589 if title:
590 590 fm.write(b'title', b'! %s\n', title)
591 591 if result:
592 592 fm.write(b'result', b'! result: %s\n', result)
593 593
594 594 def display(role, entry):
595 595 prefix = b''
596 596 if role != b'best':
597 597 prefix = b'%s.' % role
598 598 fm.plain(b'!')
599 599 fm.write(prefix + b'wall', b' wall %f', entry[0])
600 600 fm.write(prefix + b'comb', b' comb %f', entry[1] + entry[2])
601 601 fm.write(prefix + b'user', b' user %f', entry[1])
602 602 fm.write(prefix + b'sys', b' sys %f', entry[2])
603 603 fm.write(prefix + b'count', b' (%s of %%d)' % role, count)
604 604 fm.plain(b'\n')
605 605
606 606 timings.sort()
607 607 min_val = timings[0]
608 608 display(b'best', min_val)
609 609 if displayall:
610 610 max_val = timings[-1]
611 611 display(b'max', max_val)
612 612 avg = tuple([sum(x) / count for x in zip(*timings)])
613 613 display(b'avg', avg)
614 614 median = timings[len(timings) // 2]
615 615 display(b'median', median)
616 616
617 617
618 618 # utilities for historical portability
619 619
620 620
621 621 def getint(ui, section, name, default):
622 622 # for "historical portability":
623 623 # ui.configint has been available since 1.9 (or fa2b596db182)
624 624 v = ui.config(section, name, None)
625 625 if v is None:
626 626 return default
627 627 try:
628 628 return int(v)
629 629 except ValueError:
630 630 raise error.ConfigError(
631 631 b"%s.%s is not an integer ('%s')" % (section, name, v)
632 632 )
633 633
634 634
635 635 def safeattrsetter(obj, name, ignoremissing=False):
636 636 """Ensure that 'obj' has 'name' attribute before subsequent setattr
637 637
638 638 This function is aborted, if 'obj' doesn't have 'name' attribute
639 639 at runtime. This avoids overlooking removal of an attribute, which
640 640 breaks assumption of performance measurement, in the future.
641 641
642 642 This function returns the object to (1) assign a new value, and
643 643 (2) restore an original value to the attribute.
644 644
645 645 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
646 646 abortion, and this function returns None. This is useful to
647 647 examine an attribute, which isn't ensured in all Mercurial
648 648 versions.
649 649 """
650 650 if not util.safehasattr(obj, name):
651 651 if ignoremissing:
652 652 return None
653 653 raise error.Abort(
654 654 (
655 655 b"missing attribute %s of %s might break assumption"
656 656 b" of performance measurement"
657 657 )
658 658 % (name, obj)
659 659 )
660 660
661 661 origvalue = getattr(obj, _sysstr(name))
662 662
663 663 class attrutil:
664 664 def set(self, newvalue):
665 665 setattr(obj, _sysstr(name), newvalue)
666 666
667 667 def restore(self):
668 668 setattr(obj, _sysstr(name), origvalue)
669 669
670 670 return attrutil()
671 671
672 672
673 673 # utilities to examine each internal API changes
674 674
675 675
676 676 def getbranchmapsubsettable():
677 677 # for "historical portability":
678 678 # subsettable is defined in:
679 679 # - branchmap since 2.9 (or 175c6fd8cacc)
680 680 # - repoview since 2.5 (or 59a9f18d4587)
681 681 # - repoviewutil since 5.0
682 682 for mod in (branchmap, repoview, repoviewutil):
683 683 subsettable = getattr(mod, 'subsettable', None)
684 684 if subsettable:
685 685 return subsettable
686 686
687 687 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
688 688 # branchmap and repoview modules exist, but subsettable attribute
689 689 # doesn't)
690 690 raise error.Abort(
691 691 b"perfbranchmap not available with this Mercurial",
692 692 hint=b"use 2.5 or later",
693 693 )
694 694
695 695
696 696 def getsvfs(repo):
697 697 """Return appropriate object to access files under .hg/store"""
698 698 # for "historical portability":
699 699 # repo.svfs has been available since 2.3 (or 7034365089bf)
700 700 svfs = getattr(repo, 'svfs', None)
701 701 if svfs:
702 702 return svfs
703 703 else:
704 704 return getattr(repo, 'sopener')
705 705
706 706
707 707 def getvfs(repo):
708 708 """Return appropriate object to access files under .hg"""
709 709 # for "historical portability":
710 710 # repo.vfs has been available since 2.3 (or 7034365089bf)
711 711 vfs = getattr(repo, 'vfs', None)
712 712 if vfs:
713 713 return vfs
714 714 else:
715 715 return getattr(repo, 'opener')
716 716
717 717
718 718 def repocleartagscachefunc(repo):
719 719 """Return the function to clear tags cache according to repo internal API"""
720 720 if util.safehasattr(repo, b'_tagscache'): # since 2.0 (or 9dca7653b525)
721 721 # in this case, setattr(repo, '_tagscache', None) or so isn't
722 722 # correct way to clear tags cache, because existing code paths
723 723 # expect _tagscache to be a structured object.
724 724 def clearcache():
725 725 # _tagscache has been filteredpropertycache since 2.5 (or
726 726 # 98c867ac1330), and delattr() can't work in such case
727 727 if '_tagscache' in vars(repo):
728 728 del repo.__dict__['_tagscache']
729 729
730 730 return clearcache
731 731
732 732 repotags = safeattrsetter(repo, b'_tags', ignoremissing=True)
733 733 if repotags: # since 1.4 (or 5614a628d173)
734 734 return lambda: repotags.set(None)
735 735
736 736 repotagscache = safeattrsetter(repo, b'tagscache', ignoremissing=True)
737 737 if repotagscache: # since 0.6 (or d7df759d0e97)
738 738 return lambda: repotagscache.set(None)
739 739
740 740 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
741 741 # this point, but it isn't so problematic, because:
742 742 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
743 743 # in perftags() causes failure soon
744 744 # - perf.py itself has been available since 1.1 (or eb240755386d)
745 745 raise error.Abort(b"tags API of this hg command is unknown")
746 746
747 747
748 748 # utilities to clear cache
749 749
750 750
751 751 def clearfilecache(obj, attrname):
752 752 unfiltered = getattr(obj, 'unfiltered', None)
753 753 if unfiltered is not None:
754 754 obj = obj.unfiltered()
755 755 if attrname in vars(obj):
756 756 delattr(obj, attrname)
757 757 obj._filecache.pop(attrname, None)
758 758
759 759
760 760 def clearchangelog(repo):
761 761 if repo is not repo.unfiltered():
762 762 object.__setattr__(repo, '_clcachekey', None)
763 763 object.__setattr__(repo, '_clcache', None)
764 764 clearfilecache(repo.unfiltered(), 'changelog')
765 765
766 766
767 767 # perf commands
768 768
769 769
770 770 @command(b'perf::walk|perfwalk', formatteropts)
771 771 def perfwalk(ui, repo, *pats, **opts):
772 772 opts = _byteskwargs(opts)
773 773 timer, fm = gettimer(ui, opts)
774 774 m = scmutil.match(repo[None], pats, {})
775 775 timer(
776 776 lambda: len(
777 777 list(
778 778 repo.dirstate.walk(m, subrepos=[], unknown=True, ignored=False)
779 779 )
780 780 )
781 781 )
782 782 fm.end()
783 783
784 784
785 785 @command(b'perf::annotate|perfannotate', formatteropts)
786 786 def perfannotate(ui, repo, f, **opts):
787 787 opts = _byteskwargs(opts)
788 788 timer, fm = gettimer(ui, opts)
789 789 fc = repo[b'.'][f]
790 790 timer(lambda: len(fc.annotate(True)))
791 791 fm.end()
792 792
793 793
794 794 @command(
795 795 b'perf::status|perfstatus',
796 796 [
797 797 (b'u', b'unknown', False, b'ask status to look for unknown files'),
798 798 (b'', b'dirstate', False, b'benchmark the internal dirstate call'),
799 799 ]
800 800 + formatteropts,
801 801 )
802 802 def perfstatus(ui, repo, **opts):
803 803 """benchmark the performance of a single status call
804 804
805 805 The repository data are preserved between each call.
806 806
807 807 By default, only the status of the tracked file are requested. If
808 808 `--unknown` is passed, the "unknown" files are also tracked.
809 809 """
810 810 opts = _byteskwargs(opts)
811 811 # m = match.always(repo.root, repo.getcwd())
812 812 # timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
813 813 # False))))
814 814 timer, fm = gettimer(ui, opts)
815 815 if opts[b'dirstate']:
816 816 dirstate = repo.dirstate
817 817 m = scmutil.matchall(repo)
818 818 unknown = opts[b'unknown']
819 819
820 820 def status_dirstate():
821 821 s = dirstate.status(
822 822 m, subrepos=[], ignored=False, clean=False, unknown=unknown
823 823 )
824 824 sum(map(bool, s))
825 825
826 826 if util.safehasattr(dirstate, 'running_status'):
827 827 with dirstate.running_status(repo):
828 828 timer(status_dirstate)
829 829 dirstate.invalidate()
830 830 else:
831 831 timer(status_dirstate)
832 832 else:
833 833 timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown']))))
834 834 fm.end()
835 835
836 836
837 837 @command(b'perf::addremove|perfaddremove', formatteropts)
838 838 def perfaddremove(ui, repo, **opts):
839 839 opts = _byteskwargs(opts)
840 840 timer, fm = gettimer(ui, opts)
841 841 try:
842 842 oldquiet = repo.ui.quiet
843 843 repo.ui.quiet = True
844 844 matcher = scmutil.match(repo[None])
845 845 opts[b'dry_run'] = True
846 846 if 'uipathfn' in getargspec(scmutil.addremove).args:
847 847 uipathfn = scmutil.getuipathfn(repo)
848 848 timer(lambda: scmutil.addremove(repo, matcher, b"", uipathfn, opts))
849 849 else:
850 850 timer(lambda: scmutil.addremove(repo, matcher, b"", opts))
851 851 finally:
852 852 repo.ui.quiet = oldquiet
853 853 fm.end()
854 854
855 855
856 856 def clearcaches(cl):
857 857 # behave somewhat consistently across internal API changes
858 858 if util.safehasattr(cl, b'clearcaches'):
859 859 cl.clearcaches()
860 860 elif util.safehasattr(cl, b'_nodecache'):
861 861 # <= hg-5.2
862 862 from mercurial.node import nullid, nullrev
863 863
864 864 cl._nodecache = {nullid: nullrev}
865 865 cl._nodepos = None
866 866
867 867
868 868 @command(b'perf::heads|perfheads', formatteropts)
869 869 def perfheads(ui, repo, **opts):
870 870 """benchmark the computation of a changelog heads"""
871 871 opts = _byteskwargs(opts)
872 872 timer, fm = gettimer(ui, opts)
873 873 cl = repo.changelog
874 874
875 875 def s():
876 876 clearcaches(cl)
877 877
878 878 def d():
879 879 len(cl.headrevs())
880 880
881 881 timer(d, setup=s)
882 882 fm.end()
883 883
884 884
885 885 def _default_clear_on_disk_tags_cache(repo):
886 886 from mercurial import tags
887 887
888 888 repo.cachevfs.tryunlink(tags._filename(repo))
889 889
890 890
891 891 def _default_clear_on_disk_tags_fnodes_cache(repo):
892 892 from mercurial import tags
893 893
894 894 repo.cachevfs.tryunlink(tags._fnodescachefile)
895 895
896 896
897 897 def _default_forget_fnodes(repo, revs):
898 898 """function used by the perf extension to prune some entries from the
899 899 fnodes cache"""
900 900 from mercurial import tags
901 901
902 902 missing_1 = b'\xff' * 4
903 903 missing_2 = b'\xff' * 20
904 904 cache = tags.hgtagsfnodescache(repo.unfiltered())
905 905 for r in revs:
906 906 cache._writeentry(r * tags._fnodesrecsize, missing_1, missing_2)
907 907 cache.write()
908 908
909 909
910 910 @command(
911 911 b'perf::tags|perftags',
912 912 formatteropts
913 913 + [
914 914 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
915 915 (
916 916 b'',
917 917 b'clear-on-disk-cache',
918 918 False,
919 919 b'clear on disk tags cache (DESTRUCTIVE)',
920 920 ),
921 921 (
922 922 b'',
923 923 b'clear-fnode-cache-all',
924 924 False,
925 925 b'clear on disk file node cache (DESTRUCTIVE),',
926 926 ),
927 927 (
928 928 b'',
929 929 b'clear-fnode-cache-rev',
930 930 [],
931 931 b'clear on disk file node cache (DESTRUCTIVE),',
932 932 b'REVS',
933 933 ),
934 934 (
935 935 b'',
936 936 b'update-last',
937 937 b'',
938 938 b'simulate an update over the last N revisions (DESTRUCTIVE),',
939 939 b'N',
940 940 ),
941 941 ],
942 942 )
943 943 def perftags(ui, repo, **opts):
944 944 """Benchmark tags retrieval in various situation
945 945
946 946 The option marked as (DESTRUCTIVE) will alter the on-disk cache, possibly
947 947 altering performance after the command was run. However, it does not
948 948 destroy any stored data.
949 949 """
950 950 from mercurial import tags
951 951
952 952 opts = _byteskwargs(opts)
953 953 timer, fm = gettimer(ui, opts)
954 954 repocleartagscache = repocleartagscachefunc(repo)
955 955 clearrevlogs = opts[b'clear_revlogs']
956 956 clear_disk = opts[b'clear_on_disk_cache']
957 957 clear_fnode = opts[b'clear_fnode_cache_all']
958 958
959 959 clear_fnode_revs = opts[b'clear_fnode_cache_rev']
960 960 update_last_str = opts[b'update_last']
961 961 update_last = None
962 962 if update_last_str:
963 963 try:
964 964 update_last = int(update_last_str)
965 965 except ValueError:
966 966 msg = b'could not parse value for update-last: "%s"'
967 967 msg %= update_last_str
968 968 hint = b'value should be an integer'
969 969 raise error.Abort(msg, hint=hint)
970 970
971 971 clear_disk_fn = getattr(
972 972 tags,
973 973 "clear_cache_on_disk",
974 974 _default_clear_on_disk_tags_cache,
975 975 )
976 976 clear_fnodes_fn = getattr(
977 977 tags,
978 978 "clear_cache_fnodes",
979 979 _default_clear_on_disk_tags_fnodes_cache,
980 980 )
981 981 clear_fnodes_rev_fn = getattr(
982 982 tags,
983 983 "forget_fnodes",
984 984 _default_forget_fnodes,
985 985 )
986 986
987 987 clear_revs = []
988 988 if clear_fnode_revs:
989 989 clear_revs.extends(scmutil.revrange(repo, clear_fnode_revs))
990 990
991 991 if update_last:
992 992 revset = b'last(all(), %d)' % update_last
993 993 last_revs = repo.unfiltered().revs(revset)
994 994 clear_revs.extend(last_revs)
995 995
996 996 from mercurial import repoview
997 997
998 998 rev_filter = {(b'experimental', b'extra-filter-revs'): revset}
999 999 with repo.ui.configoverride(rev_filter, source=b"perf"):
1000 1000 filter_id = repoview.extrafilter(repo.ui)
1001 1001
1002 1002 filter_name = b'%s%%%s' % (repo.filtername, filter_id)
1003 1003 pre_repo = repo.filtered(filter_name)
1004 1004 pre_repo.tags() # warm the cache
1005 1005 old_tags_path = repo.cachevfs.join(tags._filename(pre_repo))
1006 1006 new_tags_path = repo.cachevfs.join(tags._filename(repo))
1007 1007
1008 1008 clear_revs = sorted(set(clear_revs))
1009 1009
1010 1010 def s():
1011 1011 if update_last:
1012 1012 util.copyfile(old_tags_path, new_tags_path)
1013 1013 if clearrevlogs:
1014 1014 clearchangelog(repo)
1015 1015 clearfilecache(repo.unfiltered(), 'manifest')
1016 1016 if clear_disk:
1017 1017 clear_disk_fn(repo)
1018 1018 if clear_fnode:
1019 1019 clear_fnodes_fn(repo)
1020 1020 elif clear_revs:
1021 1021 clear_fnodes_rev_fn(repo, clear_revs)
1022 1022 repocleartagscache()
1023 1023
1024 1024 def t():
1025 1025 len(repo.tags())
1026 1026
1027 1027 timer(t, setup=s)
1028 1028 fm.end()
1029 1029
1030 1030
1031 1031 @command(b'perf::ancestors|perfancestors', formatteropts)
1032 1032 def perfancestors(ui, repo, **opts):
1033 1033 opts = _byteskwargs(opts)
1034 1034 timer, fm = gettimer(ui, opts)
1035 1035 heads = repo.changelog.headrevs()
1036 1036
1037 1037 def d():
1038 1038 for a in repo.changelog.ancestors(heads):
1039 1039 pass
1040 1040
1041 1041 timer(d)
1042 1042 fm.end()
1043 1043
1044 1044
1045 1045 @command(b'perf::ancestorset|perfancestorset', formatteropts)
1046 1046 def perfancestorset(ui, repo, revset, **opts):
1047 1047 opts = _byteskwargs(opts)
1048 1048 timer, fm = gettimer(ui, opts)
1049 1049 revs = repo.revs(revset)
1050 1050 heads = repo.changelog.headrevs()
1051 1051
1052 1052 def d():
1053 1053 s = repo.changelog.ancestors(heads)
1054 1054 for rev in revs:
1055 1055 rev in s
1056 1056
1057 1057 timer(d)
1058 1058 fm.end()
1059 1059
1060 1060
1061 1061 @command(
1062 1062 b'perf::delta-find',
1063 1063 revlogopts + formatteropts,
1064 1064 b'-c|-m|FILE REV',
1065 1065 )
1066 1066 def perf_delta_find(ui, repo, arg_1, arg_2=None, **opts):
1067 1067 """benchmark the process of finding a valid delta for a revlog revision
1068 1068
1069 1069 When a revlog receives a new revision (e.g. from a commit, or from an
1070 1070 incoming bundle), it searches for a suitable delta-base to produce a delta.
1071 1071 This perf command measures how much time we spend in this process. It
1072 1072 operates on an already stored revision.
1073 1073
1074 1074 See `hg help debug-delta-find` for another related command.
1075 1075 """
1076 1076 from mercurial import revlogutils
1077 1077 import mercurial.revlogutils.deltas as deltautil
1078 1078
1079 1079 opts = _byteskwargs(opts)
1080 1080 if arg_2 is None:
1081 1081 file_ = None
1082 1082 rev = arg_1
1083 1083 else:
1084 1084 file_ = arg_1
1085 1085 rev = arg_2
1086 1086
1087 1087 repo = repo.unfiltered()
1088 1088
1089 1089 timer, fm = gettimer(ui, opts)
1090 1090
1091 1091 rev = int(rev)
1092 1092
1093 1093 revlog = cmdutil.openrevlog(repo, b'perf::delta-find', file_, opts)
1094 1094
1095 1095 deltacomputer = deltautil.deltacomputer(revlog)
1096 1096
1097 1097 node = revlog.node(rev)
1098 1098 p1r, p2r = revlog.parentrevs(rev)
1099 1099 p1 = revlog.node(p1r)
1100 1100 p2 = revlog.node(p2r)
1101 1101 full_text = revlog.revision(rev)
1102 1102 textlen = len(full_text)
1103 1103 cachedelta = None
1104 1104 flags = revlog.flags(rev)
1105 1105
1106 1106 revinfo = revlogutils.revisioninfo(
1107 1107 node,
1108 1108 p1,
1109 1109 p2,
1110 1110 [full_text], # btext
1111 1111 textlen,
1112 1112 cachedelta,
1113 1113 flags,
1114 1114 )
1115 1115
1116 1116 # Note: we should probably purge the potential caches (like the full
1117 1117 # manifest cache) between runs.
1118 1118 def find_one():
1119 1119 with revlog._datafp() as fh:
1120 1120 deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev)
1121 1121
1122 1122 timer(find_one)
1123 1123 fm.end()
1124 1124
1125 1125
1126 1126 @command(b'perf::discovery|perfdiscovery', formatteropts, b'PATH')
1127 1127 def perfdiscovery(ui, repo, path, **opts):
1128 1128 """benchmark discovery between local repo and the peer at given path"""
1129 1129 repos = [repo, None]
1130 1130 timer, fm = gettimer(ui, opts)
1131 1131
1132 1132 try:
1133 1133 from mercurial.utils.urlutil import get_unique_pull_path_obj
1134 1134
1135 1135 path = get_unique_pull_path_obj(b'perfdiscovery', ui, path)
1136 1136 except ImportError:
1137 1137 try:
1138 1138 from mercurial.utils.urlutil import get_unique_pull_path
1139 1139
1140 1140 path = get_unique_pull_path(b'perfdiscovery', repo, ui, path)[0]
1141 1141 except ImportError:
1142 1142 path = ui.expandpath(path)
1143 1143
1144 1144 def s():
1145 1145 repos[1] = hg.peer(ui, opts, path)
1146 1146
1147 1147 def d():
1148 1148 setdiscovery.findcommonheads(ui, *repos)
1149 1149
1150 1150 timer(d, setup=s)
1151 1151 fm.end()
1152 1152
1153 1153
1154 1154 @command(
1155 1155 b'perf::bookmarks|perfbookmarks',
1156 1156 formatteropts
1157 1157 + [
1158 1158 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
1159 1159 ],
1160 1160 )
1161 1161 def perfbookmarks(ui, repo, **opts):
1162 1162 """benchmark parsing bookmarks from disk to memory"""
1163 1163 opts = _byteskwargs(opts)
1164 1164 timer, fm = gettimer(ui, opts)
1165 1165
1166 1166 clearrevlogs = opts[b'clear_revlogs']
1167 1167
1168 1168 def s():
1169 1169 if clearrevlogs:
1170 1170 clearchangelog(repo)
1171 1171 clearfilecache(repo, b'_bookmarks')
1172 1172
1173 1173 def d():
1174 1174 repo._bookmarks
1175 1175
1176 1176 timer(d, setup=s)
1177 1177 fm.end()
1178 1178
1179 1179
1180 1180 @command(
1181 1181 b'perf::bundle',
1182 1182 [
1183 1183 (
1184 1184 b'r',
1185 1185 b'rev',
1186 1186 [],
1187 1187 b'changesets to bundle',
1188 1188 b'REV',
1189 1189 ),
1190 1190 (
1191 1191 b't',
1192 1192 b'type',
1193 1193 b'none',
1194 1194 b'bundlespec to use (see `hg help bundlespec`)',
1195 1195 b'TYPE',
1196 1196 ),
1197 1197 ]
1198 1198 + formatteropts,
1199 1199 b'REVS',
1200 1200 )
1201 1201 def perfbundle(ui, repo, *revs, **opts):
1202 1202 """benchmark the creation of a bundle from a repository
1203 1203
1204 1204 For now, this only supports "none" compression.
1205 1205 """
1206 1206 try:
1207 1207 from mercurial import bundlecaches
1208 1208
1209 1209 parsebundlespec = bundlecaches.parsebundlespec
1210 1210 except ImportError:
1211 1211 from mercurial import exchange
1212 1212
1213 1213 parsebundlespec = exchange.parsebundlespec
1214 1214
1215 1215 from mercurial import discovery
1216 1216 from mercurial import bundle2
1217 1217
1218 1218 opts = _byteskwargs(opts)
1219 1219 timer, fm = gettimer(ui, opts)
1220 1220
1221 1221 cl = repo.changelog
1222 1222 revs = list(revs)
1223 1223 revs.extend(opts.get(b'rev', ()))
1224 1224 revs = scmutil.revrange(repo, revs)
1225 1225 if not revs:
1226 1226 raise error.Abort(b"not revision specified")
1227 1227 # make it a consistent set (ie: without topological gaps)
1228 1228 old_len = len(revs)
1229 1229 revs = list(repo.revs(b"%ld::%ld", revs, revs))
1230 1230 if old_len != len(revs):
1231 1231 new_count = len(revs) - old_len
1232 1232 msg = b"add %d new revisions to make it a consistent set\n"
1233 1233 ui.write_err(msg % new_count)
1234 1234
1235 1235 targets = [cl.node(r) for r in repo.revs(b"heads(::%ld)", revs)]
1236 1236 bases = [cl.node(r) for r in repo.revs(b"heads(::%ld - %ld)", revs, revs)]
1237 1237 outgoing = discovery.outgoing(repo, bases, targets)
1238 1238
1239 1239 bundle_spec = opts.get(b'type')
1240 1240
1241 1241 bundle_spec = parsebundlespec(repo, bundle_spec, strict=False)
1242 1242
1243 1243 cgversion = bundle_spec.params.get(b"cg.version")
1244 1244 if cgversion is None:
1245 1245 if bundle_spec.version == b'v1':
1246 1246 cgversion = b'01'
1247 1247 if bundle_spec.version == b'v2':
1248 1248 cgversion = b'02'
1249 1249 if cgversion not in changegroup.supportedoutgoingversions(repo):
1250 1250 err = b"repository does not support bundle version %s"
1251 1251 raise error.Abort(err % cgversion)
1252 1252
1253 1253 if cgversion == b'01': # bundle1
1254 1254 bversion = b'HG10' + bundle_spec.wirecompression
1255 1255 bcompression = None
1256 1256 elif cgversion in (b'02', b'03'):
1257 1257 bversion = b'HG20'
1258 1258 bcompression = bundle_spec.wirecompression
1259 1259 else:
1260 1260 err = b'perf::bundle: unexpected changegroup version %s'
1261 1261 raise error.ProgrammingError(err % cgversion)
1262 1262
1263 1263 if bcompression is None:
1264 1264 bcompression = b'UN'
1265 1265
1266 1266 if bcompression != b'UN':
1267 1267 err = b'perf::bundle: compression currently unsupported: %s'
1268 1268 raise error.ProgrammingError(err % bcompression)
1269 1269
1270 1270 def do_bundle():
1271 1271 bundle2.writenewbundle(
1272 1272 ui,
1273 1273 repo,
1274 1274 b'perf::bundle',
1275 1275 os.devnull,
1276 1276 bversion,
1277 1277 outgoing,
1278 1278 bundle_spec.params,
1279 1279 )
1280 1280
1281 1281 timer(do_bundle)
1282 1282 fm.end()
1283 1283
1284 1284
1285 1285 @command(b'perf::bundleread|perfbundleread', formatteropts, b'BUNDLE')
1286 1286 def perfbundleread(ui, repo, bundlepath, **opts):
1287 1287 """Benchmark reading of bundle files.
1288 1288
1289 1289 This command is meant to isolate the I/O part of bundle reading as
1290 1290 much as possible.
1291 1291 """
1292 1292 from mercurial import (
1293 1293 bundle2,
1294 1294 exchange,
1295 1295 streamclone,
1296 1296 )
1297 1297
1298 1298 opts = _byteskwargs(opts)
1299 1299
1300 1300 def makebench(fn):
1301 1301 def run():
1302 1302 with open(bundlepath, b'rb') as fh:
1303 1303 bundle = exchange.readbundle(ui, fh, bundlepath)
1304 1304 fn(bundle)
1305 1305
1306 1306 return run
1307 1307
1308 1308 def makereadnbytes(size):
1309 1309 def run():
1310 1310 with open(bundlepath, b'rb') as fh:
1311 1311 bundle = exchange.readbundle(ui, fh, bundlepath)
1312 1312 while bundle.read(size):
1313 1313 pass
1314 1314
1315 1315 return run
1316 1316
1317 1317 def makestdioread(size):
1318 1318 def run():
1319 1319 with open(bundlepath, b'rb') as fh:
1320 1320 while fh.read(size):
1321 1321 pass
1322 1322
1323 1323 return run
1324 1324
1325 1325 # bundle1
1326 1326
1327 1327 def deltaiter(bundle):
1328 1328 for delta in bundle.deltaiter():
1329 1329 pass
1330 1330
1331 1331 def iterchunks(bundle):
1332 1332 for chunk in bundle.getchunks():
1333 1333 pass
1334 1334
1335 1335 # bundle2
1336 1336
1337 1337 def forwardchunks(bundle):
1338 1338 for chunk in bundle._forwardchunks():
1339 1339 pass
1340 1340
1341 1341 def iterparts(bundle):
1342 1342 for part in bundle.iterparts():
1343 1343 pass
1344 1344
1345 1345 def iterpartsseekable(bundle):
1346 1346 for part in bundle.iterparts(seekable=True):
1347 1347 pass
1348 1348
1349 1349 def seek(bundle):
1350 1350 for part in bundle.iterparts(seekable=True):
1351 1351 part.seek(0, os.SEEK_END)
1352 1352
1353 1353 def makepartreadnbytes(size):
1354 1354 def run():
1355 1355 with open(bundlepath, b'rb') as fh:
1356 1356 bundle = exchange.readbundle(ui, fh, bundlepath)
1357 1357 for part in bundle.iterparts():
1358 1358 while part.read(size):
1359 1359 pass
1360 1360
1361 1361 return run
1362 1362
1363 1363 benches = [
1364 1364 (makestdioread(8192), b'read(8k)'),
1365 1365 (makestdioread(16384), b'read(16k)'),
1366 1366 (makestdioread(32768), b'read(32k)'),
1367 1367 (makestdioread(131072), b'read(128k)'),
1368 1368 ]
1369 1369
1370 1370 with open(bundlepath, b'rb') as fh:
1371 1371 bundle = exchange.readbundle(ui, fh, bundlepath)
1372 1372
1373 1373 if isinstance(bundle, changegroup.cg1unpacker):
1374 1374 benches.extend(
1375 1375 [
1376 1376 (makebench(deltaiter), b'cg1 deltaiter()'),
1377 1377 (makebench(iterchunks), b'cg1 getchunks()'),
1378 1378 (makereadnbytes(8192), b'cg1 read(8k)'),
1379 1379 (makereadnbytes(16384), b'cg1 read(16k)'),
1380 1380 (makereadnbytes(32768), b'cg1 read(32k)'),
1381 1381 (makereadnbytes(131072), b'cg1 read(128k)'),
1382 1382 ]
1383 1383 )
1384 1384 elif isinstance(bundle, bundle2.unbundle20):
1385 1385 benches.extend(
1386 1386 [
1387 1387 (makebench(forwardchunks), b'bundle2 forwardchunks()'),
1388 1388 (makebench(iterparts), b'bundle2 iterparts()'),
1389 1389 (
1390 1390 makebench(iterpartsseekable),
1391 1391 b'bundle2 iterparts() seekable',
1392 1392 ),
1393 1393 (makebench(seek), b'bundle2 part seek()'),
1394 1394 (makepartreadnbytes(8192), b'bundle2 part read(8k)'),
1395 1395 (makepartreadnbytes(16384), b'bundle2 part read(16k)'),
1396 1396 (makepartreadnbytes(32768), b'bundle2 part read(32k)'),
1397 1397 (makepartreadnbytes(131072), b'bundle2 part read(128k)'),
1398 1398 ]
1399 1399 )
1400 1400 elif isinstance(bundle, streamclone.streamcloneapplier):
1401 1401 raise error.Abort(b'stream clone bundles not supported')
1402 1402 else:
1403 1403 raise error.Abort(b'unhandled bundle type: %s' % type(bundle))
1404 1404
1405 1405 for fn, title in benches:
1406 1406 timer, fm = gettimer(ui, opts)
1407 1407 timer(fn, title=title)
1408 1408 fm.end()
1409 1409
1410 1410
1411 1411 @command(
1412 1412 b'perf::changegroupchangelog|perfchangegroupchangelog',
1413 1413 formatteropts
1414 1414 + [
1415 1415 (b'', b'cgversion', b'02', b'changegroup version'),
1416 1416 (b'r', b'rev', b'', b'revisions to add to changegroup'),
1417 1417 ],
1418 1418 )
1419 1419 def perfchangegroupchangelog(ui, repo, cgversion=b'02', rev=None, **opts):
1420 1420 """Benchmark producing a changelog group for a changegroup.
1421 1421
1422 1422 This measures the time spent processing the changelog during a
1423 1423 bundle operation. This occurs during `hg bundle` and on a server
1424 1424 processing a `getbundle` wire protocol request (handles clones
1425 1425 and pull requests).
1426 1426
1427 1427 By default, all revisions are added to the changegroup.
1428 1428 """
1429 1429 opts = _byteskwargs(opts)
1430 1430 cl = repo.changelog
1431 1431 nodes = [cl.lookup(r) for r in repo.revs(rev or b'all()')]
1432 1432 bundler = changegroup.getbundler(cgversion, repo)
1433 1433
1434 1434 def d():
1435 1435 state, chunks = bundler._generatechangelog(cl, nodes)
1436 1436 for chunk in chunks:
1437 1437 pass
1438 1438
1439 1439 timer, fm = gettimer(ui, opts)
1440 1440
1441 1441 # Terminal printing can interfere with timing. So disable it.
1442 1442 with ui.configoverride({(b'progress', b'disable'): True}):
1443 1443 timer(d)
1444 1444
1445 1445 fm.end()
1446 1446
1447 1447
1448 1448 @command(b'perf::dirs|perfdirs', formatteropts)
1449 1449 def perfdirs(ui, repo, **opts):
1450 1450 opts = _byteskwargs(opts)
1451 1451 timer, fm = gettimer(ui, opts)
1452 1452 dirstate = repo.dirstate
1453 1453 b'a' in dirstate
1454 1454
1455 1455 def d():
1456 1456 dirstate.hasdir(b'a')
1457 1457 try:
1458 1458 del dirstate._map._dirs
1459 1459 except AttributeError:
1460 1460 pass
1461 1461
1462 1462 timer(d)
1463 1463 fm.end()
1464 1464
1465 1465
1466 1466 @command(
1467 1467 b'perf::dirstate|perfdirstate',
1468 1468 [
1469 1469 (
1470 1470 b'',
1471 1471 b'iteration',
1472 1472 None,
1473 1473 b'benchmark a full iteration for the dirstate',
1474 1474 ),
1475 1475 (
1476 1476 b'',
1477 1477 b'contains',
1478 1478 None,
1479 1479 b'benchmark a large amount of `nf in dirstate` calls',
1480 1480 ),
1481 1481 ]
1482 1482 + formatteropts,
1483 1483 )
1484 1484 def perfdirstate(ui, repo, **opts):
1485 1485 """benchmap the time of various distate operations
1486 1486
1487 1487 By default benchmark the time necessary to load a dirstate from scratch.
1488 1488 The dirstate is loaded to the point were a "contains" request can be
1489 1489 answered.
1490 1490 """
1491 1491 opts = _byteskwargs(opts)
1492 1492 timer, fm = gettimer(ui, opts)
1493 1493 b"a" in repo.dirstate
1494 1494
1495 1495 if opts[b'iteration'] and opts[b'contains']:
1496 1496 msg = b'only specify one of --iteration or --contains'
1497 1497 raise error.Abort(msg)
1498 1498
1499 1499 if opts[b'iteration']:
1500 1500 setup = None
1501 1501 dirstate = repo.dirstate
1502 1502
1503 1503 def d():
1504 1504 for f in dirstate:
1505 1505 pass
1506 1506
1507 1507 elif opts[b'contains']:
1508 1508 setup = None
1509 1509 dirstate = repo.dirstate
1510 1510 allfiles = list(dirstate)
1511 1511 # also add file path that will be "missing" from the dirstate
1512 1512 allfiles.extend([f[::-1] for f in allfiles])
1513 1513
1514 1514 def d():
1515 1515 for f in allfiles:
1516 1516 f in dirstate
1517 1517
1518 1518 else:
1519 1519
1520 1520 def setup():
1521 1521 repo.dirstate.invalidate()
1522 1522
1523 1523 def d():
1524 1524 b"a" in repo.dirstate
1525 1525
1526 1526 timer(d, setup=setup)
1527 1527 fm.end()
1528 1528
1529 1529
1530 1530 @command(b'perf::dirstatedirs|perfdirstatedirs', formatteropts)
1531 1531 def perfdirstatedirs(ui, repo, **opts):
1532 1532 """benchmap a 'dirstate.hasdir' call from an empty `dirs` cache"""
1533 1533 opts = _byteskwargs(opts)
1534 1534 timer, fm = gettimer(ui, opts)
1535 1535 repo.dirstate.hasdir(b"a")
1536 1536
1537 1537 def setup():
1538 1538 try:
1539 1539 del repo.dirstate._map._dirs
1540 1540 except AttributeError:
1541 1541 pass
1542 1542
1543 1543 def d():
1544 1544 repo.dirstate.hasdir(b"a")
1545 1545
1546 1546 timer(d, setup=setup)
1547 1547 fm.end()
1548 1548
1549 1549
1550 1550 @command(b'perf::dirstatefoldmap|perfdirstatefoldmap', formatteropts)
1551 1551 def perfdirstatefoldmap(ui, repo, **opts):
1552 1552 """benchmap a `dirstate._map.filefoldmap.get()` request
1553 1553
1554 1554 The dirstate filefoldmap cache is dropped between every request.
1555 1555 """
1556 1556 opts = _byteskwargs(opts)
1557 1557 timer, fm = gettimer(ui, opts)
1558 1558 dirstate = repo.dirstate
1559 1559 dirstate._map.filefoldmap.get(b'a')
1560 1560
1561 1561 def setup():
1562 1562 del dirstate._map.filefoldmap
1563 1563
1564 1564 def d():
1565 1565 dirstate._map.filefoldmap.get(b'a')
1566 1566
1567 1567 timer(d, setup=setup)
1568 1568 fm.end()
1569 1569
1570 1570
1571 1571 @command(b'perf::dirfoldmap|perfdirfoldmap', formatteropts)
1572 1572 def perfdirfoldmap(ui, repo, **opts):
1573 1573 """benchmap a `dirstate._map.dirfoldmap.get()` request
1574 1574
1575 1575 The dirstate dirfoldmap cache is dropped between every request.
1576 1576 """
1577 1577 opts = _byteskwargs(opts)
1578 1578 timer, fm = gettimer(ui, opts)
1579 1579 dirstate = repo.dirstate
1580 1580 dirstate._map.dirfoldmap.get(b'a')
1581 1581
1582 1582 def setup():
1583 1583 del dirstate._map.dirfoldmap
1584 1584 try:
1585 1585 del dirstate._map._dirs
1586 1586 except AttributeError:
1587 1587 pass
1588 1588
1589 1589 def d():
1590 1590 dirstate._map.dirfoldmap.get(b'a')
1591 1591
1592 1592 timer(d, setup=setup)
1593 1593 fm.end()
1594 1594
1595 1595
1596 1596 @command(b'perf::dirstatewrite|perfdirstatewrite', formatteropts)
1597 1597 def perfdirstatewrite(ui, repo, **opts):
1598 1598 """benchmap the time it take to write a dirstate on disk"""
1599 1599 opts = _byteskwargs(opts)
1600 1600 timer, fm = gettimer(ui, opts)
1601 1601 ds = repo.dirstate
1602 1602 b"a" in ds
1603 1603
1604 1604 def setup():
1605 1605 ds._dirty = True
1606 1606
1607 1607 def d():
1608 1608 ds.write(repo.currenttransaction())
1609 1609
1610 1610 with repo.wlock():
1611 1611 timer(d, setup=setup)
1612 1612 fm.end()
1613 1613
1614 1614
1615 1615 def _getmergerevs(repo, opts):
1616 1616 """parse command argument to return rev involved in merge
1617 1617
1618 1618 input: options dictionnary with `rev`, `from` and `bse`
1619 1619 output: (localctx, otherctx, basectx)
1620 1620 """
1621 1621 if opts[b'from']:
1622 1622 fromrev = scmutil.revsingle(repo, opts[b'from'])
1623 1623 wctx = repo[fromrev]
1624 1624 else:
1625 1625 wctx = repo[None]
1626 1626 # we don't want working dir files to be stat'd in the benchmark, so
1627 1627 # prime that cache
1628 1628 wctx.dirty()
1629 1629 rctx = scmutil.revsingle(repo, opts[b'rev'], opts[b'rev'])
1630 1630 if opts[b'base']:
1631 1631 fromrev = scmutil.revsingle(repo, opts[b'base'])
1632 1632 ancestor = repo[fromrev]
1633 1633 else:
1634 1634 ancestor = wctx.ancestor(rctx)
1635 1635 return (wctx, rctx, ancestor)
1636 1636
1637 1637
1638 1638 @command(
1639 1639 b'perf::mergecalculate|perfmergecalculate',
1640 1640 [
1641 1641 (b'r', b'rev', b'.', b'rev to merge against'),
1642 1642 (b'', b'from', b'', b'rev to merge from'),
1643 1643 (b'', b'base', b'', b'the revision to use as base'),
1644 1644 ]
1645 1645 + formatteropts,
1646 1646 )
1647 1647 def perfmergecalculate(ui, repo, **opts):
1648 1648 opts = _byteskwargs(opts)
1649 1649 timer, fm = gettimer(ui, opts)
1650 1650
1651 1651 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1652 1652
1653 1653 def d():
1654 1654 # acceptremote is True because we don't want prompts in the middle of
1655 1655 # our benchmark
1656 1656 merge.calculateupdates(
1657 1657 repo,
1658 1658 wctx,
1659 1659 rctx,
1660 1660 [ancestor],
1661 1661 branchmerge=False,
1662 1662 force=False,
1663 1663 acceptremote=True,
1664 1664 followcopies=True,
1665 1665 )
1666 1666
1667 1667 timer(d)
1668 1668 fm.end()
1669 1669
1670 1670
1671 1671 @command(
1672 1672 b'perf::mergecopies|perfmergecopies',
1673 1673 [
1674 1674 (b'r', b'rev', b'.', b'rev to merge against'),
1675 1675 (b'', b'from', b'', b'rev to merge from'),
1676 1676 (b'', b'base', b'', b'the revision to use as base'),
1677 1677 ]
1678 1678 + formatteropts,
1679 1679 )
1680 1680 def perfmergecopies(ui, repo, **opts):
1681 1681 """measure runtime of `copies.mergecopies`"""
1682 1682 opts = _byteskwargs(opts)
1683 1683 timer, fm = gettimer(ui, opts)
1684 1684 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1685 1685
1686 1686 def d():
1687 1687 # acceptremote is True because we don't want prompts in the middle of
1688 1688 # our benchmark
1689 1689 copies.mergecopies(repo, wctx, rctx, ancestor)
1690 1690
1691 1691 timer(d)
1692 1692 fm.end()
1693 1693
1694 1694
1695 1695 @command(b'perf::pathcopies|perfpathcopies', [], b"REV REV")
1696 1696 def perfpathcopies(ui, repo, rev1, rev2, **opts):
1697 1697 """benchmark the copy tracing logic"""
1698 1698 opts = _byteskwargs(opts)
1699 1699 timer, fm = gettimer(ui, opts)
1700 1700 ctx1 = scmutil.revsingle(repo, rev1, rev1)
1701 1701 ctx2 = scmutil.revsingle(repo, rev2, rev2)
1702 1702
1703 1703 def d():
1704 1704 copies.pathcopies(ctx1, ctx2)
1705 1705
1706 1706 timer(d)
1707 1707 fm.end()
1708 1708
1709 1709
1710 1710 @command(
1711 1711 b'perf::phases|perfphases',
1712 1712 [
1713 1713 (b'', b'full', False, b'include file reading time too'),
1714 1714 ],
1715 1715 b"",
1716 1716 )
1717 1717 def perfphases(ui, repo, **opts):
1718 1718 """benchmark phasesets computation"""
1719 1719 opts = _byteskwargs(opts)
1720 1720 timer, fm = gettimer(ui, opts)
1721 1721 _phases = repo._phasecache
1722 1722 full = opts.get(b'full')
1723 1723
1724 1724 def d():
1725 1725 phases = _phases
1726 1726 if full:
1727 1727 clearfilecache(repo, b'_phasecache')
1728 1728 phases = repo._phasecache
1729 1729 phases.invalidate()
1730 1730 phases.loadphaserevs(repo)
1731 1731
1732 1732 timer(d)
1733 1733 fm.end()
1734 1734
1735 1735
1736 1736 @command(b'perf::phasesremote|perfphasesremote', [], b"[DEST]")
1737 1737 def perfphasesremote(ui, repo, dest=None, **opts):
1738 1738 """benchmark time needed to analyse phases of the remote server"""
1739 1739 from mercurial.node import bin
1740 1740 from mercurial import (
1741 1741 exchange,
1742 1742 hg,
1743 1743 phases,
1744 1744 )
1745 1745
1746 1746 opts = _byteskwargs(opts)
1747 1747 timer, fm = gettimer(ui, opts)
1748 1748
1749 1749 path = ui.getpath(dest, default=(b'default-push', b'default'))
1750 1750 if not path:
1751 1751 raise error.Abort(
1752 1752 b'default repository not configured!',
1753 1753 hint=b"see 'hg help config.paths'",
1754 1754 )
1755 1755 if util.safehasattr(path, 'main_path'):
1756 1756 path = path.get_push_variant()
1757 1757 dest = path.loc
1758 1758 else:
1759 1759 dest = path.pushloc or path.loc
1760 1760 ui.statusnoi18n(b'analysing phase of %s\n' % util.hidepassword(dest))
1761 1761 other = hg.peer(repo, opts, dest)
1762 1762
1763 1763 # easier to perform discovery through the operation
1764 1764 op = exchange.pushoperation(repo, other)
1765 1765 exchange._pushdiscoverychangeset(op)
1766 1766
1767 1767 remotesubset = op.fallbackheads
1768 1768
1769 1769 with other.commandexecutor() as e:
1770 1770 remotephases = e.callcommand(
1771 1771 b'listkeys', {b'namespace': b'phases'}
1772 1772 ).result()
1773 1773 del other
1774 1774 publishing = remotephases.get(b'publishing', False)
1775 1775 if publishing:
1776 1776 ui.statusnoi18n(b'publishing: yes\n')
1777 1777 else:
1778 1778 ui.statusnoi18n(b'publishing: no\n')
1779 1779
1780 1780 has_node = getattr(repo.changelog.index, 'has_node', None)
1781 1781 if has_node is None:
1782 1782 has_node = repo.changelog.nodemap.__contains__
1783 1783 nonpublishroots = 0
1784 1784 for nhex, phase in remotephases.iteritems():
1785 1785 if nhex == b'publishing': # ignore data related to publish option
1786 1786 continue
1787 1787 node = bin(nhex)
1788 1788 if has_node(node) and int(phase):
1789 1789 nonpublishroots += 1
1790 1790 ui.statusnoi18n(b'number of roots: %d\n' % len(remotephases))
1791 1791 ui.statusnoi18n(b'number of known non public roots: %d\n' % nonpublishroots)
1792 1792
1793 1793 def d():
1794 1794 phases.remotephasessummary(repo, remotesubset, remotephases)
1795 1795
1796 1796 timer(d)
1797 1797 fm.end()
1798 1798
1799 1799
1800 1800 @command(
1801 1801 b'perf::manifest|perfmanifest',
1802 1802 [
1803 1803 (b'm', b'manifest-rev', False, b'Look up a manifest node revision'),
1804 1804 (b'', b'clear-disk', False, b'clear on-disk caches too'),
1805 1805 ]
1806 1806 + formatteropts,
1807 1807 b'REV|NODE',
1808 1808 )
1809 1809 def perfmanifest(ui, repo, rev, manifest_rev=False, clear_disk=False, **opts):
1810 1810 """benchmark the time to read a manifest from disk and return a usable
1811 1811 dict-like object
1812 1812
1813 1813 Manifest caches are cleared before retrieval."""
1814 1814 opts = _byteskwargs(opts)
1815 1815 timer, fm = gettimer(ui, opts)
1816 1816 if not manifest_rev:
1817 1817 ctx = scmutil.revsingle(repo, rev, rev)
1818 1818 t = ctx.manifestnode()
1819 1819 else:
1820 1820 from mercurial.node import bin
1821 1821
1822 1822 if len(rev) == 40:
1823 1823 t = bin(rev)
1824 1824 else:
1825 1825 try:
1826 1826 rev = int(rev)
1827 1827
1828 1828 if util.safehasattr(repo.manifestlog, b'getstorage'):
1829 1829 t = repo.manifestlog.getstorage(b'').node(rev)
1830 1830 else:
1831 1831 t = repo.manifestlog._revlog.lookup(rev)
1832 1832 except ValueError:
1833 1833 raise error.Abort(
1834 1834 b'manifest revision must be integer or full node'
1835 1835 )
1836 1836
1837 1837 def d():
1838 1838 repo.manifestlog.clearcaches(clear_persisted_data=clear_disk)
1839 1839 repo.manifestlog[t].read()
1840 1840
1841 1841 timer(d)
1842 1842 fm.end()
1843 1843
1844 1844
1845 1845 @command(b'perf::changeset|perfchangeset', formatteropts)
1846 1846 def perfchangeset(ui, repo, rev, **opts):
1847 1847 opts = _byteskwargs(opts)
1848 1848 timer, fm = gettimer(ui, opts)
1849 1849 n = scmutil.revsingle(repo, rev).node()
1850 1850
1851 1851 def d():
1852 1852 repo.changelog.read(n)
1853 1853 # repo.changelog._cache = None
1854 1854
1855 1855 timer(d)
1856 1856 fm.end()
1857 1857
1858 1858
1859 1859 @command(b'perf::ignore|perfignore', formatteropts)
1860 1860 def perfignore(ui, repo, **opts):
1861 1861 """benchmark operation related to computing ignore"""
1862 1862 opts = _byteskwargs(opts)
1863 1863 timer, fm = gettimer(ui, opts)
1864 1864 dirstate = repo.dirstate
1865 1865
1866 1866 def setupone():
1867 1867 dirstate.invalidate()
1868 1868 clearfilecache(dirstate, b'_ignore')
1869 1869
1870 1870 def runone():
1871 1871 dirstate._ignore
1872 1872
1873 1873 timer(runone, setup=setupone, title=b"load")
1874 1874 fm.end()
1875 1875
1876 1876
1877 1877 @command(
1878 1878 b'perf::index|perfindex',
1879 1879 [
1880 1880 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1881 1881 (b'', b'no-lookup', None, b'do not revision lookup post creation'),
1882 1882 ]
1883 1883 + formatteropts,
1884 1884 )
1885 1885 def perfindex(ui, repo, **opts):
1886 1886 """benchmark index creation time followed by a lookup
1887 1887
1888 1888 The default is to look `tip` up. Depending on the index implementation,
1889 1889 the revision looked up can matters. For example, an implementation
1890 1890 scanning the index will have a faster lookup time for `--rev tip` than for
1891 1891 `--rev 0`. The number of looked up revisions and their order can also
1892 1892 matters.
1893 1893
1894 1894 Example of useful set to test:
1895 1895
1896 1896 * tip
1897 1897 * 0
1898 1898 * -10:
1899 1899 * :10
1900 1900 * -10: + :10
1901 1901 * :10: + -10:
1902 1902 * -10000:
1903 1903 * -10000: + 0
1904 1904
1905 1905 It is not currently possible to check for lookup of a missing node. For
1906 1906 deeper lookup benchmarking, checkout the `perfnodemap` command."""
1907 1907 import mercurial.revlog
1908 1908
1909 1909 opts = _byteskwargs(opts)
1910 1910 timer, fm = gettimer(ui, opts)
1911 1911 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1912 1912 if opts[b'no_lookup']:
1913 1913 if opts['rev']:
1914 1914 raise error.Abort('--no-lookup and --rev are mutually exclusive')
1915 1915 nodes = []
1916 1916 elif not opts[b'rev']:
1917 1917 nodes = [repo[b"tip"].node()]
1918 1918 else:
1919 1919 revs = scmutil.revrange(repo, opts[b'rev'])
1920 1920 cl = repo.changelog
1921 1921 nodes = [cl.node(r) for r in revs]
1922 1922
1923 1923 unfi = repo.unfiltered()
1924 1924 # find the filecache func directly
1925 1925 # This avoid polluting the benchmark with the filecache logic
1926 1926 makecl = unfi.__class__.changelog.func
1927 1927
1928 1928 def setup():
1929 1929 # probably not necessary, but for good measure
1930 1930 clearchangelog(unfi)
1931 1931
1932 1932 def d():
1933 1933 cl = makecl(unfi)
1934 1934 for n in nodes:
1935 1935 cl.rev(n)
1936 1936
1937 1937 timer(d, setup=setup)
1938 1938 fm.end()
1939 1939
1940 1940
1941 1941 @command(
1942 1942 b'perf::nodemap|perfnodemap',
1943 1943 [
1944 1944 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1945 1945 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
1946 1946 ]
1947 1947 + formatteropts,
1948 1948 )
1949 1949 def perfnodemap(ui, repo, **opts):
1950 1950 """benchmark the time necessary to look up revision from a cold nodemap
1951 1951
1952 1952 Depending on the implementation, the amount and order of revision we look
1953 1953 up can varies. Example of useful set to test:
1954 1954 * tip
1955 1955 * 0
1956 1956 * -10:
1957 1957 * :10
1958 1958 * -10: + :10
1959 1959 * :10: + -10:
1960 1960 * -10000:
1961 1961 * -10000: + 0
1962 1962
1963 1963 The command currently focus on valid binary lookup. Benchmarking for
1964 1964 hexlookup, prefix lookup and missing lookup would also be valuable.
1965 1965 """
1966 1966 import mercurial.revlog
1967 1967
1968 1968 opts = _byteskwargs(opts)
1969 1969 timer, fm = gettimer(ui, opts)
1970 1970 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1971 1971
1972 1972 unfi = repo.unfiltered()
1973 1973 clearcaches = opts[b'clear_caches']
1974 1974 # find the filecache func directly
1975 1975 # This avoid polluting the benchmark with the filecache logic
1976 1976 makecl = unfi.__class__.changelog.func
1977 1977 if not opts[b'rev']:
1978 1978 raise error.Abort(b'use --rev to specify revisions to look up')
1979 1979 revs = scmutil.revrange(repo, opts[b'rev'])
1980 1980 cl = repo.changelog
1981 1981 nodes = [cl.node(r) for r in revs]
1982 1982
1983 1983 # use a list to pass reference to a nodemap from one closure to the next
1984 1984 nodeget = [None]
1985 1985
1986 1986 def setnodeget():
1987 1987 # probably not necessary, but for good measure
1988 1988 clearchangelog(unfi)
1989 1989 cl = makecl(unfi)
1990 1990 if util.safehasattr(cl.index, 'get_rev'):
1991 1991 nodeget[0] = cl.index.get_rev
1992 1992 else:
1993 1993 nodeget[0] = cl.nodemap.get
1994 1994
1995 1995 def d():
1996 1996 get = nodeget[0]
1997 1997 for n in nodes:
1998 1998 get(n)
1999 1999
2000 2000 setup = None
2001 2001 if clearcaches:
2002 2002
2003 2003 def setup():
2004 2004 setnodeget()
2005 2005
2006 2006 else:
2007 2007 setnodeget()
2008 2008 d() # prewarm the data structure
2009 2009 timer(d, setup=setup)
2010 2010 fm.end()
2011 2011
2012 2012
2013 2013 @command(b'perf::startup|perfstartup', formatteropts)
2014 2014 def perfstartup(ui, repo, **opts):
2015 2015 opts = _byteskwargs(opts)
2016 2016 timer, fm = gettimer(ui, opts)
2017 2017
2018 2018 def d():
2019 2019 if os.name != 'nt':
2020 2020 os.system(
2021 2021 b"HGRCPATH= %s version -q > /dev/null" % fsencode(sys.argv[0])
2022 2022 )
2023 2023 else:
2024 2024 os.environ['HGRCPATH'] = r' '
2025 2025 os.system("%s version -q > NUL" % sys.argv[0])
2026 2026
2027 2027 timer(d)
2028 2028 fm.end()
2029 2029
2030 2030
2031 2031 def _find_stream_generator(version):
2032 2032 """find the proper generator function for this stream version"""
2033 2033 import mercurial.streamclone
2034 2034
2035 2035 available = {}
2036 2036
2037 2037 # try to fetch a v1 generator
2038 2038 generatev1 = getattr(mercurial.streamclone, "generatev1", None)
2039 2039 if generatev1 is not None:
2040 2040
2041 2041 def generate(repo):
2042 2042 entries, bytes, data = generatev2(repo, None, None, True)
2043 2043 return data
2044 2044
2045 2045 available[b'v1'] = generatev1
2046 2046 # try to fetch a v2 generator
2047 2047 generatev2 = getattr(mercurial.streamclone, "generatev2", None)
2048 2048 if generatev2 is not None:
2049 2049
2050 2050 def generate(repo):
2051 2051 entries, bytes, data = generatev2(repo, None, None, True)
2052 2052 return data
2053 2053
2054 2054 available[b'v2'] = generate
2055 2055 # try to fetch a v3 generator
2056 2056 generatev3 = getattr(mercurial.streamclone, "generatev3", None)
2057 2057 if generatev3 is not None:
2058 2058
2059 2059 def generate(repo):
2060 2060 entries, bytes, data = generatev3(repo, None, None, True)
2061 2061 return data
2062 2062
2063 2063 available[b'v3-exp'] = generate
2064 2064
2065 2065 # resolve the request
2066 2066 if version == b"latest":
2067 2067 # latest is the highest non experimental version
2068 2068 latest_key = max(v for v in available if b'-exp' not in v)
2069 2069 return available[latest_key]
2070 2070 elif version in available:
2071 2071 return available[version]
2072 2072 else:
2073 2073 msg = b"unkown or unavailable version: %s"
2074 2074 msg %= version
2075 2075 hint = b"available versions: %s"
2076 2076 hint %= b', '.join(sorted(available))
2077 2077 raise error.Abort(msg, hint=hint)
2078 2078
2079 2079
2080 2080 @command(
2081 2081 b'perf::stream-locked-section',
2082 2082 [
2083 2083 (
2084 2084 b'',
2085 2085 b'stream-version',
2086 2086 b'latest',
2087 2087 b'stream version to use ("v1", "v2", "v3" or "latest", (the default))',
2088 2088 ),
2089 2089 ]
2090 2090 + formatteropts,
2091 2091 )
2092 2092 def perf_stream_clone_scan(ui, repo, stream_version, **opts):
2093 2093 """benchmark the initial, repo-locked, section of a stream-clone"""
2094 2094
2095 2095 opts = _byteskwargs(opts)
2096 2096 timer, fm = gettimer(ui, opts)
2097 2097
2098 2098 # deletion of the generator may trigger some cleanup that we do not want to
2099 2099 # measure
2100 2100 result_holder = [None]
2101 2101
2102 2102 def setupone():
2103 2103 result_holder[0] = None
2104 2104
2105 2105 generate = _find_stream_generator(stream_version)
2106 2106
2107 2107 def runone():
2108 2108 # the lock is held for the duration the initialisation
2109 2109 result_holder[0] = generate(repo)
2110 2110
2111 2111 timer(runone, setup=setupone, title=b"load")
2112 2112 fm.end()
2113 2113
2114 2114
2115 2115 @command(
2116 2116 b'perf::stream-generate',
2117 2117 [
2118 2118 (
2119 2119 b'',
2120 2120 b'stream-version',
2121 2121 b'latest',
2122 2122 b'stream version to us ("v1", "v2" or "latest", (the default))',
2123 2123 ),
2124 2124 ]
2125 2125 + formatteropts,
2126 2126 )
2127 2127 def perf_stream_clone_generate(ui, repo, stream_version, **opts):
2128 2128 """benchmark the full generation of a stream clone"""
2129 2129
2130 2130 opts = _byteskwargs(opts)
2131 2131 timer, fm = gettimer(ui, opts)
2132 2132
2133 2133 # deletion of the generator may trigger some cleanup that we do not want to
2134 2134 # measure
2135 2135
2136 2136 generate = _find_stream_generator(stream_version)
2137 2137
2138 2138 def runone():
2139 2139 # the lock is held for the duration the initialisation
2140 2140 for chunk in generate(repo):
2141 2141 pass
2142 2142
2143 2143 timer(runone, title=b"generate")
2144 2144 fm.end()
2145 2145
2146 2146
2147 2147 @command(
2148 2148 b'perf::stream-consume',
2149 2149 formatteropts,
2150 2150 )
2151 2151 def perf_stream_clone_consume(ui, repo, filename, **opts):
2152 2152 """benchmark the full application of a stream clone
2153 2153
2154 2154 This include the creation of the repository
2155 2155 """
2156 2156 # try except to appease check code
2157 2157 msg = b"mercurial too old, missing necessary module: %s"
2158 2158 try:
2159 2159 from mercurial import bundle2
2160 2160 except ImportError as exc:
2161 2161 msg %= _bytestr(exc)
2162 2162 raise error.Abort(msg)
2163 2163 try:
2164 2164 from mercurial import exchange
2165 2165 except ImportError as exc:
2166 2166 msg %= _bytestr(exc)
2167 2167 raise error.Abort(msg)
2168 2168 try:
2169 2169 from mercurial import hg
2170 2170 except ImportError as exc:
2171 2171 msg %= _bytestr(exc)
2172 2172 raise error.Abort(msg)
2173 2173 try:
2174 2174 from mercurial import localrepo
2175 2175 except ImportError as exc:
2176 2176 msg %= _bytestr(exc)
2177 2177 raise error.Abort(msg)
2178 2178
2179 2179 opts = _byteskwargs(opts)
2180 2180 timer, fm = gettimer(ui, opts)
2181 2181
2182 2182 # deletion of the generator may trigger some cleanup that we do not want to
2183 2183 # measure
2184 2184 if not (os.path.isfile(filename) and os.access(filename, os.R_OK)):
2185 2185 raise error.Abort("not a readable file: %s" % filename)
2186 2186
2187 2187 run_variables = [None, None]
2188 2188
2189 2189 @contextlib.contextmanager
2190 2190 def context():
2191 2191 with open(filename, mode='rb') as bundle:
2192 2192 with tempfile.TemporaryDirectory() as tmp_dir:
2193 2193 tmp_dir = fsencode(tmp_dir)
2194 2194 run_variables[0] = bundle
2195 2195 run_variables[1] = tmp_dir
2196 2196 yield
2197 2197 run_variables[0] = None
2198 2198 run_variables[1] = None
2199 2199
2200 2200 def runone():
2201 2201 bundle = run_variables[0]
2202 2202 tmp_dir = run_variables[1]
2203 2203 # only pass ui when no srcrepo
2204 2204 localrepo.createrepository(
2205 2205 repo.ui, tmp_dir, requirements=repo.requirements
2206 2206 )
2207 2207 target = hg.repository(repo.ui, tmp_dir)
2208 2208 gen = exchange.readbundle(target.ui, bundle, bundle.name)
2209 2209 # stream v1
2210 2210 if util.safehasattr(gen, 'apply'):
2211 2211 gen.apply(target)
2212 2212 else:
2213 2213 with target.transaction(b"perf::stream-consume") as tr:
2214 2214 bundle2.applybundle(
2215 2215 target,
2216 2216 gen,
2217 2217 tr,
2218 2218 source=b'unbundle',
2219 2219 url=filename,
2220 2220 )
2221 2221
2222 2222 timer(runone, context=context, title=b"consume")
2223 2223 fm.end()
2224 2224
2225 2225
2226 2226 @command(b'perf::parents|perfparents', formatteropts)
2227 2227 def perfparents(ui, repo, **opts):
2228 2228 """benchmark the time necessary to fetch one changeset's parents.
2229 2229
2230 2230 The fetch is done using the `node identifier`, traversing all object layers
2231 2231 from the repository object. The first N revisions will be used for this
2232 2232 benchmark. N is controlled by the ``perf.parentscount`` config option
2233 2233 (default: 1000).
2234 2234 """
2235 2235 opts = _byteskwargs(opts)
2236 2236 timer, fm = gettimer(ui, opts)
2237 2237 # control the number of commits perfparents iterates over
2238 2238 # experimental config: perf.parentscount
2239 2239 count = getint(ui, b"perf", b"parentscount", 1000)
2240 2240 if len(repo.changelog) < count:
2241 2241 raise error.Abort(b"repo needs %d commits for this test" % count)
2242 2242 repo = repo.unfiltered()
2243 2243 nl = [repo.changelog.node(i) for i in _xrange(count)]
2244 2244
2245 2245 def d():
2246 2246 for n in nl:
2247 2247 repo.changelog.parents(n)
2248 2248
2249 2249 timer(d)
2250 2250 fm.end()
2251 2251
2252 2252
2253 2253 @command(b'perf::ctxfiles|perfctxfiles', formatteropts)
2254 2254 def perfctxfiles(ui, repo, x, **opts):
2255 2255 opts = _byteskwargs(opts)
2256 2256 x = int(x)
2257 2257 timer, fm = gettimer(ui, opts)
2258 2258
2259 2259 def d():
2260 2260 len(repo[x].files())
2261 2261
2262 2262 timer(d)
2263 2263 fm.end()
2264 2264
2265 2265
2266 2266 @command(b'perf::rawfiles|perfrawfiles', formatteropts)
2267 2267 def perfrawfiles(ui, repo, x, **opts):
2268 2268 opts = _byteskwargs(opts)
2269 2269 x = int(x)
2270 2270 timer, fm = gettimer(ui, opts)
2271 2271 cl = repo.changelog
2272 2272
2273 2273 def d():
2274 2274 len(cl.read(x)[3])
2275 2275
2276 2276 timer(d)
2277 2277 fm.end()
2278 2278
2279 2279
2280 2280 @command(b'perf::lookup|perflookup', formatteropts)
2281 2281 def perflookup(ui, repo, rev, **opts):
2282 2282 opts = _byteskwargs(opts)
2283 2283 timer, fm = gettimer(ui, opts)
2284 2284 timer(lambda: len(repo.lookup(rev)))
2285 2285 fm.end()
2286 2286
2287 2287
2288 2288 @command(
2289 2289 b'perf::linelogedits|perflinelogedits',
2290 2290 [
2291 2291 (b'n', b'edits', 10000, b'number of edits'),
2292 2292 (b'', b'max-hunk-lines', 10, b'max lines in a hunk'),
2293 2293 ],
2294 2294 norepo=True,
2295 2295 )
2296 2296 def perflinelogedits(ui, **opts):
2297 2297 from mercurial import linelog
2298 2298
2299 2299 opts = _byteskwargs(opts)
2300 2300
2301 2301 edits = opts[b'edits']
2302 2302 maxhunklines = opts[b'max_hunk_lines']
2303 2303
2304 2304 maxb1 = 100000
2305 2305 random.seed(0)
2306 2306 randint = random.randint
2307 2307 currentlines = 0
2308 2308 arglist = []
2309 2309 for rev in _xrange(edits):
2310 2310 a1 = randint(0, currentlines)
2311 2311 a2 = randint(a1, min(currentlines, a1 + maxhunklines))
2312 2312 b1 = randint(0, maxb1)
2313 2313 b2 = randint(b1, b1 + maxhunklines)
2314 2314 currentlines += (b2 - b1) - (a2 - a1)
2315 2315 arglist.append((rev, a1, a2, b1, b2))
2316 2316
2317 2317 def d():
2318 2318 ll = linelog.linelog()
2319 2319 for args in arglist:
2320 2320 ll.replacelines(*args)
2321 2321
2322 2322 timer, fm = gettimer(ui, opts)
2323 2323 timer(d)
2324 2324 fm.end()
2325 2325
2326 2326
2327 2327 @command(b'perf::revrange|perfrevrange', formatteropts)
2328 2328 def perfrevrange(ui, repo, *specs, **opts):
2329 2329 opts = _byteskwargs(opts)
2330 2330 timer, fm = gettimer(ui, opts)
2331 2331 revrange = scmutil.revrange
2332 2332 timer(lambda: len(revrange(repo, specs)))
2333 2333 fm.end()
2334 2334
2335 2335
2336 2336 @command(b'perf::nodelookup|perfnodelookup', formatteropts)
2337 2337 def perfnodelookup(ui, repo, rev, **opts):
2338 2338 opts = _byteskwargs(opts)
2339 2339 timer, fm = gettimer(ui, opts)
2340 2340 import mercurial.revlog
2341 2341
2342 2342 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
2343 2343 n = scmutil.revsingle(repo, rev).node()
2344 2344
2345 2345 try:
2346 2346 cl = revlog(getsvfs(repo), radix=b"00changelog")
2347 2347 except TypeError:
2348 2348 cl = revlog(getsvfs(repo), indexfile=b"00changelog.i")
2349 2349
2350 2350 def d():
2351 2351 cl.rev(n)
2352 2352 clearcaches(cl)
2353 2353
2354 2354 timer(d)
2355 2355 fm.end()
2356 2356
2357 2357
2358 2358 @command(
2359 2359 b'perf::log|perflog',
2360 2360 [(b'', b'rename', False, b'ask log to follow renames')] + formatteropts,
2361 2361 )
2362 2362 def perflog(ui, repo, rev=None, **opts):
2363 2363 opts = _byteskwargs(opts)
2364 2364 if rev is None:
2365 2365 rev = []
2366 2366 timer, fm = gettimer(ui, opts)
2367 2367 ui.pushbuffer()
2368 2368 timer(
2369 2369 lambda: commands.log(
2370 2370 ui, repo, rev=rev, date=b'', user=b'', copies=opts.get(b'rename')
2371 2371 )
2372 2372 )
2373 2373 ui.popbuffer()
2374 2374 fm.end()
2375 2375
2376 2376
2377 2377 @command(b'perf::moonwalk|perfmoonwalk', formatteropts)
2378 2378 def perfmoonwalk(ui, repo, **opts):
2379 2379 """benchmark walking the changelog backwards
2380 2380
2381 2381 This also loads the changelog data for each revision in the changelog.
2382 2382 """
2383 2383 opts = _byteskwargs(opts)
2384 2384 timer, fm = gettimer(ui, opts)
2385 2385
2386 2386 def moonwalk():
2387 2387 for i in repo.changelog.revs(start=(len(repo) - 1), stop=-1):
2388 2388 ctx = repo[i]
2389 2389 ctx.branch() # read changelog data (in addition to the index)
2390 2390
2391 2391 timer(moonwalk)
2392 2392 fm.end()
2393 2393
2394 2394
2395 2395 @command(
2396 2396 b'perf::templating|perftemplating',
2397 2397 [
2398 2398 (b'r', b'rev', [], b'revisions to run the template on'),
2399 2399 ]
2400 2400 + formatteropts,
2401 2401 )
2402 2402 def perftemplating(ui, repo, testedtemplate=None, **opts):
2403 2403 """test the rendering time of a given template"""
2404 2404 if makelogtemplater is None:
2405 2405 raise error.Abort(
2406 2406 b"perftemplating not available with this Mercurial",
2407 2407 hint=b"use 4.3 or later",
2408 2408 )
2409 2409
2410 2410 opts = _byteskwargs(opts)
2411 2411
2412 2412 nullui = ui.copy()
2413 2413 nullui.fout = open(os.devnull, 'wb')
2414 2414 nullui.disablepager()
2415 2415 revs = opts.get(b'rev')
2416 2416 if not revs:
2417 2417 revs = [b'all()']
2418 2418 revs = list(scmutil.revrange(repo, revs))
2419 2419
2420 2420 defaulttemplate = (
2421 2421 b'{date|shortdate} [{rev}:{node|short}]'
2422 2422 b' {author|person}: {desc|firstline}\n'
2423 2423 )
2424 2424 if testedtemplate is None:
2425 2425 testedtemplate = defaulttemplate
2426 2426 displayer = makelogtemplater(nullui, repo, testedtemplate)
2427 2427
2428 2428 def format():
2429 2429 for r in revs:
2430 2430 ctx = repo[r]
2431 2431 displayer.show(ctx)
2432 2432 displayer.flush(ctx)
2433 2433
2434 2434 timer, fm = gettimer(ui, opts)
2435 2435 timer(format)
2436 2436 fm.end()
2437 2437
2438 2438
2439 2439 def _displaystats(ui, opts, entries, data):
2440 2440 # use a second formatter because the data are quite different, not sure
2441 2441 # how it flies with the templater.
2442 2442 fm = ui.formatter(b'perf-stats', opts)
2443 2443 for key, title in entries:
2444 2444 values = data[key]
2445 2445 nbvalues = len(data)
2446 2446 values.sort()
2447 2447 stats = {
2448 2448 'key': key,
2449 2449 'title': title,
2450 2450 'nbitems': len(values),
2451 2451 'min': values[0][0],
2452 2452 '10%': values[(nbvalues * 10) // 100][0],
2453 2453 '25%': values[(nbvalues * 25) // 100][0],
2454 2454 '50%': values[(nbvalues * 50) // 100][0],
2455 2455 '75%': values[(nbvalues * 75) // 100][0],
2456 2456 '80%': values[(nbvalues * 80) // 100][0],
2457 2457 '85%': values[(nbvalues * 85) // 100][0],
2458 2458 '90%': values[(nbvalues * 90) // 100][0],
2459 2459 '95%': values[(nbvalues * 95) // 100][0],
2460 2460 '99%': values[(nbvalues * 99) // 100][0],
2461 2461 'max': values[-1][0],
2462 2462 }
2463 2463 fm.startitem()
2464 2464 fm.data(**stats)
2465 2465 # make node pretty for the human output
2466 2466 fm.plain('### %s (%d items)\n' % (title, len(values)))
2467 2467 lines = [
2468 2468 'min',
2469 2469 '10%',
2470 2470 '25%',
2471 2471 '50%',
2472 2472 '75%',
2473 2473 '80%',
2474 2474 '85%',
2475 2475 '90%',
2476 2476 '95%',
2477 2477 '99%',
2478 2478 'max',
2479 2479 ]
2480 2480 for l in lines:
2481 2481 fm.plain('%s: %s\n' % (l, stats[l]))
2482 2482 fm.end()
2483 2483
2484 2484
2485 2485 @command(
2486 2486 b'perf::helper-mergecopies|perfhelper-mergecopies',
2487 2487 formatteropts
2488 2488 + [
2489 2489 (b'r', b'revs', [], b'restrict search to these revisions'),
2490 2490 (b'', b'timing', False, b'provides extra data (costly)'),
2491 2491 (b'', b'stats', False, b'provides statistic about the measured data'),
2492 2492 ],
2493 2493 )
2494 2494 def perfhelpermergecopies(ui, repo, revs=[], **opts):
2495 2495 """find statistics about potential parameters for `perfmergecopies`
2496 2496
2497 2497 This command find (base, p1, p2) triplet relevant for copytracing
2498 2498 benchmarking in the context of a merge. It reports values for some of the
2499 2499 parameters that impact merge copy tracing time during merge.
2500 2500
2501 2501 If `--timing` is set, rename detection is run and the associated timing
2502 2502 will be reported. The extra details come at the cost of slower command
2503 2503 execution.
2504 2504
2505 2505 Since rename detection is only run once, other factors might easily
2506 2506 affect the precision of the timing. However it should give a good
2507 2507 approximation of which revision triplets are very costly.
2508 2508 """
2509 2509 opts = _byteskwargs(opts)
2510 2510 fm = ui.formatter(b'perf', opts)
2511 2511 dotiming = opts[b'timing']
2512 2512 dostats = opts[b'stats']
2513 2513
2514 2514 output_template = [
2515 2515 ("base", "%(base)12s"),
2516 2516 ("p1", "%(p1.node)12s"),
2517 2517 ("p2", "%(p2.node)12s"),
2518 2518 ("p1.nb-revs", "%(p1.nbrevs)12d"),
2519 2519 ("p1.nb-files", "%(p1.nbmissingfiles)12d"),
2520 2520 ("p1.renames", "%(p1.renamedfiles)12d"),
2521 2521 ("p1.time", "%(p1.time)12.3f"),
2522 2522 ("p2.nb-revs", "%(p2.nbrevs)12d"),
2523 2523 ("p2.nb-files", "%(p2.nbmissingfiles)12d"),
2524 2524 ("p2.renames", "%(p2.renamedfiles)12d"),
2525 2525 ("p2.time", "%(p2.time)12.3f"),
2526 2526 ("renames", "%(nbrenamedfiles)12d"),
2527 2527 ("total.time", "%(time)12.3f"),
2528 2528 ]
2529 2529 if not dotiming:
2530 2530 output_template = [
2531 2531 i
2532 2532 for i in output_template
2533 2533 if not ('time' in i[0] or 'renames' in i[0])
2534 2534 ]
2535 2535 header_names = [h for (h, v) in output_template]
2536 2536 output = ' '.join([v for (h, v) in output_template]) + '\n'
2537 2537 header = ' '.join(['%12s'] * len(header_names)) + '\n'
2538 2538 fm.plain(header % tuple(header_names))
2539 2539
2540 2540 if not revs:
2541 2541 revs = ['all()']
2542 2542 revs = scmutil.revrange(repo, revs)
2543 2543
2544 2544 if dostats:
2545 2545 alldata = {
2546 2546 'nbrevs': [],
2547 2547 'nbmissingfiles': [],
2548 2548 }
2549 2549 if dotiming:
2550 2550 alldata['parentnbrenames'] = []
2551 2551 alldata['totalnbrenames'] = []
2552 2552 alldata['parenttime'] = []
2553 2553 alldata['totaltime'] = []
2554 2554
2555 2555 roi = repo.revs('merge() and %ld', revs)
2556 2556 for r in roi:
2557 2557 ctx = repo[r]
2558 2558 p1 = ctx.p1()
2559 2559 p2 = ctx.p2()
2560 2560 bases = repo.changelog._commonancestorsheads(p1.rev(), p2.rev())
2561 2561 for b in bases:
2562 2562 b = repo[b]
2563 2563 p1missing = copies._computeforwardmissing(b, p1)
2564 2564 p2missing = copies._computeforwardmissing(b, p2)
2565 2565 data = {
2566 2566 b'base': b.hex(),
2567 2567 b'p1.node': p1.hex(),
2568 2568 b'p1.nbrevs': len(repo.revs('only(%d, %d)', p1.rev(), b.rev())),
2569 2569 b'p1.nbmissingfiles': len(p1missing),
2570 2570 b'p2.node': p2.hex(),
2571 2571 b'p2.nbrevs': len(repo.revs('only(%d, %d)', p2.rev(), b.rev())),
2572 2572 b'p2.nbmissingfiles': len(p2missing),
2573 2573 }
2574 2574 if dostats:
2575 2575 if p1missing:
2576 2576 alldata['nbrevs'].append(
2577 2577 (data['p1.nbrevs'], b.hex(), p1.hex())
2578 2578 )
2579 2579 alldata['nbmissingfiles'].append(
2580 2580 (data['p1.nbmissingfiles'], b.hex(), p1.hex())
2581 2581 )
2582 2582 if p2missing:
2583 2583 alldata['nbrevs'].append(
2584 2584 (data['p2.nbrevs'], b.hex(), p2.hex())
2585 2585 )
2586 2586 alldata['nbmissingfiles'].append(
2587 2587 (data['p2.nbmissingfiles'], b.hex(), p2.hex())
2588 2588 )
2589 2589 if dotiming:
2590 2590 begin = util.timer()
2591 2591 mergedata = copies.mergecopies(repo, p1, p2, b)
2592 2592 end = util.timer()
2593 2593 # not very stable timing since we did only one run
2594 2594 data['time'] = end - begin
2595 2595 # mergedata contains five dicts: "copy", "movewithdir",
2596 2596 # "diverge", "renamedelete" and "dirmove".
2597 2597 # The first 4 are about renamed file so lets count that.
2598 2598 renames = len(mergedata[0])
2599 2599 renames += len(mergedata[1])
2600 2600 renames += len(mergedata[2])
2601 2601 renames += len(mergedata[3])
2602 2602 data['nbrenamedfiles'] = renames
2603 2603 begin = util.timer()
2604 2604 p1renames = copies.pathcopies(b, p1)
2605 2605 end = util.timer()
2606 2606 data['p1.time'] = end - begin
2607 2607 begin = util.timer()
2608 2608 p2renames = copies.pathcopies(b, p2)
2609 2609 end = util.timer()
2610 2610 data['p2.time'] = end - begin
2611 2611 data['p1.renamedfiles'] = len(p1renames)
2612 2612 data['p2.renamedfiles'] = len(p2renames)
2613 2613
2614 2614 if dostats:
2615 2615 if p1missing:
2616 2616 alldata['parentnbrenames'].append(
2617 2617 (data['p1.renamedfiles'], b.hex(), p1.hex())
2618 2618 )
2619 2619 alldata['parenttime'].append(
2620 2620 (data['p1.time'], b.hex(), p1.hex())
2621 2621 )
2622 2622 if p2missing:
2623 2623 alldata['parentnbrenames'].append(
2624 2624 (data['p2.renamedfiles'], b.hex(), p2.hex())
2625 2625 )
2626 2626 alldata['parenttime'].append(
2627 2627 (data['p2.time'], b.hex(), p2.hex())
2628 2628 )
2629 2629 if p1missing or p2missing:
2630 2630 alldata['totalnbrenames'].append(
2631 2631 (
2632 2632 data['nbrenamedfiles'],
2633 2633 b.hex(),
2634 2634 p1.hex(),
2635 2635 p2.hex(),
2636 2636 )
2637 2637 )
2638 2638 alldata['totaltime'].append(
2639 2639 (data['time'], b.hex(), p1.hex(), p2.hex())
2640 2640 )
2641 2641 fm.startitem()
2642 2642 fm.data(**data)
2643 2643 # make node pretty for the human output
2644 2644 out = data.copy()
2645 2645 out['base'] = fm.hexfunc(b.node())
2646 2646 out['p1.node'] = fm.hexfunc(p1.node())
2647 2647 out['p2.node'] = fm.hexfunc(p2.node())
2648 2648 fm.plain(output % out)
2649 2649
2650 2650 fm.end()
2651 2651 if dostats:
2652 2652 # use a second formatter because the data are quite different, not sure
2653 2653 # how it flies with the templater.
2654 2654 entries = [
2655 2655 ('nbrevs', 'number of revision covered'),
2656 2656 ('nbmissingfiles', 'number of missing files at head'),
2657 2657 ]
2658 2658 if dotiming:
2659 2659 entries.append(
2660 2660 ('parentnbrenames', 'rename from one parent to base')
2661 2661 )
2662 2662 entries.append(('totalnbrenames', 'total number of renames'))
2663 2663 entries.append(('parenttime', 'time for one parent'))
2664 2664 entries.append(('totaltime', 'time for both parents'))
2665 2665 _displaystats(ui, opts, entries, alldata)
2666 2666
2667 2667
2668 2668 @command(
2669 2669 b'perf::helper-pathcopies|perfhelper-pathcopies',
2670 2670 formatteropts
2671 2671 + [
2672 2672 (b'r', b'revs', [], b'restrict search to these revisions'),
2673 2673 (b'', b'timing', False, b'provides extra data (costly)'),
2674 2674 (b'', b'stats', False, b'provides statistic about the measured data'),
2675 2675 ],
2676 2676 )
2677 2677 def perfhelperpathcopies(ui, repo, revs=[], **opts):
2678 2678 """find statistic about potential parameters for the `perftracecopies`
2679 2679
2680 2680 This command find source-destination pair relevant for copytracing testing.
2681 2681 It report value for some of the parameters that impact copy tracing time.
2682 2682
2683 2683 If `--timing` is set, rename detection is run and the associated timing
2684 2684 will be reported. The extra details comes at the cost of a slower command
2685 2685 execution.
2686 2686
2687 2687 Since the rename detection is only run once, other factors might easily
2688 2688 affect the precision of the timing. However it should give a good
2689 2689 approximation of which revision pairs are very costly.
2690 2690 """
2691 2691 opts = _byteskwargs(opts)
2692 2692 fm = ui.formatter(b'perf', opts)
2693 2693 dotiming = opts[b'timing']
2694 2694 dostats = opts[b'stats']
2695 2695
2696 2696 if dotiming:
2697 2697 header = '%12s %12s %12s %12s %12s %12s\n'
2698 2698 output = (
2699 2699 "%(source)12s %(destination)12s "
2700 2700 "%(nbrevs)12d %(nbmissingfiles)12d "
2701 2701 "%(nbrenamedfiles)12d %(time)18.5f\n"
2702 2702 )
2703 2703 header_names = (
2704 2704 "source",
2705 2705 "destination",
2706 2706 "nb-revs",
2707 2707 "nb-files",
2708 2708 "nb-renames",
2709 2709 "time",
2710 2710 )
2711 2711 fm.plain(header % header_names)
2712 2712 else:
2713 2713 header = '%12s %12s %12s %12s\n'
2714 2714 output = (
2715 2715 "%(source)12s %(destination)12s "
2716 2716 "%(nbrevs)12d %(nbmissingfiles)12d\n"
2717 2717 )
2718 2718 fm.plain(header % ("source", "destination", "nb-revs", "nb-files"))
2719 2719
2720 2720 if not revs:
2721 2721 revs = ['all()']
2722 2722 revs = scmutil.revrange(repo, revs)
2723 2723
2724 2724 if dostats:
2725 2725 alldata = {
2726 2726 'nbrevs': [],
2727 2727 'nbmissingfiles': [],
2728 2728 }
2729 2729 if dotiming:
2730 2730 alldata['nbrenames'] = []
2731 2731 alldata['time'] = []
2732 2732
2733 2733 roi = repo.revs('merge() and %ld', revs)
2734 2734 for r in roi:
2735 2735 ctx = repo[r]
2736 2736 p1 = ctx.p1().rev()
2737 2737 p2 = ctx.p2().rev()
2738 2738 bases = repo.changelog._commonancestorsheads(p1, p2)
2739 2739 for p in (p1, p2):
2740 2740 for b in bases:
2741 2741 base = repo[b]
2742 2742 parent = repo[p]
2743 2743 missing = copies._computeforwardmissing(base, parent)
2744 2744 if not missing:
2745 2745 continue
2746 2746 data = {
2747 2747 b'source': base.hex(),
2748 2748 b'destination': parent.hex(),
2749 2749 b'nbrevs': len(repo.revs('only(%d, %d)', p, b)),
2750 2750 b'nbmissingfiles': len(missing),
2751 2751 }
2752 2752 if dostats:
2753 2753 alldata['nbrevs'].append(
2754 2754 (
2755 2755 data['nbrevs'],
2756 2756 base.hex(),
2757 2757 parent.hex(),
2758 2758 )
2759 2759 )
2760 2760 alldata['nbmissingfiles'].append(
2761 2761 (
2762 2762 data['nbmissingfiles'],
2763 2763 base.hex(),
2764 2764 parent.hex(),
2765 2765 )
2766 2766 )
2767 2767 if dotiming:
2768 2768 begin = util.timer()
2769 2769 renames = copies.pathcopies(base, parent)
2770 2770 end = util.timer()
2771 2771 # not very stable timing since we did only one run
2772 2772 data['time'] = end - begin
2773 2773 data['nbrenamedfiles'] = len(renames)
2774 2774 if dostats:
2775 2775 alldata['time'].append(
2776 2776 (
2777 2777 data['time'],
2778 2778 base.hex(),
2779 2779 parent.hex(),
2780 2780 )
2781 2781 )
2782 2782 alldata['nbrenames'].append(
2783 2783 (
2784 2784 data['nbrenamedfiles'],
2785 2785 base.hex(),
2786 2786 parent.hex(),
2787 2787 )
2788 2788 )
2789 2789 fm.startitem()
2790 2790 fm.data(**data)
2791 2791 out = data.copy()
2792 2792 out['source'] = fm.hexfunc(base.node())
2793 2793 out['destination'] = fm.hexfunc(parent.node())
2794 2794 fm.plain(output % out)
2795 2795
2796 2796 fm.end()
2797 2797 if dostats:
2798 2798 entries = [
2799 2799 ('nbrevs', 'number of revision covered'),
2800 2800 ('nbmissingfiles', 'number of missing files at head'),
2801 2801 ]
2802 2802 if dotiming:
2803 2803 entries.append(('nbrenames', 'renamed files'))
2804 2804 entries.append(('time', 'time'))
2805 2805 _displaystats(ui, opts, entries, alldata)
2806 2806
2807 2807
2808 2808 @command(b'perf::cca|perfcca', formatteropts)
2809 2809 def perfcca(ui, repo, **opts):
2810 2810 opts = _byteskwargs(opts)
2811 2811 timer, fm = gettimer(ui, opts)
2812 2812 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
2813 2813 fm.end()
2814 2814
2815 2815
2816 2816 @command(b'perf::fncacheload|perffncacheload', formatteropts)
2817 2817 def perffncacheload(ui, repo, **opts):
2818 2818 opts = _byteskwargs(opts)
2819 2819 timer, fm = gettimer(ui, opts)
2820 2820 s = repo.store
2821 2821
2822 2822 def d():
2823 2823 s.fncache._load()
2824 2824
2825 2825 timer(d)
2826 2826 fm.end()
2827 2827
2828 2828
2829 2829 @command(b'perf::fncachewrite|perffncachewrite', formatteropts)
2830 2830 def perffncachewrite(ui, repo, **opts):
2831 2831 opts = _byteskwargs(opts)
2832 2832 timer, fm = gettimer(ui, opts)
2833 2833 s = repo.store
2834 2834 lock = repo.lock()
2835 2835 s.fncache._load()
2836 2836 tr = repo.transaction(b'perffncachewrite')
2837 2837 tr.addbackup(b'fncache')
2838 2838
2839 2839 def d():
2840 2840 s.fncache._dirty = True
2841 2841 s.fncache.write(tr)
2842 2842
2843 2843 timer(d)
2844 2844 tr.close()
2845 2845 lock.release()
2846 2846 fm.end()
2847 2847
2848 2848
2849 2849 @command(b'perf::fncacheencode|perffncacheencode', formatteropts)
2850 2850 def perffncacheencode(ui, repo, **opts):
2851 2851 opts = _byteskwargs(opts)
2852 2852 timer, fm = gettimer(ui, opts)
2853 2853 s = repo.store
2854 2854 s.fncache._load()
2855 2855
2856 2856 def d():
2857 2857 for p in s.fncache.entries:
2858 2858 s.encode(p)
2859 2859
2860 2860 timer(d)
2861 2861 fm.end()
2862 2862
2863 2863
2864 2864 def _bdiffworker(q, blocks, xdiff, ready, done):
2865 2865 while not done.is_set():
2866 2866 pair = q.get()
2867 2867 while pair is not None:
2868 2868 if xdiff:
2869 2869 mdiff.bdiff.xdiffblocks(*pair)
2870 2870 elif blocks:
2871 2871 mdiff.bdiff.blocks(*pair)
2872 2872 else:
2873 2873 mdiff.textdiff(*pair)
2874 2874 q.task_done()
2875 2875 pair = q.get()
2876 2876 q.task_done() # for the None one
2877 2877 with ready:
2878 2878 ready.wait()
2879 2879
2880 2880
2881 2881 def _manifestrevision(repo, mnode):
2882 2882 ml = repo.manifestlog
2883 2883
2884 2884 if util.safehasattr(ml, b'getstorage'):
2885 2885 store = ml.getstorage(b'')
2886 2886 else:
2887 2887 store = ml._revlog
2888 2888
2889 2889 return store.revision(mnode)
2890 2890
2891 2891
2892 2892 @command(
2893 2893 b'perf::bdiff|perfbdiff',
2894 2894 revlogopts
2895 2895 + formatteropts
2896 2896 + [
2897 2897 (
2898 2898 b'',
2899 2899 b'count',
2900 2900 1,
2901 2901 b'number of revisions to test (when using --startrev)',
2902 2902 ),
2903 2903 (b'', b'alldata', False, b'test bdiffs for all associated revisions'),
2904 2904 (b'', b'threads', 0, b'number of thread to use (disable with 0)'),
2905 2905 (b'', b'blocks', False, b'test computing diffs into blocks'),
2906 2906 (b'', b'xdiff', False, b'use xdiff algorithm'),
2907 2907 ],
2908 2908 b'-c|-m|FILE REV',
2909 2909 )
2910 2910 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
2911 2911 """benchmark a bdiff between revisions
2912 2912
2913 2913 By default, benchmark a bdiff between its delta parent and itself.
2914 2914
2915 2915 With ``--count``, benchmark bdiffs between delta parents and self for N
2916 2916 revisions starting at the specified revision.
2917 2917
2918 2918 With ``--alldata``, assume the requested revision is a changeset and
2919 2919 measure bdiffs for all changes related to that changeset (manifest
2920 2920 and filelogs).
2921 2921 """
2922 2922 opts = _byteskwargs(opts)
2923 2923
2924 2924 if opts[b'xdiff'] and not opts[b'blocks']:
2925 2925 raise error.CommandError(b'perfbdiff', b'--xdiff requires --blocks')
2926 2926
2927 2927 if opts[b'alldata']:
2928 2928 opts[b'changelog'] = True
2929 2929
2930 2930 if opts.get(b'changelog') or opts.get(b'manifest'):
2931 2931 file_, rev = None, file_
2932 2932 elif rev is None:
2933 2933 raise error.CommandError(b'perfbdiff', b'invalid arguments')
2934 2934
2935 2935 blocks = opts[b'blocks']
2936 2936 xdiff = opts[b'xdiff']
2937 2937 textpairs = []
2938 2938
2939 2939 r = cmdutil.openrevlog(repo, b'perfbdiff', file_, opts)
2940 2940
2941 2941 startrev = r.rev(r.lookup(rev))
2942 2942 for rev in range(startrev, min(startrev + count, len(r) - 1)):
2943 2943 if opts[b'alldata']:
2944 2944 # Load revisions associated with changeset.
2945 2945 ctx = repo[rev]
2946 2946 mtext = _manifestrevision(repo, ctx.manifestnode())
2947 2947 for pctx in ctx.parents():
2948 2948 pman = _manifestrevision(repo, pctx.manifestnode())
2949 2949 textpairs.append((pman, mtext))
2950 2950
2951 2951 # Load filelog revisions by iterating manifest delta.
2952 2952 man = ctx.manifest()
2953 2953 pman = ctx.p1().manifest()
2954 2954 for filename, change in pman.diff(man).items():
2955 2955 fctx = repo.file(filename)
2956 2956 f1 = fctx.revision(change[0][0] or -1)
2957 2957 f2 = fctx.revision(change[1][0] or -1)
2958 2958 textpairs.append((f1, f2))
2959 2959 else:
2960 2960 dp = r.deltaparent(rev)
2961 2961 textpairs.append((r.revision(dp), r.revision(rev)))
2962 2962
2963 2963 withthreads = threads > 0
2964 2964 if not withthreads:
2965 2965
2966 2966 def d():
2967 2967 for pair in textpairs:
2968 2968 if xdiff:
2969 2969 mdiff.bdiff.xdiffblocks(*pair)
2970 2970 elif blocks:
2971 2971 mdiff.bdiff.blocks(*pair)
2972 2972 else:
2973 2973 mdiff.textdiff(*pair)
2974 2974
2975 2975 else:
2976 2976 q = queue()
2977 2977 for i in _xrange(threads):
2978 2978 q.put(None)
2979 2979 ready = threading.Condition()
2980 2980 done = threading.Event()
2981 2981 for i in _xrange(threads):
2982 2982 threading.Thread(
2983 2983 target=_bdiffworker, args=(q, blocks, xdiff, ready, done)
2984 2984 ).start()
2985 2985 q.join()
2986 2986
2987 2987 def d():
2988 2988 for pair in textpairs:
2989 2989 q.put(pair)
2990 2990 for i in _xrange(threads):
2991 2991 q.put(None)
2992 2992 with ready:
2993 2993 ready.notify_all()
2994 2994 q.join()
2995 2995
2996 2996 timer, fm = gettimer(ui, opts)
2997 2997 timer(d)
2998 2998 fm.end()
2999 2999
3000 3000 if withthreads:
3001 3001 done.set()
3002 3002 for i in _xrange(threads):
3003 3003 q.put(None)
3004 3004 with ready:
3005 3005 ready.notify_all()
3006 3006
3007 3007
3008 3008 @command(
3009 3009 b'perf::unbundle',
3010 3010 formatteropts,
3011 3011 b'BUNDLE_FILE',
3012 3012 )
3013 3013 def perf_unbundle(ui, repo, fname, **opts):
3014 3014 """benchmark application of a bundle in a repository.
3015 3015
3016 3016 This does not include the final transaction processing"""
3017 3017
3018 3018 from mercurial import exchange
3019 3019 from mercurial import bundle2
3020 3020 from mercurial import transaction
3021 3021
3022 3022 opts = _byteskwargs(opts)
3023 3023
3024 3024 ### some compatibility hotfix
3025 3025 #
3026 3026 # the data attribute is dropped in 63edc384d3b7 a changeset introducing a
3027 3027 # critical regression that break transaction rollback for files that are
3028 3028 # de-inlined.
3029 3029 method = transaction.transaction._addentry
3030 3030 pre_63edc384d3b7 = "data" in getargspec(method).args
3031 3031 # the `detailed_exit_code` attribute is introduced in 33c0c25d0b0f
3032 3032 # a changeset that is a close descendant of 18415fc918a1, the changeset
3033 3033 # that conclude the fix run for the bug introduced in 63edc384d3b7.
3034 3034 args = getargspec(error.Abort.__init__).args
3035 3035 post_18415fc918a1 = "detailed_exit_code" in args
3036 3036
3037 3037 old_max_inline = None
3038 3038 try:
3039 3039 if not (pre_63edc384d3b7 or post_18415fc918a1):
3040 3040 # disable inlining
3041 3041 old_max_inline = mercurial.revlog._maxinline
3042 3042 # large enough to never happen
3043 3043 mercurial.revlog._maxinline = 2 ** 50
3044 3044
3045 3045 with repo.lock():
3046 3046 bundle = [None, None]
3047 3047 orig_quiet = repo.ui.quiet
3048 3048 try:
3049 3049 repo.ui.quiet = True
3050 3050 with open(fname, mode="rb") as f:
3051 3051
3052 3052 def noop_report(*args, **kwargs):
3053 3053 pass
3054 3054
3055 3055 def setup():
3056 3056 gen, tr = bundle
3057 3057 if tr is not None:
3058 3058 tr.abort()
3059 3059 bundle[:] = [None, None]
3060 3060 f.seek(0)
3061 3061 bundle[0] = exchange.readbundle(ui, f, fname)
3062 3062 bundle[1] = repo.transaction(b'perf::unbundle')
3063 3063 # silence the transaction
3064 3064 bundle[1]._report = noop_report
3065 3065
3066 3066 def apply():
3067 3067 gen, tr = bundle
3068 3068 bundle2.applybundle(
3069 3069 repo,
3070 3070 gen,
3071 3071 tr,
3072 3072 source=b'perf::unbundle',
3073 3073 url=fname,
3074 3074 )
3075 3075
3076 3076 timer, fm = gettimer(ui, opts)
3077 3077 timer(apply, setup=setup)
3078 3078 fm.end()
3079 3079 finally:
3080 3080 repo.ui.quiet == orig_quiet
3081 3081 gen, tr = bundle
3082 3082 if tr is not None:
3083 3083 tr.abort()
3084 3084 finally:
3085 3085 if old_max_inline is not None:
3086 3086 mercurial.revlog._maxinline = old_max_inline
3087 3087
3088 3088
3089 3089 @command(
3090 3090 b'perf::unidiff|perfunidiff',
3091 3091 revlogopts
3092 3092 + formatteropts
3093 3093 + [
3094 3094 (
3095 3095 b'',
3096 3096 b'count',
3097 3097 1,
3098 3098 b'number of revisions to test (when using --startrev)',
3099 3099 ),
3100 3100 (b'', b'alldata', False, b'test unidiffs for all associated revisions'),
3101 3101 ],
3102 3102 b'-c|-m|FILE REV',
3103 3103 )
3104 3104 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
3105 3105 """benchmark a unified diff between revisions
3106 3106
3107 3107 This doesn't include any copy tracing - it's just a unified diff
3108 3108 of the texts.
3109 3109
3110 3110 By default, benchmark a diff between its delta parent and itself.
3111 3111
3112 3112 With ``--count``, benchmark diffs between delta parents and self for N
3113 3113 revisions starting at the specified revision.
3114 3114
3115 3115 With ``--alldata``, assume the requested revision is a changeset and
3116 3116 measure diffs for all changes related to that changeset (manifest
3117 3117 and filelogs).
3118 3118 """
3119 3119 opts = _byteskwargs(opts)
3120 3120 if opts[b'alldata']:
3121 3121 opts[b'changelog'] = True
3122 3122
3123 3123 if opts.get(b'changelog') or opts.get(b'manifest'):
3124 3124 file_, rev = None, file_
3125 3125 elif rev is None:
3126 3126 raise error.CommandError(b'perfunidiff', b'invalid arguments')
3127 3127
3128 3128 textpairs = []
3129 3129
3130 3130 r = cmdutil.openrevlog(repo, b'perfunidiff', file_, opts)
3131 3131
3132 3132 startrev = r.rev(r.lookup(rev))
3133 3133 for rev in range(startrev, min(startrev + count, len(r) - 1)):
3134 3134 if opts[b'alldata']:
3135 3135 # Load revisions associated with changeset.
3136 3136 ctx = repo[rev]
3137 3137 mtext = _manifestrevision(repo, ctx.manifestnode())
3138 3138 for pctx in ctx.parents():
3139 3139 pman = _manifestrevision(repo, pctx.manifestnode())
3140 3140 textpairs.append((pman, mtext))
3141 3141
3142 3142 # Load filelog revisions by iterating manifest delta.
3143 3143 man = ctx.manifest()
3144 3144 pman = ctx.p1().manifest()
3145 3145 for filename, change in pman.diff(man).items():
3146 3146 fctx = repo.file(filename)
3147 3147 f1 = fctx.revision(change[0][0] or -1)
3148 3148 f2 = fctx.revision(change[1][0] or -1)
3149 3149 textpairs.append((f1, f2))
3150 3150 else:
3151 3151 dp = r.deltaparent(rev)
3152 3152 textpairs.append((r.revision(dp), r.revision(rev)))
3153 3153
3154 3154 def d():
3155 3155 for left, right in textpairs:
3156 3156 # The date strings don't matter, so we pass empty strings.
3157 3157 headerlines, hunks = mdiff.unidiff(
3158 3158 left, b'', right, b'', b'left', b'right', binary=False
3159 3159 )
3160 3160 # consume iterators in roughly the way patch.py does
3161 3161 b'\n'.join(headerlines)
3162 3162 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
3163 3163
3164 3164 timer, fm = gettimer(ui, opts)
3165 3165 timer(d)
3166 3166 fm.end()
3167 3167
3168 3168
3169 3169 @command(b'perf::diffwd|perfdiffwd', formatteropts)
3170 3170 def perfdiffwd(ui, repo, **opts):
3171 3171 """Profile diff of working directory changes"""
3172 3172 opts = _byteskwargs(opts)
3173 3173 timer, fm = gettimer(ui, opts)
3174 3174 options = {
3175 3175 'w': 'ignore_all_space',
3176 3176 'b': 'ignore_space_change',
3177 3177 'B': 'ignore_blank_lines',
3178 3178 }
3179 3179
3180 3180 for diffopt in ('', 'w', 'b', 'B', 'wB'):
3181 3181 opts = {options[c]: b'1' for c in diffopt}
3182 3182
3183 3183 def d():
3184 3184 ui.pushbuffer()
3185 3185 commands.diff(ui, repo, **opts)
3186 3186 ui.popbuffer()
3187 3187
3188 3188 diffopt = diffopt.encode('ascii')
3189 3189 title = b'diffopts: %s' % (diffopt and (b'-' + diffopt) or b'none')
3190 3190 timer(d, title=title)
3191 3191 fm.end()
3192 3192
3193 3193
3194 3194 @command(
3195 3195 b'perf::revlogindex|perfrevlogindex',
3196 3196 revlogopts + formatteropts,
3197 3197 b'-c|-m|FILE',
3198 3198 )
3199 3199 def perfrevlogindex(ui, repo, file_=None, **opts):
3200 3200 """Benchmark operations against a revlog index.
3201 3201
3202 3202 This tests constructing a revlog instance, reading index data,
3203 3203 parsing index data, and performing various operations related to
3204 3204 index data.
3205 3205 """
3206 3206
3207 3207 opts = _byteskwargs(opts)
3208 3208
3209 3209 rl = cmdutil.openrevlog(repo, b'perfrevlogindex', file_, opts)
3210 3210
3211 3211 opener = getattr(rl, 'opener') # trick linter
3212 3212 # compat with hg <= 5.8
3213 3213 radix = getattr(rl, 'radix', None)
3214 3214 indexfile = getattr(rl, '_indexfile', None)
3215 3215 if indexfile is None:
3216 3216 # compatibility with <= hg-5.8
3217 3217 indexfile = getattr(rl, 'indexfile')
3218 3218 data = opener.read(indexfile)
3219 3219
3220 3220 header = struct.unpack(b'>I', data[0:4])[0]
3221 3221 version = header & 0xFFFF
3222 3222 if version == 1:
3223 3223 inline = header & (1 << 16)
3224 3224 else:
3225 3225 raise error.Abort(b'unsupported revlog version: %d' % version)
3226 3226
3227 3227 parse_index_v1 = getattr(mercurial.revlog, 'parse_index_v1', None)
3228 3228 if parse_index_v1 is None:
3229 3229 parse_index_v1 = mercurial.revlog.revlogio().parseindex
3230 3230
3231 3231 rllen = len(rl)
3232 3232
3233 3233 node0 = rl.node(0)
3234 3234 node25 = rl.node(rllen // 4)
3235 3235 node50 = rl.node(rllen // 2)
3236 3236 node75 = rl.node(rllen // 4 * 3)
3237 3237 node100 = rl.node(rllen - 1)
3238 3238
3239 3239 allrevs = range(rllen)
3240 3240 allrevsrev = list(reversed(allrevs))
3241 3241 allnodes = [rl.node(rev) for rev in range(rllen)]
3242 3242 allnodesrev = list(reversed(allnodes))
3243 3243
3244 3244 def constructor():
3245 3245 if radix is not None:
3246 3246 revlog(opener, radix=radix)
3247 3247 else:
3248 3248 # hg <= 5.8
3249 3249 revlog(opener, indexfile=indexfile)
3250 3250
3251 3251 def read():
3252 3252 with opener(indexfile) as fh:
3253 3253 fh.read()
3254 3254
3255 3255 def parseindex():
3256 3256 parse_index_v1(data, inline)
3257 3257
3258 3258 def getentry(revornode):
3259 3259 index = parse_index_v1(data, inline)[0]
3260 3260 index[revornode]
3261 3261
3262 3262 def getentries(revs, count=1):
3263 3263 index = parse_index_v1(data, inline)[0]
3264 3264
3265 3265 for i in range(count):
3266 3266 for rev in revs:
3267 3267 index[rev]
3268 3268
3269 3269 def resolvenode(node):
3270 3270 index = parse_index_v1(data, inline)[0]
3271 3271 rev = getattr(index, 'rev', None)
3272 3272 if rev is None:
3273 3273 nodemap = getattr(parse_index_v1(data, inline)[0], 'nodemap', None)
3274 3274 # This only works for the C code.
3275 3275 if nodemap is None:
3276 3276 return
3277 3277 rev = nodemap.__getitem__
3278 3278
3279 3279 try:
3280 3280 rev(node)
3281 3281 except error.RevlogError:
3282 3282 pass
3283 3283
3284 3284 def resolvenodes(nodes, count=1):
3285 3285 index = parse_index_v1(data, inline)[0]
3286 3286 rev = getattr(index, 'rev', None)
3287 3287 if rev is None:
3288 3288 nodemap = getattr(parse_index_v1(data, inline)[0], 'nodemap', None)
3289 3289 # This only works for the C code.
3290 3290 if nodemap is None:
3291 3291 return
3292 3292 rev = nodemap.__getitem__
3293 3293
3294 3294 for i in range(count):
3295 3295 for node in nodes:
3296 3296 try:
3297 3297 rev(node)
3298 3298 except error.RevlogError:
3299 3299 pass
3300 3300
3301 3301 benches = [
3302 3302 (constructor, b'revlog constructor'),
3303 3303 (read, b'read'),
3304 3304 (parseindex, b'create index object'),
3305 3305 (lambda: getentry(0), b'retrieve index entry for rev 0'),
3306 3306 (lambda: resolvenode(b'a' * 20), b'look up missing node'),
3307 3307 (lambda: resolvenode(node0), b'look up node at rev 0'),
3308 3308 (lambda: resolvenode(node25), b'look up node at 1/4 len'),
3309 3309 (lambda: resolvenode(node50), b'look up node at 1/2 len'),
3310 3310 (lambda: resolvenode(node75), b'look up node at 3/4 len'),
3311 3311 (lambda: resolvenode(node100), b'look up node at tip'),
3312 3312 # 2x variation is to measure caching impact.
3313 3313 (lambda: resolvenodes(allnodes), b'look up all nodes (forward)'),
3314 3314 (lambda: resolvenodes(allnodes, 2), b'look up all nodes 2x (forward)'),
3315 3315 (lambda: resolvenodes(allnodesrev), b'look up all nodes (reverse)'),
3316 3316 (
3317 3317 lambda: resolvenodes(allnodesrev, 2),
3318 3318 b'look up all nodes 2x (reverse)',
3319 3319 ),
3320 3320 (lambda: getentries(allrevs), b'retrieve all index entries (forward)'),
3321 3321 (
3322 3322 lambda: getentries(allrevs, 2),
3323 3323 b'retrieve all index entries 2x (forward)',
3324 3324 ),
3325 3325 (
3326 3326 lambda: getentries(allrevsrev),
3327 3327 b'retrieve all index entries (reverse)',
3328 3328 ),
3329 3329 (
3330 3330 lambda: getentries(allrevsrev, 2),
3331 3331 b'retrieve all index entries 2x (reverse)',
3332 3332 ),
3333 3333 ]
3334 3334
3335 3335 for fn, title in benches:
3336 3336 timer, fm = gettimer(ui, opts)
3337 3337 timer(fn, title=title)
3338 3338 fm.end()
3339 3339
3340 3340
3341 3341 @command(
3342 3342 b'perf::revlogrevisions|perfrevlogrevisions',
3343 3343 revlogopts
3344 3344 + formatteropts
3345 3345 + [
3346 3346 (b'd', b'dist', 100, b'distance between the revisions'),
3347 3347 (b's', b'startrev', 0, b'revision to start reading at'),
3348 3348 (b'', b'reverse', False, b'read in reverse'),
3349 3349 ],
3350 3350 b'-c|-m|FILE',
3351 3351 )
3352 3352 def perfrevlogrevisions(
3353 3353 ui, repo, file_=None, startrev=0, reverse=False, **opts
3354 3354 ):
3355 3355 """Benchmark reading a series of revisions from a revlog.
3356 3356
3357 3357 By default, we read every ``-d/--dist`` revision from 0 to tip of
3358 3358 the specified revlog.
3359 3359
3360 3360 The start revision can be defined via ``-s/--startrev``.
3361 3361 """
3362 3362 opts = _byteskwargs(opts)
3363 3363
3364 3364 rl = cmdutil.openrevlog(repo, b'perfrevlogrevisions', file_, opts)
3365 3365 rllen = getlen(ui)(rl)
3366 3366
3367 3367 if startrev < 0:
3368 3368 startrev = rllen + startrev
3369 3369
3370 3370 def d():
3371 3371 rl.clearcaches()
3372 3372
3373 3373 beginrev = startrev
3374 3374 endrev = rllen
3375 3375 dist = opts[b'dist']
3376 3376
3377 3377 if reverse:
3378 3378 beginrev, endrev = endrev - 1, beginrev - 1
3379 3379 dist = -1 * dist
3380 3380
3381 3381 for x in _xrange(beginrev, endrev, dist):
3382 3382 # Old revisions don't support passing int.
3383 3383 n = rl.node(x)
3384 3384 rl.revision(n)
3385 3385
3386 3386 timer, fm = gettimer(ui, opts)
3387 3387 timer(d)
3388 3388 fm.end()
3389 3389
3390 3390
3391 3391 @command(
3392 3392 b'perf::revlogwrite|perfrevlogwrite',
3393 3393 revlogopts
3394 3394 + formatteropts
3395 3395 + [
3396 3396 (b's', b'startrev', 1000, b'revision to start writing at'),
3397 3397 (b'', b'stoprev', -1, b'last revision to write'),
3398 3398 (b'', b'count', 3, b'number of passes to perform'),
3399 3399 (b'', b'details', False, b'print timing for every revisions tested'),
3400 3400 (b'', b'source', b'full', b'the kind of data feed in the revlog'),
3401 3401 (b'', b'lazydeltabase', True, b'try the provided delta first'),
3402 3402 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
3403 3403 ],
3404 3404 b'-c|-m|FILE',
3405 3405 )
3406 3406 def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts):
3407 3407 """Benchmark writing a series of revisions to a revlog.
3408 3408
3409 3409 Possible source values are:
3410 3410 * `full`: add from a full text (default).
3411 3411 * `parent-1`: add from a delta to the first parent
3412 3412 * `parent-2`: add from a delta to the second parent if it exists
3413 3413 (use a delta from the first parent otherwise)
3414 3414 * `parent-smallest`: add from the smallest delta (either p1 or p2)
3415 3415 * `storage`: add from the existing precomputed deltas
3416 3416
3417 3417 Note: This performance command measures performance in a custom way. As a
3418 3418 result some of the global configuration of the 'perf' command does not
3419 3419 apply to it:
3420 3420
3421 3421 * ``pre-run``: disabled
3422 3422
3423 3423 * ``profile-benchmark``: disabled
3424 3424
3425 3425 * ``run-limits``: disabled use --count instead
3426 3426 """
3427 3427 opts = _byteskwargs(opts)
3428 3428
3429 3429 rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts)
3430 3430 rllen = getlen(ui)(rl)
3431 3431 if startrev < 0:
3432 3432 startrev = rllen + startrev
3433 3433 if stoprev < 0:
3434 3434 stoprev = rllen + stoprev
3435 3435
3436 3436 lazydeltabase = opts['lazydeltabase']
3437 3437 source = opts['source']
3438 3438 clearcaches = opts['clear_caches']
3439 3439 validsource = (
3440 3440 b'full',
3441 3441 b'parent-1',
3442 3442 b'parent-2',
3443 3443 b'parent-smallest',
3444 3444 b'storage',
3445 3445 )
3446 3446 if source not in validsource:
3447 3447 raise error.Abort('invalid source type: %s' % source)
3448 3448
3449 3449 ### actually gather results
3450 3450 count = opts['count']
3451 3451 if count <= 0:
3452 3452 raise error.Abort('invalide run count: %d' % count)
3453 3453 allresults = []
3454 3454 for c in range(count):
3455 3455 timing = _timeonewrite(
3456 3456 ui,
3457 3457 rl,
3458 3458 source,
3459 3459 startrev,
3460 3460 stoprev,
3461 3461 c + 1,
3462 3462 lazydeltabase=lazydeltabase,
3463 3463 clearcaches=clearcaches,
3464 3464 )
3465 3465 allresults.append(timing)
3466 3466
3467 3467 ### consolidate the results in a single list
3468 3468 results = []
3469 3469 for idx, (rev, t) in enumerate(allresults[0]):
3470 3470 ts = [t]
3471 3471 for other in allresults[1:]:
3472 3472 orev, ot = other[idx]
3473 3473 assert orev == rev
3474 3474 ts.append(ot)
3475 3475 results.append((rev, ts))
3476 3476 resultcount = len(results)
3477 3477
3478 3478 ### Compute and display relevant statistics
3479 3479
3480 3480 # get a formatter
3481 3481 fm = ui.formatter(b'perf', opts)
3482 3482 displayall = ui.configbool(b"perf", b"all-timing", True)
3483 3483
3484 3484 # print individual details if requested
3485 3485 if opts['details']:
3486 3486 for idx, item in enumerate(results, 1):
3487 3487 rev, data = item
3488 3488 title = 'revisions #%d of %d, rev %d' % (idx, resultcount, rev)
3489 3489 formatone(fm, data, title=title, displayall=displayall)
3490 3490
3491 3491 # sorts results by median time
3492 3492 results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2])
3493 3493 # list of (name, index) to display)
3494 3494 relevants = [
3495 3495 ("min", 0),
3496 3496 ("10%", resultcount * 10 // 100),
3497 3497 ("25%", resultcount * 25 // 100),
3498 3498 ("50%", resultcount * 70 // 100),
3499 3499 ("75%", resultcount * 75 // 100),
3500 3500 ("90%", resultcount * 90 // 100),
3501 3501 ("95%", resultcount * 95 // 100),
3502 3502 ("99%", resultcount * 99 // 100),
3503 3503 ("99.9%", resultcount * 999 // 1000),
3504 3504 ("99.99%", resultcount * 9999 // 10000),
3505 3505 ("99.999%", resultcount * 99999 // 100000),
3506 3506 ("max", -1),
3507 3507 ]
3508 3508 if not ui.quiet:
3509 3509 for name, idx in relevants:
3510 3510 data = results[idx]
3511 3511 title = '%s of %d, rev %d' % (name, resultcount, data[0])
3512 3512 formatone(fm, data[1], title=title, displayall=displayall)
3513 3513
3514 3514 # XXX summing that many float will not be very precise, we ignore this fact
3515 3515 # for now
3516 3516 totaltime = []
3517 3517 for item in allresults:
3518 3518 totaltime.append(
3519 3519 (
3520 3520 sum(x[1][0] for x in item),
3521 3521 sum(x[1][1] for x in item),
3522 3522 sum(x[1][2] for x in item),
3523 3523 )
3524 3524 )
3525 3525 formatone(
3526 3526 fm,
3527 3527 totaltime,
3528 3528 title="total time (%d revs)" % resultcount,
3529 3529 displayall=displayall,
3530 3530 )
3531 3531 fm.end()
3532 3532
3533 3533
3534 3534 class _faketr:
3535 3535 def add(s, x, y, z=None):
3536 3536 return None
3537 3537
3538 3538
3539 3539 def _timeonewrite(
3540 3540 ui,
3541 3541 orig,
3542 3542 source,
3543 3543 startrev,
3544 3544 stoprev,
3545 3545 runidx=None,
3546 3546 lazydeltabase=True,
3547 3547 clearcaches=True,
3548 3548 ):
3549 3549 timings = []
3550 3550 tr = _faketr()
3551 3551 with _temprevlog(ui, orig, startrev) as dest:
3552 3552 if hasattr(dest, "delta_config"):
3553 3553 dest.delta_config.lazy_delta_base = lazydeltabase
3554 3554 else:
3555 3555 dest._lazydeltabase = lazydeltabase
3556 3556 revs = list(orig.revs(startrev, stoprev))
3557 3557 total = len(revs)
3558 3558 topic = 'adding'
3559 3559 if runidx is not None:
3560 3560 topic += ' (run #%d)' % runidx
3561 3561 # Support both old and new progress API
3562 3562 if util.safehasattr(ui, 'makeprogress'):
3563 3563 progress = ui.makeprogress(topic, unit='revs', total=total)
3564 3564
3565 3565 def updateprogress(pos):
3566 3566 progress.update(pos)
3567 3567
3568 3568 def completeprogress():
3569 3569 progress.complete()
3570 3570
3571 3571 else:
3572 3572
3573 3573 def updateprogress(pos):
3574 3574 ui.progress(topic, pos, unit='revs', total=total)
3575 3575
3576 3576 def completeprogress():
3577 3577 ui.progress(topic, None, unit='revs', total=total)
3578 3578
3579 3579 for idx, rev in enumerate(revs):
3580 3580 updateprogress(idx)
3581 3581 addargs, addkwargs = _getrevisionseed(orig, rev, tr, source)
3582 3582 if clearcaches:
3583 3583 dest.index.clearcaches()
3584 3584 dest.clearcaches()
3585 3585 with timeone() as r:
3586 3586 dest.addrawrevision(*addargs, **addkwargs)
3587 3587 timings.append((rev, r[0]))
3588 3588 updateprogress(total)
3589 3589 completeprogress()
3590 3590 return timings
3591 3591
3592 3592
3593 3593 def _getrevisionseed(orig, rev, tr, source):
3594 3594 from mercurial.node import nullid
3595 3595
3596 3596 linkrev = orig.linkrev(rev)
3597 3597 node = orig.node(rev)
3598 3598 p1, p2 = orig.parents(node)
3599 3599 flags = orig.flags(rev)
3600 3600 cachedelta = None
3601 3601 text = None
3602 3602
3603 3603 if source == b'full':
3604 3604 text = orig.revision(rev)
3605 3605 elif source == b'parent-1':
3606 3606 baserev = orig.rev(p1)
3607 3607 cachedelta = (baserev, orig.revdiff(p1, rev))
3608 3608 elif source == b'parent-2':
3609 3609 parent = p2
3610 3610 if p2 == nullid:
3611 3611 parent = p1
3612 3612 baserev = orig.rev(parent)
3613 3613 cachedelta = (baserev, orig.revdiff(parent, rev))
3614 3614 elif source == b'parent-smallest':
3615 3615 p1diff = orig.revdiff(p1, rev)
3616 3616 parent = p1
3617 3617 diff = p1diff
3618 3618 if p2 != nullid:
3619 3619 p2diff = orig.revdiff(p2, rev)
3620 3620 if len(p1diff) > len(p2diff):
3621 3621 parent = p2
3622 3622 diff = p2diff
3623 3623 baserev = orig.rev(parent)
3624 3624 cachedelta = (baserev, diff)
3625 3625 elif source == b'storage':
3626 3626 baserev = orig.deltaparent(rev)
3627 3627 cachedelta = (baserev, orig.revdiff(orig.node(baserev), rev))
3628 3628
3629 3629 return (
3630 3630 (text, tr, linkrev, p1, p2),
3631 3631 {'node': node, 'flags': flags, 'cachedelta': cachedelta},
3632 3632 )
3633 3633
3634 3634
3635 3635 @contextlib.contextmanager
3636 3636 def _temprevlog(ui, orig, truncaterev):
3637 3637 from mercurial import vfs as vfsmod
3638 3638
3639 3639 if orig._inline:
3640 3640 raise error.Abort('not supporting inline revlog (yet)')
3641 3641 revlogkwargs = {}
3642 3642 k = 'upperboundcomp'
3643 3643 if util.safehasattr(orig, k):
3644 3644 revlogkwargs[k] = getattr(orig, k)
3645 3645
3646 3646 indexfile = getattr(orig, '_indexfile', None)
3647 3647 if indexfile is None:
3648 3648 # compatibility with <= hg-5.8
3649 3649 indexfile = getattr(orig, 'indexfile')
3650 3650 origindexpath = orig.opener.join(indexfile)
3651 3651
3652 3652 datafile = getattr(orig, '_datafile', getattr(orig, 'datafile'))
3653 3653 origdatapath = orig.opener.join(datafile)
3654 3654 radix = b'revlog'
3655 3655 indexname = b'revlog.i'
3656 3656 dataname = b'revlog.d'
3657 3657
3658 3658 tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
3659 3659 try:
3660 3660 # copy the data file in a temporary directory
3661 3661 ui.debug('copying data in %s\n' % tmpdir)
3662 3662 destindexpath = os.path.join(tmpdir, 'revlog.i')
3663 3663 destdatapath = os.path.join(tmpdir, 'revlog.d')
3664 3664 shutil.copyfile(origindexpath, destindexpath)
3665 3665 shutil.copyfile(origdatapath, destdatapath)
3666 3666
3667 3667 # remove the data we want to add again
3668 3668 ui.debug('truncating data to be rewritten\n')
3669 3669 with open(destindexpath, 'ab') as index:
3670 3670 index.seek(0)
3671 3671 index.truncate(truncaterev * orig._io.size)
3672 3672 with open(destdatapath, 'ab') as data:
3673 3673 data.seek(0)
3674 3674 data.truncate(orig.start(truncaterev))
3675 3675
3676 3676 # instantiate a new revlog from the temporary copy
3677 3677 ui.debug('truncating adding to be rewritten\n')
3678 3678 vfs = vfsmod.vfs(tmpdir)
3679 3679 vfs.options = getattr(orig.opener, 'options', None)
3680 3680
3681 3681 try:
3682 3682 dest = revlog(vfs, radix=radix, **revlogkwargs)
3683 3683 except TypeError:
3684 3684 dest = revlog(
3685 3685 vfs, indexfile=indexname, datafile=dataname, **revlogkwargs
3686 3686 )
3687 3687 if dest._inline:
3688 3688 raise error.Abort('not supporting inline revlog (yet)')
3689 3689 # make sure internals are initialized
3690 3690 dest.revision(len(dest) - 1)
3691 3691 yield dest
3692 3692 del dest, vfs
3693 3693 finally:
3694 3694 shutil.rmtree(tmpdir, True)
3695 3695
3696 3696
3697 3697 @command(
3698 3698 b'perf::revlogchunks|perfrevlogchunks',
3699 3699 revlogopts
3700 3700 + formatteropts
3701 3701 + [
3702 3702 (b'e', b'engines', b'', b'compression engines to use'),
3703 3703 (b's', b'startrev', 0, b'revision to start at'),
3704 3704 ],
3705 3705 b'-c|-m|FILE',
3706 3706 )
3707 3707 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
3708 3708 """Benchmark operations on revlog chunks.
3709 3709
3710 3710 Logically, each revlog is a collection of fulltext revisions. However,
3711 3711 stored within each revlog are "chunks" of possibly compressed data. This
3712 3712 data needs to be read and decompressed or compressed and written.
3713 3713
3714 3714 This command measures the time it takes to read+decompress and recompress
3715 3715 chunks in a revlog. It effectively isolates I/O and compression performance.
3716 3716 For measurements of higher-level operations like resolving revisions,
3717 3717 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
3718 3718 """
3719 3719 opts = _byteskwargs(opts)
3720 3720
3721 3721 rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
3722 3722
3723 # _chunkraw was renamed to _getsegmentforrevs.
3723 # - _chunkraw was renamed to _getsegmentforrevs
3724 # - _getsegmentforrevs was moved on the inner object
3725 try:
3726 segmentforrevs = rl._inner.get_segment_for_revs
3727 except AttributeError:
3724 3728 try:
3725 3729 segmentforrevs = rl._getsegmentforrevs
3726 3730 except AttributeError:
3727 3731 segmentforrevs = rl._chunkraw
3728 3732
3729 3733 # Verify engines argument.
3730 3734 if engines:
3731 3735 engines = {e.strip() for e in engines.split(b',')}
3732 3736 for engine in engines:
3733 3737 try:
3734 3738 util.compressionengines[engine]
3735 3739 except KeyError:
3736 3740 raise error.Abort(b'unknown compression engine: %s' % engine)
3737 3741 else:
3738 3742 engines = []
3739 3743 for e in util.compengines:
3740 3744 engine = util.compengines[e]
3741 3745 try:
3742 3746 if engine.available():
3743 3747 engine.revlogcompressor().compress(b'dummy')
3744 3748 engines.append(e)
3745 3749 except NotImplementedError:
3746 3750 pass
3747 3751
3748 3752 revs = list(rl.revs(startrev, len(rl) - 1))
3749 3753
3750 3754 @contextlib.contextmanager
3751 3755 def reading(rl):
3752 3756 if getattr(rl, 'reading', None) is not None:
3753 3757 with rl.reading():
3754 3758 yield None
3755 3759 elif rl._inline:
3756 3760 indexfile = getattr(rl, '_indexfile', None)
3757 3761 if indexfile is None:
3758 3762 # compatibility with <= hg-5.8
3759 3763 indexfile = getattr(rl, 'indexfile')
3760 3764 yield getsvfs(repo)(indexfile)
3761 3765 else:
3762 3766 datafile = getattr(rl, 'datafile', getattr(rl, 'datafile'))
3763 3767 yield getsvfs(repo)(datafile)
3764 3768
3765 3769 if getattr(rl, 'reading', None) is not None:
3766 3770
3767 3771 @contextlib.contextmanager
3768 3772 def lazy_reading(rl):
3769 3773 with rl.reading():
3770 3774 yield
3771 3775
3772 3776 else:
3773 3777
3774 3778 @contextlib.contextmanager
3775 3779 def lazy_reading(rl):
3776 3780 yield
3777 3781
3778 3782 def doread():
3779 3783 rl.clearcaches()
3780 3784 for rev in revs:
3781 3785 with lazy_reading(rl):
3782 3786 segmentforrevs(rev, rev)
3783 3787
3784 3788 def doreadcachedfh():
3785 3789 rl.clearcaches()
3786 3790 with reading(rl) as fh:
3787 3791 if fh is not None:
3788 3792 for rev in revs:
3789 3793 segmentforrevs(rev, rev, df=fh)
3790 3794 else:
3791 3795 for rev in revs:
3792 3796 segmentforrevs(rev, rev)
3793 3797
3794 3798 def doreadbatch():
3795 3799 rl.clearcaches()
3796 3800 with lazy_reading(rl):
3797 3801 segmentforrevs(revs[0], revs[-1])
3798 3802
3799 3803 def doreadbatchcachedfh():
3800 3804 rl.clearcaches()
3801 3805 with reading(rl) as fh:
3802 3806 if fh is not None:
3803 3807 segmentforrevs(revs[0], revs[-1], df=fh)
3804 3808 else:
3805 3809 segmentforrevs(revs[0], revs[-1])
3806 3810
3807 3811 def dochunk():
3808 3812 rl.clearcaches()
3809 3813 with reading(rl) as fh:
3810 3814 if fh is not None:
3811 3815 for rev in revs:
3812 3816 rl._chunk(rev, df=fh)
3813 3817 else:
3814 3818 for rev in revs:
3815 3819 rl._chunk(rev)
3816 3820
3817 3821 chunks = [None]
3818 3822
3819 3823 def dochunkbatch():
3820 3824 rl.clearcaches()
3821 3825 with reading(rl) as fh:
3822 3826 if fh is not None:
3823 3827 # Save chunks as a side-effect.
3824 3828 chunks[0] = rl._chunks(revs, df=fh)
3825 3829 else:
3826 3830 # Save chunks as a side-effect.
3827 3831 chunks[0] = rl._chunks(revs)
3828 3832
3829 3833 def docompress(compressor):
3830 3834 rl.clearcaches()
3831 3835
3832 3836 try:
3833 3837 # Swap in the requested compression engine.
3834 3838 oldcompressor = rl._compressor
3835 3839 rl._compressor = compressor
3836 3840 for chunk in chunks[0]:
3837 3841 rl.compress(chunk)
3838 3842 finally:
3839 3843 rl._compressor = oldcompressor
3840 3844
3841 3845 benches = [
3842 3846 (lambda: doread(), b'read'),
3843 3847 (lambda: doreadcachedfh(), b'read w/ reused fd'),
3844 3848 (lambda: doreadbatch(), b'read batch'),
3845 3849 (lambda: doreadbatchcachedfh(), b'read batch w/ reused fd'),
3846 3850 (lambda: dochunk(), b'chunk'),
3847 3851 (lambda: dochunkbatch(), b'chunk batch'),
3848 3852 ]
3849 3853
3850 3854 for engine in sorted(engines):
3851 3855 compressor = util.compengines[engine].revlogcompressor()
3852 3856 benches.append(
3853 3857 (
3854 3858 functools.partial(docompress, compressor),
3855 3859 b'compress w/ %s' % engine,
3856 3860 )
3857 3861 )
3858 3862
3859 3863 for fn, title in benches:
3860 3864 timer, fm = gettimer(ui, opts)
3861 3865 timer(fn, title=title)
3862 3866 fm.end()
3863 3867
3864 3868
3865 3869 @command(
3866 3870 b'perf::revlogrevision|perfrevlogrevision',
3867 3871 revlogopts
3868 3872 + formatteropts
3869 3873 + [(b'', b'cache', False, b'use caches instead of clearing')],
3870 3874 b'-c|-m|FILE REV',
3871 3875 )
3872 3876 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
3873 3877 """Benchmark obtaining a revlog revision.
3874 3878
3875 3879 Obtaining a revlog revision consists of roughly the following steps:
3876 3880
3877 3881 1. Compute the delta chain
3878 3882 2. Slice the delta chain if applicable
3879 3883 3. Obtain the raw chunks for that delta chain
3880 3884 4. Decompress each raw chunk
3881 3885 5. Apply binary patches to obtain fulltext
3882 3886 6. Verify hash of fulltext
3883 3887
3884 3888 This command measures the time spent in each of these phases.
3885 3889 """
3886 3890 opts = _byteskwargs(opts)
3887 3891
3888 3892 if opts.get(b'changelog') or opts.get(b'manifest'):
3889 3893 file_, rev = None, file_
3890 3894 elif rev is None:
3891 3895 raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
3892 3896
3893 3897 r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
3894 3898
3895 3899 # _chunkraw was renamed to _getsegmentforrevs.
3896 3900 try:
3901 segmentforrevs = r._inner.get_segment_for_revs
3902 except AttributeError:
3903 try:
3897 3904 segmentforrevs = r._getsegmentforrevs
3898 3905 except AttributeError:
3899 3906 segmentforrevs = r._chunkraw
3900 3907
3901 3908 node = r.lookup(rev)
3902 3909 rev = r.rev(node)
3903 3910
3904 3911 if getattr(r, 'reading', None) is not None:
3905 3912
3906 3913 @contextlib.contextmanager
3907 3914 def lazy_reading(r):
3908 3915 with r.reading():
3909 3916 yield
3910 3917
3911 3918 else:
3912 3919
3913 3920 @contextlib.contextmanager
3914 3921 def lazy_reading(r):
3915 3922 yield
3916 3923
3917 3924 def getrawchunks(data, chain):
3918 3925 start = r.start
3919 3926 length = r.length
3920 3927 inline = r._inline
3921 3928 try:
3922 3929 iosize = r.index.entry_size
3923 3930 except AttributeError:
3924 3931 iosize = r._io.size
3925 3932 buffer = util.buffer
3926 3933
3927 3934 chunks = []
3928 3935 ladd = chunks.append
3929 3936 for idx, item in enumerate(chain):
3930 3937 offset = start(item[0])
3931 3938 bits = data[idx]
3932 3939 for rev in item:
3933 3940 chunkstart = start(rev)
3934 3941 if inline:
3935 3942 chunkstart += (rev + 1) * iosize
3936 3943 chunklength = length(rev)
3937 3944 ladd(buffer(bits, chunkstart - offset, chunklength))
3938 3945
3939 3946 return chunks
3940 3947
3941 3948 def dodeltachain(rev):
3942 3949 if not cache:
3943 3950 r.clearcaches()
3944 3951 r._deltachain(rev)
3945 3952
3946 3953 def doread(chain):
3947 3954 if not cache:
3948 3955 r.clearcaches()
3949 3956 for item in slicedchain:
3950 3957 with lazy_reading(r):
3951 3958 segmentforrevs(item[0], item[-1])
3952 3959
3953 3960 def doslice(r, chain, size):
3954 3961 for s in slicechunk(r, chain, targetsize=size):
3955 3962 pass
3956 3963
3957 3964 def dorawchunks(data, chain):
3958 3965 if not cache:
3959 3966 r.clearcaches()
3960 3967 getrawchunks(data, chain)
3961 3968
3962 3969 def dodecompress(chunks):
3963 3970 decomp = r.decompress
3964 3971 for chunk in chunks:
3965 3972 decomp(chunk)
3966 3973
3967 3974 def dopatch(text, bins):
3968 3975 if not cache:
3969 3976 r.clearcaches()
3970 3977 mdiff.patches(text, bins)
3971 3978
3972 3979 def dohash(text):
3973 3980 if not cache:
3974 3981 r.clearcaches()
3975 3982 r.checkhash(text, node, rev=rev)
3976 3983
3977 3984 def dorevision():
3978 3985 if not cache:
3979 3986 r.clearcaches()
3980 3987 r.revision(node)
3981 3988
3982 3989 try:
3983 3990 from mercurial.revlogutils.deltas import slicechunk
3984 3991 except ImportError:
3985 3992 slicechunk = getattr(revlog, '_slicechunk', None)
3986 3993
3987 3994 size = r.length(rev)
3988 3995 chain = r._deltachain(rev)[0]
3989 3996
3990 3997 with_sparse_read = False
3991 3998 if hasattr(r, 'data_config'):
3992 3999 with_sparse_read = r.data_config.with_sparse_read
3993 4000 elif hasattr(r, '_withsparseread'):
3994 4001 with_sparse_read = r._withsparseread
3995 4002 if with_sparse_read:
3996 4003 slicedchain = (chain,)
3997 4004 else:
3998 4005 slicedchain = tuple(slicechunk(r, chain, targetsize=size))
3999 4006 data = [segmentforrevs(seg[0], seg[-1])[1] for seg in slicedchain]
4000 4007 rawchunks = getrawchunks(data, slicedchain)
4001 4008 bins = r._chunks(chain)
4002 4009 text = bytes(bins[0])
4003 4010 bins = bins[1:]
4004 4011 text = mdiff.patches(text, bins)
4005 4012
4006 4013 benches = [
4007 4014 (lambda: dorevision(), b'full'),
4008 4015 (lambda: dodeltachain(rev), b'deltachain'),
4009 4016 (lambda: doread(chain), b'read'),
4010 4017 ]
4011 4018
4012 4019 if with_sparse_read:
4013 4020 slicing = (lambda: doslice(r, chain, size), b'slice-sparse-chain')
4014 4021 benches.append(slicing)
4015 4022
4016 4023 benches.extend(
4017 4024 [
4018 4025 (lambda: dorawchunks(data, slicedchain), b'rawchunks'),
4019 4026 (lambda: dodecompress(rawchunks), b'decompress'),
4020 4027 (lambda: dopatch(text, bins), b'patch'),
4021 4028 (lambda: dohash(text), b'hash'),
4022 4029 ]
4023 4030 )
4024 4031
4025 4032 timer, fm = gettimer(ui, opts)
4026 4033 for fn, title in benches:
4027 4034 timer(fn, title=title)
4028 4035 fm.end()
4029 4036
4030 4037
4031 4038 @command(
4032 4039 b'perf::revset|perfrevset',
4033 4040 [
4034 4041 (b'C', b'clear', False, b'clear volatile cache between each call.'),
4035 4042 (b'', b'contexts', False, b'obtain changectx for each revision'),
4036 4043 ]
4037 4044 + formatteropts,
4038 4045 b"REVSET",
4039 4046 )
4040 4047 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
4041 4048 """benchmark the execution time of a revset
4042 4049
4043 4050 Use the --clean option if need to evaluate the impact of build volatile
4044 4051 revisions set cache on the revset execution. Volatile cache hold filtered
4045 4052 and obsolete related cache."""
4046 4053 opts = _byteskwargs(opts)
4047 4054
4048 4055 timer, fm = gettimer(ui, opts)
4049 4056
4050 4057 def d():
4051 4058 if clear:
4052 4059 repo.invalidatevolatilesets()
4053 4060 if contexts:
4054 4061 for ctx in repo.set(expr):
4055 4062 pass
4056 4063 else:
4057 4064 for r in repo.revs(expr):
4058 4065 pass
4059 4066
4060 4067 timer(d)
4061 4068 fm.end()
4062 4069
4063 4070
4064 4071 @command(
4065 4072 b'perf::volatilesets|perfvolatilesets',
4066 4073 [
4067 4074 (b'', b'clear-obsstore', False, b'drop obsstore between each call.'),
4068 4075 ]
4069 4076 + formatteropts,
4070 4077 )
4071 4078 def perfvolatilesets(ui, repo, *names, **opts):
4072 4079 """benchmark the computation of various volatile set
4073 4080
4074 4081 Volatile set computes element related to filtering and obsolescence."""
4075 4082 opts = _byteskwargs(opts)
4076 4083 timer, fm = gettimer(ui, opts)
4077 4084 repo = repo.unfiltered()
4078 4085
4079 4086 def getobs(name):
4080 4087 def d():
4081 4088 repo.invalidatevolatilesets()
4082 4089 if opts[b'clear_obsstore']:
4083 4090 clearfilecache(repo, b'obsstore')
4084 4091 obsolete.getrevs(repo, name)
4085 4092
4086 4093 return d
4087 4094
4088 4095 allobs = sorted(obsolete.cachefuncs)
4089 4096 if names:
4090 4097 allobs = [n for n in allobs if n in names]
4091 4098
4092 4099 for name in allobs:
4093 4100 timer(getobs(name), title=name)
4094 4101
4095 4102 def getfiltered(name):
4096 4103 def d():
4097 4104 repo.invalidatevolatilesets()
4098 4105 if opts[b'clear_obsstore']:
4099 4106 clearfilecache(repo, b'obsstore')
4100 4107 repoview.filterrevs(repo, name)
4101 4108
4102 4109 return d
4103 4110
4104 4111 allfilter = sorted(repoview.filtertable)
4105 4112 if names:
4106 4113 allfilter = [n for n in allfilter if n in names]
4107 4114
4108 4115 for name in allfilter:
4109 4116 timer(getfiltered(name), title=name)
4110 4117 fm.end()
4111 4118
4112 4119
4113 4120 @command(
4114 4121 b'perf::branchmap|perfbranchmap',
4115 4122 [
4116 4123 (b'f', b'full', False, b'Includes build time of subset'),
4117 4124 (
4118 4125 b'',
4119 4126 b'clear-revbranch',
4120 4127 False,
4121 4128 b'purge the revbranch cache between computation',
4122 4129 ),
4123 4130 ]
4124 4131 + formatteropts,
4125 4132 )
4126 4133 def perfbranchmap(ui, repo, *filternames, **opts):
4127 4134 """benchmark the update of a branchmap
4128 4135
4129 4136 This benchmarks the full repo.branchmap() call with read and write disabled
4130 4137 """
4131 4138 opts = _byteskwargs(opts)
4132 4139 full = opts.get(b"full", False)
4133 4140 clear_revbranch = opts.get(b"clear_revbranch", False)
4134 4141 timer, fm = gettimer(ui, opts)
4135 4142
4136 4143 def getbranchmap(filtername):
4137 4144 """generate a benchmark function for the filtername"""
4138 4145 if filtername is None:
4139 4146 view = repo
4140 4147 else:
4141 4148 view = repo.filtered(filtername)
4142 4149 if util.safehasattr(view._branchcaches, '_per_filter'):
4143 4150 filtered = view._branchcaches._per_filter
4144 4151 else:
4145 4152 # older versions
4146 4153 filtered = view._branchcaches
4147 4154
4148 4155 def d():
4149 4156 if clear_revbranch:
4150 4157 repo.revbranchcache()._clear()
4151 4158 if full:
4152 4159 view._branchcaches.clear()
4153 4160 else:
4154 4161 filtered.pop(filtername, None)
4155 4162 view.branchmap()
4156 4163
4157 4164 return d
4158 4165
4159 4166 # add filter in smaller subset to bigger subset
4160 4167 possiblefilters = set(repoview.filtertable)
4161 4168 if filternames:
4162 4169 possiblefilters &= set(filternames)
4163 4170 subsettable = getbranchmapsubsettable()
4164 4171 allfilters = []
4165 4172 while possiblefilters:
4166 4173 for name in possiblefilters:
4167 4174 subset = subsettable.get(name)
4168 4175 if subset not in possiblefilters:
4169 4176 break
4170 4177 else:
4171 4178 assert False, b'subset cycle %s!' % possiblefilters
4172 4179 allfilters.append(name)
4173 4180 possiblefilters.remove(name)
4174 4181
4175 4182 # warm the cache
4176 4183 if not full:
4177 4184 for name in allfilters:
4178 4185 repo.filtered(name).branchmap()
4179 4186 if not filternames or b'unfiltered' in filternames:
4180 4187 # add unfiltered
4181 4188 allfilters.append(None)
4182 4189
4183 4190 if util.safehasattr(branchmap.branchcache, 'fromfile'):
4184 4191 branchcacheread = safeattrsetter(branchmap.branchcache, b'fromfile')
4185 4192 branchcacheread.set(classmethod(lambda *args: None))
4186 4193 else:
4187 4194 # older versions
4188 4195 branchcacheread = safeattrsetter(branchmap, b'read')
4189 4196 branchcacheread.set(lambda *args: None)
4190 4197 branchcachewrite = safeattrsetter(branchmap.branchcache, b'write')
4191 4198 branchcachewrite.set(lambda *args: None)
4192 4199 try:
4193 4200 for name in allfilters:
4194 4201 printname = name
4195 4202 if name is None:
4196 4203 printname = b'unfiltered'
4197 4204 timer(getbranchmap(name), title=printname)
4198 4205 finally:
4199 4206 branchcacheread.restore()
4200 4207 branchcachewrite.restore()
4201 4208 fm.end()
4202 4209
4203 4210
4204 4211 @command(
4205 4212 b'perf::branchmapupdate|perfbranchmapupdate',
4206 4213 [
4207 4214 (b'', b'base', [], b'subset of revision to start from'),
4208 4215 (b'', b'target', [], b'subset of revision to end with'),
4209 4216 (b'', b'clear-caches', False, b'clear cache between each runs'),
4210 4217 ]
4211 4218 + formatteropts,
4212 4219 )
4213 4220 def perfbranchmapupdate(ui, repo, base=(), target=(), **opts):
4214 4221 """benchmark branchmap update from for <base> revs to <target> revs
4215 4222
4216 4223 If `--clear-caches` is passed, the following items will be reset before
4217 4224 each update:
4218 4225 * the changelog instance and associated indexes
4219 4226 * the rev-branch-cache instance
4220 4227
4221 4228 Examples:
4222 4229
4223 4230 # update for the one last revision
4224 4231 $ hg perfbranchmapupdate --base 'not tip' --target 'tip'
4225 4232
4226 4233 $ update for change coming with a new branch
4227 4234 $ hg perfbranchmapupdate --base 'stable' --target 'default'
4228 4235 """
4229 4236 from mercurial import branchmap
4230 4237 from mercurial import repoview
4231 4238
4232 4239 opts = _byteskwargs(opts)
4233 4240 timer, fm = gettimer(ui, opts)
4234 4241 clearcaches = opts[b'clear_caches']
4235 4242 unfi = repo.unfiltered()
4236 4243 x = [None] # used to pass data between closure
4237 4244
4238 4245 # we use a `list` here to avoid possible side effect from smartset
4239 4246 baserevs = list(scmutil.revrange(repo, base))
4240 4247 targetrevs = list(scmutil.revrange(repo, target))
4241 4248 if not baserevs:
4242 4249 raise error.Abort(b'no revisions selected for --base')
4243 4250 if not targetrevs:
4244 4251 raise error.Abort(b'no revisions selected for --target')
4245 4252
4246 4253 # make sure the target branchmap also contains the one in the base
4247 4254 targetrevs = list(set(baserevs) | set(targetrevs))
4248 4255 targetrevs.sort()
4249 4256
4250 4257 cl = repo.changelog
4251 4258 allbaserevs = list(cl.ancestors(baserevs, inclusive=True))
4252 4259 allbaserevs.sort()
4253 4260 alltargetrevs = frozenset(cl.ancestors(targetrevs, inclusive=True))
4254 4261
4255 4262 newrevs = list(alltargetrevs.difference(allbaserevs))
4256 4263 newrevs.sort()
4257 4264
4258 4265 allrevs = frozenset(unfi.changelog.revs())
4259 4266 basefilterrevs = frozenset(allrevs.difference(allbaserevs))
4260 4267 targetfilterrevs = frozenset(allrevs.difference(alltargetrevs))
4261 4268
4262 4269 def basefilter(repo, visibilityexceptions=None):
4263 4270 return basefilterrevs
4264 4271
4265 4272 def targetfilter(repo, visibilityexceptions=None):
4266 4273 return targetfilterrevs
4267 4274
4268 4275 msg = b'benchmark of branchmap with %d revisions with %d new ones\n'
4269 4276 ui.status(msg % (len(allbaserevs), len(newrevs)))
4270 4277 if targetfilterrevs:
4271 4278 msg = b'(%d revisions still filtered)\n'
4272 4279 ui.status(msg % len(targetfilterrevs))
4273 4280
4274 4281 try:
4275 4282 repoview.filtertable[b'__perf_branchmap_update_base'] = basefilter
4276 4283 repoview.filtertable[b'__perf_branchmap_update_target'] = targetfilter
4277 4284
4278 4285 baserepo = repo.filtered(b'__perf_branchmap_update_base')
4279 4286 targetrepo = repo.filtered(b'__perf_branchmap_update_target')
4280 4287
4281 4288 # try to find an existing branchmap to reuse
4282 4289 subsettable = getbranchmapsubsettable()
4283 4290 candidatefilter = subsettable.get(None)
4284 4291 while candidatefilter is not None:
4285 4292 candidatebm = repo.filtered(candidatefilter).branchmap()
4286 4293 if candidatebm.validfor(baserepo):
4287 4294 filtered = repoview.filterrevs(repo, candidatefilter)
4288 4295 missing = [r for r in allbaserevs if r in filtered]
4289 4296 base = candidatebm.copy()
4290 4297 base.update(baserepo, missing)
4291 4298 break
4292 4299 candidatefilter = subsettable.get(candidatefilter)
4293 4300 else:
4294 4301 # no suitable subset where found
4295 4302 base = branchmap.branchcache()
4296 4303 base.update(baserepo, allbaserevs)
4297 4304
4298 4305 def setup():
4299 4306 x[0] = base.copy()
4300 4307 if clearcaches:
4301 4308 unfi._revbranchcache = None
4302 4309 clearchangelog(repo)
4303 4310
4304 4311 def bench():
4305 4312 x[0].update(targetrepo, newrevs)
4306 4313
4307 4314 timer(bench, setup=setup)
4308 4315 fm.end()
4309 4316 finally:
4310 4317 repoview.filtertable.pop(b'__perf_branchmap_update_base', None)
4311 4318 repoview.filtertable.pop(b'__perf_branchmap_update_target', None)
4312 4319
4313 4320
4314 4321 @command(
4315 4322 b'perf::branchmapload|perfbranchmapload',
4316 4323 [
4317 4324 (b'f', b'filter', b'', b'Specify repoview filter'),
4318 4325 (b'', b'list', False, b'List brachmap filter caches'),
4319 4326 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
4320 4327 ]
4321 4328 + formatteropts,
4322 4329 )
4323 4330 def perfbranchmapload(ui, repo, filter=b'', list=False, **opts):
4324 4331 """benchmark reading the branchmap"""
4325 4332 opts = _byteskwargs(opts)
4326 4333 clearrevlogs = opts[b'clear_revlogs']
4327 4334
4328 4335 if list:
4329 4336 for name, kind, st in repo.cachevfs.readdir(stat=True):
4330 4337 if name.startswith(b'branch2'):
4331 4338 filtername = name.partition(b'-')[2] or b'unfiltered'
4332 4339 ui.status(
4333 4340 b'%s - %s\n' % (filtername, util.bytecount(st.st_size))
4334 4341 )
4335 4342 return
4336 4343 if not filter:
4337 4344 filter = None
4338 4345 subsettable = getbranchmapsubsettable()
4339 4346 if filter is None:
4340 4347 repo = repo.unfiltered()
4341 4348 else:
4342 4349 repo = repoview.repoview(repo, filter)
4343 4350
4344 4351 repo.branchmap() # make sure we have a relevant, up to date branchmap
4345 4352
4346 4353 try:
4347 4354 fromfile = branchmap.branchcache.fromfile
4348 4355 except AttributeError:
4349 4356 # older versions
4350 4357 fromfile = branchmap.read
4351 4358
4352 4359 currentfilter = filter
4353 4360 # try once without timer, the filter may not be cached
4354 4361 while fromfile(repo) is None:
4355 4362 currentfilter = subsettable.get(currentfilter)
4356 4363 if currentfilter is None:
4357 4364 raise error.Abort(
4358 4365 b'No branchmap cached for %s repo' % (filter or b'unfiltered')
4359 4366 )
4360 4367 repo = repo.filtered(currentfilter)
4361 4368 timer, fm = gettimer(ui, opts)
4362 4369
4363 4370 def setup():
4364 4371 if clearrevlogs:
4365 4372 clearchangelog(repo)
4366 4373
4367 4374 def bench():
4368 4375 fromfile(repo)
4369 4376
4370 4377 timer(bench, setup=setup)
4371 4378 fm.end()
4372 4379
4373 4380
4374 4381 @command(b'perf::loadmarkers|perfloadmarkers')
4375 4382 def perfloadmarkers(ui, repo):
4376 4383 """benchmark the time to parse the on-disk markers for a repo
4377 4384
4378 4385 Result is the number of markers in the repo."""
4379 4386 timer, fm = gettimer(ui)
4380 4387 svfs = getsvfs(repo)
4381 4388 timer(lambda: len(obsolete.obsstore(repo, svfs)))
4382 4389 fm.end()
4383 4390
4384 4391
4385 4392 @command(
4386 4393 b'perf::lrucachedict|perflrucachedict',
4387 4394 formatteropts
4388 4395 + [
4389 4396 (b'', b'costlimit', 0, b'maximum total cost of items in cache'),
4390 4397 (b'', b'mincost', 0, b'smallest cost of items in cache'),
4391 4398 (b'', b'maxcost', 100, b'maximum cost of items in cache'),
4392 4399 (b'', b'size', 4, b'size of cache'),
4393 4400 (b'', b'gets', 10000, b'number of key lookups'),
4394 4401 (b'', b'sets', 10000, b'number of key sets'),
4395 4402 (b'', b'mixed', 10000, b'number of mixed mode operations'),
4396 4403 (
4397 4404 b'',
4398 4405 b'mixedgetfreq',
4399 4406 50,
4400 4407 b'frequency of get vs set ops in mixed mode',
4401 4408 ),
4402 4409 ],
4403 4410 norepo=True,
4404 4411 )
4405 4412 def perflrucache(
4406 4413 ui,
4407 4414 mincost=0,
4408 4415 maxcost=100,
4409 4416 costlimit=0,
4410 4417 size=4,
4411 4418 gets=10000,
4412 4419 sets=10000,
4413 4420 mixed=10000,
4414 4421 mixedgetfreq=50,
4415 4422 **opts
4416 4423 ):
4417 4424 opts = _byteskwargs(opts)
4418 4425
4419 4426 def doinit():
4420 4427 for i in _xrange(10000):
4421 4428 util.lrucachedict(size)
4422 4429
4423 4430 costrange = list(range(mincost, maxcost + 1))
4424 4431
4425 4432 values = []
4426 4433 for i in _xrange(size):
4427 4434 values.append(random.randint(0, _maxint))
4428 4435
4429 4436 # Get mode fills the cache and tests raw lookup performance with no
4430 4437 # eviction.
4431 4438 getseq = []
4432 4439 for i in _xrange(gets):
4433 4440 getseq.append(random.choice(values))
4434 4441
4435 4442 def dogets():
4436 4443 d = util.lrucachedict(size)
4437 4444 for v in values:
4438 4445 d[v] = v
4439 4446 for key in getseq:
4440 4447 value = d[key]
4441 4448 value # silence pyflakes warning
4442 4449
4443 4450 def dogetscost():
4444 4451 d = util.lrucachedict(size, maxcost=costlimit)
4445 4452 for i, v in enumerate(values):
4446 4453 d.insert(v, v, cost=costs[i])
4447 4454 for key in getseq:
4448 4455 try:
4449 4456 value = d[key]
4450 4457 value # silence pyflakes warning
4451 4458 except KeyError:
4452 4459 pass
4453 4460
4454 4461 # Set mode tests insertion speed with cache eviction.
4455 4462 setseq = []
4456 4463 costs = []
4457 4464 for i in _xrange(sets):
4458 4465 setseq.append(random.randint(0, _maxint))
4459 4466 costs.append(random.choice(costrange))
4460 4467
4461 4468 def doinserts():
4462 4469 d = util.lrucachedict(size)
4463 4470 for v in setseq:
4464 4471 d.insert(v, v)
4465 4472
4466 4473 def doinsertscost():
4467 4474 d = util.lrucachedict(size, maxcost=costlimit)
4468 4475 for i, v in enumerate(setseq):
4469 4476 d.insert(v, v, cost=costs[i])
4470 4477
4471 4478 def dosets():
4472 4479 d = util.lrucachedict(size)
4473 4480 for v in setseq:
4474 4481 d[v] = v
4475 4482
4476 4483 # Mixed mode randomly performs gets and sets with eviction.
4477 4484 mixedops = []
4478 4485 for i in _xrange(mixed):
4479 4486 r = random.randint(0, 100)
4480 4487 if r < mixedgetfreq:
4481 4488 op = 0
4482 4489 else:
4483 4490 op = 1
4484 4491
4485 4492 mixedops.append(
4486 4493 (op, random.randint(0, size * 2), random.choice(costrange))
4487 4494 )
4488 4495
4489 4496 def domixed():
4490 4497 d = util.lrucachedict(size)
4491 4498
4492 4499 for op, v, cost in mixedops:
4493 4500 if op == 0:
4494 4501 try:
4495 4502 d[v]
4496 4503 except KeyError:
4497 4504 pass
4498 4505 else:
4499 4506 d[v] = v
4500 4507
4501 4508 def domixedcost():
4502 4509 d = util.lrucachedict(size, maxcost=costlimit)
4503 4510
4504 4511 for op, v, cost in mixedops:
4505 4512 if op == 0:
4506 4513 try:
4507 4514 d[v]
4508 4515 except KeyError:
4509 4516 pass
4510 4517 else:
4511 4518 d.insert(v, v, cost=cost)
4512 4519
4513 4520 benches = [
4514 4521 (doinit, b'init'),
4515 4522 ]
4516 4523
4517 4524 if costlimit:
4518 4525 benches.extend(
4519 4526 [
4520 4527 (dogetscost, b'gets w/ cost limit'),
4521 4528 (doinsertscost, b'inserts w/ cost limit'),
4522 4529 (domixedcost, b'mixed w/ cost limit'),
4523 4530 ]
4524 4531 )
4525 4532 else:
4526 4533 benches.extend(
4527 4534 [
4528 4535 (dogets, b'gets'),
4529 4536 (doinserts, b'inserts'),
4530 4537 (dosets, b'sets'),
4531 4538 (domixed, b'mixed'),
4532 4539 ]
4533 4540 )
4534 4541
4535 4542 for fn, title in benches:
4536 4543 timer, fm = gettimer(ui, opts)
4537 4544 timer(fn, title=title)
4538 4545 fm.end()
4539 4546
4540 4547
4541 4548 @command(
4542 4549 b'perf::write|perfwrite',
4543 4550 formatteropts
4544 4551 + [
4545 4552 (b'', b'write-method', b'write', b'ui write method'),
4546 4553 (b'', b'nlines', 100, b'number of lines'),
4547 4554 (b'', b'nitems', 100, b'number of items (per line)'),
4548 4555 (b'', b'item', b'x', b'item that is written'),
4549 4556 (b'', b'batch-line', None, b'pass whole line to write method at once'),
4550 4557 (b'', b'flush-line', None, b'flush after each line'),
4551 4558 ],
4552 4559 )
4553 4560 def perfwrite(ui, repo, **opts):
4554 4561 """microbenchmark ui.write (and others)"""
4555 4562 opts = _byteskwargs(opts)
4556 4563
4557 4564 write = getattr(ui, _sysstr(opts[b'write_method']))
4558 4565 nlines = int(opts[b'nlines'])
4559 4566 nitems = int(opts[b'nitems'])
4560 4567 item = opts[b'item']
4561 4568 batch_line = opts.get(b'batch_line')
4562 4569 flush_line = opts.get(b'flush_line')
4563 4570
4564 4571 if batch_line:
4565 4572 line = item * nitems + b'\n'
4566 4573
4567 4574 def benchmark():
4568 4575 for i in pycompat.xrange(nlines):
4569 4576 if batch_line:
4570 4577 write(line)
4571 4578 else:
4572 4579 for i in pycompat.xrange(nitems):
4573 4580 write(item)
4574 4581 write(b'\n')
4575 4582 if flush_line:
4576 4583 ui.flush()
4577 4584 ui.flush()
4578 4585
4579 4586 timer, fm = gettimer(ui, opts)
4580 4587 timer(benchmark)
4581 4588 fm.end()
4582 4589
4583 4590
4584 4591 def uisetup(ui):
4585 4592 if util.safehasattr(cmdutil, b'openrevlog') and not util.safehasattr(
4586 4593 commands, b'debugrevlogopts'
4587 4594 ):
4588 4595 # for "historical portability":
4589 4596 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
4590 4597 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
4591 4598 # openrevlog() should cause failure, because it has been
4592 4599 # available since 3.5 (or 49c583ca48c4).
4593 4600 def openrevlog(orig, repo, cmd, file_, opts):
4594 4601 if opts.get(b'dir') and not util.safehasattr(repo, b'dirlog'):
4595 4602 raise error.Abort(
4596 4603 b"This version doesn't support --dir option",
4597 4604 hint=b"use 3.5 or later",
4598 4605 )
4599 4606 return orig(repo, cmd, file_, opts)
4600 4607
4601 4608 name = _sysstr(b'openrevlog')
4602 4609 extensions.wrapfunction(cmdutil, name, openrevlog)
4603 4610
4604 4611
4605 4612 @command(
4606 4613 b'perf::progress|perfprogress',
4607 4614 formatteropts
4608 4615 + [
4609 4616 (b'', b'topic', b'topic', b'topic for progress messages'),
4610 4617 (b'c', b'total', 1000000, b'total value we are progressing to'),
4611 4618 ],
4612 4619 norepo=True,
4613 4620 )
4614 4621 def perfprogress(ui, topic=None, total=None, **opts):
4615 4622 """printing of progress bars"""
4616 4623 opts = _byteskwargs(opts)
4617 4624
4618 4625 timer, fm = gettimer(ui, opts)
4619 4626
4620 4627 def doprogress():
4621 4628 with ui.makeprogress(topic, total=total) as progress:
4622 4629 for i in _xrange(total):
4623 4630 progress.increment()
4624 4631
4625 4632 timer(doprogress)
4626 4633 fm.end()
@@ -1,3901 +1,3907 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class _InnerRevlog:
341 341 """An inner layer of the revlog object
342 342
343 343 That layer exist to be able to delegate some operation to Rust, its
344 344 boundaries are arbitrary and based on what we can delegate to Rust.
345 345 """
346 346
347 347 def __init__(
348 348 self,
349 349 opener,
350 350 index,
351 351 index_file,
352 352 data_file,
353 353 sidedata_file,
354 354 inline,
355 355 data_config,
356 356 chunk_cache,
357 357 ):
358 358 self.opener = opener
359 359 self.index = index
360 360
361 361 self.index_file = index_file
362 362 self.data_file = data_file
363 363 self.sidedata_file = sidedata_file
364 364 self.inline = inline
365 365 self.data_config = data_config
366 366
367 367 # index
368 368
369 369 # 3-tuple of file handles being used for active writing.
370 370 self._writinghandles = None
371 371
372 372 self._segmentfile = randomaccessfile.randomaccessfile(
373 373 self.opener,
374 374 (self.index_file if self.inline else self.data_file),
375 375 self.data_config.chunk_cache_size,
376 376 chunk_cache,
377 377 )
378 378 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
379 379 self.opener,
380 380 self.sidedata_file,
381 381 self.data_config.chunk_cache_size,
382 382 )
383 383
384 384 # Derived from index values.
385 385
386 386 def start(self, rev):
387 387 """the offset of the data chunk for this revision"""
388 388 return int(self.index[rev][0] >> 16)
389 389
390 390 def length(self, rev):
391 391 """the length of the data chunk for this revision"""
392 392 return self.index[rev][1]
393 393
394 394 def end(self, rev):
395 395 """the end of the data chunk for this revision"""
396 396 return self.start(rev) + self.length(rev)
397 397
398 398 @contextlib.contextmanager
399 399 def reading(self):
400 400 """Context manager that keeps data and sidedata files open for reading"""
401 401 if len(self.index) == 0:
402 402 yield # nothing to be read
403 403 else:
404 404 with self._segmentfile.reading():
405 405 with self._segmentfile_sidedata.reading():
406 406 yield
407 407
408 408 @property
409 409 def is_writing(self):
410 410 """True is a writing context is open"""
411 411 return self._writinghandles is not None
412 412
413 413 @contextlib.contextmanager
414 414 def writing(self, transaction, data_end=None, sidedata_end=None):
415 415 """Open the revlog files for writing
416 416
417 417 Add content to a revlog should be done within such context.
418 418 """
419 419 if self.is_writing:
420 420 yield
421 421 else:
422 422 ifh = dfh = sdfh = None
423 423 try:
424 424 r = len(self.index)
425 425 # opening the data file.
426 426 dsize = 0
427 427 if r:
428 428 dsize = self.end(r - 1)
429 429 dfh = None
430 430 if not self.inline:
431 431 try:
432 432 dfh = self.opener(self.data_file, mode=b"r+")
433 433 if data_end is None:
434 434 dfh.seek(0, os.SEEK_END)
435 435 else:
436 436 dfh.seek(data_end, os.SEEK_SET)
437 437 except FileNotFoundError:
438 438 dfh = self.opener(self.data_file, mode=b"w+")
439 439 transaction.add(self.data_file, dsize)
440 440 if self.sidedata_file is not None:
441 441 assert sidedata_end is not None
442 442 # revlog-v2 does not inline, help Pytype
443 443 assert dfh is not None
444 444 try:
445 445 sdfh = self.opener(self.sidedata_file, mode=b"r+")
446 446 dfh.seek(sidedata_end, os.SEEK_SET)
447 447 except FileNotFoundError:
448 448 sdfh = self.opener(self.sidedata_file, mode=b"w+")
449 449 transaction.add(self.sidedata_file, sidedata_end)
450 450
451 451 # opening the index file.
452 452 isize = r * self.index.entry_size
453 453 ifh = self.__index_write_fp()
454 454 if self.inline:
455 455 transaction.add(self.index_file, dsize + isize)
456 456 else:
457 457 transaction.add(self.index_file, isize)
458 458 # exposing all file handle for writing.
459 459 self._writinghandles = (ifh, dfh, sdfh)
460 460 self._segmentfile.writing_handle = ifh if self.inline else dfh
461 461 self._segmentfile_sidedata.writing_handle = sdfh
462 462 yield
463 463 finally:
464 464 self._writinghandles = None
465 465 self._segmentfile.writing_handle = None
466 466 self._segmentfile_sidedata.writing_handle = None
467 467 if dfh is not None:
468 468 dfh.close()
469 469 if sdfh is not None:
470 470 sdfh.close()
471 471 # closing the index file last to avoid exposing referent to
472 472 # potential unflushed data content.
473 473 if ifh is not None:
474 474 ifh.close()
475 475
476 476 def __index_write_fp(self, index_end=None):
477 477 """internal method to open the index file for writing
478 478
479 479 You should not use this directly and use `_writing` instead
480 480 """
481 481 try:
482 482 f = self.opener(
483 483 self.index_file,
484 484 mode=b"r+",
485 485 checkambig=self.data_config.check_ambig,
486 486 )
487 487 if index_end is None:
488 488 f.seek(0, os.SEEK_END)
489 489 else:
490 490 f.seek(index_end, os.SEEK_SET)
491 491 return f
492 492 except FileNotFoundError:
493 493 return self.opener(
494 494 self.index_file,
495 495 mode=b"w+",
496 496 checkambig=self.data_config.check_ambig,
497 497 )
498 498
499 499 def __index_new_fp(self):
500 500 """internal method to create a new index file for writing
501 501
502 502 You should not use this unless you are upgrading from inline revlog
503 503 """
504 504 return self.opener(
505 505 self.index_file,
506 506 mode=b"w",
507 507 checkambig=self.data_config.check_ambig,
508 508 atomictemp=True,
509 509 )
510 510
511 def get_segment_for_revs(self, startrev, endrev):
512 """Obtain a segment of raw data corresponding to a range of revisions.
513
514 Accepts the start and end revisions and an optional already-open
515 file handle to be used for reading. If the file handle is read, its
516 seek position will not be preserved.
517
518 Requests for data may be satisfied by a cache.
519
520 Returns a 2-tuple of (offset, data) for the requested range of
521 revisions. Offset is the integer offset from the beginning of the
522 revlog and data is a str or buffer of the raw byte data.
523
524 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
525 to determine where each revision's data begins and ends.
526
527 API: we should consider making this a private part of the InnerRevlog
528 at some point.
529 """
530 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
531 # (functions are expensive).
532 index = self.index
533 istart = index[startrev]
534 start = int(istart[0] >> 16)
535 if startrev == endrev:
536 end = start + istart[1]
537 else:
538 iend = index[endrev]
539 end = int(iend[0] >> 16) + iend[1]
540
541 if self.inline:
542 start += (startrev + 1) * self.index.entry_size
543 end += (endrev + 1) * self.index.entry_size
544 length = end - start
545
546 return start, self._segmentfile.read_chunk(start, length)
547
511 548
512 549 class revlog:
513 550 """
514 551 the underlying revision storage object
515 552
516 553 A revlog consists of two parts, an index and the revision data.
517 554
518 555 The index is a file with a fixed record size containing
519 556 information on each revision, including its nodeid (hash), the
520 557 nodeids of its parents, the position and offset of its data within
521 558 the data file, and the revision it's based on. Finally, each entry
522 559 contains a linkrev entry that can serve as a pointer to external
523 560 data.
524 561
525 562 The revision data itself is a linear collection of data chunks.
526 563 Each chunk represents a revision and is usually represented as a
527 564 delta against the previous chunk. To bound lookup time, runs of
528 565 deltas are limited to about 2 times the length of the original
529 566 version data. This makes retrieval of a version proportional to
530 567 its size, or O(1) relative to the number of revisions.
531 568
532 569 Both pieces of the revlog are written to in an append-only
533 570 fashion, which means we never need to rewrite a file to insert or
534 571 remove data, and can use some simple techniques to avoid the need
535 572 for locking while reading.
536 573
537 574 If checkambig, indexfile is opened with checkambig=True at
538 575 writing, to avoid file stat ambiguity.
539 576
540 577 If mmaplargeindex is True, and an mmapindexthreshold is set, the
541 578 index will be mmapped rather than read if it is larger than the
542 579 configured threshold.
543 580
544 581 If censorable is True, the revlog can have censored revisions.
545 582
546 583 If `upperboundcomp` is not None, this is the expected maximal gain from
547 584 compression for the data content.
548 585
549 586 `concurrencychecker` is an optional function that receives 3 arguments: a
550 587 file handle, a filename, and an expected position. It should check whether
551 588 the current position in the file handle is valid, and log/warn/fail (by
552 589 raising).
553 590
554 591 See mercurial/revlogutils/contants.py for details about the content of an
555 592 index entry.
556 593 """
557 594
558 595 _flagserrorclass = error.RevlogError
559 596
560 597 @staticmethod
561 598 def is_inline_index(header_bytes):
562 599 """Determine if a revlog is inline from the initial bytes of the index"""
563 600 header = INDEX_HEADER.unpack(header_bytes)[0]
564 601
565 602 _format_flags = header & ~0xFFFF
566 603 _format_version = header & 0xFFFF
567 604
568 605 features = FEATURES_BY_VERSION[_format_version]
569 606 return features[b'inline'](_format_flags)
570 607
571 608 def __init__(
572 609 self,
573 610 opener,
574 611 target,
575 612 radix,
576 613 postfix=None, # only exist for `tmpcensored` now
577 614 checkambig=False,
578 615 mmaplargeindex=False,
579 616 censorable=False,
580 617 upperboundcomp=None,
581 618 persistentnodemap=False,
582 619 concurrencychecker=None,
583 620 trypending=False,
584 621 try_split=False,
585 622 canonical_parent_order=True,
586 623 ):
587 624 """
588 625 create a revlog object
589 626
590 627 opener is a function that abstracts the file opening operation
591 628 and can be used to implement COW semantics or the like.
592 629
593 630 `target`: a (KIND, ID) tuple that identify the content stored in
594 631 this revlog. It help the rest of the code to understand what the revlog
595 632 is about without having to resort to heuristic and index filename
596 633 analysis. Note: that this must be reliably be set by normal code, but
597 634 that test, debug, or performance measurement code might not set this to
598 635 accurate value.
599 636 """
600 637
601 638 self.radix = radix
602 639
603 640 self._docket_file = None
604 641 self._indexfile = None
605 642 self._datafile = None
606 643 self._sidedatafile = None
607 644 self._nodemap_file = None
608 645 self.postfix = postfix
609 646 self._trypending = trypending
610 647 self._try_split = try_split
611 648 self.opener = opener
612 649 if persistentnodemap:
613 650 self._nodemap_file = nodemaputil.get_nodemap_file(self)
614 651
615 652 assert target[0] in ALL_KINDS
616 653 assert len(target) == 2
617 654 self.target = target
618 655 if b'feature-config' in self.opener.options:
619 656 self.feature_config = self.opener.options[b'feature-config'].copy()
620 657 else:
621 658 self.feature_config = FeatureConfig()
622 659 self.feature_config.censorable = censorable
623 660 self.feature_config.canonical_parent_order = canonical_parent_order
624 661 if b'data-config' in self.opener.options:
625 662 self.data_config = self.opener.options[b'data-config'].copy()
626 663 else:
627 664 self.data_config = DataConfig()
628 665 self.data_config.check_ambig = checkambig
629 666 self.data_config.mmap_large_index = mmaplargeindex
630 667 if b'delta-config' in self.opener.options:
631 668 self.delta_config = self.opener.options[b'delta-config'].copy()
632 669 else:
633 670 self.delta_config = DeltaConfig()
634 671 self.delta_config.upper_bound_comp = upperboundcomp
635 672
636 673 # 3-tuple of (node, rev, text) for a raw revision.
637 674 self._revisioncache = None
638 675 # Maps rev to chain base rev.
639 676 self._chainbasecache = util.lrucachedict(100)
640 677
641 678 self.index = None
642 679 self._docket = None
643 680 self._nodemap_docket = None
644 681 # Mapping of partial identifiers to full nodes.
645 682 self._pcache = {}
646 683
647 684 # other optionnals features
648 685
649 686 # Make copy of flag processors so each revlog instance can support
650 687 # custom flags.
651 688 self._flagprocessors = dict(flagutil.flagprocessors)
652 689 # prevent nesting of addgroup
653 690 self._adding_group = None
654 691
655 692 chunk_cache = self._loadindex()
656 693 self._load_inner(chunk_cache)
657 694
658 695 self._concurrencychecker = concurrencychecker
659 696
660 697 @property
661 698 def _generaldelta(self):
662 699 """temporary compatibility proxy"""
663 700 util.nouideprecwarn(
664 701 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
665 702 )
666 703 return self.delta_config.general_delta
667 704
668 705 @property
669 706 def _checkambig(self):
670 707 """temporary compatibility proxy"""
671 708 util.nouideprecwarn(
672 709 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
673 710 )
674 711 return self.data_config.check_ambig
675 712
676 713 @property
677 714 def _mmaplargeindex(self):
678 715 """temporary compatibility proxy"""
679 716 util.nouideprecwarn(
680 717 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
681 718 )
682 719 return self.data_config.mmap_large_index
683 720
684 721 @property
685 722 def _censorable(self):
686 723 """temporary compatibility proxy"""
687 724 util.nouideprecwarn(
688 725 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
689 726 )
690 727 return self.feature_config.censorable
691 728
692 729 @property
693 730 def _chunkcachesize(self):
694 731 """temporary compatibility proxy"""
695 732 util.nouideprecwarn(
696 733 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
697 734 )
698 735 return self.data_config.chunk_cache_size
699 736
700 737 @property
701 738 def _maxchainlen(self):
702 739 """temporary compatibility proxy"""
703 740 util.nouideprecwarn(
704 741 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
705 742 )
706 743 return self.delta_config.max_chain_len
707 744
708 745 @property
709 746 def _deltabothparents(self):
710 747 """temporary compatibility proxy"""
711 748 util.nouideprecwarn(
712 749 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
713 750 )
714 751 return self.delta_config.delta_both_parents
715 752
716 753 @property
717 754 def _candidate_group_chunk_size(self):
718 755 """temporary compatibility proxy"""
719 756 util.nouideprecwarn(
720 757 b"use revlog.delta_config.candidate_group_chunk_size",
721 758 b"6.6",
722 759 stacklevel=2,
723 760 )
724 761 return self.delta_config.candidate_group_chunk_size
725 762
726 763 @property
727 764 def _debug_delta(self):
728 765 """temporary compatibility proxy"""
729 766 util.nouideprecwarn(
730 767 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
731 768 )
732 769 return self.delta_config.debug_delta
733 770
734 771 @property
735 772 def _compengine(self):
736 773 """temporary compatibility proxy"""
737 774 util.nouideprecwarn(
738 775 b"use revlog.feature_config.compression_engine",
739 776 b"6.6",
740 777 stacklevel=2,
741 778 )
742 779 return self.feature_config.compression_engine
743 780
744 781 @property
745 782 def upperboundcomp(self):
746 783 """temporary compatibility proxy"""
747 784 util.nouideprecwarn(
748 785 b"use revlog.delta_config.upper_bound_comp",
749 786 b"6.6",
750 787 stacklevel=2,
751 788 )
752 789 return self.delta_config.upper_bound_comp
753 790
754 791 @property
755 792 def _compengineopts(self):
756 793 """temporary compatibility proxy"""
757 794 util.nouideprecwarn(
758 795 b"use revlog.feature_config.compression_engine_options",
759 796 b"6.6",
760 797 stacklevel=2,
761 798 )
762 799 return self.feature_config.compression_engine_options
763 800
764 801 @property
765 802 def _maxdeltachainspan(self):
766 803 """temporary compatibility proxy"""
767 804 util.nouideprecwarn(
768 805 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
769 806 )
770 807 return self.delta_config.max_deltachain_span
771 808
772 809 @property
773 810 def _withsparseread(self):
774 811 """temporary compatibility proxy"""
775 812 util.nouideprecwarn(
776 813 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
777 814 )
778 815 return self.data_config.with_sparse_read
779 816
780 817 @property
781 818 def _sparserevlog(self):
782 819 """temporary compatibility proxy"""
783 820 util.nouideprecwarn(
784 821 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
785 822 )
786 823 return self.delta_config.sparse_revlog
787 824
788 825 @property
789 826 def hassidedata(self):
790 827 """temporary compatibility proxy"""
791 828 util.nouideprecwarn(
792 829 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
793 830 )
794 831 return self.feature_config.has_side_data
795 832
796 833 @property
797 834 def _srdensitythreshold(self):
798 835 """temporary compatibility proxy"""
799 836 util.nouideprecwarn(
800 837 b"use revlog.data_config.sr_density_threshold",
801 838 b"6.6",
802 839 stacklevel=2,
803 840 )
804 841 return self.data_config.sr_density_threshold
805 842
806 843 @property
807 844 def _srmingapsize(self):
808 845 """temporary compatibility proxy"""
809 846 util.nouideprecwarn(
810 847 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
811 848 )
812 849 return self.data_config.sr_min_gap_size
813 850
814 851 @property
815 852 def _compute_rank(self):
816 853 """temporary compatibility proxy"""
817 854 util.nouideprecwarn(
818 855 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
819 856 )
820 857 return self.feature_config.compute_rank
821 858
822 859 @property
823 860 def canonical_parent_order(self):
824 861 """temporary compatibility proxy"""
825 862 util.nouideprecwarn(
826 863 b"use revlog.feature_config.canonical_parent_order",
827 864 b"6.6",
828 865 stacklevel=2,
829 866 )
830 867 return self.feature_config.canonical_parent_order
831 868
832 869 @property
833 870 def _lazydelta(self):
834 871 """temporary compatibility proxy"""
835 872 util.nouideprecwarn(
836 873 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
837 874 )
838 875 return self.delta_config.lazy_delta
839 876
840 877 @property
841 878 def _lazydeltabase(self):
842 879 """temporary compatibility proxy"""
843 880 util.nouideprecwarn(
844 881 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
845 882 )
846 883 return self.delta_config.lazy_delta_base
847 884
848 885 def _init_opts(self):
849 886 """process options (from above/config) to setup associated default revlog mode
850 887
851 888 These values might be affected when actually reading on disk information.
852 889
853 890 The relevant values are returned for use in _loadindex().
854 891
855 892 * newversionflags:
856 893 version header to use if we need to create a new revlog
857 894
858 895 * mmapindexthreshold:
859 896 minimal index size for start to use mmap
860 897
861 898 * force_nodemap:
862 899 force the usage of a "development" version of the nodemap code
863 900 """
864 901 opts = self.opener.options
865 902
866 903 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
867 904 new_header = CHANGELOGV2
868 905 compute_rank = opts.get(b'changelogv2.compute-rank', True)
869 906 self.feature_config.compute_rank = compute_rank
870 907 elif b'revlogv2' in opts:
871 908 new_header = REVLOGV2
872 909 elif b'revlogv1' in opts:
873 910 new_header = REVLOGV1 | FLAG_INLINE_DATA
874 911 if b'generaldelta' in opts:
875 912 new_header |= FLAG_GENERALDELTA
876 913 elif b'revlogv0' in self.opener.options:
877 914 new_header = REVLOGV0
878 915 else:
879 916 new_header = REVLOG_DEFAULT_VERSION
880 917
881 918 mmapindexthreshold = None
882 919 if self.data_config.mmap_large_index:
883 920 mmapindexthreshold = self.data_config.mmap_index_threshold
884 921 if self.feature_config.enable_ellipsis:
885 922 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
886 923
887 924 # revlog v0 doesn't have flag processors
888 925 for flag, processor in opts.get(b'flagprocessors', {}).items():
889 926 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
890 927
891 928 chunk_cache_size = self.data_config.chunk_cache_size
892 929 if chunk_cache_size <= 0:
893 930 raise error.RevlogError(
894 931 _(b'revlog chunk cache size %r is not greater than 0')
895 932 % chunk_cache_size
896 933 )
897 934 elif chunk_cache_size & (chunk_cache_size - 1):
898 935 raise error.RevlogError(
899 936 _(b'revlog chunk cache size %r is not a power of 2')
900 937 % chunk_cache_size
901 938 )
902 939 force_nodemap = opts.get(b'devel-force-nodemap', False)
903 940 return new_header, mmapindexthreshold, force_nodemap
904 941
905 942 def _get_data(self, filepath, mmap_threshold, size=None):
906 943 """return a file content with or without mmap
907 944
908 945 If the file is missing return the empty string"""
909 946 try:
910 947 with self.opener(filepath) as fp:
911 948 if mmap_threshold is not None:
912 949 file_size = self.opener.fstat(fp).st_size
913 950 if file_size >= mmap_threshold:
914 951 if size is not None:
915 952 # avoid potentiel mmap crash
916 953 size = min(file_size, size)
917 954 # TODO: should .close() to release resources without
918 955 # relying on Python GC
919 956 if size is None:
920 957 return util.buffer(util.mmapread(fp))
921 958 else:
922 959 return util.buffer(util.mmapread(fp, size))
923 960 if size is None:
924 961 return fp.read()
925 962 else:
926 963 return fp.read(size)
927 964 except FileNotFoundError:
928 965 return b''
929 966
930 967 def get_streams(self, max_linkrev, force_inline=False):
931 968 """return a list of streams that represent this revlog
932 969
933 970 This is used by stream-clone to do bytes to bytes copies of a repository.
934 971
935 972 This streams data for all revisions that refer to a changelog revision up
936 973 to `max_linkrev`.
937 974
938 975 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
939 976
940 977 It returns is a list of three-tuple:
941 978
942 979 [
943 980 (filename, bytes_stream, stream_size),
944 981 …
945 982 ]
946 983 """
947 984 n = len(self)
948 985 index = self.index
949 986 while n > 0:
950 987 linkrev = index[n - 1][4]
951 988 if linkrev < max_linkrev:
952 989 break
953 990 # note: this loop will rarely go through multiple iterations, since
954 991 # it only traverses commits created during the current streaming
955 992 # pull operation.
956 993 #
957 994 # If this become a problem, using a binary search should cap the
958 995 # runtime of this.
959 996 n = n - 1
960 997 if n == 0:
961 998 # no data to send
962 999 return []
963 1000 index_size = n * index.entry_size
964 1001 data_size = self.end(n - 1)
965 1002
966 1003 # XXX we might have been split (or stripped) since the object
967 1004 # initialization, We need to close this race too, but having a way to
968 1005 # pre-open the file we feed to the revlog and never closing them before
969 1006 # we are done streaming.
970 1007
971 1008 if self._inline:
972 1009
973 1010 def get_stream():
974 1011 with self.opener(self._indexfile, mode=b"r") as fp:
975 1012 yield None
976 1013 size = index_size + data_size
977 1014 if size <= 65536:
978 1015 yield fp.read(size)
979 1016 else:
980 1017 yield from util.filechunkiter(fp, limit=size)
981 1018
982 1019 inline_stream = get_stream()
983 1020 next(inline_stream)
984 1021 return [
985 1022 (self._indexfile, inline_stream, index_size + data_size),
986 1023 ]
987 1024 elif force_inline:
988 1025
989 1026 def get_stream():
990 1027 with self.reading():
991 1028 yield None
992 1029
993 1030 for rev in range(n):
994 1031 idx = self.index.entry_binary(rev)
995 1032 if rev == 0 and self._docket is None:
996 1033 # re-inject the inline flag
997 1034 header = self._format_flags
998 1035 header |= self._format_version
999 1036 header |= FLAG_INLINE_DATA
1000 1037 header = self.index.pack_header(header)
1001 1038 idx = header + idx
1002 1039 yield idx
1003 yield self._getsegmentforrevs(rev, rev)[1]
1040 yield self._inner.get_segment_for_revs(rev, rev)[1]
1004 1041
1005 1042 inline_stream = get_stream()
1006 1043 next(inline_stream)
1007 1044 return [
1008 1045 (self._indexfile, inline_stream, index_size + data_size),
1009 1046 ]
1010 1047 else:
1011 1048
1012 1049 def get_index_stream():
1013 1050 with self.opener(self._indexfile, mode=b"r") as fp:
1014 1051 yield None
1015 1052 if index_size <= 65536:
1016 1053 yield fp.read(index_size)
1017 1054 else:
1018 1055 yield from util.filechunkiter(fp, limit=index_size)
1019 1056
1020 1057 def get_data_stream():
1021 1058 with self._datafp() as fp:
1022 1059 yield None
1023 1060 if data_size <= 65536:
1024 1061 yield fp.read(data_size)
1025 1062 else:
1026 1063 yield from util.filechunkiter(fp, limit=data_size)
1027 1064
1028 1065 index_stream = get_index_stream()
1029 1066 next(index_stream)
1030 1067 data_stream = get_data_stream()
1031 1068 next(data_stream)
1032 1069 return [
1033 1070 (self._datafile, data_stream, data_size),
1034 1071 (self._indexfile, index_stream, index_size),
1035 1072 ]
1036 1073
1037 1074 def _loadindex(self, docket=None):
1038 1075
1039 1076 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1040 1077
1041 1078 if self.postfix is not None:
1042 1079 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1043 1080 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1044 1081 entry_point = b'%s.i.a' % self.radix
1045 1082 elif self._try_split and self.opener.exists(self._split_index_file):
1046 1083 entry_point = self._split_index_file
1047 1084 else:
1048 1085 entry_point = b'%s.i' % self.radix
1049 1086
1050 1087 if docket is not None:
1051 1088 self._docket = docket
1052 1089 self._docket_file = entry_point
1053 1090 else:
1054 1091 self._initempty = True
1055 1092 entry_data = self._get_data(entry_point, mmapindexthreshold)
1056 1093 if len(entry_data) > 0:
1057 1094 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1058 1095 self._initempty = False
1059 1096 else:
1060 1097 header = new_header
1061 1098
1062 1099 self._format_flags = header & ~0xFFFF
1063 1100 self._format_version = header & 0xFFFF
1064 1101
1065 1102 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1066 1103 if supported_flags is None:
1067 1104 msg = _(b'unknown version (%d) in revlog %s')
1068 1105 msg %= (self._format_version, self.display_id)
1069 1106 raise error.RevlogError(msg)
1070 1107 elif self._format_flags & ~supported_flags:
1071 1108 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1072 1109 display_flag = self._format_flags >> 16
1073 1110 msg %= (display_flag, self._format_version, self.display_id)
1074 1111 raise error.RevlogError(msg)
1075 1112
1076 1113 features = FEATURES_BY_VERSION[self._format_version]
1077 1114 self._inline = features[b'inline'](self._format_flags)
1078 1115 self.delta_config.general_delta = features[b'generaldelta'](
1079 1116 self._format_flags
1080 1117 )
1081 1118 self.feature_config.has_side_data = features[b'sidedata']
1082 1119
1083 1120 if not features[b'docket']:
1084 1121 self._indexfile = entry_point
1085 1122 index_data = entry_data
1086 1123 else:
1087 1124 self._docket_file = entry_point
1088 1125 if self._initempty:
1089 1126 self._docket = docketutil.default_docket(self, header)
1090 1127 else:
1091 1128 self._docket = docketutil.parse_docket(
1092 1129 self, entry_data, use_pending=self._trypending
1093 1130 )
1094 1131
1095 1132 if self._docket is not None:
1096 1133 self._indexfile = self._docket.index_filepath()
1097 1134 index_data = b''
1098 1135 index_size = self._docket.index_end
1099 1136 if index_size > 0:
1100 1137 index_data = self._get_data(
1101 1138 self._indexfile, mmapindexthreshold, size=index_size
1102 1139 )
1103 1140 if len(index_data) < index_size:
1104 1141 msg = _(b'too few index data for %s: got %d, expected %d')
1105 1142 msg %= (self.display_id, len(index_data), index_size)
1106 1143 raise error.RevlogError(msg)
1107 1144
1108 1145 self._inline = False
1109 1146 # generaldelta implied by version 2 revlogs.
1110 1147 self.delta_config.general_delta = True
1111 1148 # the logic for persistent nodemap will be dealt with within the
1112 1149 # main docket, so disable it for now.
1113 1150 self._nodemap_file = None
1114 1151
1115 1152 if self._docket is not None:
1116 1153 self._datafile = self._docket.data_filepath()
1117 1154 self._sidedatafile = self._docket.sidedata_filepath()
1118 1155 elif self.postfix is None:
1119 1156 self._datafile = b'%s.d' % self.radix
1120 1157 else:
1121 1158 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1122 1159
1123 1160 self.nodeconstants = sha1nodeconstants
1124 1161 self.nullid = self.nodeconstants.nullid
1125 1162
1126 1163 # sparse-revlog can't be on without general-delta (issue6056)
1127 1164 if not self.delta_config.general_delta:
1128 1165 self.delta_config.sparse_revlog = False
1129 1166
1130 1167 self._storedeltachains = True
1131 1168
1132 1169 devel_nodemap = (
1133 1170 self._nodemap_file
1134 1171 and force_nodemap
1135 1172 and parse_index_v1_nodemap is not None
1136 1173 )
1137 1174
1138 1175 use_rust_index = False
1139 1176 if rustrevlog is not None:
1140 1177 if self._nodemap_file is not None:
1141 1178 use_rust_index = True
1142 1179 else:
1143 1180 use_rust_index = self.opener.options.get(b'rust.index')
1144 1181
1145 1182 self._parse_index = parse_index_v1
1146 1183 if self._format_version == REVLOGV0:
1147 1184 self._parse_index = revlogv0.parse_index_v0
1148 1185 elif self._format_version == REVLOGV2:
1149 1186 self._parse_index = parse_index_v2
1150 1187 elif self._format_version == CHANGELOGV2:
1151 1188 self._parse_index = parse_index_cl_v2
1152 1189 elif devel_nodemap:
1153 1190 self._parse_index = parse_index_v1_nodemap
1154 1191 elif use_rust_index:
1155 1192 self._parse_index = parse_index_v1_mixed
1156 1193 try:
1157 1194 d = self._parse_index(index_data, self._inline)
1158 1195 index, chunkcache = d
1159 1196 use_nodemap = (
1160 1197 not self._inline
1161 1198 and self._nodemap_file is not None
1162 1199 and hasattr(index, 'update_nodemap_data')
1163 1200 )
1164 1201 if use_nodemap:
1165 1202 nodemap_data = nodemaputil.persisted_data(self)
1166 1203 if nodemap_data is not None:
1167 1204 docket = nodemap_data[0]
1168 1205 if (
1169 1206 len(d[0]) > docket.tip_rev
1170 1207 and d[0][docket.tip_rev][7] == docket.tip_node
1171 1208 ):
1172 1209 # no changelog tampering
1173 1210 self._nodemap_docket = docket
1174 1211 index.update_nodemap_data(*nodemap_data)
1175 1212 except (ValueError, IndexError):
1176 1213 raise error.RevlogError(
1177 1214 _(b"index %s is corrupted") % self.display_id
1178 1215 )
1179 1216 self.index = index
1180 1217 # revnum -> (chain-length, sum-delta-length)
1181 1218 self._chaininfocache = util.lrucachedict(500)
1182 1219 # revlog header -> revlog compressor
1183 1220 self._decompressors = {}
1184 1221
1185 1222 return chunkcache
1186 1223
1187 1224 def _load_inner(self, chunk_cache):
1188 1225 self._inner = _InnerRevlog(
1189 1226 opener=self.opener,
1190 1227 index=self.index,
1191 1228 index_file=self._indexfile,
1192 1229 data_file=self._datafile,
1193 1230 sidedata_file=self._sidedatafile,
1194 1231 inline=self._inline,
1195 1232 data_config=self.data_config,
1196 1233 chunk_cache=chunk_cache,
1197 1234 )
1198 1235
1199 1236 def get_revlog(self):
1200 1237 """simple function to mirror API of other not-really-revlog API"""
1201 1238 return self
1202 1239
1203 1240 @util.propertycache
1204 1241 def revlog_kind(self):
1205 1242 return self.target[0]
1206 1243
1207 1244 @util.propertycache
1208 1245 def display_id(self):
1209 1246 """The public facing "ID" of the revlog that we use in message"""
1210 1247 if self.revlog_kind == KIND_FILELOG:
1211 1248 # Reference the file without the "data/" prefix, so it is familiar
1212 1249 # to the user.
1213 1250 return self.target[1]
1214 1251 else:
1215 1252 return self.radix
1216 1253
1217 1254 def _get_decompressor(self, t):
1218 1255 try:
1219 1256 compressor = self._decompressors[t]
1220 1257 except KeyError:
1221 1258 try:
1222 1259 engine = util.compengines.forrevlogheader(t)
1223 1260 compressor = engine.revlogcompressor(
1224 1261 self.feature_config.compression_engine_options
1225 1262 )
1226 1263 self._decompressors[t] = compressor
1227 1264 except KeyError:
1228 1265 raise error.RevlogError(
1229 1266 _(b'unknown compression type %s') % binascii.hexlify(t)
1230 1267 )
1231 1268 return compressor
1232 1269
1233 1270 @util.propertycache
1234 1271 def _compressor(self):
1235 1272 engine = util.compengines[self.feature_config.compression_engine]
1236 1273 return engine.revlogcompressor(
1237 1274 self.feature_config.compression_engine_options
1238 1275 )
1239 1276
1240 1277 @util.propertycache
1241 1278 def _decompressor(self):
1242 1279 """the default decompressor"""
1243 1280 if self._docket is None:
1244 1281 return None
1245 1282 t = self._docket.default_compression_header
1246 1283 c = self._get_decompressor(t)
1247 1284 return c.decompress
1248 1285
1249 1286 def _datafp(self, mode=b'r'):
1250 1287 """file object for the revlog's data file"""
1251 1288 return self.opener(self._datafile, mode=mode)
1252 1289
1253 1290 def tiprev(self):
1254 1291 return len(self.index) - 1
1255 1292
1256 1293 def tip(self):
1257 1294 return self.node(self.tiprev())
1258 1295
1259 1296 def __contains__(self, rev):
1260 1297 return 0 <= rev < len(self)
1261 1298
1262 1299 def __len__(self):
1263 1300 return len(self.index)
1264 1301
1265 1302 def __iter__(self):
1266 1303 return iter(range(len(self)))
1267 1304
1268 1305 def revs(self, start=0, stop=None):
1269 1306 """iterate over all rev in this revlog (from start to stop)"""
1270 1307 return storageutil.iterrevs(len(self), start=start, stop=stop)
1271 1308
1272 1309 def hasnode(self, node):
1273 1310 try:
1274 1311 self.rev(node)
1275 1312 return True
1276 1313 except KeyError:
1277 1314 return False
1278 1315
1279 1316 def _candelta(self, baserev, rev):
1280 1317 """whether two revisions (baserev, rev) can be delta-ed or not"""
1281 1318 # Disable delta if either rev requires a content-changing flag
1282 1319 # processor (ex. LFS). This is because such flag processor can alter
1283 1320 # the rawtext content that the delta will be based on, and two clients
1284 1321 # could have a same revlog node with different flags (i.e. different
1285 1322 # rawtext contents) and the delta could be incompatible.
1286 1323 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1287 1324 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1288 1325 ):
1289 1326 return False
1290 1327 return True
1291 1328
1292 1329 def update_caches(self, transaction):
1293 1330 """update on disk cache
1294 1331
1295 1332 If a transaction is passed, the update may be delayed to transaction
1296 1333 commit."""
1297 1334 if self._nodemap_file is not None:
1298 1335 if transaction is None:
1299 1336 nodemaputil.update_persistent_nodemap(self)
1300 1337 else:
1301 1338 nodemaputil.setup_persistent_nodemap(transaction, self)
1302 1339
1303 1340 def clearcaches(self):
1304 1341 """Clear in-memory caches"""
1305 1342 self._revisioncache = None
1306 1343 self._chainbasecache.clear()
1307 1344 self._inner._segmentfile.clear_cache()
1308 1345 self._inner._segmentfile_sidedata.clear_cache()
1309 1346 self._pcache = {}
1310 1347 self._nodemap_docket = None
1311 1348 self.index.clearcaches()
1312 1349 # The python code is the one responsible for validating the docket, we
1313 1350 # end up having to refresh it here.
1314 1351 use_nodemap = (
1315 1352 not self._inline
1316 1353 and self._nodemap_file is not None
1317 1354 and hasattr(self.index, 'update_nodemap_data')
1318 1355 )
1319 1356 if use_nodemap:
1320 1357 nodemap_data = nodemaputil.persisted_data(self)
1321 1358 if nodemap_data is not None:
1322 1359 self._nodemap_docket = nodemap_data[0]
1323 1360 self.index.update_nodemap_data(*nodemap_data)
1324 1361
1325 1362 def rev(self, node):
1326 1363 """return the revision number associated with a <nodeid>"""
1327 1364 try:
1328 1365 return self.index.rev(node)
1329 1366 except TypeError:
1330 1367 raise
1331 1368 except error.RevlogError:
1332 1369 # parsers.c radix tree lookup failed
1333 1370 if (
1334 1371 node == self.nodeconstants.wdirid
1335 1372 or node in self.nodeconstants.wdirfilenodeids
1336 1373 ):
1337 1374 raise error.WdirUnsupported
1338 1375 raise error.LookupError(node, self.display_id, _(b'no node'))
1339 1376
1340 1377 # Accessors for index entries.
1341 1378
1342 1379 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1343 1380 # are flags.
1344 1381 def start(self, rev):
1345 1382 return int(self.index[rev][0] >> 16)
1346 1383
1347 1384 def sidedata_cut_off(self, rev):
1348 1385 sd_cut_off = self.index[rev][8]
1349 1386 if sd_cut_off != 0:
1350 1387 return sd_cut_off
1351 1388 # This is some annoying dance, because entries without sidedata
1352 1389 # currently use 0 as their ofsset. (instead of previous-offset +
1353 1390 # previous-size)
1354 1391 #
1355 1392 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1356 1393 # In the meantime, we need this.
1357 1394 while 0 <= rev:
1358 1395 e = self.index[rev]
1359 1396 if e[9] != 0:
1360 1397 return e[8] + e[9]
1361 1398 rev -= 1
1362 1399 return 0
1363 1400
1364 1401 def flags(self, rev):
1365 1402 return self.index[rev][0] & 0xFFFF
1366 1403
1367 1404 def length(self, rev):
1368 1405 return self.index[rev][1]
1369 1406
1370 1407 def sidedata_length(self, rev):
1371 1408 if not self.feature_config.has_side_data:
1372 1409 return 0
1373 1410 return self.index[rev][9]
1374 1411
1375 1412 def rawsize(self, rev):
1376 1413 """return the length of the uncompressed text for a given revision"""
1377 1414 l = self.index[rev][2]
1378 1415 if l >= 0:
1379 1416 return l
1380 1417
1381 1418 t = self.rawdata(rev)
1382 1419 return len(t)
1383 1420
1384 1421 def size(self, rev):
1385 1422 """length of non-raw text (processed by a "read" flag processor)"""
1386 1423 # fast path: if no "read" flag processor could change the content,
1387 1424 # size is rawsize. note: ELLIPSIS is known to not change the content.
1388 1425 flags = self.flags(rev)
1389 1426 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1390 1427 return self.rawsize(rev)
1391 1428
1392 1429 return len(self.revision(rev))
1393 1430
1394 1431 def fast_rank(self, rev):
1395 1432 """Return the rank of a revision if already known, or None otherwise.
1396 1433
1397 1434 The rank of a revision is the size of the sub-graph it defines as a
1398 1435 head. Equivalently, the rank of a revision `r` is the size of the set
1399 1436 `ancestors(r)`, `r` included.
1400 1437
1401 1438 This method returns the rank retrieved from the revlog in constant
1402 1439 time. It makes no attempt at computing unknown values for versions of
1403 1440 the revlog which do not persist the rank.
1404 1441 """
1405 1442 rank = self.index[rev][ENTRY_RANK]
1406 1443 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1407 1444 return None
1408 1445 if rev == nullrev:
1409 1446 return 0 # convention
1410 1447 return rank
1411 1448
1412 1449 def chainbase(self, rev):
1413 1450 base = self._chainbasecache.get(rev)
1414 1451 if base is not None:
1415 1452 return base
1416 1453
1417 1454 index = self.index
1418 1455 iterrev = rev
1419 1456 base = index[iterrev][3]
1420 1457 while base != iterrev:
1421 1458 iterrev = base
1422 1459 base = index[iterrev][3]
1423 1460
1424 1461 self._chainbasecache[rev] = base
1425 1462 return base
1426 1463
1427 1464 def linkrev(self, rev):
1428 1465 return self.index[rev][4]
1429 1466
1430 1467 def parentrevs(self, rev):
1431 1468 try:
1432 1469 entry = self.index[rev]
1433 1470 except IndexError:
1434 1471 if rev == wdirrev:
1435 1472 raise error.WdirUnsupported
1436 1473 raise
1437 1474
1438 1475 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1439 1476 return entry[6], entry[5]
1440 1477 else:
1441 1478 return entry[5], entry[6]
1442 1479
1443 1480 # fast parentrevs(rev) where rev isn't filtered
1444 1481 _uncheckedparentrevs = parentrevs
1445 1482
1446 1483 def node(self, rev):
1447 1484 try:
1448 1485 return self.index[rev][7]
1449 1486 except IndexError:
1450 1487 if rev == wdirrev:
1451 1488 raise error.WdirUnsupported
1452 1489 raise
1453 1490
1454 1491 # Derived from index values.
1455 1492
1456 1493 def end(self, rev):
1457 1494 return self.start(rev) + self.length(rev)
1458 1495
1459 1496 def parents(self, node):
1460 1497 i = self.index
1461 1498 d = i[self.rev(node)]
1462 1499 # inline node() to avoid function call overhead
1463 1500 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1464 1501 return i[d[6]][7], i[d[5]][7]
1465 1502 else:
1466 1503 return i[d[5]][7], i[d[6]][7]
1467 1504
1468 1505 def chainlen(self, rev):
1469 1506 return self._chaininfo(rev)[0]
1470 1507
1471 1508 def _chaininfo(self, rev):
1472 1509 chaininfocache = self._chaininfocache
1473 1510 if rev in chaininfocache:
1474 1511 return chaininfocache[rev]
1475 1512 index = self.index
1476 1513 generaldelta = self.delta_config.general_delta
1477 1514 iterrev = rev
1478 1515 e = index[iterrev]
1479 1516 clen = 0
1480 1517 compresseddeltalen = 0
1481 1518 while iterrev != e[3]:
1482 1519 clen += 1
1483 1520 compresseddeltalen += e[1]
1484 1521 if generaldelta:
1485 1522 iterrev = e[3]
1486 1523 else:
1487 1524 iterrev -= 1
1488 1525 if iterrev in chaininfocache:
1489 1526 t = chaininfocache[iterrev]
1490 1527 clen += t[0]
1491 1528 compresseddeltalen += t[1]
1492 1529 break
1493 1530 e = index[iterrev]
1494 1531 else:
1495 1532 # Add text length of base since decompressing that also takes
1496 1533 # work. For cache hits the length is already included.
1497 1534 compresseddeltalen += e[1]
1498 1535 r = (clen, compresseddeltalen)
1499 1536 chaininfocache[rev] = r
1500 1537 return r
1501 1538
1502 1539 def _deltachain(self, rev, stoprev=None):
1503 1540 """Obtain the delta chain for a revision.
1504 1541
1505 1542 ``stoprev`` specifies a revision to stop at. If not specified, we
1506 1543 stop at the base of the chain.
1507 1544
1508 1545 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1509 1546 revs in ascending order and ``stopped`` is a bool indicating whether
1510 1547 ``stoprev`` was hit.
1511 1548 """
1512 1549 generaldelta = self.delta_config.general_delta
1513 1550 # Try C implementation.
1514 1551 try:
1515 1552 return self.index.deltachain(rev, stoprev, generaldelta)
1516 1553 except AttributeError:
1517 1554 pass
1518 1555
1519 1556 chain = []
1520 1557
1521 1558 # Alias to prevent attribute lookup in tight loop.
1522 1559 index = self.index
1523 1560
1524 1561 iterrev = rev
1525 1562 e = index[iterrev]
1526 1563 while iterrev != e[3] and iterrev != stoprev:
1527 1564 chain.append(iterrev)
1528 1565 if generaldelta:
1529 1566 iterrev = e[3]
1530 1567 else:
1531 1568 iterrev -= 1
1532 1569 e = index[iterrev]
1533 1570
1534 1571 if iterrev == stoprev:
1535 1572 stopped = True
1536 1573 else:
1537 1574 chain.append(iterrev)
1538 1575 stopped = False
1539 1576
1540 1577 chain.reverse()
1541 1578 return chain, stopped
1542 1579
1543 1580 def ancestors(self, revs, stoprev=0, inclusive=False):
1544 1581 """Generate the ancestors of 'revs' in reverse revision order.
1545 1582 Does not generate revs lower than stoprev.
1546 1583
1547 1584 See the documentation for ancestor.lazyancestors for more details."""
1548 1585
1549 1586 # first, make sure start revisions aren't filtered
1550 1587 revs = list(revs)
1551 1588 checkrev = self.node
1552 1589 for r in revs:
1553 1590 checkrev(r)
1554 1591 # and we're sure ancestors aren't filtered as well
1555 1592
1556 1593 if rustancestor is not None and self.index.rust_ext_compat:
1557 1594 lazyancestors = rustancestor.LazyAncestors
1558 1595 arg = self.index
1559 1596 else:
1560 1597 lazyancestors = ancestor.lazyancestors
1561 1598 arg = self._uncheckedparentrevs
1562 1599 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1563 1600
1564 1601 def descendants(self, revs):
1565 1602 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1566 1603
1567 1604 def findcommonmissing(self, common=None, heads=None):
1568 1605 """Return a tuple of the ancestors of common and the ancestors of heads
1569 1606 that are not ancestors of common. In revset terminology, we return the
1570 1607 tuple:
1571 1608
1572 1609 ::common, (::heads) - (::common)
1573 1610
1574 1611 The list is sorted by revision number, meaning it is
1575 1612 topologically sorted.
1576 1613
1577 1614 'heads' and 'common' are both lists of node IDs. If heads is
1578 1615 not supplied, uses all of the revlog's heads. If common is not
1579 1616 supplied, uses nullid."""
1580 1617 if common is None:
1581 1618 common = [self.nullid]
1582 1619 if heads is None:
1583 1620 heads = self.heads()
1584 1621
1585 1622 common = [self.rev(n) for n in common]
1586 1623 heads = [self.rev(n) for n in heads]
1587 1624
1588 1625 # we want the ancestors, but inclusive
1589 1626 class lazyset:
1590 1627 def __init__(self, lazyvalues):
1591 1628 self.addedvalues = set()
1592 1629 self.lazyvalues = lazyvalues
1593 1630
1594 1631 def __contains__(self, value):
1595 1632 return value in self.addedvalues or value in self.lazyvalues
1596 1633
1597 1634 def __iter__(self):
1598 1635 added = self.addedvalues
1599 1636 for r in added:
1600 1637 yield r
1601 1638 for r in self.lazyvalues:
1602 1639 if not r in added:
1603 1640 yield r
1604 1641
1605 1642 def add(self, value):
1606 1643 self.addedvalues.add(value)
1607 1644
1608 1645 def update(self, values):
1609 1646 self.addedvalues.update(values)
1610 1647
1611 1648 has = lazyset(self.ancestors(common))
1612 1649 has.add(nullrev)
1613 1650 has.update(common)
1614 1651
1615 1652 # take all ancestors from heads that aren't in has
1616 1653 missing = set()
1617 1654 visit = collections.deque(r for r in heads if r not in has)
1618 1655 while visit:
1619 1656 r = visit.popleft()
1620 1657 if r in missing:
1621 1658 continue
1622 1659 else:
1623 1660 missing.add(r)
1624 1661 for p in self.parentrevs(r):
1625 1662 if p not in has:
1626 1663 visit.append(p)
1627 1664 missing = list(missing)
1628 1665 missing.sort()
1629 1666 return has, [self.node(miss) for miss in missing]
1630 1667
1631 1668 def incrementalmissingrevs(self, common=None):
1632 1669 """Return an object that can be used to incrementally compute the
1633 1670 revision numbers of the ancestors of arbitrary sets that are not
1634 1671 ancestors of common. This is an ancestor.incrementalmissingancestors
1635 1672 object.
1636 1673
1637 1674 'common' is a list of revision numbers. If common is not supplied, uses
1638 1675 nullrev.
1639 1676 """
1640 1677 if common is None:
1641 1678 common = [nullrev]
1642 1679
1643 1680 if rustancestor is not None and self.index.rust_ext_compat:
1644 1681 return rustancestor.MissingAncestors(self.index, common)
1645 1682 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1646 1683
1647 1684 def findmissingrevs(self, common=None, heads=None):
1648 1685 """Return the revision numbers of the ancestors of heads that
1649 1686 are not ancestors of common.
1650 1687
1651 1688 More specifically, return a list of revision numbers corresponding to
1652 1689 nodes N such that every N satisfies the following constraints:
1653 1690
1654 1691 1. N is an ancestor of some node in 'heads'
1655 1692 2. N is not an ancestor of any node in 'common'
1656 1693
1657 1694 The list is sorted by revision number, meaning it is
1658 1695 topologically sorted.
1659 1696
1660 1697 'heads' and 'common' are both lists of revision numbers. If heads is
1661 1698 not supplied, uses all of the revlog's heads. If common is not
1662 1699 supplied, uses nullid."""
1663 1700 if common is None:
1664 1701 common = [nullrev]
1665 1702 if heads is None:
1666 1703 heads = self.headrevs()
1667 1704
1668 1705 inc = self.incrementalmissingrevs(common=common)
1669 1706 return inc.missingancestors(heads)
1670 1707
1671 1708 def findmissing(self, common=None, heads=None):
1672 1709 """Return the ancestors of heads that are not ancestors of common.
1673 1710
1674 1711 More specifically, return a list of nodes N such that every N
1675 1712 satisfies the following constraints:
1676 1713
1677 1714 1. N is an ancestor of some node in 'heads'
1678 1715 2. N is not an ancestor of any node in 'common'
1679 1716
1680 1717 The list is sorted by revision number, meaning it is
1681 1718 topologically sorted.
1682 1719
1683 1720 'heads' and 'common' are both lists of node IDs. If heads is
1684 1721 not supplied, uses all of the revlog's heads. If common is not
1685 1722 supplied, uses nullid."""
1686 1723 if common is None:
1687 1724 common = [self.nullid]
1688 1725 if heads is None:
1689 1726 heads = self.heads()
1690 1727
1691 1728 common = [self.rev(n) for n in common]
1692 1729 heads = [self.rev(n) for n in heads]
1693 1730
1694 1731 inc = self.incrementalmissingrevs(common=common)
1695 1732 return [self.node(r) for r in inc.missingancestors(heads)]
1696 1733
1697 1734 def nodesbetween(self, roots=None, heads=None):
1698 1735 """Return a topological path from 'roots' to 'heads'.
1699 1736
1700 1737 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1701 1738 topologically sorted list of all nodes N that satisfy both of
1702 1739 these constraints:
1703 1740
1704 1741 1. N is a descendant of some node in 'roots'
1705 1742 2. N is an ancestor of some node in 'heads'
1706 1743
1707 1744 Every node is considered to be both a descendant and an ancestor
1708 1745 of itself, so every reachable node in 'roots' and 'heads' will be
1709 1746 included in 'nodes'.
1710 1747
1711 1748 'outroots' is the list of reachable nodes in 'roots', i.e., the
1712 1749 subset of 'roots' that is returned in 'nodes'. Likewise,
1713 1750 'outheads' is the subset of 'heads' that is also in 'nodes'.
1714 1751
1715 1752 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1716 1753 unspecified, uses nullid as the only root. If 'heads' is
1717 1754 unspecified, uses list of all of the revlog's heads."""
1718 1755 nonodes = ([], [], [])
1719 1756 if roots is not None:
1720 1757 roots = list(roots)
1721 1758 if not roots:
1722 1759 return nonodes
1723 1760 lowestrev = min([self.rev(n) for n in roots])
1724 1761 else:
1725 1762 roots = [self.nullid] # Everybody's a descendant of nullid
1726 1763 lowestrev = nullrev
1727 1764 if (lowestrev == nullrev) and (heads is None):
1728 1765 # We want _all_ the nodes!
1729 1766 return (
1730 1767 [self.node(r) for r in self],
1731 1768 [self.nullid],
1732 1769 list(self.heads()),
1733 1770 )
1734 1771 if heads is None:
1735 1772 # All nodes are ancestors, so the latest ancestor is the last
1736 1773 # node.
1737 1774 highestrev = len(self) - 1
1738 1775 # Set ancestors to None to signal that every node is an ancestor.
1739 1776 ancestors = None
1740 1777 # Set heads to an empty dictionary for later discovery of heads
1741 1778 heads = {}
1742 1779 else:
1743 1780 heads = list(heads)
1744 1781 if not heads:
1745 1782 return nonodes
1746 1783 ancestors = set()
1747 1784 # Turn heads into a dictionary so we can remove 'fake' heads.
1748 1785 # Also, later we will be using it to filter out the heads we can't
1749 1786 # find from roots.
1750 1787 heads = dict.fromkeys(heads, False)
1751 1788 # Start at the top and keep marking parents until we're done.
1752 1789 nodestotag = set(heads)
1753 1790 # Remember where the top was so we can use it as a limit later.
1754 1791 highestrev = max([self.rev(n) for n in nodestotag])
1755 1792 while nodestotag:
1756 1793 # grab a node to tag
1757 1794 n = nodestotag.pop()
1758 1795 # Never tag nullid
1759 1796 if n == self.nullid:
1760 1797 continue
1761 1798 # A node's revision number represents its place in a
1762 1799 # topologically sorted list of nodes.
1763 1800 r = self.rev(n)
1764 1801 if r >= lowestrev:
1765 1802 if n not in ancestors:
1766 1803 # If we are possibly a descendant of one of the roots
1767 1804 # and we haven't already been marked as an ancestor
1768 1805 ancestors.add(n) # Mark as ancestor
1769 1806 # Add non-nullid parents to list of nodes to tag.
1770 1807 nodestotag.update(
1771 1808 [p for p in self.parents(n) if p != self.nullid]
1772 1809 )
1773 1810 elif n in heads: # We've seen it before, is it a fake head?
1774 1811 # So it is, real heads should not be the ancestors of
1775 1812 # any other heads.
1776 1813 heads.pop(n)
1777 1814 if not ancestors:
1778 1815 return nonodes
1779 1816 # Now that we have our set of ancestors, we want to remove any
1780 1817 # roots that are not ancestors.
1781 1818
1782 1819 # If one of the roots was nullid, everything is included anyway.
1783 1820 if lowestrev > nullrev:
1784 1821 # But, since we weren't, let's recompute the lowest rev to not
1785 1822 # include roots that aren't ancestors.
1786 1823
1787 1824 # Filter out roots that aren't ancestors of heads
1788 1825 roots = [root for root in roots if root in ancestors]
1789 1826 # Recompute the lowest revision
1790 1827 if roots:
1791 1828 lowestrev = min([self.rev(root) for root in roots])
1792 1829 else:
1793 1830 # No more roots? Return empty list
1794 1831 return nonodes
1795 1832 else:
1796 1833 # We are descending from nullid, and don't need to care about
1797 1834 # any other roots.
1798 1835 lowestrev = nullrev
1799 1836 roots = [self.nullid]
1800 1837 # Transform our roots list into a set.
1801 1838 descendants = set(roots)
1802 1839 # Also, keep the original roots so we can filter out roots that aren't
1803 1840 # 'real' roots (i.e. are descended from other roots).
1804 1841 roots = descendants.copy()
1805 1842 # Our topologically sorted list of output nodes.
1806 1843 orderedout = []
1807 1844 # Don't start at nullid since we don't want nullid in our output list,
1808 1845 # and if nullid shows up in descendants, empty parents will look like
1809 1846 # they're descendants.
1810 1847 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1811 1848 n = self.node(r)
1812 1849 isdescendant = False
1813 1850 if lowestrev == nullrev: # Everybody is a descendant of nullid
1814 1851 isdescendant = True
1815 1852 elif n in descendants:
1816 1853 # n is already a descendant
1817 1854 isdescendant = True
1818 1855 # This check only needs to be done here because all the roots
1819 1856 # will start being marked is descendants before the loop.
1820 1857 if n in roots:
1821 1858 # If n was a root, check if it's a 'real' root.
1822 1859 p = tuple(self.parents(n))
1823 1860 # If any of its parents are descendants, it's not a root.
1824 1861 if (p[0] in descendants) or (p[1] in descendants):
1825 1862 roots.remove(n)
1826 1863 else:
1827 1864 p = tuple(self.parents(n))
1828 1865 # A node is a descendant if either of its parents are
1829 1866 # descendants. (We seeded the dependents list with the roots
1830 1867 # up there, remember?)
1831 1868 if (p[0] in descendants) or (p[1] in descendants):
1832 1869 descendants.add(n)
1833 1870 isdescendant = True
1834 1871 if isdescendant and ((ancestors is None) or (n in ancestors)):
1835 1872 # Only include nodes that are both descendants and ancestors.
1836 1873 orderedout.append(n)
1837 1874 if (ancestors is not None) and (n in heads):
1838 1875 # We're trying to figure out which heads are reachable
1839 1876 # from roots.
1840 1877 # Mark this head as having been reached
1841 1878 heads[n] = True
1842 1879 elif ancestors is None:
1843 1880 # Otherwise, we're trying to discover the heads.
1844 1881 # Assume this is a head because if it isn't, the next step
1845 1882 # will eventually remove it.
1846 1883 heads[n] = True
1847 1884 # But, obviously its parents aren't.
1848 1885 for p in self.parents(n):
1849 1886 heads.pop(p, None)
1850 1887 heads = [head for head, flag in heads.items() if flag]
1851 1888 roots = list(roots)
1852 1889 assert orderedout
1853 1890 assert roots
1854 1891 assert heads
1855 1892 return (orderedout, roots, heads)
1856 1893
1857 1894 def headrevs(self, revs=None):
1858 1895 if revs is None:
1859 1896 try:
1860 1897 return self.index.headrevs()
1861 1898 except AttributeError:
1862 1899 return self._headrevs()
1863 1900 if rustdagop is not None and self.index.rust_ext_compat:
1864 1901 return rustdagop.headrevs(self.index, revs)
1865 1902 return dagop.headrevs(revs, self._uncheckedparentrevs)
1866 1903
1867 1904 def computephases(self, roots):
1868 1905 return self.index.computephasesmapsets(roots)
1869 1906
1870 1907 def _headrevs(self):
1871 1908 count = len(self)
1872 1909 if not count:
1873 1910 return [nullrev]
1874 1911 # we won't iter over filtered rev so nobody is a head at start
1875 1912 ishead = [0] * (count + 1)
1876 1913 index = self.index
1877 1914 for r in self:
1878 1915 ishead[r] = 1 # I may be an head
1879 1916 e = index[r]
1880 1917 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1881 1918 return [r for r, val in enumerate(ishead) if val]
1882 1919
1883 1920 def heads(self, start=None, stop=None):
1884 1921 """return the list of all nodes that have no children
1885 1922
1886 1923 if start is specified, only heads that are descendants of
1887 1924 start will be returned
1888 1925 if stop is specified, it will consider all the revs from stop
1889 1926 as if they had no children
1890 1927 """
1891 1928 if start is None and stop is None:
1892 1929 if not len(self):
1893 1930 return [self.nullid]
1894 1931 return [self.node(r) for r in self.headrevs()]
1895 1932
1896 1933 if start is None:
1897 1934 start = nullrev
1898 1935 else:
1899 1936 start = self.rev(start)
1900 1937
1901 1938 stoprevs = {self.rev(n) for n in stop or []}
1902 1939
1903 1940 revs = dagop.headrevssubset(
1904 1941 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1905 1942 )
1906 1943
1907 1944 return [self.node(rev) for rev in revs]
1908 1945
1909 1946 def children(self, node):
1910 1947 """find the children of a given node"""
1911 1948 c = []
1912 1949 p = self.rev(node)
1913 1950 for r in self.revs(start=p + 1):
1914 1951 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1915 1952 if prevs:
1916 1953 for pr in prevs:
1917 1954 if pr == p:
1918 1955 c.append(self.node(r))
1919 1956 elif p == nullrev:
1920 1957 c.append(self.node(r))
1921 1958 return c
1922 1959
1923 1960 def commonancestorsheads(self, a, b):
1924 1961 """calculate all the heads of the common ancestors of nodes a and b"""
1925 1962 a, b = self.rev(a), self.rev(b)
1926 1963 ancs = self._commonancestorsheads(a, b)
1927 1964 return pycompat.maplist(self.node, ancs)
1928 1965
1929 1966 def _commonancestorsheads(self, *revs):
1930 1967 """calculate all the heads of the common ancestors of revs"""
1931 1968 try:
1932 1969 ancs = self.index.commonancestorsheads(*revs)
1933 1970 except (AttributeError, OverflowError): # C implementation failed
1934 1971 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1935 1972 return ancs
1936 1973
1937 1974 def isancestor(self, a, b):
1938 1975 """return True if node a is an ancestor of node b
1939 1976
1940 1977 A revision is considered an ancestor of itself."""
1941 1978 a, b = self.rev(a), self.rev(b)
1942 1979 return self.isancestorrev(a, b)
1943 1980
1944 1981 def isancestorrev(self, a, b):
1945 1982 """return True if revision a is an ancestor of revision b
1946 1983
1947 1984 A revision is considered an ancestor of itself.
1948 1985
1949 1986 The implementation of this is trivial but the use of
1950 1987 reachableroots is not."""
1951 1988 if a == nullrev:
1952 1989 return True
1953 1990 elif a == b:
1954 1991 return True
1955 1992 elif a > b:
1956 1993 return False
1957 1994 return bool(self.reachableroots(a, [b], [a], includepath=False))
1958 1995
1959 1996 def reachableroots(self, minroot, heads, roots, includepath=False):
1960 1997 """return (heads(::(<roots> and <roots>::<heads>)))
1961 1998
1962 1999 If includepath is True, return (<roots>::<heads>)."""
1963 2000 try:
1964 2001 return self.index.reachableroots2(
1965 2002 minroot, heads, roots, includepath
1966 2003 )
1967 2004 except AttributeError:
1968 2005 return dagop._reachablerootspure(
1969 2006 self.parentrevs, minroot, roots, heads, includepath
1970 2007 )
1971 2008
1972 2009 def ancestor(self, a, b):
1973 2010 """calculate the "best" common ancestor of nodes a and b"""
1974 2011
1975 2012 a, b = self.rev(a), self.rev(b)
1976 2013 try:
1977 2014 ancs = self.index.ancestors(a, b)
1978 2015 except (AttributeError, OverflowError):
1979 2016 ancs = ancestor.ancestors(self.parentrevs, a, b)
1980 2017 if ancs:
1981 2018 # choose a consistent winner when there's a tie
1982 2019 return min(map(self.node, ancs))
1983 2020 return self.nullid
1984 2021
1985 2022 def _match(self, id):
1986 2023 if isinstance(id, int):
1987 2024 # rev
1988 2025 return self.node(id)
1989 2026 if len(id) == self.nodeconstants.nodelen:
1990 2027 # possibly a binary node
1991 2028 # odds of a binary node being all hex in ASCII are 1 in 10**25
1992 2029 try:
1993 2030 node = id
1994 2031 self.rev(node) # quick search the index
1995 2032 return node
1996 2033 except error.LookupError:
1997 2034 pass # may be partial hex id
1998 2035 try:
1999 2036 # str(rev)
2000 2037 rev = int(id)
2001 2038 if b"%d" % rev != id:
2002 2039 raise ValueError
2003 2040 if rev < 0:
2004 2041 rev = len(self) + rev
2005 2042 if rev < 0 or rev >= len(self):
2006 2043 raise ValueError
2007 2044 return self.node(rev)
2008 2045 except (ValueError, OverflowError):
2009 2046 pass
2010 2047 if len(id) == 2 * self.nodeconstants.nodelen:
2011 2048 try:
2012 2049 # a full hex nodeid?
2013 2050 node = bin(id)
2014 2051 self.rev(node)
2015 2052 return node
2016 2053 except (binascii.Error, error.LookupError):
2017 2054 pass
2018 2055
2019 2056 def _partialmatch(self, id):
2020 2057 # we don't care wdirfilenodeids as they should be always full hash
2021 2058 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2022 2059 ambiguous = False
2023 2060 try:
2024 2061 partial = self.index.partialmatch(id)
2025 2062 if partial and self.hasnode(partial):
2026 2063 if maybewdir:
2027 2064 # single 'ff...' match in radix tree, ambiguous with wdir
2028 2065 ambiguous = True
2029 2066 else:
2030 2067 return partial
2031 2068 elif maybewdir:
2032 2069 # no 'ff...' match in radix tree, wdir identified
2033 2070 raise error.WdirUnsupported
2034 2071 else:
2035 2072 return None
2036 2073 except error.RevlogError:
2037 2074 # parsers.c radix tree lookup gave multiple matches
2038 2075 # fast path: for unfiltered changelog, radix tree is accurate
2039 2076 if not getattr(self, 'filteredrevs', None):
2040 2077 ambiguous = True
2041 2078 # fall through to slow path that filters hidden revisions
2042 2079 except (AttributeError, ValueError):
2043 2080 # we are pure python, or key is not hex
2044 2081 pass
2045 2082 if ambiguous:
2046 2083 raise error.AmbiguousPrefixLookupError(
2047 2084 id, self.display_id, _(b'ambiguous identifier')
2048 2085 )
2049 2086
2050 2087 if id in self._pcache:
2051 2088 return self._pcache[id]
2052 2089
2053 2090 if len(id) <= 40:
2054 2091 # hex(node)[:...]
2055 2092 l = len(id) // 2 * 2 # grab an even number of digits
2056 2093 try:
2057 2094 # we're dropping the last digit, so let's check that it's hex,
2058 2095 # to avoid the expensive computation below if it's not
2059 2096 if len(id) % 2 > 0:
2060 2097 if not (id[-1] in hexdigits):
2061 2098 return None
2062 2099 prefix = bin(id[:l])
2063 2100 except binascii.Error:
2064 2101 pass
2065 2102 else:
2066 2103 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2067 2104 nl = [
2068 2105 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2069 2106 ]
2070 2107 if self.nodeconstants.nullhex.startswith(id):
2071 2108 nl.append(self.nullid)
2072 2109 if len(nl) > 0:
2073 2110 if len(nl) == 1 and not maybewdir:
2074 2111 self._pcache[id] = nl[0]
2075 2112 return nl[0]
2076 2113 raise error.AmbiguousPrefixLookupError(
2077 2114 id, self.display_id, _(b'ambiguous identifier')
2078 2115 )
2079 2116 if maybewdir:
2080 2117 raise error.WdirUnsupported
2081 2118 return None
2082 2119
2083 2120 def lookup(self, id):
2084 2121 """locate a node based on:
2085 2122 - revision number or str(revision number)
2086 2123 - nodeid or subset of hex nodeid
2087 2124 """
2088 2125 n = self._match(id)
2089 2126 if n is not None:
2090 2127 return n
2091 2128 n = self._partialmatch(id)
2092 2129 if n:
2093 2130 return n
2094 2131
2095 2132 raise error.LookupError(id, self.display_id, _(b'no match found'))
2096 2133
2097 2134 def shortest(self, node, minlength=1):
2098 2135 """Find the shortest unambiguous prefix that matches node."""
2099 2136
2100 2137 def isvalid(prefix):
2101 2138 try:
2102 2139 matchednode = self._partialmatch(prefix)
2103 2140 except error.AmbiguousPrefixLookupError:
2104 2141 return False
2105 2142 except error.WdirUnsupported:
2106 2143 # single 'ff...' match
2107 2144 return True
2108 2145 if matchednode is None:
2109 2146 raise error.LookupError(node, self.display_id, _(b'no node'))
2110 2147 return True
2111 2148
2112 2149 def maybewdir(prefix):
2113 2150 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2114 2151
2115 2152 hexnode = hex(node)
2116 2153
2117 2154 def disambiguate(hexnode, minlength):
2118 2155 """Disambiguate against wdirid."""
2119 2156 for length in range(minlength, len(hexnode) + 1):
2120 2157 prefix = hexnode[:length]
2121 2158 if not maybewdir(prefix):
2122 2159 return prefix
2123 2160
2124 2161 if not getattr(self, 'filteredrevs', None):
2125 2162 try:
2126 2163 length = max(self.index.shortest(node), minlength)
2127 2164 return disambiguate(hexnode, length)
2128 2165 except error.RevlogError:
2129 2166 if node != self.nodeconstants.wdirid:
2130 2167 raise error.LookupError(
2131 2168 node, self.display_id, _(b'no node')
2132 2169 )
2133 2170 except AttributeError:
2134 2171 # Fall through to pure code
2135 2172 pass
2136 2173
2137 2174 if node == self.nodeconstants.wdirid:
2138 2175 for length in range(minlength, len(hexnode) + 1):
2139 2176 prefix = hexnode[:length]
2140 2177 if isvalid(prefix):
2141 2178 return prefix
2142 2179
2143 2180 for length in range(minlength, len(hexnode) + 1):
2144 2181 prefix = hexnode[:length]
2145 2182 if isvalid(prefix):
2146 2183 return disambiguate(hexnode, length)
2147 2184
2148 2185 def cmp(self, node, text):
2149 2186 """compare text with a given file revision
2150 2187
2151 2188 returns True if text is different than what is stored.
2152 2189 """
2153 2190 p1, p2 = self.parents(node)
2154 2191 return storageutil.hashrevisionsha1(text, p1, p2) != node
2155 2192
2156 def _getsegmentforrevs(self, startrev, endrev):
2157 """Obtain a segment of raw data corresponding to a range of revisions.
2158
2159 Accepts the start and end revisions and an optional already-open
2160 file handle to be used for reading. If the file handle is read, its
2161 seek position will not be preserved.
2162
2163 Requests for data may be satisfied by a cache.
2164
2165 Returns a 2-tuple of (offset, data) for the requested range of
2166 revisions. Offset is the integer offset from the beginning of the
2167 revlog and data is a str or buffer of the raw byte data.
2168
2169 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2170 to determine where each revision's data begins and ends.
2171 """
2172 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2173 # (functions are expensive).
2174 index = self.index
2175 istart = index[startrev]
2176 start = int(istart[0] >> 16)
2177 if startrev == endrev:
2178 end = start + istart[1]
2179 else:
2180 iend = index[endrev]
2181 end = int(iend[0] >> 16) + iend[1]
2182
2183 if self._inline:
2184 start += (startrev + 1) * self.index.entry_size
2185 end += (endrev + 1) * self.index.entry_size
2186 length = end - start
2187
2188 return start, self._inner._segmentfile.read_chunk(start, length)
2189
2190 2193 def _chunk(self, rev):
2191 2194 """Obtain a single decompressed chunk for a revision.
2192 2195
2193 2196 Accepts an integer revision and an optional already-open file handle
2194 2197 to be used for reading. If used, the seek position of the file will not
2195 2198 be preserved.
2196 2199
2197 2200 Returns a str holding uncompressed data for the requested revision.
2198 2201 """
2199 2202 compression_mode = self.index[rev][10]
2200 data = self._getsegmentforrevs(rev, rev)[1]
2203 data = self._inner.get_segment_for_revs(rev, rev)[1]
2201 2204 if compression_mode == COMP_MODE_PLAIN:
2202 2205 return data
2203 2206 elif compression_mode == COMP_MODE_DEFAULT:
2204 2207 return self._decompressor(data)
2205 2208 elif compression_mode == COMP_MODE_INLINE:
2206 2209 return self.decompress(data)
2207 2210 else:
2208 2211 msg = b'unknown compression mode %d'
2209 2212 msg %= compression_mode
2210 2213 raise error.RevlogError(msg)
2211 2214
2212 2215 def _chunks(self, revs, targetsize=None):
2213 2216 """Obtain decompressed chunks for the specified revisions.
2214 2217
2215 2218 Accepts an iterable of numeric revisions that are assumed to be in
2216 2219 ascending order. Also accepts an optional already-open file handle
2217 2220 to be used for reading. If used, the seek position of the file will
2218 2221 not be preserved.
2219 2222
2220 2223 This function is similar to calling ``self._chunk()`` multiple times,
2221 2224 but is faster.
2222 2225
2223 2226 Returns a list with decompressed data for each requested revision.
2224 2227 """
2225 2228 if not revs:
2226 2229 return []
2227 2230 start = self.start
2228 2231 length = self.length
2229 2232 inline = self._inline
2230 2233 iosize = self.index.entry_size
2231 2234 buffer = util.buffer
2232 2235
2233 2236 l = []
2234 2237 ladd = l.append
2235 2238
2236 2239 if not self.data_config.with_sparse_read:
2237 2240 slicedchunks = (revs,)
2238 2241 else:
2239 2242 slicedchunks = deltautil.slicechunk(
2240 2243 self, revs, targetsize=targetsize
2241 2244 )
2242 2245
2243 2246 for revschunk in slicedchunks:
2244 2247 firstrev = revschunk[0]
2245 2248 # Skip trailing revisions with empty diff
2246 2249 for lastrev in revschunk[::-1]:
2247 2250 if length(lastrev) != 0:
2248 2251 break
2249 2252
2250 2253 try:
2251 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2254 offset, data = self._inner.get_segment_for_revs(
2255 firstrev,
2256 lastrev,
2257 )
2252 2258 except OverflowError:
2253 2259 # issue4215 - we can't cache a run of chunks greater than
2254 2260 # 2G on Windows
2255 2261 return [self._chunk(rev) for rev in revschunk]
2256 2262
2257 2263 decomp = self.decompress
2258 2264 # self._decompressor might be None, but will not be used in that case
2259 2265 def_decomp = self._decompressor
2260 2266 for rev in revschunk:
2261 2267 chunkstart = start(rev)
2262 2268 if inline:
2263 2269 chunkstart += (rev + 1) * iosize
2264 2270 chunklength = length(rev)
2265 2271 comp_mode = self.index[rev][10]
2266 2272 c = buffer(data, chunkstart - offset, chunklength)
2267 2273 if comp_mode == COMP_MODE_PLAIN:
2268 2274 ladd(c)
2269 2275 elif comp_mode == COMP_MODE_INLINE:
2270 2276 ladd(decomp(c))
2271 2277 elif comp_mode == COMP_MODE_DEFAULT:
2272 2278 ladd(def_decomp(c))
2273 2279 else:
2274 2280 msg = b'unknown compression mode %d'
2275 2281 msg %= comp_mode
2276 2282 raise error.RevlogError(msg)
2277 2283
2278 2284 return l
2279 2285
2280 2286 def deltaparent(self, rev):
2281 2287 """return deltaparent of the given revision"""
2282 2288 base = self.index[rev][3]
2283 2289 if base == rev:
2284 2290 return nullrev
2285 2291 elif self.delta_config.general_delta:
2286 2292 return base
2287 2293 else:
2288 2294 return rev - 1
2289 2295
2290 2296 def issnapshot(self, rev):
2291 2297 """tells whether rev is a snapshot"""
2292 2298 if not self.delta_config.sparse_revlog:
2293 2299 return self.deltaparent(rev) == nullrev
2294 2300 elif hasattr(self.index, 'issnapshot'):
2295 2301 # directly assign the method to cache the testing and access
2296 2302 self.issnapshot = self.index.issnapshot
2297 2303 return self.issnapshot(rev)
2298 2304 if rev == nullrev:
2299 2305 return True
2300 2306 entry = self.index[rev]
2301 2307 base = entry[3]
2302 2308 if base == rev:
2303 2309 return True
2304 2310 if base == nullrev:
2305 2311 return True
2306 2312 p1 = entry[5]
2307 2313 while self.length(p1) == 0:
2308 2314 b = self.deltaparent(p1)
2309 2315 if b == p1:
2310 2316 break
2311 2317 p1 = b
2312 2318 p2 = entry[6]
2313 2319 while self.length(p2) == 0:
2314 2320 b = self.deltaparent(p2)
2315 2321 if b == p2:
2316 2322 break
2317 2323 p2 = b
2318 2324 if base == p1 or base == p2:
2319 2325 return False
2320 2326 return self.issnapshot(base)
2321 2327
2322 2328 def snapshotdepth(self, rev):
2323 2329 """number of snapshot in the chain before this one"""
2324 2330 if not self.issnapshot(rev):
2325 2331 raise error.ProgrammingError(b'revision %d not a snapshot')
2326 2332 return len(self._deltachain(rev)[0]) - 1
2327 2333
2328 2334 def revdiff(self, rev1, rev2):
2329 2335 """return or calculate a delta between two revisions
2330 2336
2331 2337 The delta calculated is in binary form and is intended to be written to
2332 2338 revlog data directly. So this function needs raw revision data.
2333 2339 """
2334 2340 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2335 2341 return bytes(self._chunk(rev2))
2336 2342
2337 2343 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2338 2344
2339 2345 def revision(self, nodeorrev):
2340 2346 """return an uncompressed revision of a given node or revision
2341 2347 number.
2342 2348 """
2343 2349 return self._revisiondata(nodeorrev)
2344 2350
2345 2351 def sidedata(self, nodeorrev):
2346 2352 """a map of extra data related to the changeset but not part of the hash
2347 2353
2348 2354 This function currently return a dictionary. However, more advanced
2349 2355 mapping object will likely be used in the future for a more
2350 2356 efficient/lazy code.
2351 2357 """
2352 2358 # deal with <nodeorrev> argument type
2353 2359 if isinstance(nodeorrev, int):
2354 2360 rev = nodeorrev
2355 2361 else:
2356 2362 rev = self.rev(nodeorrev)
2357 2363 return self._sidedata(rev)
2358 2364
2359 2365 def _revisiondata(self, nodeorrev, raw=False):
2360 2366 # deal with <nodeorrev> argument type
2361 2367 if isinstance(nodeorrev, int):
2362 2368 rev = nodeorrev
2363 2369 node = self.node(rev)
2364 2370 else:
2365 2371 node = nodeorrev
2366 2372 rev = None
2367 2373
2368 2374 # fast path the special `nullid` rev
2369 2375 if node == self.nullid:
2370 2376 return b""
2371 2377
2372 2378 # ``rawtext`` is the text as stored inside the revlog. Might be the
2373 2379 # revision or might need to be processed to retrieve the revision.
2374 2380 rev, rawtext, validated = self._rawtext(node, rev)
2375 2381
2376 2382 if raw and validated:
2377 2383 # if we don't want to process the raw text and that raw
2378 2384 # text is cached, we can exit early.
2379 2385 return rawtext
2380 2386 if rev is None:
2381 2387 rev = self.rev(node)
2382 2388 # the revlog's flag for this revision
2383 2389 # (usually alter its state or content)
2384 2390 flags = self.flags(rev)
2385 2391
2386 2392 if validated and flags == REVIDX_DEFAULT_FLAGS:
2387 2393 # no extra flags set, no flag processor runs, text = rawtext
2388 2394 return rawtext
2389 2395
2390 2396 if raw:
2391 2397 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2392 2398 text = rawtext
2393 2399 else:
2394 2400 r = flagutil.processflagsread(self, rawtext, flags)
2395 2401 text, validatehash = r
2396 2402 if validatehash:
2397 2403 self.checkhash(text, node, rev=rev)
2398 2404 if not validated:
2399 2405 self._revisioncache = (node, rev, rawtext)
2400 2406
2401 2407 return text
2402 2408
2403 2409 def _rawtext(self, node, rev):
2404 2410 """return the possibly unvalidated rawtext for a revision
2405 2411
2406 2412 returns (rev, rawtext, validated)
2407 2413 """
2408 2414
2409 2415 # revision in the cache (could be useful to apply delta)
2410 2416 cachedrev = None
2411 2417 # An intermediate text to apply deltas to
2412 2418 basetext = None
2413 2419
2414 2420 # Check if we have the entry in cache
2415 2421 # The cache entry looks like (node, rev, rawtext)
2416 2422 if self._revisioncache:
2417 2423 if self._revisioncache[0] == node:
2418 2424 return (rev, self._revisioncache[2], True)
2419 2425 cachedrev = self._revisioncache[1]
2420 2426
2421 2427 if rev is None:
2422 2428 rev = self.rev(node)
2423 2429
2424 2430 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2425 2431 if stopped:
2426 2432 basetext = self._revisioncache[2]
2427 2433
2428 2434 # drop cache to save memory, the caller is expected to
2429 2435 # update self._revisioncache after validating the text
2430 2436 self._revisioncache = None
2431 2437
2432 2438 targetsize = None
2433 2439 rawsize = self.index[rev][2]
2434 2440 if 0 <= rawsize:
2435 2441 targetsize = 4 * rawsize
2436 2442
2437 2443 bins = self._chunks(chain, targetsize=targetsize)
2438 2444 if basetext is None:
2439 2445 basetext = bytes(bins[0])
2440 2446 bins = bins[1:]
2441 2447
2442 2448 rawtext = mdiff.patches(basetext, bins)
2443 2449 del basetext # let us have a chance to free memory early
2444 2450 return (rev, rawtext, False)
2445 2451
2446 2452 def _sidedata(self, rev):
2447 2453 """Return the sidedata for a given revision number."""
2448 2454 index_entry = self.index[rev]
2449 2455 sidedata_offset = index_entry[8]
2450 2456 sidedata_size = index_entry[9]
2451 2457
2452 2458 if self._inline:
2453 2459 sidedata_offset += self.index.entry_size * (1 + rev)
2454 2460 if sidedata_size == 0:
2455 2461 return {}
2456 2462
2457 2463 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2458 2464 filename = self._sidedatafile
2459 2465 end = self._docket.sidedata_end
2460 2466 offset = sidedata_offset
2461 2467 length = sidedata_size
2462 2468 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2463 2469 raise error.RevlogError(m)
2464 2470
2465 2471 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2466 2472 sidedata_offset, sidedata_size
2467 2473 )
2468 2474
2469 2475 comp = self.index[rev][11]
2470 2476 if comp == COMP_MODE_PLAIN:
2471 2477 segment = comp_segment
2472 2478 elif comp == COMP_MODE_DEFAULT:
2473 2479 segment = self._decompressor(comp_segment)
2474 2480 elif comp == COMP_MODE_INLINE:
2475 2481 segment = self.decompress(comp_segment)
2476 2482 else:
2477 2483 msg = b'unknown compression mode %d'
2478 2484 msg %= comp
2479 2485 raise error.RevlogError(msg)
2480 2486
2481 2487 sidedata = sidedatautil.deserialize_sidedata(segment)
2482 2488 return sidedata
2483 2489
2484 2490 def rawdata(self, nodeorrev):
2485 2491 """return an uncompressed raw data of a given node or revision number."""
2486 2492 return self._revisiondata(nodeorrev, raw=True)
2487 2493
2488 2494 def hash(self, text, p1, p2):
2489 2495 """Compute a node hash.
2490 2496
2491 2497 Available as a function so that subclasses can replace the hash
2492 2498 as needed.
2493 2499 """
2494 2500 return storageutil.hashrevisionsha1(text, p1, p2)
2495 2501
2496 2502 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2497 2503 """Check node hash integrity.
2498 2504
2499 2505 Available as a function so that subclasses can extend hash mismatch
2500 2506 behaviors as needed.
2501 2507 """
2502 2508 try:
2503 2509 if p1 is None and p2 is None:
2504 2510 p1, p2 = self.parents(node)
2505 2511 if node != self.hash(text, p1, p2):
2506 2512 # Clear the revision cache on hash failure. The revision cache
2507 2513 # only stores the raw revision and clearing the cache does have
2508 2514 # the side-effect that we won't have a cache hit when the raw
2509 2515 # revision data is accessed. But this case should be rare and
2510 2516 # it is extra work to teach the cache about the hash
2511 2517 # verification state.
2512 2518 if self._revisioncache and self._revisioncache[0] == node:
2513 2519 self._revisioncache = None
2514 2520
2515 2521 revornode = rev
2516 2522 if revornode is None:
2517 2523 revornode = templatefilters.short(hex(node))
2518 2524 raise error.RevlogError(
2519 2525 _(b"integrity check failed on %s:%s")
2520 2526 % (self.display_id, pycompat.bytestr(revornode))
2521 2527 )
2522 2528 except error.RevlogError:
2523 2529 if self.feature_config.censorable and storageutil.iscensoredtext(
2524 2530 text
2525 2531 ):
2526 2532 raise error.CensoredNodeError(self.display_id, node, text)
2527 2533 raise
2528 2534
2529 2535 @property
2530 2536 def _split_index_file(self):
2531 2537 """the path where to expect the index of an ongoing splitting operation
2532 2538
2533 2539 The file will only exist if a splitting operation is in progress, but
2534 2540 it is always expected at the same location."""
2535 2541 parts = self.radix.split(b'/')
2536 2542 if len(parts) > 1:
2537 2543 # adds a '-s' prefix to the ``data/` or `meta/` base
2538 2544 head = parts[0] + b'-s'
2539 2545 mids = parts[1:-1]
2540 2546 tail = parts[-1] + b'.i'
2541 2547 pieces = [head] + mids + [tail]
2542 2548 return b'/'.join(pieces)
2543 2549 else:
2544 2550 # the revlog is stored at the root of the store (changelog or
2545 2551 # manifest), no risk of collision.
2546 2552 return self.radix + b'.i.s'
2547 2553
2548 2554 def _enforceinlinesize(self, tr, side_write=True):
2549 2555 """Check if the revlog is too big for inline and convert if so.
2550 2556
2551 2557 This should be called after revisions are added to the revlog. If the
2552 2558 revlog has grown too large to be an inline revlog, it will convert it
2553 2559 to use multiple index and data files.
2554 2560 """
2555 2561 tiprev = len(self) - 1
2556 2562 total_size = self.start(tiprev) + self.length(tiprev)
2557 2563 if not self._inline or total_size < _maxinline:
2558 2564 return
2559 2565
2560 2566 troffset = tr.findoffset(self._indexfile)
2561 2567 if troffset is None:
2562 2568 raise error.RevlogError(
2563 2569 _(b"%s not found in the transaction") % self._indexfile
2564 2570 )
2565 2571 if troffset:
2566 2572 tr.addbackup(self._indexfile, for_offset=True)
2567 2573 tr.add(self._datafile, 0)
2568 2574
2569 2575 existing_handles = False
2570 2576 if self._inner._writinghandles is not None:
2571 2577 existing_handles = True
2572 2578 fp = self._inner._writinghandles[0]
2573 2579 fp.flush()
2574 2580 fp.close()
2575 2581 # We can't use the cached file handle after close(). So prevent
2576 2582 # its usage.
2577 2583 self._inner._writinghandles = None
2578 2584 self._inner._segmentfile.writing_handle = None
2579 2585 # No need to deal with sidedata writing handle as it is only
2580 2586 # relevant with revlog-v2 which is never inline, not reaching
2581 2587 # this code
2582 2588 if side_write:
2583 2589 old_index_file_path = self._indexfile
2584 2590 new_index_file_path = self._split_index_file
2585 2591 opener = self.opener
2586 2592 weak_self = weakref.ref(self)
2587 2593
2588 2594 # the "split" index replace the real index when the transaction is finalized
2589 2595 def finalize_callback(tr):
2590 2596 opener.rename(
2591 2597 new_index_file_path,
2592 2598 old_index_file_path,
2593 2599 checkambig=True,
2594 2600 )
2595 2601 maybe_self = weak_self()
2596 2602 if maybe_self is not None:
2597 2603 maybe_self._indexfile = old_index_file_path
2598 2604 maybe_self._inner.index_file = maybe_self._indexfile
2599 2605
2600 2606 def abort_callback(tr):
2601 2607 maybe_self = weak_self()
2602 2608 if maybe_self is not None:
2603 2609 maybe_self._indexfile = old_index_file_path
2604 2610 maybe_self._inner.index_file = old_index_file_path
2605 2611
2606 2612 tr.registertmp(new_index_file_path)
2607 2613 if self.target[1] is not None:
2608 2614 callback_id = b'000-revlog-split-%d-%s' % self.target
2609 2615 else:
2610 2616 callback_id = b'000-revlog-split-%d' % self.target[0]
2611 2617 tr.addfinalize(callback_id, finalize_callback)
2612 2618 tr.addabort(callback_id, abort_callback)
2613 2619
2614 2620 new_dfh = self._datafp(b'w+')
2615 2621 new_dfh.truncate(0) # drop any potentially existing data
2616 2622 try:
2617 2623 with self.reading():
2618 2624 for r in self:
2619 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2625 new_dfh.write(self._inner.get_segment_for_revs(r, r)[1])
2620 2626 new_dfh.flush()
2621 2627
2622 2628 if side_write:
2623 2629 self._indexfile = new_index_file_path
2624 2630 self._inner.index_file = self._indexfile
2625 2631 with self._inner._InnerRevlog__index_new_fp() as fp:
2626 2632 self._format_flags &= ~FLAG_INLINE_DATA
2627 2633 self._inline = False
2628 2634 self._inner.inline = False
2629 2635 for i in self:
2630 2636 e = self.index.entry_binary(i)
2631 2637 if i == 0 and self._docket is None:
2632 2638 header = self._format_flags | self._format_version
2633 2639 header = self.index.pack_header(header)
2634 2640 e = header + e
2635 2641 fp.write(e)
2636 2642 if self._docket is not None:
2637 2643 self._docket.index_end = fp.tell()
2638 2644
2639 2645 # If we don't use side-write, the temp file replace the real
2640 2646 # index when we exit the context manager
2641 2647
2642 2648 nodemaputil.setup_persistent_nodemap(tr, self)
2643 2649 self._inner._segmentfile = randomaccessfile.randomaccessfile(
2644 2650 self.opener,
2645 2651 self._datafile,
2646 2652 self.data_config.chunk_cache_size,
2647 2653 )
2648 2654
2649 2655 if existing_handles:
2650 2656 # switched from inline to conventional reopen the index
2651 2657 index_end = None
2652 2658 if self._docket is not None:
2653 2659 index_end = self._docket.index_end
2654 2660 ifh = self._inner._InnerRevlog__index_write_fp(
2655 2661 index_end=index_end
2656 2662 )
2657 2663 self._inner._writinghandles = (ifh, new_dfh, None)
2658 2664 self._inner._segmentfile.writing_handle = new_dfh
2659 2665 new_dfh = None
2660 2666 # No need to deal with sidedata writing handle as it is only
2661 2667 # relevant with revlog-v2 which is never inline, not reaching
2662 2668 # this code
2663 2669 finally:
2664 2670 if new_dfh is not None:
2665 2671 new_dfh.close()
2666 2672
2667 2673 def _nodeduplicatecallback(self, transaction, node):
2668 2674 """called when trying to add a node already stored."""
2669 2675
2670 2676 @contextlib.contextmanager
2671 2677 def reading(self):
2672 2678 with self._inner.reading():
2673 2679 yield
2674 2680
2675 2681 @contextlib.contextmanager
2676 2682 def _writing(self, transaction):
2677 2683 if self._trypending:
2678 2684 msg = b'try to write in a `trypending` revlog: %s'
2679 2685 msg %= self.display_id
2680 2686 raise error.ProgrammingError(msg)
2681 2687 if self._inner.is_writing:
2682 2688 yield
2683 2689 else:
2684 2690 data_end = None
2685 2691 sidedata_end = None
2686 2692 if self._docket is not None:
2687 2693 data_end = self._docket.data_end
2688 2694 sidedata_end = self._docket.sidedata_end
2689 2695 with self._inner.writing(
2690 2696 transaction,
2691 2697 data_end=data_end,
2692 2698 sidedata_end=sidedata_end,
2693 2699 ):
2694 2700 yield
2695 2701 if self._docket is not None:
2696 2702 self._write_docket(transaction)
2697 2703
2698 2704 def _write_docket(self, transaction):
2699 2705 """write the current docket on disk
2700 2706
2701 2707 Exist as a method to help changelog to implement transaction logic
2702 2708
2703 2709 We could also imagine using the same transaction logic for all revlog
2704 2710 since docket are cheap."""
2705 2711 self._docket.write(transaction)
2706 2712
2707 2713 def addrevision(
2708 2714 self,
2709 2715 text,
2710 2716 transaction,
2711 2717 link,
2712 2718 p1,
2713 2719 p2,
2714 2720 cachedelta=None,
2715 2721 node=None,
2716 2722 flags=REVIDX_DEFAULT_FLAGS,
2717 2723 deltacomputer=None,
2718 2724 sidedata=None,
2719 2725 ):
2720 2726 """add a revision to the log
2721 2727
2722 2728 text - the revision data to add
2723 2729 transaction - the transaction object used for rollback
2724 2730 link - the linkrev data to add
2725 2731 p1, p2 - the parent nodeids of the revision
2726 2732 cachedelta - an optional precomputed delta
2727 2733 node - nodeid of revision; typically node is not specified, and it is
2728 2734 computed by default as hash(text, p1, p2), however subclasses might
2729 2735 use different hashing method (and override checkhash() in such case)
2730 2736 flags - the known flags to set on the revision
2731 2737 deltacomputer - an optional deltacomputer instance shared between
2732 2738 multiple calls
2733 2739 """
2734 2740 if link == nullrev:
2735 2741 raise error.RevlogError(
2736 2742 _(b"attempted to add linkrev -1 to %s") % self.display_id
2737 2743 )
2738 2744
2739 2745 if sidedata is None:
2740 2746 sidedata = {}
2741 2747 elif sidedata and not self.feature_config.has_side_data:
2742 2748 raise error.ProgrammingError(
2743 2749 _(b"trying to add sidedata to a revlog who don't support them")
2744 2750 )
2745 2751
2746 2752 if flags:
2747 2753 node = node or self.hash(text, p1, p2)
2748 2754
2749 2755 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2750 2756
2751 2757 # If the flag processor modifies the revision data, ignore any provided
2752 2758 # cachedelta.
2753 2759 if rawtext != text:
2754 2760 cachedelta = None
2755 2761
2756 2762 if len(rawtext) > _maxentrysize:
2757 2763 raise error.RevlogError(
2758 2764 _(
2759 2765 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2760 2766 )
2761 2767 % (self.display_id, len(rawtext))
2762 2768 )
2763 2769
2764 2770 node = node or self.hash(rawtext, p1, p2)
2765 2771 rev = self.index.get_rev(node)
2766 2772 if rev is not None:
2767 2773 return rev
2768 2774
2769 2775 if validatehash:
2770 2776 self.checkhash(rawtext, node, p1=p1, p2=p2)
2771 2777
2772 2778 return self.addrawrevision(
2773 2779 rawtext,
2774 2780 transaction,
2775 2781 link,
2776 2782 p1,
2777 2783 p2,
2778 2784 node,
2779 2785 flags,
2780 2786 cachedelta=cachedelta,
2781 2787 deltacomputer=deltacomputer,
2782 2788 sidedata=sidedata,
2783 2789 )
2784 2790
2785 2791 def addrawrevision(
2786 2792 self,
2787 2793 rawtext,
2788 2794 transaction,
2789 2795 link,
2790 2796 p1,
2791 2797 p2,
2792 2798 node,
2793 2799 flags,
2794 2800 cachedelta=None,
2795 2801 deltacomputer=None,
2796 2802 sidedata=None,
2797 2803 ):
2798 2804 """add a raw revision with known flags, node and parents
2799 2805 useful when reusing a revision not stored in this revlog (ex: received
2800 2806 over wire, or read from an external bundle).
2801 2807 """
2802 2808 with self._writing(transaction):
2803 2809 return self._addrevision(
2804 2810 node,
2805 2811 rawtext,
2806 2812 transaction,
2807 2813 link,
2808 2814 p1,
2809 2815 p2,
2810 2816 flags,
2811 2817 cachedelta,
2812 2818 deltacomputer=deltacomputer,
2813 2819 sidedata=sidedata,
2814 2820 )
2815 2821
2816 2822 def compress(self, data):
2817 2823 """Generate a possibly-compressed representation of data."""
2818 2824 if not data:
2819 2825 return b'', data
2820 2826
2821 2827 compressed = self._compressor.compress(data)
2822 2828
2823 2829 if compressed:
2824 2830 # The revlog compressor added the header in the returned data.
2825 2831 return b'', compressed
2826 2832
2827 2833 if data[0:1] == b'\0':
2828 2834 return b'', data
2829 2835 return b'u', data
2830 2836
2831 2837 def decompress(self, data):
2832 2838 """Decompress a revlog chunk.
2833 2839
2834 2840 The chunk is expected to begin with a header identifying the
2835 2841 format type so it can be routed to an appropriate decompressor.
2836 2842 """
2837 2843 if not data:
2838 2844 return data
2839 2845
2840 2846 # Revlogs are read much more frequently than they are written and many
2841 2847 # chunks only take microseconds to decompress, so performance is
2842 2848 # important here.
2843 2849 #
2844 2850 # We can make a few assumptions about revlogs:
2845 2851 #
2846 2852 # 1) the majority of chunks will be compressed (as opposed to inline
2847 2853 # raw data).
2848 2854 # 2) decompressing *any* data will likely by at least 10x slower than
2849 2855 # returning raw inline data.
2850 2856 # 3) we want to prioritize common and officially supported compression
2851 2857 # engines
2852 2858 #
2853 2859 # It follows that we want to optimize for "decompress compressed data
2854 2860 # when encoded with common and officially supported compression engines"
2855 2861 # case over "raw data" and "data encoded by less common or non-official
2856 2862 # compression engines." That is why we have the inline lookup first
2857 2863 # followed by the compengines lookup.
2858 2864 #
2859 2865 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2860 2866 # compressed chunks. And this matters for changelog and manifest reads.
2861 2867 t = data[0:1]
2862 2868
2863 2869 if t == b'x':
2864 2870 try:
2865 2871 return _zlibdecompress(data)
2866 2872 except zlib.error as e:
2867 2873 raise error.RevlogError(
2868 2874 _(b'revlog decompress error: %s')
2869 2875 % stringutil.forcebytestr(e)
2870 2876 )
2871 2877 # '\0' is more common than 'u' so it goes first.
2872 2878 elif t == b'\0':
2873 2879 return data
2874 2880 elif t == b'u':
2875 2881 return util.buffer(data, 1)
2876 2882
2877 2883 compressor = self._get_decompressor(t)
2878 2884
2879 2885 return compressor.decompress(data)
2880 2886
2881 2887 def _addrevision(
2882 2888 self,
2883 2889 node,
2884 2890 rawtext,
2885 2891 transaction,
2886 2892 link,
2887 2893 p1,
2888 2894 p2,
2889 2895 flags,
2890 2896 cachedelta,
2891 2897 alwayscache=False,
2892 2898 deltacomputer=None,
2893 2899 sidedata=None,
2894 2900 ):
2895 2901 """internal function to add revisions to the log
2896 2902
2897 2903 see addrevision for argument descriptions.
2898 2904
2899 2905 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2900 2906
2901 2907 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2902 2908 be used.
2903 2909
2904 2910 invariants:
2905 2911 - rawtext is optional (can be None); if not set, cachedelta must be set.
2906 2912 if both are set, they must correspond to each other.
2907 2913 """
2908 2914 if node == self.nullid:
2909 2915 raise error.RevlogError(
2910 2916 _(b"%s: attempt to add null revision") % self.display_id
2911 2917 )
2912 2918 if (
2913 2919 node == self.nodeconstants.wdirid
2914 2920 or node in self.nodeconstants.wdirfilenodeids
2915 2921 ):
2916 2922 raise error.RevlogError(
2917 2923 _(b"%s: attempt to add wdir revision") % self.display_id
2918 2924 )
2919 2925 if self._inner._writinghandles is None:
2920 2926 msg = b'adding revision outside `revlog._writing` context'
2921 2927 raise error.ProgrammingError(msg)
2922 2928
2923 2929 btext = [rawtext]
2924 2930
2925 2931 curr = len(self)
2926 2932 prev = curr - 1
2927 2933
2928 2934 offset = self._get_data_offset(prev)
2929 2935
2930 2936 if self._concurrencychecker:
2931 2937 ifh, dfh, sdfh = self._inner._writinghandles
2932 2938 # XXX no checking for the sidedata file
2933 2939 if self._inline:
2934 2940 # offset is "as if" it were in the .d file, so we need to add on
2935 2941 # the size of the entry metadata.
2936 2942 self._concurrencychecker(
2937 2943 ifh, self._indexfile, offset + curr * self.index.entry_size
2938 2944 )
2939 2945 else:
2940 2946 # Entries in the .i are a consistent size.
2941 2947 self._concurrencychecker(
2942 2948 ifh, self._indexfile, curr * self.index.entry_size
2943 2949 )
2944 2950 self._concurrencychecker(dfh, self._datafile, offset)
2945 2951
2946 2952 p1r, p2r = self.rev(p1), self.rev(p2)
2947 2953
2948 2954 # full versions are inserted when the needed deltas
2949 2955 # become comparable to the uncompressed text
2950 2956 if rawtext is None:
2951 2957 # need rawtext size, before changed by flag processors, which is
2952 2958 # the non-raw size. use revlog explicitly to avoid filelog's extra
2953 2959 # logic that might remove metadata size.
2954 2960 textlen = mdiff.patchedsize(
2955 2961 revlog.size(self, cachedelta[0]), cachedelta[1]
2956 2962 )
2957 2963 else:
2958 2964 textlen = len(rawtext)
2959 2965
2960 2966 if deltacomputer is None:
2961 2967 write_debug = None
2962 2968 if self.delta_config.debug_delta:
2963 2969 write_debug = transaction._report
2964 2970 deltacomputer = deltautil.deltacomputer(
2965 2971 self, write_debug=write_debug
2966 2972 )
2967 2973
2968 2974 if cachedelta is not None and len(cachedelta) == 2:
2969 2975 # If the cached delta has no information about how it should be
2970 2976 # reused, add the default reuse instruction according to the
2971 2977 # revlog's configuration.
2972 2978 if (
2973 2979 self.delta_config.general_delta
2974 2980 and self.delta_config.lazy_delta_base
2975 2981 ):
2976 2982 delta_base_reuse = DELTA_BASE_REUSE_TRY
2977 2983 else:
2978 2984 delta_base_reuse = DELTA_BASE_REUSE_NO
2979 2985 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2980 2986
2981 2987 revinfo = revlogutils.revisioninfo(
2982 2988 node,
2983 2989 p1,
2984 2990 p2,
2985 2991 btext,
2986 2992 textlen,
2987 2993 cachedelta,
2988 2994 flags,
2989 2995 )
2990 2996
2991 2997 deltainfo = deltacomputer.finddeltainfo(revinfo)
2992 2998
2993 2999 compression_mode = COMP_MODE_INLINE
2994 3000 if self._docket is not None:
2995 3001 default_comp = self._docket.default_compression_header
2996 3002 r = deltautil.delta_compression(default_comp, deltainfo)
2997 3003 compression_mode, deltainfo = r
2998 3004
2999 3005 sidedata_compression_mode = COMP_MODE_INLINE
3000 3006 if sidedata and self.feature_config.has_side_data:
3001 3007 sidedata_compression_mode = COMP_MODE_PLAIN
3002 3008 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3003 3009 sidedata_offset = self._docket.sidedata_end
3004 3010 h, comp_sidedata = self.compress(serialized_sidedata)
3005 3011 if (
3006 3012 h != b'u'
3007 3013 and comp_sidedata[0:1] != b'\0'
3008 3014 and len(comp_sidedata) < len(serialized_sidedata)
3009 3015 ):
3010 3016 assert not h
3011 3017 if (
3012 3018 comp_sidedata[0:1]
3013 3019 == self._docket.default_compression_header
3014 3020 ):
3015 3021 sidedata_compression_mode = COMP_MODE_DEFAULT
3016 3022 serialized_sidedata = comp_sidedata
3017 3023 else:
3018 3024 sidedata_compression_mode = COMP_MODE_INLINE
3019 3025 serialized_sidedata = comp_sidedata
3020 3026 else:
3021 3027 serialized_sidedata = b""
3022 3028 # Don't store the offset if the sidedata is empty, that way
3023 3029 # we can easily detect empty sidedata and they will be no different
3024 3030 # than ones we manually add.
3025 3031 sidedata_offset = 0
3026 3032
3027 3033 rank = RANK_UNKNOWN
3028 3034 if self.feature_config.compute_rank:
3029 3035 if (p1r, p2r) == (nullrev, nullrev):
3030 3036 rank = 1
3031 3037 elif p1r != nullrev and p2r == nullrev:
3032 3038 rank = 1 + self.fast_rank(p1r)
3033 3039 elif p1r == nullrev and p2r != nullrev:
3034 3040 rank = 1 + self.fast_rank(p2r)
3035 3041 else: # merge node
3036 3042 if rustdagop is not None and self.index.rust_ext_compat:
3037 3043 rank = rustdagop.rank(self.index, p1r, p2r)
3038 3044 else:
3039 3045 pmin, pmax = sorted((p1r, p2r))
3040 3046 rank = 1 + self.fast_rank(pmax)
3041 3047 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3042 3048
3043 3049 e = revlogutils.entry(
3044 3050 flags=flags,
3045 3051 data_offset=offset,
3046 3052 data_compressed_length=deltainfo.deltalen,
3047 3053 data_uncompressed_length=textlen,
3048 3054 data_compression_mode=compression_mode,
3049 3055 data_delta_base=deltainfo.base,
3050 3056 link_rev=link,
3051 3057 parent_rev_1=p1r,
3052 3058 parent_rev_2=p2r,
3053 3059 node_id=node,
3054 3060 sidedata_offset=sidedata_offset,
3055 3061 sidedata_compressed_length=len(serialized_sidedata),
3056 3062 sidedata_compression_mode=sidedata_compression_mode,
3057 3063 rank=rank,
3058 3064 )
3059 3065
3060 3066 self.index.append(e)
3061 3067 entry = self.index.entry_binary(curr)
3062 3068 if curr == 0 and self._docket is None:
3063 3069 header = self._format_flags | self._format_version
3064 3070 header = self.index.pack_header(header)
3065 3071 entry = header + entry
3066 3072 self._writeentry(
3067 3073 transaction,
3068 3074 entry,
3069 3075 deltainfo.data,
3070 3076 link,
3071 3077 offset,
3072 3078 serialized_sidedata,
3073 3079 sidedata_offset,
3074 3080 )
3075 3081
3076 3082 rawtext = btext[0]
3077 3083
3078 3084 if alwayscache and rawtext is None:
3079 3085 rawtext = deltacomputer.buildtext(revinfo)
3080 3086
3081 3087 if type(rawtext) == bytes: # only accept immutable objects
3082 3088 self._revisioncache = (node, curr, rawtext)
3083 3089 self._chainbasecache[curr] = deltainfo.chainbase
3084 3090 return curr
3085 3091
3086 3092 def _get_data_offset(self, prev):
3087 3093 """Returns the current offset in the (in-transaction) data file.
3088 3094 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3089 3095 file to store that information: since sidedata can be rewritten to the
3090 3096 end of the data file within a transaction, you can have cases where, for
3091 3097 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3092 3098 to `n - 1`'s sidedata being written after `n`'s data.
3093 3099
3094 3100 TODO cache this in a docket file before getting out of experimental."""
3095 3101 if self._docket is None:
3096 3102 return self.end(prev)
3097 3103 else:
3098 3104 return self._docket.data_end
3099 3105
3100 3106 def _writeentry(
3101 3107 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3102 3108 ):
3103 3109 # Files opened in a+ mode have inconsistent behavior on various
3104 3110 # platforms. Windows requires that a file positioning call be made
3105 3111 # when the file handle transitions between reads and writes. See
3106 3112 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3107 3113 # platforms, Python or the platform itself can be buggy. Some versions
3108 3114 # of Solaris have been observed to not append at the end of the file
3109 3115 # if the file was seeked to before the end. See issue4943 for more.
3110 3116 #
3111 3117 # We work around this issue by inserting a seek() before writing.
3112 3118 # Note: This is likely not necessary on Python 3. However, because
3113 3119 # the file handle is reused for reads and may be seeked there, we need
3114 3120 # to be careful before changing this.
3115 3121 if self._inner._writinghandles is None:
3116 3122 msg = b'adding revision outside `revlog._writing` context'
3117 3123 raise error.ProgrammingError(msg)
3118 3124 ifh, dfh, sdfh = self._inner._writinghandles
3119 3125 if self._docket is None:
3120 3126 ifh.seek(0, os.SEEK_END)
3121 3127 else:
3122 3128 ifh.seek(self._docket.index_end, os.SEEK_SET)
3123 3129 if dfh:
3124 3130 if self._docket is None:
3125 3131 dfh.seek(0, os.SEEK_END)
3126 3132 else:
3127 3133 dfh.seek(self._docket.data_end, os.SEEK_SET)
3128 3134 if sdfh:
3129 3135 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3130 3136
3131 3137 curr = len(self) - 1
3132 3138 if not self._inline:
3133 3139 transaction.add(self._datafile, offset)
3134 3140 if self._sidedatafile:
3135 3141 transaction.add(self._sidedatafile, sidedata_offset)
3136 3142 transaction.add(self._indexfile, curr * len(entry))
3137 3143 if data[0]:
3138 3144 dfh.write(data[0])
3139 3145 dfh.write(data[1])
3140 3146 if sidedata:
3141 3147 sdfh.write(sidedata)
3142 3148 ifh.write(entry)
3143 3149 else:
3144 3150 offset += curr * self.index.entry_size
3145 3151 transaction.add(self._indexfile, offset)
3146 3152 ifh.write(entry)
3147 3153 ifh.write(data[0])
3148 3154 ifh.write(data[1])
3149 3155 assert not sidedata
3150 3156 self._enforceinlinesize(transaction)
3151 3157 if self._docket is not None:
3152 3158 # revlog-v2 always has 3 writing handles, help Pytype
3153 3159 wh1 = self._inner._writinghandles[0]
3154 3160 wh2 = self._inner._writinghandles[1]
3155 3161 wh3 = self._inner._writinghandles[2]
3156 3162 assert wh1 is not None
3157 3163 assert wh2 is not None
3158 3164 assert wh3 is not None
3159 3165 self._docket.index_end = wh1.tell()
3160 3166 self._docket.data_end = wh2.tell()
3161 3167 self._docket.sidedata_end = wh3.tell()
3162 3168
3163 3169 nodemaputil.setup_persistent_nodemap(transaction, self)
3164 3170
3165 3171 def addgroup(
3166 3172 self,
3167 3173 deltas,
3168 3174 linkmapper,
3169 3175 transaction,
3170 3176 alwayscache=False,
3171 3177 addrevisioncb=None,
3172 3178 duplicaterevisioncb=None,
3173 3179 debug_info=None,
3174 3180 delta_base_reuse_policy=None,
3175 3181 ):
3176 3182 """
3177 3183 add a delta group
3178 3184
3179 3185 given a set of deltas, add them to the revision log. the
3180 3186 first delta is against its parent, which should be in our
3181 3187 log, the rest are against the previous delta.
3182 3188
3183 3189 If ``addrevisioncb`` is defined, it will be called with arguments of
3184 3190 this revlog and the node that was added.
3185 3191 """
3186 3192
3187 3193 if self._adding_group:
3188 3194 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3189 3195
3190 3196 # read the default delta-base reuse policy from revlog config if the
3191 3197 # group did not specify one.
3192 3198 if delta_base_reuse_policy is None:
3193 3199 if (
3194 3200 self.delta_config.general_delta
3195 3201 and self.delta_config.lazy_delta_base
3196 3202 ):
3197 3203 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3198 3204 else:
3199 3205 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3200 3206
3201 3207 self._adding_group = True
3202 3208 empty = True
3203 3209 try:
3204 3210 with self._writing(transaction):
3205 3211 write_debug = None
3206 3212 if self.delta_config.debug_delta:
3207 3213 write_debug = transaction._report
3208 3214 deltacomputer = deltautil.deltacomputer(
3209 3215 self,
3210 3216 write_debug=write_debug,
3211 3217 debug_info=debug_info,
3212 3218 )
3213 3219 # loop through our set of deltas
3214 3220 for data in deltas:
3215 3221 (
3216 3222 node,
3217 3223 p1,
3218 3224 p2,
3219 3225 linknode,
3220 3226 deltabase,
3221 3227 delta,
3222 3228 flags,
3223 3229 sidedata,
3224 3230 ) = data
3225 3231 link = linkmapper(linknode)
3226 3232 flags = flags or REVIDX_DEFAULT_FLAGS
3227 3233
3228 3234 rev = self.index.get_rev(node)
3229 3235 if rev is not None:
3230 3236 # this can happen if two branches make the same change
3231 3237 self._nodeduplicatecallback(transaction, rev)
3232 3238 if duplicaterevisioncb:
3233 3239 duplicaterevisioncb(self, rev)
3234 3240 empty = False
3235 3241 continue
3236 3242
3237 3243 for p in (p1, p2):
3238 3244 if not self.index.has_node(p):
3239 3245 raise error.LookupError(
3240 3246 p, self.radix, _(b'unknown parent')
3241 3247 )
3242 3248
3243 3249 if not self.index.has_node(deltabase):
3244 3250 raise error.LookupError(
3245 3251 deltabase, self.display_id, _(b'unknown delta base')
3246 3252 )
3247 3253
3248 3254 baserev = self.rev(deltabase)
3249 3255
3250 3256 if baserev != nullrev and self.iscensored(baserev):
3251 3257 # if base is censored, delta must be full replacement in a
3252 3258 # single patch operation
3253 3259 hlen = struct.calcsize(b">lll")
3254 3260 oldlen = self.rawsize(baserev)
3255 3261 newlen = len(delta) - hlen
3256 3262 if delta[:hlen] != mdiff.replacediffheader(
3257 3263 oldlen, newlen
3258 3264 ):
3259 3265 raise error.CensoredBaseError(
3260 3266 self.display_id, self.node(baserev)
3261 3267 )
3262 3268
3263 3269 if not flags and self._peek_iscensored(baserev, delta):
3264 3270 flags |= REVIDX_ISCENSORED
3265 3271
3266 3272 # We assume consumers of addrevisioncb will want to retrieve
3267 3273 # the added revision, which will require a call to
3268 3274 # revision(). revision() will fast path if there is a cache
3269 3275 # hit. So, we tell _addrevision() to always cache in this case.
3270 3276 # We're only using addgroup() in the context of changegroup
3271 3277 # generation so the revision data can always be handled as raw
3272 3278 # by the flagprocessor.
3273 3279 rev = self._addrevision(
3274 3280 node,
3275 3281 None,
3276 3282 transaction,
3277 3283 link,
3278 3284 p1,
3279 3285 p2,
3280 3286 flags,
3281 3287 (baserev, delta, delta_base_reuse_policy),
3282 3288 alwayscache=alwayscache,
3283 3289 deltacomputer=deltacomputer,
3284 3290 sidedata=sidedata,
3285 3291 )
3286 3292
3287 3293 if addrevisioncb:
3288 3294 addrevisioncb(self, rev)
3289 3295 empty = False
3290 3296 finally:
3291 3297 self._adding_group = False
3292 3298 return not empty
3293 3299
3294 3300 def iscensored(self, rev):
3295 3301 """Check if a file revision is censored."""
3296 3302 if not self.feature_config.censorable:
3297 3303 return False
3298 3304
3299 3305 return self.flags(rev) & REVIDX_ISCENSORED
3300 3306
3301 3307 def _peek_iscensored(self, baserev, delta):
3302 3308 """Quickly check if a delta produces a censored revision."""
3303 3309 if not self.feature_config.censorable:
3304 3310 return False
3305 3311
3306 3312 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3307 3313
3308 3314 def getstrippoint(self, minlink):
3309 3315 """find the minimum rev that must be stripped to strip the linkrev
3310 3316
3311 3317 Returns a tuple containing the minimum rev and a set of all revs that
3312 3318 have linkrevs that will be broken by this strip.
3313 3319 """
3314 3320 return storageutil.resolvestripinfo(
3315 3321 minlink,
3316 3322 len(self) - 1,
3317 3323 self.headrevs(),
3318 3324 self.linkrev,
3319 3325 self.parentrevs,
3320 3326 )
3321 3327
3322 3328 def strip(self, minlink, transaction):
3323 3329 """truncate the revlog on the first revision with a linkrev >= minlink
3324 3330
3325 3331 This function is called when we're stripping revision minlink and
3326 3332 its descendants from the repository.
3327 3333
3328 3334 We have to remove all revisions with linkrev >= minlink, because
3329 3335 the equivalent changelog revisions will be renumbered after the
3330 3336 strip.
3331 3337
3332 3338 So we truncate the revlog on the first of these revisions, and
3333 3339 trust that the caller has saved the revisions that shouldn't be
3334 3340 removed and that it'll re-add them after this truncation.
3335 3341 """
3336 3342 if len(self) == 0:
3337 3343 return
3338 3344
3339 3345 rev, _ = self.getstrippoint(minlink)
3340 3346 if rev == len(self):
3341 3347 return
3342 3348
3343 3349 # first truncate the files on disk
3344 3350 data_end = self.start(rev)
3345 3351 if not self._inline:
3346 3352 transaction.add(self._datafile, data_end)
3347 3353 end = rev * self.index.entry_size
3348 3354 else:
3349 3355 end = data_end + (rev * self.index.entry_size)
3350 3356
3351 3357 if self._sidedatafile:
3352 3358 sidedata_end = self.sidedata_cut_off(rev)
3353 3359 transaction.add(self._sidedatafile, sidedata_end)
3354 3360
3355 3361 transaction.add(self._indexfile, end)
3356 3362 if self._docket is not None:
3357 3363 # XXX we could, leverage the docket while stripping. However it is
3358 3364 # not powerfull enough at the time of this comment
3359 3365 self._docket.index_end = end
3360 3366 self._docket.data_end = data_end
3361 3367 self._docket.sidedata_end = sidedata_end
3362 3368 self._docket.write(transaction, stripping=True)
3363 3369
3364 3370 # then reset internal state in memory to forget those revisions
3365 3371 self._revisioncache = None
3366 3372 self._chaininfocache = util.lrucachedict(500)
3367 3373 self._inner._segmentfile.clear_cache()
3368 3374 self._inner._segmentfile_sidedata.clear_cache()
3369 3375
3370 3376 del self.index[rev:-1]
3371 3377
3372 3378 def checksize(self):
3373 3379 """Check size of index and data files
3374 3380
3375 3381 return a (dd, di) tuple.
3376 3382 - dd: extra bytes for the "data" file
3377 3383 - di: extra bytes for the "index" file
3378 3384
3379 3385 A healthy revlog will return (0, 0).
3380 3386 """
3381 3387 expected = 0
3382 3388 if len(self):
3383 3389 expected = max(0, self.end(len(self) - 1))
3384 3390
3385 3391 try:
3386 3392 with self._datafp() as f:
3387 3393 f.seek(0, io.SEEK_END)
3388 3394 actual = f.tell()
3389 3395 dd = actual - expected
3390 3396 except FileNotFoundError:
3391 3397 dd = 0
3392 3398
3393 3399 try:
3394 3400 f = self.opener(self._indexfile)
3395 3401 f.seek(0, io.SEEK_END)
3396 3402 actual = f.tell()
3397 3403 f.close()
3398 3404 s = self.index.entry_size
3399 3405 i = max(0, actual // s)
3400 3406 di = actual - (i * s)
3401 3407 if self._inline:
3402 3408 databytes = 0
3403 3409 for r in self:
3404 3410 databytes += max(0, self.length(r))
3405 3411 dd = 0
3406 3412 di = actual - len(self) * s - databytes
3407 3413 except FileNotFoundError:
3408 3414 di = 0
3409 3415
3410 3416 return (dd, di)
3411 3417
3412 3418 def files(self):
3413 3419 """return list of files that compose this revlog"""
3414 3420 res = [self._indexfile]
3415 3421 if self._docket_file is None:
3416 3422 if not self._inline:
3417 3423 res.append(self._datafile)
3418 3424 else:
3419 3425 res.append(self._docket_file)
3420 3426 res.extend(self._docket.old_index_filepaths(include_empty=False))
3421 3427 if self._docket.data_end:
3422 3428 res.append(self._datafile)
3423 3429 res.extend(self._docket.old_data_filepaths(include_empty=False))
3424 3430 if self._docket.sidedata_end:
3425 3431 res.append(self._sidedatafile)
3426 3432 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3427 3433 return res
3428 3434
3429 3435 def emitrevisions(
3430 3436 self,
3431 3437 nodes,
3432 3438 nodesorder=None,
3433 3439 revisiondata=False,
3434 3440 assumehaveparentrevisions=False,
3435 3441 deltamode=repository.CG_DELTAMODE_STD,
3436 3442 sidedata_helpers=None,
3437 3443 debug_info=None,
3438 3444 ):
3439 3445 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3440 3446 raise error.ProgrammingError(
3441 3447 b'unhandled value for nodesorder: %s' % nodesorder
3442 3448 )
3443 3449
3444 3450 if nodesorder is None and not self.delta_config.general_delta:
3445 3451 nodesorder = b'storage'
3446 3452
3447 3453 if (
3448 3454 not self._storedeltachains
3449 3455 and deltamode != repository.CG_DELTAMODE_PREV
3450 3456 ):
3451 3457 deltamode = repository.CG_DELTAMODE_FULL
3452 3458
3453 3459 return storageutil.emitrevisions(
3454 3460 self,
3455 3461 nodes,
3456 3462 nodesorder,
3457 3463 revlogrevisiondelta,
3458 3464 deltaparentfn=self.deltaparent,
3459 3465 candeltafn=self._candelta,
3460 3466 rawsizefn=self.rawsize,
3461 3467 revdifffn=self.revdiff,
3462 3468 flagsfn=self.flags,
3463 3469 deltamode=deltamode,
3464 3470 revisiondata=revisiondata,
3465 3471 assumehaveparentrevisions=assumehaveparentrevisions,
3466 3472 sidedata_helpers=sidedata_helpers,
3467 3473 debug_info=debug_info,
3468 3474 )
3469 3475
3470 3476 DELTAREUSEALWAYS = b'always'
3471 3477 DELTAREUSESAMEREVS = b'samerevs'
3472 3478 DELTAREUSENEVER = b'never'
3473 3479
3474 3480 DELTAREUSEFULLADD = b'fulladd'
3475 3481
3476 3482 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3477 3483
3478 3484 def clone(
3479 3485 self,
3480 3486 tr,
3481 3487 destrevlog,
3482 3488 addrevisioncb=None,
3483 3489 deltareuse=DELTAREUSESAMEREVS,
3484 3490 forcedeltabothparents=None,
3485 3491 sidedata_helpers=None,
3486 3492 ):
3487 3493 """Copy this revlog to another, possibly with format changes.
3488 3494
3489 3495 The destination revlog will contain the same revisions and nodes.
3490 3496 However, it may not be bit-for-bit identical due to e.g. delta encoding
3491 3497 differences.
3492 3498
3493 3499 The ``deltareuse`` argument control how deltas from the existing revlog
3494 3500 are preserved in the destination revlog. The argument can have the
3495 3501 following values:
3496 3502
3497 3503 DELTAREUSEALWAYS
3498 3504 Deltas will always be reused (if possible), even if the destination
3499 3505 revlog would not select the same revisions for the delta. This is the
3500 3506 fastest mode of operation.
3501 3507 DELTAREUSESAMEREVS
3502 3508 Deltas will be reused if the destination revlog would pick the same
3503 3509 revisions for the delta. This mode strikes a balance between speed
3504 3510 and optimization.
3505 3511 DELTAREUSENEVER
3506 3512 Deltas will never be reused. This is the slowest mode of execution.
3507 3513 This mode can be used to recompute deltas (e.g. if the diff/delta
3508 3514 algorithm changes).
3509 3515 DELTAREUSEFULLADD
3510 3516 Revision will be re-added as if their were new content. This is
3511 3517 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3512 3518 eg: large file detection and handling.
3513 3519
3514 3520 Delta computation can be slow, so the choice of delta reuse policy can
3515 3521 significantly affect run time.
3516 3522
3517 3523 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3518 3524 two extremes. Deltas will be reused if they are appropriate. But if the
3519 3525 delta could choose a better revision, it will do so. This means if you
3520 3526 are converting a non-generaldelta revlog to a generaldelta revlog,
3521 3527 deltas will be recomputed if the delta's parent isn't a parent of the
3522 3528 revision.
3523 3529
3524 3530 In addition to the delta policy, the ``forcedeltabothparents``
3525 3531 argument controls whether to force compute deltas against both parents
3526 3532 for merges. By default, the current default is used.
3527 3533
3528 3534 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3529 3535 `sidedata_helpers`.
3530 3536 """
3531 3537 if deltareuse not in self.DELTAREUSEALL:
3532 3538 raise ValueError(
3533 3539 _(b'value for deltareuse invalid: %s') % deltareuse
3534 3540 )
3535 3541
3536 3542 if len(destrevlog):
3537 3543 raise ValueError(_(b'destination revlog is not empty'))
3538 3544
3539 3545 if getattr(self, 'filteredrevs', None):
3540 3546 raise ValueError(_(b'source revlog has filtered revisions'))
3541 3547 if getattr(destrevlog, 'filteredrevs', None):
3542 3548 raise ValueError(_(b'destination revlog has filtered revisions'))
3543 3549
3544 3550 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3545 3551 # if possible.
3546 3552 old_delta_config = destrevlog.delta_config
3547 3553 destrevlog.delta_config = destrevlog.delta_config.copy()
3548 3554
3549 3555 try:
3550 3556 if deltareuse == self.DELTAREUSEALWAYS:
3551 3557 destrevlog.delta_config.lazy_delta_base = True
3552 3558 destrevlog.delta_config.lazy_delta = True
3553 3559 elif deltareuse == self.DELTAREUSESAMEREVS:
3554 3560 destrevlog.delta_config.lazy_delta_base = False
3555 3561 destrevlog.delta_config.lazy_delta = True
3556 3562 elif deltareuse == self.DELTAREUSENEVER:
3557 3563 destrevlog.delta_config.lazy_delta_base = False
3558 3564 destrevlog.delta_config.lazy_delta = False
3559 3565
3560 3566 delta_both_parents = (
3561 3567 forcedeltabothparents or old_delta_config.delta_both_parents
3562 3568 )
3563 3569 destrevlog.delta_config.delta_both_parents = delta_both_parents
3564 3570
3565 3571 with self.reading(), destrevlog._writing(tr):
3566 3572 self._clone(
3567 3573 tr,
3568 3574 destrevlog,
3569 3575 addrevisioncb,
3570 3576 deltareuse,
3571 3577 forcedeltabothparents,
3572 3578 sidedata_helpers,
3573 3579 )
3574 3580
3575 3581 finally:
3576 3582 destrevlog.delta_config = old_delta_config
3577 3583
3578 3584 def _clone(
3579 3585 self,
3580 3586 tr,
3581 3587 destrevlog,
3582 3588 addrevisioncb,
3583 3589 deltareuse,
3584 3590 forcedeltabothparents,
3585 3591 sidedata_helpers,
3586 3592 ):
3587 3593 """perform the core duty of `revlog.clone` after parameter processing"""
3588 3594 write_debug = None
3589 3595 if self.delta_config.debug_delta:
3590 3596 write_debug = tr._report
3591 3597 deltacomputer = deltautil.deltacomputer(
3592 3598 destrevlog,
3593 3599 write_debug=write_debug,
3594 3600 )
3595 3601 index = self.index
3596 3602 for rev in self:
3597 3603 entry = index[rev]
3598 3604
3599 3605 # Some classes override linkrev to take filtered revs into
3600 3606 # account. Use raw entry from index.
3601 3607 flags = entry[0] & 0xFFFF
3602 3608 linkrev = entry[4]
3603 3609 p1 = index[entry[5]][7]
3604 3610 p2 = index[entry[6]][7]
3605 3611 node = entry[7]
3606 3612
3607 3613 # (Possibly) reuse the delta from the revlog if allowed and
3608 3614 # the revlog chunk is a delta.
3609 3615 cachedelta = None
3610 3616 rawtext = None
3611 3617 if deltareuse == self.DELTAREUSEFULLADD:
3612 3618 text = self._revisiondata(rev)
3613 3619 sidedata = self.sidedata(rev)
3614 3620
3615 3621 if sidedata_helpers is not None:
3616 3622 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3617 3623 self, sidedata_helpers, sidedata, rev
3618 3624 )
3619 3625 flags = flags | new_flags[0] & ~new_flags[1]
3620 3626
3621 3627 destrevlog.addrevision(
3622 3628 text,
3623 3629 tr,
3624 3630 linkrev,
3625 3631 p1,
3626 3632 p2,
3627 3633 cachedelta=cachedelta,
3628 3634 node=node,
3629 3635 flags=flags,
3630 3636 deltacomputer=deltacomputer,
3631 3637 sidedata=sidedata,
3632 3638 )
3633 3639 else:
3634 3640 if destrevlog.delta_config.lazy_delta:
3635 3641 dp = self.deltaparent(rev)
3636 3642 if dp != nullrev:
3637 3643 cachedelta = (dp, bytes(self._chunk(rev)))
3638 3644
3639 3645 sidedata = None
3640 3646 if not cachedelta:
3641 3647 try:
3642 3648 rawtext = self._revisiondata(rev)
3643 3649 except error.CensoredNodeError as censored:
3644 3650 assert flags & REVIDX_ISCENSORED
3645 3651 rawtext = censored.tombstone
3646 3652 sidedata = self.sidedata(rev)
3647 3653 if sidedata is None:
3648 3654 sidedata = self.sidedata(rev)
3649 3655
3650 3656 if sidedata_helpers is not None:
3651 3657 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3652 3658 self, sidedata_helpers, sidedata, rev
3653 3659 )
3654 3660 flags = flags | new_flags[0] & ~new_flags[1]
3655 3661
3656 3662 destrevlog._addrevision(
3657 3663 node,
3658 3664 rawtext,
3659 3665 tr,
3660 3666 linkrev,
3661 3667 p1,
3662 3668 p2,
3663 3669 flags,
3664 3670 cachedelta,
3665 3671 deltacomputer=deltacomputer,
3666 3672 sidedata=sidedata,
3667 3673 )
3668 3674
3669 3675 if addrevisioncb:
3670 3676 addrevisioncb(self, rev, node)
3671 3677
3672 3678 def censorrevision(self, tr, censornode, tombstone=b''):
3673 3679 if self._format_version == REVLOGV0:
3674 3680 raise error.RevlogError(
3675 3681 _(b'cannot censor with version %d revlogs')
3676 3682 % self._format_version
3677 3683 )
3678 3684 elif self._format_version == REVLOGV1:
3679 3685 rewrite.v1_censor(self, tr, censornode, tombstone)
3680 3686 else:
3681 3687 rewrite.v2_censor(self, tr, censornode, tombstone)
3682 3688
3683 3689 def verifyintegrity(self, state):
3684 3690 """Verifies the integrity of the revlog.
3685 3691
3686 3692 Yields ``revlogproblem`` instances describing problems that are
3687 3693 found.
3688 3694 """
3689 3695 dd, di = self.checksize()
3690 3696 if dd:
3691 3697 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3692 3698 if di:
3693 3699 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3694 3700
3695 3701 version = self._format_version
3696 3702
3697 3703 # The verifier tells us what version revlog we should be.
3698 3704 if version != state[b'expectedversion']:
3699 3705 yield revlogproblem(
3700 3706 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3701 3707 % (self.display_id, version, state[b'expectedversion'])
3702 3708 )
3703 3709
3704 3710 state[b'skipread'] = set()
3705 3711 state[b'safe_renamed'] = set()
3706 3712
3707 3713 for rev in self:
3708 3714 node = self.node(rev)
3709 3715
3710 3716 # Verify contents. 4 cases to care about:
3711 3717 #
3712 3718 # common: the most common case
3713 3719 # rename: with a rename
3714 3720 # meta: file content starts with b'\1\n', the metadata
3715 3721 # header defined in filelog.py, but without a rename
3716 3722 # ext: content stored externally
3717 3723 #
3718 3724 # More formally, their differences are shown below:
3719 3725 #
3720 3726 # | common | rename | meta | ext
3721 3727 # -------------------------------------------------------
3722 3728 # flags() | 0 | 0 | 0 | not 0
3723 3729 # renamed() | False | True | False | ?
3724 3730 # rawtext[0:2]=='\1\n'| False | True | True | ?
3725 3731 #
3726 3732 # "rawtext" means the raw text stored in revlog data, which
3727 3733 # could be retrieved by "rawdata(rev)". "text"
3728 3734 # mentioned below is "revision(rev)".
3729 3735 #
3730 3736 # There are 3 different lengths stored physically:
3731 3737 # 1. L1: rawsize, stored in revlog index
3732 3738 # 2. L2: len(rawtext), stored in revlog data
3733 3739 # 3. L3: len(text), stored in revlog data if flags==0, or
3734 3740 # possibly somewhere else if flags!=0
3735 3741 #
3736 3742 # L1 should be equal to L2. L3 could be different from them.
3737 3743 # "text" may or may not affect commit hash depending on flag
3738 3744 # processors (see flagutil.addflagprocessor).
3739 3745 #
3740 3746 # | common | rename | meta | ext
3741 3747 # -------------------------------------------------
3742 3748 # rawsize() | L1 | L1 | L1 | L1
3743 3749 # size() | L1 | L2-LM | L1(*) | L1 (?)
3744 3750 # len(rawtext) | L2 | L2 | L2 | L2
3745 3751 # len(text) | L2 | L2 | L2 | L3
3746 3752 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3747 3753 #
3748 3754 # LM: length of metadata, depending on rawtext
3749 3755 # (*): not ideal, see comment in filelog.size
3750 3756 # (?): could be "- len(meta)" if the resolved content has
3751 3757 # rename metadata
3752 3758 #
3753 3759 # Checks needed to be done:
3754 3760 # 1. length check: L1 == L2, in all cases.
3755 3761 # 2. hash check: depending on flag processor, we may need to
3756 3762 # use either "text" (external), or "rawtext" (in revlog).
3757 3763
3758 3764 try:
3759 3765 skipflags = state.get(b'skipflags', 0)
3760 3766 if skipflags:
3761 3767 skipflags &= self.flags(rev)
3762 3768
3763 3769 _verify_revision(self, skipflags, state, node)
3764 3770
3765 3771 l1 = self.rawsize(rev)
3766 3772 l2 = len(self.rawdata(node))
3767 3773
3768 3774 if l1 != l2:
3769 3775 yield revlogproblem(
3770 3776 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3771 3777 node=node,
3772 3778 )
3773 3779
3774 3780 except error.CensoredNodeError:
3775 3781 if state[b'erroroncensored']:
3776 3782 yield revlogproblem(
3777 3783 error=_(b'censored file data'), node=node
3778 3784 )
3779 3785 state[b'skipread'].add(node)
3780 3786 except Exception as e:
3781 3787 yield revlogproblem(
3782 3788 error=_(b'unpacking %s: %s')
3783 3789 % (short(node), stringutil.forcebytestr(e)),
3784 3790 node=node,
3785 3791 )
3786 3792 state[b'skipread'].add(node)
3787 3793
3788 3794 def storageinfo(
3789 3795 self,
3790 3796 exclusivefiles=False,
3791 3797 sharedfiles=False,
3792 3798 revisionscount=False,
3793 3799 trackedsize=False,
3794 3800 storedsize=False,
3795 3801 ):
3796 3802 d = {}
3797 3803
3798 3804 if exclusivefiles:
3799 3805 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3800 3806 if not self._inline:
3801 3807 d[b'exclusivefiles'].append((self.opener, self._datafile))
3802 3808
3803 3809 if sharedfiles:
3804 3810 d[b'sharedfiles'] = []
3805 3811
3806 3812 if revisionscount:
3807 3813 d[b'revisionscount'] = len(self)
3808 3814
3809 3815 if trackedsize:
3810 3816 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3811 3817
3812 3818 if storedsize:
3813 3819 d[b'storedsize'] = sum(
3814 3820 self.opener.stat(path).st_size for path in self.files()
3815 3821 )
3816 3822
3817 3823 return d
3818 3824
3819 3825 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3820 3826 if not self.feature_config.has_side_data:
3821 3827 return
3822 3828 # revlog formats with sidedata support does not support inline
3823 3829 assert not self._inline
3824 3830 if not helpers[1] and not helpers[2]:
3825 3831 # Nothing to generate or remove
3826 3832 return
3827 3833
3828 3834 new_entries = []
3829 3835 # append the new sidedata
3830 3836 with self._writing(transaction):
3831 3837 ifh, dfh, sdfh = self._inner._writinghandles
3832 3838 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3833 3839
3834 3840 current_offset = sdfh.tell()
3835 3841 for rev in range(startrev, endrev + 1):
3836 3842 entry = self.index[rev]
3837 3843 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3838 3844 store=self,
3839 3845 sidedata_helpers=helpers,
3840 3846 sidedata={},
3841 3847 rev=rev,
3842 3848 )
3843 3849
3844 3850 serialized_sidedata = sidedatautil.serialize_sidedata(
3845 3851 new_sidedata
3846 3852 )
3847 3853
3848 3854 sidedata_compression_mode = COMP_MODE_INLINE
3849 3855 if serialized_sidedata and self.feature_config.has_side_data:
3850 3856 sidedata_compression_mode = COMP_MODE_PLAIN
3851 3857 h, comp_sidedata = self.compress(serialized_sidedata)
3852 3858 if (
3853 3859 h != b'u'
3854 3860 and comp_sidedata[0] != b'\0'
3855 3861 and len(comp_sidedata) < len(serialized_sidedata)
3856 3862 ):
3857 3863 assert not h
3858 3864 if (
3859 3865 comp_sidedata[0]
3860 3866 == self._docket.default_compression_header
3861 3867 ):
3862 3868 sidedata_compression_mode = COMP_MODE_DEFAULT
3863 3869 serialized_sidedata = comp_sidedata
3864 3870 else:
3865 3871 sidedata_compression_mode = COMP_MODE_INLINE
3866 3872 serialized_sidedata = comp_sidedata
3867 3873 if entry[8] != 0 or entry[9] != 0:
3868 3874 # rewriting entries that already have sidedata is not
3869 3875 # supported yet, because it introduces garbage data in the
3870 3876 # revlog.
3871 3877 msg = b"rewriting existing sidedata is not supported yet"
3872 3878 raise error.Abort(msg)
3873 3879
3874 3880 # Apply (potential) flags to add and to remove after running
3875 3881 # the sidedata helpers
3876 3882 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3877 3883 entry_update = (
3878 3884 current_offset,
3879 3885 len(serialized_sidedata),
3880 3886 new_offset_flags,
3881 3887 sidedata_compression_mode,
3882 3888 )
3883 3889
3884 3890 # the sidedata computation might have move the file cursors around
3885 3891 sdfh.seek(current_offset, os.SEEK_SET)
3886 3892 sdfh.write(serialized_sidedata)
3887 3893 new_entries.append(entry_update)
3888 3894 current_offset += len(serialized_sidedata)
3889 3895 self._docket.sidedata_end = sdfh.tell()
3890 3896
3891 3897 # rewrite the new index entries
3892 3898 ifh.seek(startrev * self.index.entry_size)
3893 3899 for i, e in enumerate(new_entries):
3894 3900 rev = startrev + i
3895 3901 self.index.replace_sidedata_info(rev, *e)
3896 3902 packed = self.index.entry_binary(rev)
3897 3903 if rev == 0 and self._docket is None:
3898 3904 header = self._format_flags | self._format_version
3899 3905 header = self.index.pack_header(header)
3900 3906 packed = header + packed
3901 3907 ifh.write(packed)
@@ -1,943 +1,943 b''
1 1 # revlogutils/debug.py - utility used for revlog debuging
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2022 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 import collections
10 10 import string
11 11
12 12 from .. import (
13 13 mdiff,
14 14 node as nodemod,
15 15 revlogutils,
16 16 )
17 17
18 18 from . import (
19 19 constants,
20 20 deltas as deltautil,
21 21 )
22 22
23 23 INDEX_ENTRY_DEBUG_COLUMN = []
24 24
25 25 NODE_SIZE = object()
26 26
27 27
28 28 class _column_base:
29 29 """constains the definition of a revlog column
30 30
31 31 name: the column header,
32 32 value_func: the function called to get a value,
33 33 size: the width of the column,
34 34 verbose_only: only include the column in verbose mode.
35 35 """
36 36
37 37 def __init__(self, name, value_func, size=None, verbose=False):
38 38 self.name = name
39 39 self.value_func = value_func
40 40 if size is not NODE_SIZE:
41 41 if size is None:
42 42 size = 8 # arbitrary default
43 43 size = max(len(name), size)
44 44 self._size = size
45 45 self.verbose_only = verbose
46 46
47 47 def get_size(self, node_size):
48 48 if self._size is NODE_SIZE:
49 49 return node_size
50 50 else:
51 51 return self._size
52 52
53 53
54 54 def debug_column(name, size=None, verbose=False):
55 55 """decorated function is registered as a column
56 56
57 57 name: the name of the column,
58 58 size: the expected size of the column.
59 59 """
60 60
61 61 def register(func):
62 62 entry = _column_base(
63 63 name=name,
64 64 value_func=func,
65 65 size=size,
66 66 verbose=verbose,
67 67 )
68 68 INDEX_ENTRY_DEBUG_COLUMN.append(entry)
69 69 return entry
70 70
71 71 return register
72 72
73 73
74 74 @debug_column(b"rev", size=6)
75 75 def _rev(index, rev, entry, hexfn):
76 76 return b"%d" % rev
77 77
78 78
79 79 @debug_column(b"rank", size=6, verbose=True)
80 80 def rank(index, rev, entry, hexfn):
81 81 return b"%d" % entry[constants.ENTRY_RANK]
82 82
83 83
84 84 @debug_column(b"linkrev", size=6)
85 85 def _linkrev(index, rev, entry, hexfn):
86 86 return b"%d" % entry[constants.ENTRY_LINK_REV]
87 87
88 88
89 89 @debug_column(b"nodeid", size=NODE_SIZE)
90 90 def _nodeid(index, rev, entry, hexfn):
91 91 return hexfn(entry[constants.ENTRY_NODE_ID])
92 92
93 93
94 94 @debug_column(b"p1-rev", size=6, verbose=True)
95 95 def _p1_rev(index, rev, entry, hexfn):
96 96 return b"%d" % entry[constants.ENTRY_PARENT_1]
97 97
98 98
99 99 @debug_column(b"p1-nodeid", size=NODE_SIZE)
100 100 def _p1_node(index, rev, entry, hexfn):
101 101 parent = entry[constants.ENTRY_PARENT_1]
102 102 p_entry = index[parent]
103 103 return hexfn(p_entry[constants.ENTRY_NODE_ID])
104 104
105 105
106 106 @debug_column(b"p2-rev", size=6, verbose=True)
107 107 def _p2_rev(index, rev, entry, hexfn):
108 108 return b"%d" % entry[constants.ENTRY_PARENT_2]
109 109
110 110
111 111 @debug_column(b"p2-nodeid", size=NODE_SIZE)
112 112 def _p2_node(index, rev, entry, hexfn):
113 113 parent = entry[constants.ENTRY_PARENT_2]
114 114 p_entry = index[parent]
115 115 return hexfn(p_entry[constants.ENTRY_NODE_ID])
116 116
117 117
118 118 @debug_column(b"full-size", size=20, verbose=True)
119 119 def full_size(index, rev, entry, hexfn):
120 120 return b"%d" % entry[constants.ENTRY_DATA_UNCOMPRESSED_LENGTH]
121 121
122 122
123 123 @debug_column(b"delta-base", size=6, verbose=True)
124 124 def delta_base(index, rev, entry, hexfn):
125 125 return b"%d" % entry[constants.ENTRY_DELTA_BASE]
126 126
127 127
128 128 @debug_column(b"flags", size=2, verbose=True)
129 129 def flags(index, rev, entry, hexfn):
130 130 field = entry[constants.ENTRY_DATA_OFFSET]
131 131 field &= 0xFFFF
132 132 return b"%d" % field
133 133
134 134
135 135 @debug_column(b"comp-mode", size=4, verbose=True)
136 136 def compression_mode(index, rev, entry, hexfn):
137 137 return b"%d" % entry[constants.ENTRY_DATA_COMPRESSION_MODE]
138 138
139 139
140 140 @debug_column(b"data-offset", size=20, verbose=True)
141 141 def data_offset(index, rev, entry, hexfn):
142 142 field = entry[constants.ENTRY_DATA_OFFSET]
143 143 field >>= 16
144 144 return b"%d" % field
145 145
146 146
147 147 @debug_column(b"chunk-size", size=10, verbose=True)
148 148 def data_chunk_size(index, rev, entry, hexfn):
149 149 return b"%d" % entry[constants.ENTRY_DATA_COMPRESSED_LENGTH]
150 150
151 151
152 152 @debug_column(b"sd-comp-mode", size=7, verbose=True)
153 153 def sidedata_compression_mode(index, rev, entry, hexfn):
154 154 compression = entry[constants.ENTRY_SIDEDATA_COMPRESSION_MODE]
155 155 if compression == constants.COMP_MODE_PLAIN:
156 156 return b"plain"
157 157 elif compression == constants.COMP_MODE_DEFAULT:
158 158 return b"default"
159 159 elif compression == constants.COMP_MODE_INLINE:
160 160 return b"inline"
161 161 else:
162 162 return b"%d" % compression
163 163
164 164
165 165 @debug_column(b"sidedata-offset", size=20, verbose=True)
166 166 def sidedata_offset(index, rev, entry, hexfn):
167 167 return b"%d" % entry[constants.ENTRY_SIDEDATA_OFFSET]
168 168
169 169
170 170 @debug_column(b"sd-chunk-size", size=10, verbose=True)
171 171 def sidedata_chunk_size(index, rev, entry, hexfn):
172 172 return b"%d" % entry[constants.ENTRY_SIDEDATA_COMPRESSED_LENGTH]
173 173
174 174
175 175 def debug_index(
176 176 ui,
177 177 repo,
178 178 formatter,
179 179 revlog,
180 180 full_node,
181 181 ):
182 182 """display index data for a revlog"""
183 183 if full_node:
184 184 hexfn = nodemod.hex
185 185 else:
186 186 hexfn = nodemod.short
187 187
188 188 idlen = 12
189 189 for i in revlog:
190 190 idlen = len(hexfn(revlog.node(i)))
191 191 break
192 192
193 193 fm = formatter
194 194
195 195 header_pieces = []
196 196 for column in INDEX_ENTRY_DEBUG_COLUMN:
197 197 if column.verbose_only and not ui.verbose:
198 198 continue
199 199 size = column.get_size(idlen)
200 200 name = column.name
201 201 header_pieces.append(name.rjust(size))
202 202
203 203 fm.plain(b' '.join(header_pieces) + b'\n')
204 204
205 205 index = revlog.index
206 206
207 207 for rev in revlog:
208 208 fm.startitem()
209 209 entry = index[rev]
210 210 first = True
211 211 for column in INDEX_ENTRY_DEBUG_COLUMN:
212 212 if column.verbose_only and not ui.verbose:
213 213 continue
214 214 if not first:
215 215 fm.plain(b' ')
216 216 first = False
217 217
218 218 size = column.get_size(idlen)
219 219 value = column.value_func(index, rev, entry, hexfn)
220 220 display = b"%%%ds" % size
221 221 fm.write(column.name, display, value)
222 222 fm.plain(b'\n')
223 223
224 224 fm.end()
225 225
226 226
227 227 def dump(ui, revlog):
228 228 """perform the work for `hg debugrevlog --dump"""
229 229 # XXX seems redundant with debug index ?
230 230 r = revlog
231 231 numrevs = len(r)
232 232 ui.write(
233 233 (
234 234 b"# rev p1rev p2rev start end deltastart base p1 p2"
235 235 b" rawsize totalsize compression heads chainlen\n"
236 236 )
237 237 )
238 238 ts = 0
239 239 heads = set()
240 240
241 241 for rev in range(numrevs):
242 242 dbase = r.deltaparent(rev)
243 243 if dbase == -1:
244 244 dbase = rev
245 245 cbase = r.chainbase(rev)
246 246 clen = r.chainlen(rev)
247 247 p1, p2 = r.parentrevs(rev)
248 248 rs = r.rawsize(rev)
249 249 ts = ts + rs
250 250 heads -= set(r.parentrevs(rev))
251 251 heads.add(rev)
252 252 try:
253 253 compression = ts / r.end(rev)
254 254 except ZeroDivisionError:
255 255 compression = 0
256 256 ui.write(
257 257 b"%5d %5d %5d %5d %5d %10d %4d %4d %4d %7d %9d "
258 258 b"%11d %5d %8d\n"
259 259 % (
260 260 rev,
261 261 p1,
262 262 p2,
263 263 r.start(rev),
264 264 r.end(rev),
265 265 r.start(dbase),
266 266 r.start(cbase),
267 267 r.start(p1),
268 268 r.start(p2),
269 269 rs,
270 270 ts,
271 271 compression,
272 272 len(heads),
273 273 clen,
274 274 )
275 275 )
276 276
277 277
278 278 def debug_revlog(ui, revlog):
279 279 """code for `hg debugrevlog`"""
280 280 r = revlog
281 281 format = r._format_version
282 282 v = r._format_flags
283 283 flags = []
284 284 gdelta = False
285 285 if v & constants.FLAG_INLINE_DATA:
286 286 flags.append(b'inline')
287 287 if v & constants.FLAG_GENERALDELTA:
288 288 gdelta = True
289 289 flags.append(b'generaldelta')
290 290 if not flags:
291 291 flags = [b'(none)']
292 292
293 293 ### the total size of stored content if incompressed.
294 294 full_text_total_size = 0
295 295 ### tracks merge vs single parent
296 296 nummerges = 0
297 297
298 298 ### tracks ways the "delta" are build
299 299 # nodelta
300 300 numempty = 0
301 301 numemptytext = 0
302 302 numemptydelta = 0
303 303 # full file content
304 304 numfull = 0
305 305 # intermediate snapshot against a prior snapshot
306 306 numsemi = 0
307 307 # snapshot count per depth
308 308 numsnapdepth = collections.defaultdict(lambda: 0)
309 309 # number of snapshots with a non-ancestor delta
310 310 numsnapdepth_nad = collections.defaultdict(lambda: 0)
311 311 # delta against previous revision
312 312 numprev = 0
313 313 # delta against prev, where prev is a non-ancestor
314 314 numprev_nad = 0
315 315 # delta against first or second parent (not prev)
316 316 nump1 = 0
317 317 nump2 = 0
318 318 # delta against neither prev nor parents
319 319 numother = 0
320 320 # delta against other that is a non-ancestor
321 321 numother_nad = 0
322 322 # delta against prev that are also first or second parent
323 323 # (details of `numprev`)
324 324 nump1prev = 0
325 325 nump2prev = 0
326 326
327 327 # data about delta chain of each revs
328 328 chainlengths = []
329 329 chainbases = []
330 330 chainspans = []
331 331
332 332 # data about each revision
333 333 datasize = [None, 0, 0]
334 334 fullsize = [None, 0, 0]
335 335 semisize = [None, 0, 0]
336 336 # snapshot count per depth
337 337 snapsizedepth = collections.defaultdict(lambda: [None, 0, 0])
338 338 deltasize = [None, 0, 0]
339 339 chunktypecounts = {}
340 340 chunktypesizes = {}
341 341
342 342 def addsize(size, l):
343 343 if l[0] is None or size < l[0]:
344 344 l[0] = size
345 345 if size > l[1]:
346 346 l[1] = size
347 347 l[2] += size
348 348
349 349 with r.reading():
350 350 numrevs = len(r)
351 351 for rev in range(numrevs):
352 352 p1, p2 = r.parentrevs(rev)
353 353 delta = r.deltaparent(rev)
354 354 if format > 0:
355 355 s = r.rawsize(rev)
356 356 full_text_total_size += s
357 357 addsize(s, datasize)
358 358 if p2 != nodemod.nullrev:
359 359 nummerges += 1
360 360 size = r.length(rev)
361 361 if delta == nodemod.nullrev:
362 362 chainlengths.append(0)
363 363 chainbases.append(r.start(rev))
364 364 chainspans.append(size)
365 365 if size == 0:
366 366 numempty += 1
367 367 numemptytext += 1
368 368 else:
369 369 numfull += 1
370 370 numsnapdepth[0] += 1
371 371 addsize(size, fullsize)
372 372 addsize(size, snapsizedepth[0])
373 373 else:
374 374 nad = (
375 375 delta != p1
376 376 and delta != p2
377 377 and not r.isancestorrev(delta, rev)
378 378 )
379 379 chainlengths.append(chainlengths[delta] + 1)
380 380 baseaddr = chainbases[delta]
381 381 revaddr = r.start(rev)
382 382 chainbases.append(baseaddr)
383 383 chainspans.append((revaddr - baseaddr) + size)
384 384 if size == 0:
385 385 numempty += 1
386 386 numemptydelta += 1
387 387 elif r.issnapshot(rev):
388 388 addsize(size, semisize)
389 389 numsemi += 1
390 390 depth = r.snapshotdepth(rev)
391 391 numsnapdepth[depth] += 1
392 392 if nad:
393 393 numsnapdepth_nad[depth] += 1
394 394 addsize(size, snapsizedepth[depth])
395 395 else:
396 396 addsize(size, deltasize)
397 397 if delta == rev - 1:
398 398 numprev += 1
399 399 if delta == p1:
400 400 nump1prev += 1
401 401 elif delta == p2:
402 402 nump2prev += 1
403 403 elif nad:
404 404 numprev_nad += 1
405 405 elif delta == p1:
406 406 nump1 += 1
407 407 elif delta == p2:
408 408 nump2 += 1
409 409 elif delta != nodemod.nullrev:
410 410 numother += 1
411 411 numother_nad += 1
412 412
413 413 # Obtain data on the raw chunks in the revlog.
414 if hasattr(r, '_getsegmentforrevs'):
415 segment = r._getsegmentforrevs(rev, rev)[1]
414 if hasattr(r, '_inner'):
415 segment = r._inner.get_segment_for_revs(rev, rev)[1]
416 416 else:
417 417 segment = r._revlog._getsegmentforrevs(rev, rev)[1]
418 418 if segment:
419 419 chunktype = bytes(segment[0:1])
420 420 else:
421 421 chunktype = b'empty'
422 422
423 423 if chunktype not in chunktypecounts:
424 424 chunktypecounts[chunktype] = 0
425 425 chunktypesizes[chunktype] = 0
426 426
427 427 chunktypecounts[chunktype] += 1
428 428 chunktypesizes[chunktype] += size
429 429
430 430 # Adjust size min value for empty cases
431 431 for size in (datasize, fullsize, semisize, deltasize):
432 432 if size[0] is None:
433 433 size[0] = 0
434 434
435 435 numdeltas = numrevs - numfull - numempty - numsemi
436 436 numoprev = numprev - nump1prev - nump2prev - numprev_nad
437 437 num_other_ancestors = numother - numother_nad
438 438 totalrawsize = datasize[2]
439 439 datasize[2] /= numrevs
440 440 fulltotal = fullsize[2]
441 441 if numfull == 0:
442 442 fullsize[2] = 0
443 443 else:
444 444 fullsize[2] /= numfull
445 445 semitotal = semisize[2]
446 446 snaptotal = {}
447 447 if numsemi > 0:
448 448 semisize[2] /= numsemi
449 449 for depth in snapsizedepth:
450 450 snaptotal[depth] = snapsizedepth[depth][2]
451 451 snapsizedepth[depth][2] /= numsnapdepth[depth]
452 452
453 453 deltatotal = deltasize[2]
454 454 if numdeltas > 0:
455 455 deltasize[2] /= numdeltas
456 456 totalsize = fulltotal + semitotal + deltatotal
457 457 avgchainlen = sum(chainlengths) / numrevs
458 458 maxchainlen = max(chainlengths)
459 459 maxchainspan = max(chainspans)
460 460 compratio = 1
461 461 if totalsize:
462 462 compratio = totalrawsize / totalsize
463 463
464 464 basedfmtstr = b'%%%dd\n'
465 465 basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n'
466 466
467 467 def dfmtstr(max):
468 468 return basedfmtstr % len(str(max))
469 469
470 470 def pcfmtstr(max, padding=0):
471 471 return basepcfmtstr % (len(str(max)), b' ' * padding)
472 472
473 473 def pcfmt(value, total):
474 474 if total:
475 475 return (value, 100 * float(value) / total)
476 476 else:
477 477 return value, 100.0
478 478
479 479 ui.writenoi18n(b'format : %d\n' % format)
480 480 ui.writenoi18n(b'flags : %s\n' % b', '.join(flags))
481 481
482 482 ui.write(b'\n')
483 483 fmt = pcfmtstr(totalsize)
484 484 fmt2 = dfmtstr(totalsize)
485 485 ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
486 486 ui.writenoi18n(b' merges : ' + fmt % pcfmt(nummerges, numrevs))
487 487 ui.writenoi18n(
488 488 b' normal : ' + fmt % pcfmt(numrevs - nummerges, numrevs)
489 489 )
490 490 ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
491 491 ui.writenoi18n(b' empty : ' + fmt % pcfmt(numempty, numrevs))
492 492 ui.writenoi18n(
493 493 b' text : '
494 494 + fmt % pcfmt(numemptytext, numemptytext + numemptydelta)
495 495 )
496 496 ui.writenoi18n(
497 497 b' delta : '
498 498 + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta)
499 499 )
500 500 ui.writenoi18n(
501 501 b' snapshot : ' + fmt % pcfmt(numfull + numsemi, numrevs)
502 502 )
503 503 for depth in sorted(numsnapdepth):
504 504 base = b' lvl-%-3d : ' % depth
505 505 count = fmt % pcfmt(numsnapdepth[depth], numrevs)
506 506 pieces = [base, count]
507 507 if numsnapdepth_nad[depth]:
508 508 pieces[-1] = count = count[:-1] # drop the final '\n'
509 509 more = b' non-ancestor-bases: '
510 510 anc_count = fmt
511 511 anc_count %= pcfmt(numsnapdepth_nad[depth], numsnapdepth[depth])
512 512 pieces.append(more)
513 513 pieces.append(anc_count)
514 514 ui.write(b''.join(pieces))
515 515 ui.writenoi18n(b' deltas : ' + fmt % pcfmt(numdeltas, numrevs))
516 516 ui.writenoi18n(b'revision size : ' + fmt2 % totalsize)
517 517 ui.writenoi18n(
518 518 b' snapshot : ' + fmt % pcfmt(fulltotal + semitotal, totalsize)
519 519 )
520 520 for depth in sorted(numsnapdepth):
521 521 ui.write(
522 522 (b' lvl-%-3d : ' % depth)
523 523 + fmt % pcfmt(snaptotal[depth], totalsize)
524 524 )
525 525 ui.writenoi18n(b' deltas : ' + fmt % pcfmt(deltatotal, totalsize))
526 526
527 527 letters = string.ascii_letters.encode('ascii')
528 528
529 529 def fmtchunktype(chunktype):
530 530 if chunktype == b'empty':
531 531 return b' %s : ' % chunktype
532 532 elif chunktype in letters:
533 533 return b' 0x%s (%s) : ' % (nodemod.hex(chunktype), chunktype)
534 534 else:
535 535 return b' 0x%s : ' % nodemod.hex(chunktype)
536 536
537 537 ui.write(b'\n')
538 538 ui.writenoi18n(b'chunks : ' + fmt2 % numrevs)
539 539 for chunktype in sorted(chunktypecounts):
540 540 ui.write(fmtchunktype(chunktype))
541 541 ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
542 542 ui.writenoi18n(b'chunks size : ' + fmt2 % totalsize)
543 543 for chunktype in sorted(chunktypecounts):
544 544 ui.write(fmtchunktype(chunktype))
545 545 ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
546 546
547 547 ui.write(b'\n')
548 548 b_total = b"%d" % full_text_total_size
549 549 p_total = []
550 550 while len(b_total) > 3:
551 551 p_total.append(b_total[-3:])
552 552 b_total = b_total[:-3]
553 553 p_total.append(b_total)
554 554 p_total.reverse()
555 555 b_total = b' '.join(p_total)
556 556
557 557 ui.write(b'\n')
558 558 ui.writenoi18n(b'total-stored-content: %s bytes\n' % b_total)
559 559 ui.write(b'\n')
560 560 fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio))
561 561 ui.writenoi18n(b'avg chain length : ' + fmt % avgchainlen)
562 562 ui.writenoi18n(b'max chain length : ' + fmt % maxchainlen)
563 563 ui.writenoi18n(b'max chain reach : ' + fmt % maxchainspan)
564 564 ui.writenoi18n(b'compression ratio : ' + fmt % compratio)
565 565
566 566 if format > 0:
567 567 ui.write(b'\n')
568 568 ui.writenoi18n(
569 569 b'uncompressed data size (min/max/avg) : %d / %d / %d\n'
570 570 % tuple(datasize)
571 571 )
572 572 ui.writenoi18n(
573 573 b'full revision size (min/max/avg) : %d / %d / %d\n'
574 574 % tuple(fullsize)
575 575 )
576 576 ui.writenoi18n(
577 577 b'inter-snapshot size (min/max/avg) : %d / %d / %d\n'
578 578 % tuple(semisize)
579 579 )
580 580 for depth in sorted(snapsizedepth):
581 581 if depth == 0:
582 582 continue
583 583 ui.writenoi18n(
584 584 b' level-%-3d (min/max/avg) : %d / %d / %d\n'
585 585 % ((depth,) + tuple(snapsizedepth[depth]))
586 586 )
587 587 ui.writenoi18n(
588 588 b'delta size (min/max/avg) : %d / %d / %d\n'
589 589 % tuple(deltasize)
590 590 )
591 591
592 592 if numdeltas > 0:
593 593 ui.write(b'\n')
594 594 fmt = pcfmtstr(numdeltas)
595 595 fmt2 = pcfmtstr(numdeltas, 4)
596 596 ui.writenoi18n(
597 597 b'deltas against prev : ' + fmt % pcfmt(numprev, numdeltas)
598 598 )
599 599 if numprev > 0:
600 600 ui.writenoi18n(
601 601 b' where prev = p1 : ' + fmt2 % pcfmt(nump1prev, numprev)
602 602 )
603 603 ui.writenoi18n(
604 604 b' where prev = p2 : ' + fmt2 % pcfmt(nump2prev, numprev)
605 605 )
606 606 ui.writenoi18n(
607 607 b' other-ancestor : ' + fmt2 % pcfmt(numoprev, numprev)
608 608 )
609 609 ui.writenoi18n(
610 610 b' unrelated : ' + fmt2 % pcfmt(numoprev, numprev)
611 611 )
612 612 if gdelta:
613 613 ui.writenoi18n(
614 614 b'deltas against p1 : ' + fmt % pcfmt(nump1, numdeltas)
615 615 )
616 616 ui.writenoi18n(
617 617 b'deltas against p2 : ' + fmt % pcfmt(nump2, numdeltas)
618 618 )
619 619 ui.writenoi18n(
620 620 b'deltas against ancs : '
621 621 + fmt % pcfmt(num_other_ancestors, numdeltas)
622 622 )
623 623 ui.writenoi18n(
624 624 b'deltas against other : '
625 625 + fmt % pcfmt(numother_nad, numdeltas)
626 626 )
627 627
628 628
629 629 def debug_delta_find(ui, revlog, rev, base_rev=nodemod.nullrev):
630 630 """display the search process for a delta"""
631 631 deltacomputer = deltautil.deltacomputer(
632 632 revlog,
633 633 write_debug=ui.write,
634 634 debug_search=not ui.quiet,
635 635 )
636 636
637 637 node = revlog.node(rev)
638 638 p1r, p2r = revlog.parentrevs(rev)
639 639 p1 = revlog.node(p1r)
640 640 p2 = revlog.node(p2r)
641 641 full_text = revlog.revision(rev)
642 642 btext = [full_text]
643 643 textlen = len(btext[0])
644 644 cachedelta = None
645 645 flags = revlog.flags(rev)
646 646
647 647 if base_rev != nodemod.nullrev:
648 648 base_text = revlog.revision(base_rev)
649 649 delta = mdiff.textdiff(base_text, full_text)
650 650
651 651 cachedelta = (base_rev, delta, constants.DELTA_BASE_REUSE_TRY)
652 652 btext = [None]
653 653
654 654 revinfo = revlogutils.revisioninfo(
655 655 node,
656 656 p1,
657 657 p2,
658 658 btext,
659 659 textlen,
660 660 cachedelta,
661 661 flags,
662 662 )
663 663
664 664 fh = revlog._datafp()
665 665 deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev)
666 666
667 667
668 668 def debug_revlog_stats(
669 669 repo, fm, changelog: bool, manifest: bool, filelogs: bool
670 670 ):
671 671 """Format revlog statistics for debugging purposes
672 672
673 673 fm: the output formatter.
674 674 """
675 675 fm.plain(b'rev-count data-size inl type target \n')
676 676
677 677 revlog_entries = [e for e in repo.store.walk() if e.is_revlog]
678 678 revlog_entries.sort(key=lambda e: (e.revlog_type, e.target_id))
679 679
680 680 for entry in revlog_entries:
681 681 if not changelog and entry.is_changelog:
682 682 continue
683 683 elif not manifest and entry.is_manifestlog:
684 684 continue
685 685 elif not filelogs and entry.is_filelog:
686 686 continue
687 687 rlog = entry.get_revlog_instance(repo).get_revlog()
688 688 fm.startitem()
689 689 nb_rev = len(rlog)
690 690 inline = rlog._inline
691 691 data_size = rlog._get_data_offset(nb_rev - 1)
692 692
693 693 target = rlog.target
694 694 revlog_type = b'unknown'
695 695 revlog_target = b''
696 696 if target[0] == constants.KIND_CHANGELOG:
697 697 revlog_type = b'changelog'
698 698 elif target[0] == constants.KIND_MANIFESTLOG:
699 699 revlog_type = b'manifest'
700 700 revlog_target = target[1]
701 701 elif target[0] == constants.KIND_FILELOG:
702 702 revlog_type = b'file'
703 703 revlog_target = target[1]
704 704
705 705 fm.write(b'revlog.rev-count', b'%9d', nb_rev)
706 706 fm.write(b'revlog.data-size', b'%12d', data_size)
707 707
708 708 fm.write(b'revlog.inline', b' %-3s', b'yes' if inline else b'no')
709 709 fm.write(b'revlog.type', b' %-9s', revlog_type)
710 710 fm.write(b'revlog.target', b' %s', revlog_target)
711 711
712 712 fm.plain(b'\n')
713 713
714 714
715 715 class DeltaChainAuditor:
716 716 def __init__(self, revlog):
717 717 self._revlog = revlog
718 718 self._index = self._revlog.index
719 719 self._generaldelta = revlog.delta_config.general_delta
720 720 self._chain_size_cache = {}
721 721 # security to avoid crash on corrupted revlogs
722 722 self._total_revs = len(self._index)
723 723
724 724 def revinfo(self, rev, size_info=True, dist_info=True, sparse_info=True):
725 725 e = self._index[rev]
726 726 compsize = e[constants.ENTRY_DATA_COMPRESSED_LENGTH]
727 727 uncompsize = e[constants.ENTRY_DATA_UNCOMPRESSED_LENGTH]
728 728
729 729 base = e[constants.ENTRY_DELTA_BASE]
730 730 p1 = e[constants.ENTRY_PARENT_1]
731 731 p2 = e[constants.ENTRY_PARENT_2]
732 732
733 733 # If the parents of a revision has an empty delta, we never try to
734 734 # delta against that parent, but directly against the delta base of
735 735 # that parent (recursively). It avoids adding a useless entry in the
736 736 # chain.
737 737 #
738 738 # However we need to detect that as a special case for delta-type, that
739 739 # is not simply "other".
740 740 p1_base = p1
741 741 if p1 != nodemod.nullrev and p1 < self._total_revs:
742 742 e1 = self._index[p1]
743 743 while e1[constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0:
744 744 new_base = e1[constants.ENTRY_DELTA_BASE]
745 745 if (
746 746 new_base == p1_base
747 747 or new_base == nodemod.nullrev
748 748 or new_base >= self._total_revs
749 749 ):
750 750 break
751 751 p1_base = new_base
752 752 e1 = self._index[p1_base]
753 753 p2_base = p2
754 754 if p2 != nodemod.nullrev and p2 < self._total_revs:
755 755 e2 = self._index[p2]
756 756 while e2[constants.ENTRY_DATA_COMPRESSED_LENGTH] == 0:
757 757 new_base = e2[constants.ENTRY_DELTA_BASE]
758 758 if (
759 759 new_base == p2_base
760 760 or new_base == nodemod.nullrev
761 761 or new_base >= self._total_revs
762 762 ):
763 763 break
764 764 p2_base = new_base
765 765 e2 = self._index[p2_base]
766 766
767 767 if self._generaldelta:
768 768 if base == p1:
769 769 deltatype = b'p1'
770 770 elif base == p2:
771 771 deltatype = b'p2'
772 772 elif base == rev:
773 773 deltatype = b'base'
774 774 elif base == p1_base:
775 775 deltatype = b'skip1'
776 776 elif base == p2_base:
777 777 deltatype = b'skip2'
778 778 elif self._revlog.issnapshot(rev):
779 779 deltatype = b'snap'
780 780 elif base == rev - 1:
781 781 deltatype = b'prev'
782 782 else:
783 783 deltatype = b'other'
784 784 else:
785 785 if base == rev:
786 786 deltatype = b'base'
787 787 else:
788 788 deltatype = b'prev'
789 789
790 790 chain = self._revlog._deltachain(rev)[0]
791 791
792 792 data = {
793 793 'p1': p1,
794 794 'p2': p2,
795 795 'compressed_size': compsize,
796 796 'uncompressed_size': uncompsize,
797 797 'deltatype': deltatype,
798 798 'chain': chain,
799 799 }
800 800
801 801 if size_info or dist_info or sparse_info:
802 802 chain_size = 0
803 803 for iter_rev in reversed(chain):
804 804 cached = self._chain_size_cache.get(iter_rev)
805 805 if cached is not None:
806 806 chain_size += cached
807 807 break
808 808 e = self._index[iter_rev]
809 809 chain_size += e[constants.ENTRY_DATA_COMPRESSED_LENGTH]
810 810 self._chain_size_cache[rev] = chain_size
811 811 data['chain_size'] = chain_size
812 812
813 813 return data
814 814
815 815
816 816 def debug_delta_chain(
817 817 revlog,
818 818 revs=None,
819 819 size_info=True,
820 820 dist_info=True,
821 821 sparse_info=True,
822 822 ):
823 823 auditor = DeltaChainAuditor(revlog)
824 824 r = revlog
825 825 start = r.start
826 826 length = r.length
827 827 withsparseread = revlog.data_config.with_sparse_read
828 828
829 829 header = (
830 830 b' rev'
831 831 b' p1'
832 832 b' p2'
833 833 b' chain#'
834 834 b' chainlen'
835 835 b' prev'
836 836 b' delta'
837 837 )
838 838 if size_info:
839 839 header += b' size' b' rawsize' b' chainsize' b' ratio'
840 840 if dist_info:
841 841 header += b' lindist' b' extradist' b' extraratio'
842 842 if withsparseread and sparse_info:
843 843 header += b' readsize' b' largestblk' b' rddensity' b' srchunks'
844 844 header += b'\n'
845 845 yield header
846 846
847 847 if revs is None:
848 848 all_revs = iter(r)
849 849 else:
850 850 revlog_size = len(r)
851 851 all_revs = sorted(rev for rev in revs if rev < revlog_size)
852 852
853 853 chainbases = {}
854 854 for rev in all_revs:
855 855 info = auditor.revinfo(
856 856 rev,
857 857 size_info=size_info,
858 858 dist_info=dist_info,
859 859 sparse_info=sparse_info,
860 860 )
861 861 comp = info['compressed_size']
862 862 uncomp = info['uncompressed_size']
863 863 chain = info['chain']
864 864 chainbase = chain[0]
865 865 chainid = chainbases.setdefault(chainbase, len(chainbases) + 1)
866 866 if dist_info:
867 867 basestart = start(chainbase)
868 868 revstart = start(rev)
869 869 lineardist = revstart + comp - basestart
870 870 extradist = lineardist - info['chain_size']
871 871 try:
872 872 prevrev = chain[-2]
873 873 except IndexError:
874 874 prevrev = -1
875 875
876 876 if size_info:
877 877 chainsize = info['chain_size']
878 878 if uncomp != 0:
879 879 chainratio = float(chainsize) / float(uncomp)
880 880 else:
881 881 chainratio = chainsize
882 882
883 883 if dist_info:
884 884 if chainsize != 0:
885 885 extraratio = float(extradist) / float(chainsize)
886 886 else:
887 887 extraratio = extradist
888 888
889 889 # label, display-format, data-key, value
890 890 entry = [
891 891 (b'rev', b'%7d', 'rev', rev),
892 892 (b'p1', b'%7d', 'p1', info['p1']),
893 893 (b'p2', b'%7d', 'p2', info['p2']),
894 894 (b'chainid', b'%7d', 'chainid', chainid),
895 895 (b'chainlen', b'%8d', 'chainlen', len(chain)),
896 896 (b'prevrev', b'%8d', 'prevrev', prevrev),
897 897 (b'deltatype', b'%7s', 'deltatype', info['deltatype']),
898 898 ]
899 899 if size_info:
900 900 entry.extend(
901 901 [
902 902 (b'compsize', b'%10d', 'compsize', comp),
903 903 (b'uncompsize', b'%10d', 'uncompsize', uncomp),
904 904 (b'chainsize', b'%10d', 'chainsize', chainsize),
905 905 (b'chainratio', b'%9.5f', 'chainratio', chainratio),
906 906 ]
907 907 )
908 908 if dist_info:
909 909 entry.extend(
910 910 [
911 911 (b'lindist', b'%9d', 'lindist', lineardist),
912 912 (b'extradist', b'%9d', 'extradist', extradist),
913 913 (b'extraratio', b'%10.5f', 'extraratio', extraratio),
914 914 ]
915 915 )
916 916 if withsparseread and sparse_info:
917 917 chainsize = info['chain_size']
918 918 readsize = 0
919 919 largestblock = 0
920 920 srchunks = 0
921 921
922 922 for revschunk in deltautil.slicechunk(r, chain):
923 923 srchunks += 1
924 924 blkend = start(revschunk[-1]) + length(revschunk[-1])
925 925 blksize = blkend - start(revschunk[0])
926 926
927 927 readsize += blksize
928 928 if largestblock < blksize:
929 929 largestblock = blksize
930 930
931 931 if readsize:
932 932 readdensity = float(chainsize) / float(readsize)
933 933 else:
934 934 readdensity = 1
935 935 entry.extend(
936 936 [
937 937 (b'readsize', b'%10d', 'readsize', readsize),
938 938 (b'largestblock', b'%10d', 'largestblock', largestblock),
939 939 (b'readdensity', b'%9.5f', 'readdensity', readdensity),
940 940 (b'srchunks', b'%8d', 'srchunks', srchunks),
941 941 ]
942 942 )
943 943 yield entry
@@ -1,875 +1,876 b''
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import binascii
11 11 import contextlib
12 12 import os
13 13 import struct
14 14
15 15 from ..node import (
16 16 nullrev,
17 17 )
18 18 from .constants import (
19 19 COMP_MODE_PLAIN,
20 20 ENTRY_DATA_COMPRESSED_LENGTH,
21 21 ENTRY_DATA_COMPRESSION_MODE,
22 22 ENTRY_DATA_OFFSET,
23 23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 24 ENTRY_DELTA_BASE,
25 25 ENTRY_LINK_REV,
26 26 ENTRY_NODE_ID,
27 27 ENTRY_PARENT_1,
28 28 ENTRY_PARENT_2,
29 29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 31 ENTRY_SIDEDATA_OFFSET,
32 32 REVIDX_ISCENSORED,
33 33 REVLOGV0,
34 34 REVLOGV1,
35 35 )
36 36 from ..i18n import _
37 37
38 38 from .. import (
39 39 error,
40 40 mdiff,
41 41 pycompat,
42 42 revlogutils,
43 43 util,
44 44 )
45 45 from ..utils import (
46 46 storageutil,
47 47 )
48 48 from . import (
49 49 constants,
50 50 deltas,
51 51 )
52 52
53 53
54 54 def v1_censor(rl, tr, censornode, tombstone=b''):
55 55 """censors a revision in a "version 1" revlog"""
56 56 assert rl._format_version == constants.REVLOGV1, rl._format_version
57 57
58 58 # avoid cycle
59 59 from .. import revlog
60 60
61 61 censorrev = rl.rev(censornode)
62 62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63 63
64 64 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 65 # to create a new revlog, copy all revisions to it, then replace the
66 66 # revlogs on transaction close.
67 67 #
68 68 # This is a bit dangerous. We could easily have a mismatch of state.
69 69 newrl = revlog.revlog(
70 70 rl.opener,
71 71 target=rl.target,
72 72 radix=rl.radix,
73 73 postfix=b'tmpcensored',
74 74 censorable=True,
75 75 )
76 76 newrl._format_version = rl._format_version
77 77 newrl._format_flags = rl._format_flags
78 78 newrl.delta_config.general_delta = rl.delta_config.general_delta
79 79 newrl._parse_index = rl._parse_index
80 80
81 81 for rev in rl.revs():
82 82 node = rl.node(rev)
83 83 p1, p2 = rl.parents(node)
84 84
85 85 if rev == censorrev:
86 86 newrl.addrawrevision(
87 87 tombstone,
88 88 tr,
89 89 rl.linkrev(censorrev),
90 90 p1,
91 91 p2,
92 92 censornode,
93 93 constants.REVIDX_ISCENSORED,
94 94 )
95 95
96 96 if newrl.deltaparent(rev) != nullrev:
97 97 m = _(b'censored revision stored as delta; cannot censor')
98 98 h = _(
99 99 b'censoring of revlogs is not fully implemented;'
100 100 b' please report this bug'
101 101 )
102 102 raise error.Abort(m, hint=h)
103 103 continue
104 104
105 105 if rl.iscensored(rev):
106 106 if rl.deltaparent(rev) != nullrev:
107 107 m = _(
108 108 b'cannot censor due to censored '
109 109 b'revision having delta stored'
110 110 )
111 111 raise error.Abort(m)
112 112 rawtext = rl._chunk(rev)
113 113 else:
114 114 rawtext = rl.rawdata(rev)
115 115
116 116 newrl.addrawrevision(
117 117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
118 118 )
119 119
120 120 tr.addbackup(rl._indexfile, location=b'store')
121 121 if not rl._inline:
122 122 tr.addbackup(rl._datafile, location=b'store')
123 123
124 124 rl.opener.rename(newrl._indexfile, rl._indexfile)
125 125 if not rl._inline:
126 126 rl.opener.rename(newrl._datafile, rl._datafile)
127 127
128 128 rl.clearcaches()
129 rl._loadindex()
129 chunk_cache = rl._loadindex()
130 rl._load_inner(chunk_cache)
130 131
131 132
132 133 def v2_censor(revlog, tr, censornode, tombstone=b''):
133 134 """censors a revision in a "version 2" revlog"""
134 135 assert revlog._format_version != REVLOGV0, revlog._format_version
135 136 assert revlog._format_version != REVLOGV1, revlog._format_version
136 137
137 138 censor_revs = {revlog.rev(censornode)}
138 139 _rewrite_v2(revlog, tr, censor_revs, tombstone)
139 140
140 141
141 142 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
142 143 """rewrite a revlog to censor some of its content
143 144
144 145 General principle
145 146
146 147 We create new revlog files (index/data/sidedata) to copy the content of
147 148 the existing data without the censored data.
148 149
149 150 We need to recompute new delta for any revision that used the censored
150 151 revision as delta base. As the cumulative size of the new delta may be
151 152 large, we store them in a temporary file until they are stored in their
152 153 final destination.
153 154
154 155 All data before the censored data can be blindly copied. The rest needs
155 156 to be copied as we go and the associated index entry needs adjustement.
156 157 """
157 158 assert revlog._format_version != REVLOGV0, revlog._format_version
158 159 assert revlog._format_version != REVLOGV1, revlog._format_version
159 160
160 161 old_index = revlog.index
161 162 docket = revlog._docket
162 163
163 164 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
164 165
165 166 first_excl_rev = min(censor_revs)
166 167
167 168 first_excl_entry = revlog.index[first_excl_rev]
168 169 index_cutoff = revlog.index.entry_size * first_excl_rev
169 170 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
170 171 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
171 172
172 173 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
173 174 # rev β†’ (new_base, data_start, data_end, compression_mode)
174 175 rewritten_entries = _precompute_rewritten_delta(
175 176 revlog,
176 177 old_index,
177 178 censor_revs,
178 179 tmp_storage,
179 180 )
180 181
181 182 all_files = _setup_new_files(
182 183 revlog,
183 184 index_cutoff,
184 185 data_cutoff,
185 186 sidedata_cutoff,
186 187 )
187 188
188 189 # we dont need to open the old index file since its content already
189 190 # exist in a usable form in `old_index`.
190 191 with all_files() as open_files:
191 192 (
192 193 old_data_file,
193 194 old_sidedata_file,
194 195 new_index_file,
195 196 new_data_file,
196 197 new_sidedata_file,
197 198 ) = open_files
198 199
199 200 # writing the censored revision
200 201
201 202 # Writing all subsequent revisions
202 203 for rev in range(first_excl_rev, len(old_index)):
203 204 if rev in censor_revs:
204 205 _rewrite_censor(
205 206 revlog,
206 207 old_index,
207 208 open_files,
208 209 rev,
209 210 tombstone,
210 211 )
211 212 else:
212 213 _rewrite_simple(
213 214 revlog,
214 215 old_index,
215 216 open_files,
216 217 rev,
217 218 rewritten_entries,
218 219 tmp_storage,
219 220 )
220 221 docket.write(transaction=None, stripping=True)
221 222
222 223
223 224 def _precompute_rewritten_delta(
224 225 revlog,
225 226 old_index,
226 227 excluded_revs,
227 228 tmp_storage,
228 229 ):
229 230 """Compute new delta for revisions whose delta is based on revision that
230 231 will not survive as is.
231 232
232 233 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
233 234 """
234 235 dc = deltas.deltacomputer(revlog)
235 236 rewritten_entries = {}
236 237 first_excl_rev = min(excluded_revs)
237 238 with revlog.reading():
238 239 for rev in range(first_excl_rev, len(old_index)):
239 240 if rev in excluded_revs:
240 241 # this revision will be preserved as is, so we don't need to
241 242 # consider recomputing a delta.
242 243 continue
243 244 entry = old_index[rev]
244 245 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
245 246 continue
246 247 # This is a revision that use the censored revision as the base
247 248 # for its delta. We need a need new deltas
248 249 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
249 250 # this revision is empty, we can delta against nullrev
250 251 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
251 252 else:
252 253
253 254 text = revlog.rawdata(rev)
254 255 info = revlogutils.revisioninfo(
255 256 node=entry[ENTRY_NODE_ID],
256 257 p1=revlog.node(entry[ENTRY_PARENT_1]),
257 258 p2=revlog.node(entry[ENTRY_PARENT_2]),
258 259 btext=[text],
259 260 textlen=len(text),
260 261 cachedelta=None,
261 262 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
262 263 )
263 264 d = dc.finddeltainfo(
264 265 info, excluded_bases=excluded_revs, target_rev=rev
265 266 )
266 267 default_comp = revlog._docket.default_compression_header
267 268 comp_mode, d = deltas.delta_compression(default_comp, d)
268 269 # using `tell` is a bit lazy, but we are not here for speed
269 270 start = tmp_storage.tell()
270 271 tmp_storage.write(d.data[1])
271 272 end = tmp_storage.tell()
272 273 rewritten_entries[rev] = (d.base, start, end, comp_mode)
273 274 return rewritten_entries
274 275
275 276
276 277 def _setup_new_files(
277 278 revlog,
278 279 index_cutoff,
279 280 data_cutoff,
280 281 sidedata_cutoff,
281 282 ):
282 283 """
283 284
284 285 return a context manager to open all the relevant files:
285 286 - old_data_file,
286 287 - old_sidedata_file,
287 288 - new_index_file,
288 289 - new_data_file,
289 290 - new_sidedata_file,
290 291
291 292 The old_index_file is not here because it is accessed through the
292 293 `old_index` object if the caller function.
293 294 """
294 295 docket = revlog._docket
295 296 old_index_filepath = revlog.opener.join(docket.index_filepath())
296 297 old_data_filepath = revlog.opener.join(docket.data_filepath())
297 298 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
298 299
299 300 new_index_filepath = revlog.opener.join(docket.new_index_file())
300 301 new_data_filepath = revlog.opener.join(docket.new_data_file())
301 302 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
302 303
303 304 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
304 305 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
305 306 util.copyfile(
306 307 old_sidedata_filepath,
307 308 new_sidedata_filepath,
308 309 nb_bytes=sidedata_cutoff,
309 310 )
310 311 revlog.opener.register_file(docket.index_filepath())
311 312 revlog.opener.register_file(docket.data_filepath())
312 313 revlog.opener.register_file(docket.sidedata_filepath())
313 314
314 315 docket.index_end = index_cutoff
315 316 docket.data_end = data_cutoff
316 317 docket.sidedata_end = sidedata_cutoff
317 318
318 319 # reload the revlog internal information
319 320 revlog.clearcaches()
320 321 revlog._loadindex(docket=docket)
321 322
322 323 @contextlib.contextmanager
323 324 def all_files_opener():
324 325 # hide opening in an helper function to please check-code, black
325 326 # and various python version at the same time
326 327 with open(old_data_filepath, 'rb') as old_data_file:
327 328 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
328 329 with open(new_index_filepath, 'r+b') as new_index_file:
329 330 with open(new_data_filepath, 'r+b') as new_data_file:
330 331 with open(
331 332 new_sidedata_filepath, 'r+b'
332 333 ) as new_sidedata_file:
333 334 new_index_file.seek(0, os.SEEK_END)
334 335 assert new_index_file.tell() == index_cutoff
335 336 new_data_file.seek(0, os.SEEK_END)
336 337 assert new_data_file.tell() == data_cutoff
337 338 new_sidedata_file.seek(0, os.SEEK_END)
338 339 assert new_sidedata_file.tell() == sidedata_cutoff
339 340 yield (
340 341 old_data_file,
341 342 old_sidedata_file,
342 343 new_index_file,
343 344 new_data_file,
344 345 new_sidedata_file,
345 346 )
346 347
347 348 return all_files_opener
348 349
349 350
350 351 def _rewrite_simple(
351 352 revlog,
352 353 old_index,
353 354 all_files,
354 355 rev,
355 356 rewritten_entries,
356 357 tmp_storage,
357 358 ):
358 359 """append a normal revision to the index after the rewritten one(s)"""
359 360 (
360 361 old_data_file,
361 362 old_sidedata_file,
362 363 new_index_file,
363 364 new_data_file,
364 365 new_sidedata_file,
365 366 ) = all_files
366 367 entry = old_index[rev]
367 368 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
368 369 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
369 370
370 371 if rev not in rewritten_entries:
371 372 old_data_file.seek(old_data_offset)
372 373 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
373 374 new_data = old_data_file.read(new_data_size)
374 375 data_delta_base = entry[ENTRY_DELTA_BASE]
375 376 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
376 377 else:
377 378 (
378 379 data_delta_base,
379 380 start,
380 381 end,
381 382 d_comp_mode,
382 383 ) = rewritten_entries[rev]
383 384 new_data_size = end - start
384 385 tmp_storage.seek(start)
385 386 new_data = tmp_storage.read(new_data_size)
386 387
387 388 # It might be faster to group continuous read/write operation,
388 389 # however, this is censor, an operation that is not focussed
389 390 # around stellar performance. So I have not written this
390 391 # optimisation yet.
391 392 new_data_offset = new_data_file.tell()
392 393 new_data_file.write(new_data)
393 394
394 395 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
395 396 new_sidedata_offset = new_sidedata_file.tell()
396 397 if 0 < sidedata_size:
397 398 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
398 399 old_sidedata_file.seek(old_sidedata_offset)
399 400 new_sidedata = old_sidedata_file.read(sidedata_size)
400 401 new_sidedata_file.write(new_sidedata)
401 402
402 403 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
403 404 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
404 405 assert data_delta_base <= rev, (data_delta_base, rev)
405 406
406 407 new_entry = revlogutils.entry(
407 408 flags=flags,
408 409 data_offset=new_data_offset,
409 410 data_compressed_length=new_data_size,
410 411 data_uncompressed_length=data_uncompressed_length,
411 412 data_delta_base=data_delta_base,
412 413 link_rev=entry[ENTRY_LINK_REV],
413 414 parent_rev_1=entry[ENTRY_PARENT_1],
414 415 parent_rev_2=entry[ENTRY_PARENT_2],
415 416 node_id=entry[ENTRY_NODE_ID],
416 417 sidedata_offset=new_sidedata_offset,
417 418 sidedata_compressed_length=sidedata_size,
418 419 data_compression_mode=d_comp_mode,
419 420 sidedata_compression_mode=sd_com_mode,
420 421 )
421 422 revlog.index.append(new_entry)
422 423 entry_bin = revlog.index.entry_binary(rev)
423 424 new_index_file.write(entry_bin)
424 425
425 426 revlog._docket.index_end = new_index_file.tell()
426 427 revlog._docket.data_end = new_data_file.tell()
427 428 revlog._docket.sidedata_end = new_sidedata_file.tell()
428 429
429 430
430 431 def _rewrite_censor(
431 432 revlog,
432 433 old_index,
433 434 all_files,
434 435 rev,
435 436 tombstone,
436 437 ):
437 438 """rewrite and append a censored revision"""
438 439 (
439 440 old_data_file,
440 441 old_sidedata_file,
441 442 new_index_file,
442 443 new_data_file,
443 444 new_sidedata_file,
444 445 ) = all_files
445 446 entry = old_index[rev]
446 447
447 448 # XXX consider trying the default compression too
448 449 new_data_size = len(tombstone)
449 450 new_data_offset = new_data_file.tell()
450 451 new_data_file.write(tombstone)
451 452
452 453 # we are not adding any sidedata as they might leak info about the censored version
453 454
454 455 link_rev = entry[ENTRY_LINK_REV]
455 456
456 457 p1 = entry[ENTRY_PARENT_1]
457 458 p2 = entry[ENTRY_PARENT_2]
458 459
459 460 new_entry = revlogutils.entry(
460 461 flags=constants.REVIDX_ISCENSORED,
461 462 data_offset=new_data_offset,
462 463 data_compressed_length=new_data_size,
463 464 data_uncompressed_length=new_data_size,
464 465 data_delta_base=rev,
465 466 link_rev=link_rev,
466 467 parent_rev_1=p1,
467 468 parent_rev_2=p2,
468 469 node_id=entry[ENTRY_NODE_ID],
469 470 sidedata_offset=0,
470 471 sidedata_compressed_length=0,
471 472 data_compression_mode=COMP_MODE_PLAIN,
472 473 sidedata_compression_mode=COMP_MODE_PLAIN,
473 474 )
474 475 revlog.index.append(new_entry)
475 476 entry_bin = revlog.index.entry_binary(rev)
476 477 new_index_file.write(entry_bin)
477 478 revlog._docket.index_end = new_index_file.tell()
478 479 revlog._docket.data_end = new_data_file.tell()
479 480
480 481
481 482 def _get_filename_from_filelog_index(path):
482 483 # Drop the extension and the `data/` prefix
483 484 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
484 485 if len(path_part) < 2:
485 486 msg = _(b"cannot recognize filelog from filename: '%s'")
486 487 msg %= path
487 488 raise error.Abort(msg)
488 489
489 490 return path_part[1]
490 491
491 492
492 493 def _filelog_from_filename(repo, path):
493 494 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
494 495
495 496 from .. import filelog # avoid cycle
496 497
497 498 fl = filelog.filelog(repo.svfs, path)
498 499 return fl
499 500
500 501
501 502 def _write_swapped_parents(repo, rl, rev, offset, fp):
502 503 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
503 504 from ..pure import parsers # avoid cycle
504 505
505 506 if repo._currentlock(repo._lockref) is None:
506 507 # Let's be paranoid about it
507 508 msg = "repo needs to be locked to rewrite parents"
508 509 raise error.ProgrammingError(msg)
509 510
510 511 index_format = parsers.IndexObject.index_format
511 512 entry = rl.index[rev]
512 513 new_entry = list(entry)
513 514 new_entry[5], new_entry[6] = entry[6], entry[5]
514 515 packed = index_format.pack(*new_entry[:8])
515 516 fp.seek(offset)
516 517 fp.write(packed)
517 518
518 519
519 520 def _reorder_filelog_parents(repo, fl, to_fix):
520 521 """
521 522 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
522 523 new version to disk, overwriting the old one with a rename.
523 524 """
524 525 from ..pure import parsers # avoid cycle
525 526
526 527 ui = repo.ui
527 528 assert len(to_fix) > 0
528 529 rl = fl._revlog
529 530 if rl._format_version != constants.REVLOGV1:
530 531 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
531 532 raise error.ProgrammingError(msg)
532 533
533 534 index_file = rl._indexfile
534 535 new_file_path = index_file + b'.tmp-parents-fix'
535 536 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
536 537
537 538 with ui.uninterruptible():
538 539 try:
539 540 util.copyfile(
540 541 rl.opener.join(index_file),
541 542 rl.opener.join(new_file_path),
542 543 checkambig=rl.data_config.check_ambig,
543 544 )
544 545
545 546 with rl.opener(new_file_path, mode=b"r+") as fp:
546 547 if rl._inline:
547 548 index = parsers.InlinedIndexObject(fp.read())
548 549 for rev in fl.revs():
549 550 if rev in to_fix:
550 551 offset = index._calculate_index(rev)
551 552 _write_swapped_parents(repo, rl, rev, offset, fp)
552 553 ui.write(repaired_msg % (rev, index_file))
553 554 else:
554 555 index_format = parsers.IndexObject.index_format
555 556 for rev in to_fix:
556 557 offset = rev * index_format.size
557 558 _write_swapped_parents(repo, rl, rev, offset, fp)
558 559 ui.write(repaired_msg % (rev, index_file))
559 560
560 561 rl.opener.rename(new_file_path, index_file)
561 562 rl.clearcaches()
562 563 rl._loadindex()
563 564 finally:
564 565 util.tryunlink(new_file_path)
565 566
566 567
567 568 def _is_revision_affected(fl, filerev, metadata_cache=None):
568 569 full_text = lambda: fl._revlog.rawdata(filerev)
569 570 parent_revs = lambda: fl._revlog.parentrevs(filerev)
570 571 return _is_revision_affected_inner(
571 572 full_text, parent_revs, filerev, metadata_cache
572 573 )
573 574
574 575
575 576 def _is_revision_affected_inner(
576 577 full_text,
577 578 parents_revs,
578 579 filerev,
579 580 metadata_cache=None,
580 581 ):
581 582 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
582 583 special meaning compared to the reverse in the context of filelog-based
583 584 copytracing. issue6528 exists because new code assumed that parent ordering
584 585 didn't matter, so this detects if the revision contains metadata (since
585 586 it's only used for filelog-based copytracing) and its parents are in the
586 587 "wrong" order."""
587 588 try:
588 589 raw_text = full_text()
589 590 except error.CensoredNodeError:
590 591 # We don't care about censored nodes as they never carry metadata
591 592 return False
592 593
593 594 # raw text can be a `memoryview`, which doesn't implement `startswith`
594 595 has_meta = bytes(raw_text[:2]) == b'\x01\n'
595 596 if metadata_cache is not None:
596 597 metadata_cache[filerev] = has_meta
597 598 if has_meta:
598 599 (p1, p2) = parents_revs()
599 600 if p1 != nullrev and p2 == nullrev:
600 601 return True
601 602 return False
602 603
603 604
604 605 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
605 606 rl = fl._revlog
606 607 is_censored = lambda: rl.iscensored(filerev)
607 608 delta_base = lambda: rl.deltaparent(filerev)
608 609 delta = lambda: rl._chunk(filerev)
609 610 full_text = lambda: rl.rawdata(filerev)
610 611 parent_revs = lambda: rl.parentrevs(filerev)
611 612 return _is_revision_affected_fast_inner(
612 613 is_censored,
613 614 delta_base,
614 615 delta,
615 616 full_text,
616 617 parent_revs,
617 618 filerev,
618 619 metadata_cache,
619 620 )
620 621
621 622
622 623 def _is_revision_affected_fast_inner(
623 624 is_censored,
624 625 delta_base,
625 626 delta,
626 627 full_text,
627 628 parent_revs,
628 629 filerev,
629 630 metadata_cache,
630 631 ):
631 632 """Optimization fast-path for `_is_revision_affected`.
632 633
633 634 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
634 635 revision to check if its base has metadata, saving computation of the full
635 636 text, instead looking at the current delta.
636 637
637 638 This optimization only works if the revisions are looked at in order."""
638 639
639 640 if is_censored():
640 641 # Censored revisions don't contain metadata, so they cannot be affected
641 642 metadata_cache[filerev] = False
642 643 return False
643 644
644 645 p1, p2 = parent_revs()
645 646 if p1 == nullrev or p2 != nullrev:
646 647 return False
647 648
648 649 delta_parent = delta_base()
649 650 parent_has_metadata = metadata_cache.get(delta_parent)
650 651 if parent_has_metadata is None:
651 652 return _is_revision_affected_inner(
652 653 full_text,
653 654 parent_revs,
654 655 filerev,
655 656 metadata_cache,
656 657 )
657 658
658 659 chunk = delta()
659 660 if not len(chunk):
660 661 # No diff for this revision
661 662 return parent_has_metadata
662 663
663 664 header_length = 12
664 665 if len(chunk) < header_length:
665 666 raise error.Abort(_(b"patch cannot be decoded"))
666 667
667 668 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
668 669
669 670 if start < 2: # len(b'\x01\n') == 2
670 671 # This delta does *something* to the metadata marker (if any).
671 672 # Check it the slow way
672 673 is_affected = _is_revision_affected_inner(
673 674 full_text,
674 675 parent_revs,
675 676 filerev,
676 677 metadata_cache,
677 678 )
678 679 return is_affected
679 680
680 681 # The diff did not remove or add the metadata header, it's then in the same
681 682 # situation as its parent
682 683 metadata_cache[filerev] = parent_has_metadata
683 684 return parent_has_metadata
684 685
685 686
686 687 def _from_report(ui, repo, context, from_report, dry_run):
687 688 """
688 689 Fix the revisions given in the `from_report` file, but still checks if the
689 690 revisions are indeed affected to prevent an unfortunate cyclic situation
690 691 where we'd swap well-ordered parents again.
691 692
692 693 See the doc for `debug_fix_issue6528` for the format documentation.
693 694 """
694 695 ui.write(_(b"loading report file '%s'\n") % from_report)
695 696
696 697 with context(), open(from_report, mode='rb') as f:
697 698 for line in f.read().split(b'\n'):
698 699 if not line:
699 700 continue
700 701 filenodes, filename = line.split(b' ', 1)
701 702 fl = _filelog_from_filename(repo, filename)
702 703 to_fix = set(
703 704 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
704 705 )
705 706 excluded = set()
706 707
707 708 for filerev in to_fix:
708 709 if _is_revision_affected(fl, filerev):
709 710 msg = b"found affected revision %d for filelog '%s'\n"
710 711 ui.warn(msg % (filerev, filename))
711 712 else:
712 713 msg = _(b"revision %s of file '%s' is not affected\n")
713 714 msg %= (binascii.hexlify(fl.node(filerev)), filename)
714 715 ui.warn(msg)
715 716 excluded.add(filerev)
716 717
717 718 to_fix = to_fix - excluded
718 719 if not to_fix:
719 720 msg = _(b"no affected revisions were found for '%s'\n")
720 721 ui.write(msg % filename)
721 722 continue
722 723 if not dry_run:
723 724 _reorder_filelog_parents(repo, fl, sorted(to_fix))
724 725
725 726
726 727 def filter_delta_issue6528(revlog, deltas_iter):
727 728 """filter incomind deltas to repaire issue 6528 on the fly"""
728 729 metadata_cache = {}
729 730
730 731 deltacomputer = deltas.deltacomputer(revlog)
731 732
732 733 for rev, d in enumerate(deltas_iter, len(revlog)):
733 734 (
734 735 node,
735 736 p1_node,
736 737 p2_node,
737 738 linknode,
738 739 deltabase,
739 740 delta,
740 741 flags,
741 742 sidedata,
742 743 ) = d
743 744
744 745 if not revlog.index.has_node(deltabase):
745 746 raise error.LookupError(
746 747 deltabase, revlog.radix, _(b'unknown parent')
747 748 )
748 749 base_rev = revlog.rev(deltabase)
749 750 if not revlog.index.has_node(p1_node):
750 751 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
751 752 p1_rev = revlog.rev(p1_node)
752 753 if not revlog.index.has_node(p2_node):
753 754 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
754 755 p2_rev = revlog.rev(p2_node)
755 756
756 757 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
757 758 delta_base = lambda: revlog.rev(delta_base)
758 759 delta_base = lambda: base_rev
759 760 parent_revs = lambda: (p1_rev, p2_rev)
760 761
761 762 def full_text():
762 763 # note: being able to reuse the full text computation in the
763 764 # underlying addrevision would be useful however this is a bit too
764 765 # intrusive the for the "quick" issue6528 we are writing before the
765 766 # 5.8 release
766 767 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
767 768
768 769 revinfo = revlogutils.revisioninfo(
769 770 node,
770 771 p1_node,
771 772 p2_node,
772 773 [None],
773 774 textlen,
774 775 (base_rev, delta),
775 776 flags,
776 777 )
777 778 return deltacomputer.buildtext(revinfo)
778 779
779 780 is_affected = _is_revision_affected_fast_inner(
780 781 is_censored,
781 782 delta_base,
782 783 lambda: delta,
783 784 full_text,
784 785 parent_revs,
785 786 rev,
786 787 metadata_cache,
787 788 )
788 789 if is_affected:
789 790 d = (
790 791 node,
791 792 p2_node,
792 793 p1_node,
793 794 linknode,
794 795 deltabase,
795 796 delta,
796 797 flags,
797 798 sidedata,
798 799 )
799 800 yield d
800 801
801 802
802 803 def repair_issue6528(
803 804 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
804 805 ):
805 806 @contextlib.contextmanager
806 807 def context():
807 808 if dry_run or to_report: # No need for locking
808 809 yield
809 810 else:
810 811 with repo.wlock(), repo.lock():
811 812 yield
812 813
813 814 if from_report:
814 815 return _from_report(ui, repo, context, from_report, dry_run)
815 816
816 817 report_entries = []
817 818
818 819 with context():
819 820 files = list(
820 821 entry
821 822 for entry in repo.store.data_entries()
822 823 if entry.is_revlog and entry.is_filelog
823 824 )
824 825
825 826 progress = ui.makeprogress(
826 827 _(b"looking for affected revisions"),
827 828 unit=_(b"filelogs"),
828 829 total=len(files),
829 830 )
830 831 found_nothing = True
831 832
832 833 for entry in files:
833 834 progress.increment()
834 835 filename = entry.target_id
835 836 fl = _filelog_from_filename(repo, entry.target_id)
836 837
837 838 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
838 839 to_fix = set()
839 840 metadata_cache = {}
840 841 for filerev in fl.revs():
841 842 affected = _is_revision_affected_fast(
842 843 repo, fl, filerev, metadata_cache
843 844 )
844 845 if paranoid:
845 846 slow = _is_revision_affected(fl, filerev)
846 847 if slow != affected:
847 848 msg = _(b"paranoid check failed for '%s' at node %s")
848 849 node = binascii.hexlify(fl.node(filerev))
849 850 raise error.Abort(msg % (filename, node))
850 851 if affected:
851 852 msg = b"found affected revision %d for file '%s'\n"
852 853 ui.warn(msg % (filerev, filename))
853 854 found_nothing = False
854 855 if not dry_run:
855 856 if to_report:
856 857 to_fix.add(binascii.hexlify(fl.node(filerev)))
857 858 else:
858 859 to_fix.add(filerev)
859 860
860 861 if to_fix:
861 862 to_fix = sorted(to_fix)
862 863 if to_report:
863 864 report_entries.append((filename, to_fix))
864 865 else:
865 866 _reorder_filelog_parents(repo, fl, to_fix)
866 867
867 868 if found_nothing:
868 869 ui.write(_(b"no affected revisions were found\n"))
869 870
870 871 if to_report and report_entries:
871 872 with open(to_report, mode="wb") as f:
872 873 for path, to_fix in report_entries:
873 874 f.write(b"%s %s\n" % (b",".join(to_fix), path))
874 875
875 876 progress.complete()
General Comments 0
You need to be logged in to leave comments. Login now