##// END OF EJS Templates
revlog: move the `_chunk` method on the inner object...
marmoute -
r51985:9c8df10e default
parent child Browse files
Show More
@@ -1,4635 +1,4637 b''
1 1 # perf.py - performance test routines
2 2 '''helper extension to measure performance
3 3
4 4 Configurations
5 5 ==============
6 6
7 7 ``perf``
8 8 --------
9 9
10 10 ``all-timing``
11 11 When set, additional statistics will be reported for each benchmark: best,
12 12 worst, median average. If not set only the best timing is reported
13 13 (default: off).
14 14
15 15 ``presleep``
16 16 number of second to wait before any group of runs (default: 1)
17 17
18 18 ``pre-run``
19 19 number of run to perform before starting measurement.
20 20
21 21 ``profile-benchmark``
22 22 Enable profiling for the benchmarked section.
23 23 (The first iteration is benchmarked)
24 24
25 25 ``run-limits``
26 26 Control the number of runs each benchmark will perform. The option value
27 27 should be a list of `<time>-<numberofrun>` pairs. After each run the
28 28 conditions are considered in order with the following logic:
29 29
30 30 If benchmark has been running for <time> seconds, and we have performed
31 31 <numberofrun> iterations, stop the benchmark,
32 32
33 33 The default value is: `3.0-100, 10.0-3`
34 34
35 35 ``stub``
36 36 When set, benchmarks will only be run once, useful for testing
37 37 (default: off)
38 38 '''
39 39
40 40 # "historical portability" policy of perf.py:
41 41 #
42 42 # We have to do:
43 43 # - make perf.py "loadable" with as wide Mercurial version as possible
44 44 # This doesn't mean that perf commands work correctly with that Mercurial.
45 45 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
46 46 # - make historical perf command work correctly with as wide Mercurial
47 47 # version as possible
48 48 #
49 49 # We have to do, if possible with reasonable cost:
50 50 # - make recent perf command for historical feature work correctly
51 51 # with early Mercurial
52 52 #
53 53 # We don't have to do:
54 54 # - make perf command for recent feature work correctly with early
55 55 # Mercurial
56 56
57 57 import contextlib
58 58 import functools
59 59 import gc
60 60 import os
61 61 import random
62 62 import shutil
63 63 import struct
64 64 import sys
65 65 import tempfile
66 66 import threading
67 67 import time
68 68
69 69 import mercurial.revlog
70 70 from mercurial import (
71 71 changegroup,
72 72 cmdutil,
73 73 commands,
74 74 copies,
75 75 error,
76 76 extensions,
77 77 hg,
78 78 mdiff,
79 79 merge,
80 80 util,
81 81 )
82 82
83 83 # for "historical portability":
84 84 # try to import modules separately (in dict order), and ignore
85 85 # failure, because these aren't available with early Mercurial
86 86 try:
87 87 from mercurial import branchmap # since 2.5 (or bcee63733aad)
88 88 except ImportError:
89 89 pass
90 90 try:
91 91 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
92 92 except ImportError:
93 93 pass
94 94 try:
95 95 from mercurial import registrar # since 3.7 (or 37d50250b696)
96 96
97 97 dir(registrar) # forcibly load it
98 98 except ImportError:
99 99 registrar = None
100 100 try:
101 101 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
102 102 except ImportError:
103 103 pass
104 104 try:
105 105 from mercurial.utils import repoviewutil # since 5.0
106 106 except ImportError:
107 107 repoviewutil = None
108 108 try:
109 109 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
110 110 except ImportError:
111 111 pass
112 112 try:
113 113 from mercurial import setdiscovery # since 1.9 (or cb98fed52495)
114 114 except ImportError:
115 115 pass
116 116
117 117 try:
118 118 from mercurial import profiling
119 119 except ImportError:
120 120 profiling = None
121 121
122 122 try:
123 123 from mercurial.revlogutils import constants as revlog_constants
124 124
125 125 perf_rl_kind = (revlog_constants.KIND_OTHER, b'created-by-perf')
126 126
127 127 def revlog(opener, *args, **kwargs):
128 128 return mercurial.revlog.revlog(opener, perf_rl_kind, *args, **kwargs)
129 129
130 130
131 131 except (ImportError, AttributeError):
132 132 perf_rl_kind = None
133 133
134 134 def revlog(opener, *args, **kwargs):
135 135 return mercurial.revlog.revlog(opener, *args, **kwargs)
136 136
137 137
138 138 def identity(a):
139 139 return a
140 140
141 141
142 142 try:
143 143 from mercurial import pycompat
144 144
145 145 getargspec = pycompat.getargspec # added to module after 4.5
146 146 _byteskwargs = pycompat.byteskwargs # since 4.1 (or fbc3f73dc802)
147 147 _sysstr = pycompat.sysstr # since 4.0 (or 2219f4f82ede)
148 148 _bytestr = pycompat.bytestr # since 4.2 (or b70407bd84d5)
149 149 _xrange = pycompat.xrange # since 4.8 (or 7eba8f83129b)
150 150 fsencode = pycompat.fsencode # since 3.9 (or f4a5e0e86a7e)
151 151 if pycompat.ispy3:
152 152 _maxint = sys.maxsize # per py3 docs for replacing maxint
153 153 else:
154 154 _maxint = sys.maxint
155 155 except (NameError, ImportError, AttributeError):
156 156 import inspect
157 157
158 158 getargspec = inspect.getargspec
159 159 _byteskwargs = identity
160 160 _bytestr = str
161 161 fsencode = identity # no py3 support
162 162 _maxint = sys.maxint # no py3 support
163 163 _sysstr = lambda x: x # no py3 support
164 164 _xrange = xrange
165 165
166 166 try:
167 167 # 4.7+
168 168 queue = pycompat.queue.Queue
169 169 except (NameError, AttributeError, ImportError):
170 170 # <4.7.
171 171 try:
172 172 queue = pycompat.queue
173 173 except (NameError, AttributeError, ImportError):
174 174 import Queue as queue
175 175
176 176 try:
177 177 from mercurial import logcmdutil
178 178
179 179 makelogtemplater = logcmdutil.maketemplater
180 180 except (AttributeError, ImportError):
181 181 try:
182 182 makelogtemplater = cmdutil.makelogtemplater
183 183 except (AttributeError, ImportError):
184 184 makelogtemplater = None
185 185
186 186 # for "historical portability":
187 187 # define util.safehasattr forcibly, because util.safehasattr has been
188 188 # available since 1.9.3 (or 94b200a11cf7)
189 189 _undefined = object()
190 190
191 191
192 192 def safehasattr(thing, attr):
193 193 return getattr(thing, _sysstr(attr), _undefined) is not _undefined
194 194
195 195
196 196 setattr(util, 'safehasattr', safehasattr)
197 197
198 198 # for "historical portability":
199 199 # define util.timer forcibly, because util.timer has been available
200 200 # since ae5d60bb70c9
201 201 if safehasattr(time, 'perf_counter'):
202 202 util.timer = time.perf_counter
203 203 elif os.name == b'nt':
204 204 util.timer = time.clock
205 205 else:
206 206 util.timer = time.time
207 207
208 208 # for "historical portability":
209 209 # use locally defined empty option list, if formatteropts isn't
210 210 # available, because commands.formatteropts has been available since
211 211 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
212 212 # available since 2.2 (or ae5f92e154d3)
213 213 formatteropts = getattr(
214 214 cmdutil, "formatteropts", getattr(commands, "formatteropts", [])
215 215 )
216 216
217 217 # for "historical portability":
218 218 # use locally defined option list, if debugrevlogopts isn't available,
219 219 # because commands.debugrevlogopts has been available since 3.7 (or
220 220 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
221 221 # since 1.9 (or a79fea6b3e77).
222 222 revlogopts = getattr(
223 223 cmdutil,
224 224 "debugrevlogopts",
225 225 getattr(
226 226 commands,
227 227 "debugrevlogopts",
228 228 [
229 229 (b'c', b'changelog', False, b'open changelog'),
230 230 (b'm', b'manifest', False, b'open manifest'),
231 231 (b'', b'dir', False, b'open directory manifest'),
232 232 ],
233 233 ),
234 234 )
235 235
236 236 cmdtable = {}
237 237
238 238
239 239 # for "historical portability":
240 240 # define parsealiases locally, because cmdutil.parsealiases has been
241 241 # available since 1.5 (or 6252852b4332)
242 242 def parsealiases(cmd):
243 243 return cmd.split(b"|")
244 244
245 245
246 246 if safehasattr(registrar, 'command'):
247 247 command = registrar.command(cmdtable)
248 248 elif safehasattr(cmdutil, 'command'):
249 249 command = cmdutil.command(cmdtable)
250 250 if 'norepo' not in getargspec(command).args:
251 251 # for "historical portability":
252 252 # wrap original cmdutil.command, because "norepo" option has
253 253 # been available since 3.1 (or 75a96326cecb)
254 254 _command = command
255 255
256 256 def command(name, options=(), synopsis=None, norepo=False):
257 257 if norepo:
258 258 commands.norepo += b' %s' % b' '.join(parsealiases(name))
259 259 return _command(name, list(options), synopsis)
260 260
261 261
262 262 else:
263 263 # for "historical portability":
264 264 # define "@command" annotation locally, because cmdutil.command
265 265 # has been available since 1.9 (or 2daa5179e73f)
266 266 def command(name, options=(), synopsis=None, norepo=False):
267 267 def decorator(func):
268 268 if synopsis:
269 269 cmdtable[name] = func, list(options), synopsis
270 270 else:
271 271 cmdtable[name] = func, list(options)
272 272 if norepo:
273 273 commands.norepo += b' %s' % b' '.join(parsealiases(name))
274 274 return func
275 275
276 276 return decorator
277 277
278 278
279 279 try:
280 280 import mercurial.registrar
281 281 import mercurial.configitems
282 282
283 283 configtable = {}
284 284 configitem = mercurial.registrar.configitem(configtable)
285 285 configitem(
286 286 b'perf',
287 287 b'presleep',
288 288 default=mercurial.configitems.dynamicdefault,
289 289 experimental=True,
290 290 )
291 291 configitem(
292 292 b'perf',
293 293 b'stub',
294 294 default=mercurial.configitems.dynamicdefault,
295 295 experimental=True,
296 296 )
297 297 configitem(
298 298 b'perf',
299 299 b'parentscount',
300 300 default=mercurial.configitems.dynamicdefault,
301 301 experimental=True,
302 302 )
303 303 configitem(
304 304 b'perf',
305 305 b'all-timing',
306 306 default=mercurial.configitems.dynamicdefault,
307 307 experimental=True,
308 308 )
309 309 configitem(
310 310 b'perf',
311 311 b'pre-run',
312 312 default=mercurial.configitems.dynamicdefault,
313 313 )
314 314 configitem(
315 315 b'perf',
316 316 b'profile-benchmark',
317 317 default=mercurial.configitems.dynamicdefault,
318 318 )
319 319 configitem(
320 320 b'perf',
321 321 b'run-limits',
322 322 default=mercurial.configitems.dynamicdefault,
323 323 experimental=True,
324 324 )
325 325 except (ImportError, AttributeError):
326 326 pass
327 327 except TypeError:
328 328 # compatibility fix for a11fd395e83f
329 329 # hg version: 5.2
330 330 configitem(
331 331 b'perf',
332 332 b'presleep',
333 333 default=mercurial.configitems.dynamicdefault,
334 334 )
335 335 configitem(
336 336 b'perf',
337 337 b'stub',
338 338 default=mercurial.configitems.dynamicdefault,
339 339 )
340 340 configitem(
341 341 b'perf',
342 342 b'parentscount',
343 343 default=mercurial.configitems.dynamicdefault,
344 344 )
345 345 configitem(
346 346 b'perf',
347 347 b'all-timing',
348 348 default=mercurial.configitems.dynamicdefault,
349 349 )
350 350 configitem(
351 351 b'perf',
352 352 b'pre-run',
353 353 default=mercurial.configitems.dynamicdefault,
354 354 )
355 355 configitem(
356 356 b'perf',
357 357 b'profile-benchmark',
358 358 default=mercurial.configitems.dynamicdefault,
359 359 )
360 360 configitem(
361 361 b'perf',
362 362 b'run-limits',
363 363 default=mercurial.configitems.dynamicdefault,
364 364 )
365 365
366 366
367 367 def getlen(ui):
368 368 if ui.configbool(b"perf", b"stub", False):
369 369 return lambda x: 1
370 370 return len
371 371
372 372
373 373 class noop:
374 374 """dummy context manager"""
375 375
376 376 def __enter__(self):
377 377 pass
378 378
379 379 def __exit__(self, *args):
380 380 pass
381 381
382 382
383 383 NOOPCTX = noop()
384 384
385 385
386 386 def gettimer(ui, opts=None):
387 387 """return a timer function and formatter: (timer, formatter)
388 388
389 389 This function exists to gather the creation of formatter in a single
390 390 place instead of duplicating it in all performance commands."""
391 391
392 392 # enforce an idle period before execution to counteract power management
393 393 # experimental config: perf.presleep
394 394 time.sleep(getint(ui, b"perf", b"presleep", 1))
395 395
396 396 if opts is None:
397 397 opts = {}
398 398 # redirect all to stderr unless buffer api is in use
399 399 if not ui._buffers:
400 400 ui = ui.copy()
401 401 uifout = safeattrsetter(ui, b'fout', ignoremissing=True)
402 402 if uifout:
403 403 # for "historical portability":
404 404 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
405 405 uifout.set(ui.ferr)
406 406
407 407 # get a formatter
408 408 uiformatter = getattr(ui, 'formatter', None)
409 409 if uiformatter:
410 410 fm = uiformatter(b'perf', opts)
411 411 else:
412 412 # for "historical portability":
413 413 # define formatter locally, because ui.formatter has been
414 414 # available since 2.2 (or ae5f92e154d3)
415 415 from mercurial import node
416 416
417 417 class defaultformatter:
418 418 """Minimized composition of baseformatter and plainformatter"""
419 419
420 420 def __init__(self, ui, topic, opts):
421 421 self._ui = ui
422 422 if ui.debugflag:
423 423 self.hexfunc = node.hex
424 424 else:
425 425 self.hexfunc = node.short
426 426
427 427 def __nonzero__(self):
428 428 return False
429 429
430 430 __bool__ = __nonzero__
431 431
432 432 def startitem(self):
433 433 pass
434 434
435 435 def data(self, **data):
436 436 pass
437 437
438 438 def write(self, fields, deftext, *fielddata, **opts):
439 439 self._ui.write(deftext % fielddata, **opts)
440 440
441 441 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
442 442 if cond:
443 443 self._ui.write(deftext % fielddata, **opts)
444 444
445 445 def plain(self, text, **opts):
446 446 self._ui.write(text, **opts)
447 447
448 448 def end(self):
449 449 pass
450 450
451 451 fm = defaultformatter(ui, b'perf', opts)
452 452
453 453 # stub function, runs code only once instead of in a loop
454 454 # experimental config: perf.stub
455 455 if ui.configbool(b"perf", b"stub", False):
456 456 return functools.partial(stub_timer, fm), fm
457 457
458 458 # experimental config: perf.all-timing
459 459 displayall = ui.configbool(b"perf", b"all-timing", True)
460 460
461 461 # experimental config: perf.run-limits
462 462 limitspec = ui.configlist(b"perf", b"run-limits", [])
463 463 limits = []
464 464 for item in limitspec:
465 465 parts = item.split(b'-', 1)
466 466 if len(parts) < 2:
467 467 ui.warn((b'malformatted run limit entry, missing "-": %s\n' % item))
468 468 continue
469 469 try:
470 470 time_limit = float(_sysstr(parts[0]))
471 471 except ValueError as e:
472 472 ui.warn(
473 473 (
474 474 b'malformatted run limit entry, %s: %s\n'
475 475 % (_bytestr(e), item)
476 476 )
477 477 )
478 478 continue
479 479 try:
480 480 run_limit = int(_sysstr(parts[1]))
481 481 except ValueError as e:
482 482 ui.warn(
483 483 (
484 484 b'malformatted run limit entry, %s: %s\n'
485 485 % (_bytestr(e), item)
486 486 )
487 487 )
488 488 continue
489 489 limits.append((time_limit, run_limit))
490 490 if not limits:
491 491 limits = DEFAULTLIMITS
492 492
493 493 profiler = None
494 494 if profiling is not None:
495 495 if ui.configbool(b"perf", b"profile-benchmark", False):
496 496 profiler = profiling.profile(ui)
497 497
498 498 prerun = getint(ui, b"perf", b"pre-run", 0)
499 499 t = functools.partial(
500 500 _timer,
501 501 fm,
502 502 displayall=displayall,
503 503 limits=limits,
504 504 prerun=prerun,
505 505 profiler=profiler,
506 506 )
507 507 return t, fm
508 508
509 509
510 510 def stub_timer(fm, func, setup=None, title=None):
511 511 if setup is not None:
512 512 setup()
513 513 func()
514 514
515 515
516 516 @contextlib.contextmanager
517 517 def timeone():
518 518 r = []
519 519 ostart = os.times()
520 520 cstart = util.timer()
521 521 yield r
522 522 cstop = util.timer()
523 523 ostop = os.times()
524 524 a, b = ostart, ostop
525 525 r.append((cstop - cstart, b[0] - a[0], b[1] - a[1]))
526 526
527 527
528 528 # list of stop condition (elapsed time, minimal run count)
529 529 DEFAULTLIMITS = (
530 530 (3.0, 100),
531 531 (10.0, 3),
532 532 )
533 533
534 534
535 535 @contextlib.contextmanager
536 536 def noop_context():
537 537 yield
538 538
539 539
540 540 def _timer(
541 541 fm,
542 542 func,
543 543 setup=None,
544 544 context=noop_context,
545 545 title=None,
546 546 displayall=False,
547 547 limits=DEFAULTLIMITS,
548 548 prerun=0,
549 549 profiler=None,
550 550 ):
551 551 gc.collect()
552 552 results = []
553 553 begin = util.timer()
554 554 count = 0
555 555 if profiler is None:
556 556 profiler = NOOPCTX
557 557 for i in range(prerun):
558 558 if setup is not None:
559 559 setup()
560 560 with context():
561 561 func()
562 562 keepgoing = True
563 563 while keepgoing:
564 564 if setup is not None:
565 565 setup()
566 566 with context():
567 567 with profiler:
568 568 with timeone() as item:
569 569 r = func()
570 570 profiler = NOOPCTX
571 571 count += 1
572 572 results.append(item[0])
573 573 cstop = util.timer()
574 574 # Look for a stop condition.
575 575 elapsed = cstop - begin
576 576 for t, mincount in limits:
577 577 if elapsed >= t and count >= mincount:
578 578 keepgoing = False
579 579 break
580 580
581 581 formatone(fm, results, title=title, result=r, displayall=displayall)
582 582
583 583
584 584 def formatone(fm, timings, title=None, result=None, displayall=False):
585 585 count = len(timings)
586 586
587 587 fm.startitem()
588 588
589 589 if title:
590 590 fm.write(b'title', b'! %s\n', title)
591 591 if result:
592 592 fm.write(b'result', b'! result: %s\n', result)
593 593
594 594 def display(role, entry):
595 595 prefix = b''
596 596 if role != b'best':
597 597 prefix = b'%s.' % role
598 598 fm.plain(b'!')
599 599 fm.write(prefix + b'wall', b' wall %f', entry[0])
600 600 fm.write(prefix + b'comb', b' comb %f', entry[1] + entry[2])
601 601 fm.write(prefix + b'user', b' user %f', entry[1])
602 602 fm.write(prefix + b'sys', b' sys %f', entry[2])
603 603 fm.write(prefix + b'count', b' (%s of %%d)' % role, count)
604 604 fm.plain(b'\n')
605 605
606 606 timings.sort()
607 607 min_val = timings[0]
608 608 display(b'best', min_val)
609 609 if displayall:
610 610 max_val = timings[-1]
611 611 display(b'max', max_val)
612 612 avg = tuple([sum(x) / count for x in zip(*timings)])
613 613 display(b'avg', avg)
614 614 median = timings[len(timings) // 2]
615 615 display(b'median', median)
616 616
617 617
618 618 # utilities for historical portability
619 619
620 620
621 621 def getint(ui, section, name, default):
622 622 # for "historical portability":
623 623 # ui.configint has been available since 1.9 (or fa2b596db182)
624 624 v = ui.config(section, name, None)
625 625 if v is None:
626 626 return default
627 627 try:
628 628 return int(v)
629 629 except ValueError:
630 630 raise error.ConfigError(
631 631 b"%s.%s is not an integer ('%s')" % (section, name, v)
632 632 )
633 633
634 634
635 635 def safeattrsetter(obj, name, ignoremissing=False):
636 636 """Ensure that 'obj' has 'name' attribute before subsequent setattr
637 637
638 638 This function is aborted, if 'obj' doesn't have 'name' attribute
639 639 at runtime. This avoids overlooking removal of an attribute, which
640 640 breaks assumption of performance measurement, in the future.
641 641
642 642 This function returns the object to (1) assign a new value, and
643 643 (2) restore an original value to the attribute.
644 644
645 645 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
646 646 abortion, and this function returns None. This is useful to
647 647 examine an attribute, which isn't ensured in all Mercurial
648 648 versions.
649 649 """
650 650 if not util.safehasattr(obj, name):
651 651 if ignoremissing:
652 652 return None
653 653 raise error.Abort(
654 654 (
655 655 b"missing attribute %s of %s might break assumption"
656 656 b" of performance measurement"
657 657 )
658 658 % (name, obj)
659 659 )
660 660
661 661 origvalue = getattr(obj, _sysstr(name))
662 662
663 663 class attrutil:
664 664 def set(self, newvalue):
665 665 setattr(obj, _sysstr(name), newvalue)
666 666
667 667 def restore(self):
668 668 setattr(obj, _sysstr(name), origvalue)
669 669
670 670 return attrutil()
671 671
672 672
673 673 # utilities to examine each internal API changes
674 674
675 675
676 676 def getbranchmapsubsettable():
677 677 # for "historical portability":
678 678 # subsettable is defined in:
679 679 # - branchmap since 2.9 (or 175c6fd8cacc)
680 680 # - repoview since 2.5 (or 59a9f18d4587)
681 681 # - repoviewutil since 5.0
682 682 for mod in (branchmap, repoview, repoviewutil):
683 683 subsettable = getattr(mod, 'subsettable', None)
684 684 if subsettable:
685 685 return subsettable
686 686
687 687 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
688 688 # branchmap and repoview modules exist, but subsettable attribute
689 689 # doesn't)
690 690 raise error.Abort(
691 691 b"perfbranchmap not available with this Mercurial",
692 692 hint=b"use 2.5 or later",
693 693 )
694 694
695 695
696 696 def getsvfs(repo):
697 697 """Return appropriate object to access files under .hg/store"""
698 698 # for "historical portability":
699 699 # repo.svfs has been available since 2.3 (or 7034365089bf)
700 700 svfs = getattr(repo, 'svfs', None)
701 701 if svfs:
702 702 return svfs
703 703 else:
704 704 return getattr(repo, 'sopener')
705 705
706 706
707 707 def getvfs(repo):
708 708 """Return appropriate object to access files under .hg"""
709 709 # for "historical portability":
710 710 # repo.vfs has been available since 2.3 (or 7034365089bf)
711 711 vfs = getattr(repo, 'vfs', None)
712 712 if vfs:
713 713 return vfs
714 714 else:
715 715 return getattr(repo, 'opener')
716 716
717 717
718 718 def repocleartagscachefunc(repo):
719 719 """Return the function to clear tags cache according to repo internal API"""
720 720 if util.safehasattr(repo, b'_tagscache'): # since 2.0 (or 9dca7653b525)
721 721 # in this case, setattr(repo, '_tagscache', None) or so isn't
722 722 # correct way to clear tags cache, because existing code paths
723 723 # expect _tagscache to be a structured object.
724 724 def clearcache():
725 725 # _tagscache has been filteredpropertycache since 2.5 (or
726 726 # 98c867ac1330), and delattr() can't work in such case
727 727 if '_tagscache' in vars(repo):
728 728 del repo.__dict__['_tagscache']
729 729
730 730 return clearcache
731 731
732 732 repotags = safeattrsetter(repo, b'_tags', ignoremissing=True)
733 733 if repotags: # since 1.4 (or 5614a628d173)
734 734 return lambda: repotags.set(None)
735 735
736 736 repotagscache = safeattrsetter(repo, b'tagscache', ignoremissing=True)
737 737 if repotagscache: # since 0.6 (or d7df759d0e97)
738 738 return lambda: repotagscache.set(None)
739 739
740 740 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
741 741 # this point, but it isn't so problematic, because:
742 742 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
743 743 # in perftags() causes failure soon
744 744 # - perf.py itself has been available since 1.1 (or eb240755386d)
745 745 raise error.Abort(b"tags API of this hg command is unknown")
746 746
747 747
748 748 # utilities to clear cache
749 749
750 750
751 751 def clearfilecache(obj, attrname):
752 752 unfiltered = getattr(obj, 'unfiltered', None)
753 753 if unfiltered is not None:
754 754 obj = obj.unfiltered()
755 755 if attrname in vars(obj):
756 756 delattr(obj, attrname)
757 757 obj._filecache.pop(attrname, None)
758 758
759 759
760 760 def clearchangelog(repo):
761 761 if repo is not repo.unfiltered():
762 762 object.__setattr__(repo, '_clcachekey', None)
763 763 object.__setattr__(repo, '_clcache', None)
764 764 clearfilecache(repo.unfiltered(), 'changelog')
765 765
766 766
767 767 # perf commands
768 768
769 769
770 770 @command(b'perf::walk|perfwalk', formatteropts)
771 771 def perfwalk(ui, repo, *pats, **opts):
772 772 opts = _byteskwargs(opts)
773 773 timer, fm = gettimer(ui, opts)
774 774 m = scmutil.match(repo[None], pats, {})
775 775 timer(
776 776 lambda: len(
777 777 list(
778 778 repo.dirstate.walk(m, subrepos=[], unknown=True, ignored=False)
779 779 )
780 780 )
781 781 )
782 782 fm.end()
783 783
784 784
785 785 @command(b'perf::annotate|perfannotate', formatteropts)
786 786 def perfannotate(ui, repo, f, **opts):
787 787 opts = _byteskwargs(opts)
788 788 timer, fm = gettimer(ui, opts)
789 789 fc = repo[b'.'][f]
790 790 timer(lambda: len(fc.annotate(True)))
791 791 fm.end()
792 792
793 793
794 794 @command(
795 795 b'perf::status|perfstatus',
796 796 [
797 797 (b'u', b'unknown', False, b'ask status to look for unknown files'),
798 798 (b'', b'dirstate', False, b'benchmark the internal dirstate call'),
799 799 ]
800 800 + formatteropts,
801 801 )
802 802 def perfstatus(ui, repo, **opts):
803 803 """benchmark the performance of a single status call
804 804
805 805 The repository data are preserved between each call.
806 806
807 807 By default, only the status of the tracked file are requested. If
808 808 `--unknown` is passed, the "unknown" files are also tracked.
809 809 """
810 810 opts = _byteskwargs(opts)
811 811 # m = match.always(repo.root, repo.getcwd())
812 812 # timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
813 813 # False))))
814 814 timer, fm = gettimer(ui, opts)
815 815 if opts[b'dirstate']:
816 816 dirstate = repo.dirstate
817 817 m = scmutil.matchall(repo)
818 818 unknown = opts[b'unknown']
819 819
820 820 def status_dirstate():
821 821 s = dirstate.status(
822 822 m, subrepos=[], ignored=False, clean=False, unknown=unknown
823 823 )
824 824 sum(map(bool, s))
825 825
826 826 if util.safehasattr(dirstate, 'running_status'):
827 827 with dirstate.running_status(repo):
828 828 timer(status_dirstate)
829 829 dirstate.invalidate()
830 830 else:
831 831 timer(status_dirstate)
832 832 else:
833 833 timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown']))))
834 834 fm.end()
835 835
836 836
837 837 @command(b'perf::addremove|perfaddremove', formatteropts)
838 838 def perfaddremove(ui, repo, **opts):
839 839 opts = _byteskwargs(opts)
840 840 timer, fm = gettimer(ui, opts)
841 841 try:
842 842 oldquiet = repo.ui.quiet
843 843 repo.ui.quiet = True
844 844 matcher = scmutil.match(repo[None])
845 845 opts[b'dry_run'] = True
846 846 if 'uipathfn' in getargspec(scmutil.addremove).args:
847 847 uipathfn = scmutil.getuipathfn(repo)
848 848 timer(lambda: scmutil.addremove(repo, matcher, b"", uipathfn, opts))
849 849 else:
850 850 timer(lambda: scmutil.addremove(repo, matcher, b"", opts))
851 851 finally:
852 852 repo.ui.quiet = oldquiet
853 853 fm.end()
854 854
855 855
856 856 def clearcaches(cl):
857 857 # behave somewhat consistently across internal API changes
858 858 if util.safehasattr(cl, b'clearcaches'):
859 859 cl.clearcaches()
860 860 elif util.safehasattr(cl, b'_nodecache'):
861 861 # <= hg-5.2
862 862 from mercurial.node import nullid, nullrev
863 863
864 864 cl._nodecache = {nullid: nullrev}
865 865 cl._nodepos = None
866 866
867 867
868 868 @command(b'perf::heads|perfheads', formatteropts)
869 869 def perfheads(ui, repo, **opts):
870 870 """benchmark the computation of a changelog heads"""
871 871 opts = _byteskwargs(opts)
872 872 timer, fm = gettimer(ui, opts)
873 873 cl = repo.changelog
874 874
875 875 def s():
876 876 clearcaches(cl)
877 877
878 878 def d():
879 879 len(cl.headrevs())
880 880
881 881 timer(d, setup=s)
882 882 fm.end()
883 883
884 884
885 885 def _default_clear_on_disk_tags_cache(repo):
886 886 from mercurial import tags
887 887
888 888 repo.cachevfs.tryunlink(tags._filename(repo))
889 889
890 890
891 891 def _default_clear_on_disk_tags_fnodes_cache(repo):
892 892 from mercurial import tags
893 893
894 894 repo.cachevfs.tryunlink(tags._fnodescachefile)
895 895
896 896
897 897 def _default_forget_fnodes(repo, revs):
898 898 """function used by the perf extension to prune some entries from the
899 899 fnodes cache"""
900 900 from mercurial import tags
901 901
902 902 missing_1 = b'\xff' * 4
903 903 missing_2 = b'\xff' * 20
904 904 cache = tags.hgtagsfnodescache(repo.unfiltered())
905 905 for r in revs:
906 906 cache._writeentry(r * tags._fnodesrecsize, missing_1, missing_2)
907 907 cache.write()
908 908
909 909
910 910 @command(
911 911 b'perf::tags|perftags',
912 912 formatteropts
913 913 + [
914 914 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
915 915 (
916 916 b'',
917 917 b'clear-on-disk-cache',
918 918 False,
919 919 b'clear on disk tags cache (DESTRUCTIVE)',
920 920 ),
921 921 (
922 922 b'',
923 923 b'clear-fnode-cache-all',
924 924 False,
925 925 b'clear on disk file node cache (DESTRUCTIVE),',
926 926 ),
927 927 (
928 928 b'',
929 929 b'clear-fnode-cache-rev',
930 930 [],
931 931 b'clear on disk file node cache (DESTRUCTIVE),',
932 932 b'REVS',
933 933 ),
934 934 (
935 935 b'',
936 936 b'update-last',
937 937 b'',
938 938 b'simulate an update over the last N revisions (DESTRUCTIVE),',
939 939 b'N',
940 940 ),
941 941 ],
942 942 )
943 943 def perftags(ui, repo, **opts):
944 944 """Benchmark tags retrieval in various situation
945 945
946 946 The option marked as (DESTRUCTIVE) will alter the on-disk cache, possibly
947 947 altering performance after the command was run. However, it does not
948 948 destroy any stored data.
949 949 """
950 950 from mercurial import tags
951 951
952 952 opts = _byteskwargs(opts)
953 953 timer, fm = gettimer(ui, opts)
954 954 repocleartagscache = repocleartagscachefunc(repo)
955 955 clearrevlogs = opts[b'clear_revlogs']
956 956 clear_disk = opts[b'clear_on_disk_cache']
957 957 clear_fnode = opts[b'clear_fnode_cache_all']
958 958
959 959 clear_fnode_revs = opts[b'clear_fnode_cache_rev']
960 960 update_last_str = opts[b'update_last']
961 961 update_last = None
962 962 if update_last_str:
963 963 try:
964 964 update_last = int(update_last_str)
965 965 except ValueError:
966 966 msg = b'could not parse value for update-last: "%s"'
967 967 msg %= update_last_str
968 968 hint = b'value should be an integer'
969 969 raise error.Abort(msg, hint=hint)
970 970
971 971 clear_disk_fn = getattr(
972 972 tags,
973 973 "clear_cache_on_disk",
974 974 _default_clear_on_disk_tags_cache,
975 975 )
976 976 clear_fnodes_fn = getattr(
977 977 tags,
978 978 "clear_cache_fnodes",
979 979 _default_clear_on_disk_tags_fnodes_cache,
980 980 )
981 981 clear_fnodes_rev_fn = getattr(
982 982 tags,
983 983 "forget_fnodes",
984 984 _default_forget_fnodes,
985 985 )
986 986
987 987 clear_revs = []
988 988 if clear_fnode_revs:
989 989 clear_revs.extends(scmutil.revrange(repo, clear_fnode_revs))
990 990
991 991 if update_last:
992 992 revset = b'last(all(), %d)' % update_last
993 993 last_revs = repo.unfiltered().revs(revset)
994 994 clear_revs.extend(last_revs)
995 995
996 996 from mercurial import repoview
997 997
998 998 rev_filter = {(b'experimental', b'extra-filter-revs'): revset}
999 999 with repo.ui.configoverride(rev_filter, source=b"perf"):
1000 1000 filter_id = repoview.extrafilter(repo.ui)
1001 1001
1002 1002 filter_name = b'%s%%%s' % (repo.filtername, filter_id)
1003 1003 pre_repo = repo.filtered(filter_name)
1004 1004 pre_repo.tags() # warm the cache
1005 1005 old_tags_path = repo.cachevfs.join(tags._filename(pre_repo))
1006 1006 new_tags_path = repo.cachevfs.join(tags._filename(repo))
1007 1007
1008 1008 clear_revs = sorted(set(clear_revs))
1009 1009
1010 1010 def s():
1011 1011 if update_last:
1012 1012 util.copyfile(old_tags_path, new_tags_path)
1013 1013 if clearrevlogs:
1014 1014 clearchangelog(repo)
1015 1015 clearfilecache(repo.unfiltered(), 'manifest')
1016 1016 if clear_disk:
1017 1017 clear_disk_fn(repo)
1018 1018 if clear_fnode:
1019 1019 clear_fnodes_fn(repo)
1020 1020 elif clear_revs:
1021 1021 clear_fnodes_rev_fn(repo, clear_revs)
1022 1022 repocleartagscache()
1023 1023
1024 1024 def t():
1025 1025 len(repo.tags())
1026 1026
1027 1027 timer(t, setup=s)
1028 1028 fm.end()
1029 1029
1030 1030
1031 1031 @command(b'perf::ancestors|perfancestors', formatteropts)
1032 1032 def perfancestors(ui, repo, **opts):
1033 1033 opts = _byteskwargs(opts)
1034 1034 timer, fm = gettimer(ui, opts)
1035 1035 heads = repo.changelog.headrevs()
1036 1036
1037 1037 def d():
1038 1038 for a in repo.changelog.ancestors(heads):
1039 1039 pass
1040 1040
1041 1041 timer(d)
1042 1042 fm.end()
1043 1043
1044 1044
1045 1045 @command(b'perf::ancestorset|perfancestorset', formatteropts)
1046 1046 def perfancestorset(ui, repo, revset, **opts):
1047 1047 opts = _byteskwargs(opts)
1048 1048 timer, fm = gettimer(ui, opts)
1049 1049 revs = repo.revs(revset)
1050 1050 heads = repo.changelog.headrevs()
1051 1051
1052 1052 def d():
1053 1053 s = repo.changelog.ancestors(heads)
1054 1054 for rev in revs:
1055 1055 rev in s
1056 1056
1057 1057 timer(d)
1058 1058 fm.end()
1059 1059
1060 1060
1061 1061 @command(
1062 1062 b'perf::delta-find',
1063 1063 revlogopts + formatteropts,
1064 1064 b'-c|-m|FILE REV',
1065 1065 )
1066 1066 def perf_delta_find(ui, repo, arg_1, arg_2=None, **opts):
1067 1067 """benchmark the process of finding a valid delta for a revlog revision
1068 1068
1069 1069 When a revlog receives a new revision (e.g. from a commit, or from an
1070 1070 incoming bundle), it searches for a suitable delta-base to produce a delta.
1071 1071 This perf command measures how much time we spend in this process. It
1072 1072 operates on an already stored revision.
1073 1073
1074 1074 See `hg help debug-delta-find` for another related command.
1075 1075 """
1076 1076 from mercurial import revlogutils
1077 1077 import mercurial.revlogutils.deltas as deltautil
1078 1078
1079 1079 opts = _byteskwargs(opts)
1080 1080 if arg_2 is None:
1081 1081 file_ = None
1082 1082 rev = arg_1
1083 1083 else:
1084 1084 file_ = arg_1
1085 1085 rev = arg_2
1086 1086
1087 1087 repo = repo.unfiltered()
1088 1088
1089 1089 timer, fm = gettimer(ui, opts)
1090 1090
1091 1091 rev = int(rev)
1092 1092
1093 1093 revlog = cmdutil.openrevlog(repo, b'perf::delta-find', file_, opts)
1094 1094
1095 1095 deltacomputer = deltautil.deltacomputer(revlog)
1096 1096
1097 1097 node = revlog.node(rev)
1098 1098 p1r, p2r = revlog.parentrevs(rev)
1099 1099 p1 = revlog.node(p1r)
1100 1100 p2 = revlog.node(p2r)
1101 1101 full_text = revlog.revision(rev)
1102 1102 textlen = len(full_text)
1103 1103 cachedelta = None
1104 1104 flags = revlog.flags(rev)
1105 1105
1106 1106 revinfo = revlogutils.revisioninfo(
1107 1107 node,
1108 1108 p1,
1109 1109 p2,
1110 1110 [full_text], # btext
1111 1111 textlen,
1112 1112 cachedelta,
1113 1113 flags,
1114 1114 )
1115 1115
1116 1116 # Note: we should probably purge the potential caches (like the full
1117 1117 # manifest cache) between runs.
1118 1118 def find_one():
1119 1119 with revlog._datafp() as fh:
1120 1120 deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev)
1121 1121
1122 1122 timer(find_one)
1123 1123 fm.end()
1124 1124
1125 1125
1126 1126 @command(b'perf::discovery|perfdiscovery', formatteropts, b'PATH')
1127 1127 def perfdiscovery(ui, repo, path, **opts):
1128 1128 """benchmark discovery between local repo and the peer at given path"""
1129 1129 repos = [repo, None]
1130 1130 timer, fm = gettimer(ui, opts)
1131 1131
1132 1132 try:
1133 1133 from mercurial.utils.urlutil import get_unique_pull_path_obj
1134 1134
1135 1135 path = get_unique_pull_path_obj(b'perfdiscovery', ui, path)
1136 1136 except ImportError:
1137 1137 try:
1138 1138 from mercurial.utils.urlutil import get_unique_pull_path
1139 1139
1140 1140 path = get_unique_pull_path(b'perfdiscovery', repo, ui, path)[0]
1141 1141 except ImportError:
1142 1142 path = ui.expandpath(path)
1143 1143
1144 1144 def s():
1145 1145 repos[1] = hg.peer(ui, opts, path)
1146 1146
1147 1147 def d():
1148 1148 setdiscovery.findcommonheads(ui, *repos)
1149 1149
1150 1150 timer(d, setup=s)
1151 1151 fm.end()
1152 1152
1153 1153
1154 1154 @command(
1155 1155 b'perf::bookmarks|perfbookmarks',
1156 1156 formatteropts
1157 1157 + [
1158 1158 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
1159 1159 ],
1160 1160 )
1161 1161 def perfbookmarks(ui, repo, **opts):
1162 1162 """benchmark parsing bookmarks from disk to memory"""
1163 1163 opts = _byteskwargs(opts)
1164 1164 timer, fm = gettimer(ui, opts)
1165 1165
1166 1166 clearrevlogs = opts[b'clear_revlogs']
1167 1167
1168 1168 def s():
1169 1169 if clearrevlogs:
1170 1170 clearchangelog(repo)
1171 1171 clearfilecache(repo, b'_bookmarks')
1172 1172
1173 1173 def d():
1174 1174 repo._bookmarks
1175 1175
1176 1176 timer(d, setup=s)
1177 1177 fm.end()
1178 1178
1179 1179
1180 1180 @command(
1181 1181 b'perf::bundle',
1182 1182 [
1183 1183 (
1184 1184 b'r',
1185 1185 b'rev',
1186 1186 [],
1187 1187 b'changesets to bundle',
1188 1188 b'REV',
1189 1189 ),
1190 1190 (
1191 1191 b't',
1192 1192 b'type',
1193 1193 b'none',
1194 1194 b'bundlespec to use (see `hg help bundlespec`)',
1195 1195 b'TYPE',
1196 1196 ),
1197 1197 ]
1198 1198 + formatteropts,
1199 1199 b'REVS',
1200 1200 )
1201 1201 def perfbundle(ui, repo, *revs, **opts):
1202 1202 """benchmark the creation of a bundle from a repository
1203 1203
1204 1204 For now, this only supports "none" compression.
1205 1205 """
1206 1206 try:
1207 1207 from mercurial import bundlecaches
1208 1208
1209 1209 parsebundlespec = bundlecaches.parsebundlespec
1210 1210 except ImportError:
1211 1211 from mercurial import exchange
1212 1212
1213 1213 parsebundlespec = exchange.parsebundlespec
1214 1214
1215 1215 from mercurial import discovery
1216 1216 from mercurial import bundle2
1217 1217
1218 1218 opts = _byteskwargs(opts)
1219 1219 timer, fm = gettimer(ui, opts)
1220 1220
1221 1221 cl = repo.changelog
1222 1222 revs = list(revs)
1223 1223 revs.extend(opts.get(b'rev', ()))
1224 1224 revs = scmutil.revrange(repo, revs)
1225 1225 if not revs:
1226 1226 raise error.Abort(b"not revision specified")
1227 1227 # make it a consistent set (ie: without topological gaps)
1228 1228 old_len = len(revs)
1229 1229 revs = list(repo.revs(b"%ld::%ld", revs, revs))
1230 1230 if old_len != len(revs):
1231 1231 new_count = len(revs) - old_len
1232 1232 msg = b"add %d new revisions to make it a consistent set\n"
1233 1233 ui.write_err(msg % new_count)
1234 1234
1235 1235 targets = [cl.node(r) for r in repo.revs(b"heads(::%ld)", revs)]
1236 1236 bases = [cl.node(r) for r in repo.revs(b"heads(::%ld - %ld)", revs, revs)]
1237 1237 outgoing = discovery.outgoing(repo, bases, targets)
1238 1238
1239 1239 bundle_spec = opts.get(b'type')
1240 1240
1241 1241 bundle_spec = parsebundlespec(repo, bundle_spec, strict=False)
1242 1242
1243 1243 cgversion = bundle_spec.params.get(b"cg.version")
1244 1244 if cgversion is None:
1245 1245 if bundle_spec.version == b'v1':
1246 1246 cgversion = b'01'
1247 1247 if bundle_spec.version == b'v2':
1248 1248 cgversion = b'02'
1249 1249 if cgversion not in changegroup.supportedoutgoingversions(repo):
1250 1250 err = b"repository does not support bundle version %s"
1251 1251 raise error.Abort(err % cgversion)
1252 1252
1253 1253 if cgversion == b'01': # bundle1
1254 1254 bversion = b'HG10' + bundle_spec.wirecompression
1255 1255 bcompression = None
1256 1256 elif cgversion in (b'02', b'03'):
1257 1257 bversion = b'HG20'
1258 1258 bcompression = bundle_spec.wirecompression
1259 1259 else:
1260 1260 err = b'perf::bundle: unexpected changegroup version %s'
1261 1261 raise error.ProgrammingError(err % cgversion)
1262 1262
1263 1263 if bcompression is None:
1264 1264 bcompression = b'UN'
1265 1265
1266 1266 if bcompression != b'UN':
1267 1267 err = b'perf::bundle: compression currently unsupported: %s'
1268 1268 raise error.ProgrammingError(err % bcompression)
1269 1269
1270 1270 def do_bundle():
1271 1271 bundle2.writenewbundle(
1272 1272 ui,
1273 1273 repo,
1274 1274 b'perf::bundle',
1275 1275 os.devnull,
1276 1276 bversion,
1277 1277 outgoing,
1278 1278 bundle_spec.params,
1279 1279 )
1280 1280
1281 1281 timer(do_bundle)
1282 1282 fm.end()
1283 1283
1284 1284
1285 1285 @command(b'perf::bundleread|perfbundleread', formatteropts, b'BUNDLE')
1286 1286 def perfbundleread(ui, repo, bundlepath, **opts):
1287 1287 """Benchmark reading of bundle files.
1288 1288
1289 1289 This command is meant to isolate the I/O part of bundle reading as
1290 1290 much as possible.
1291 1291 """
1292 1292 from mercurial import (
1293 1293 bundle2,
1294 1294 exchange,
1295 1295 streamclone,
1296 1296 )
1297 1297
1298 1298 opts = _byteskwargs(opts)
1299 1299
1300 1300 def makebench(fn):
1301 1301 def run():
1302 1302 with open(bundlepath, b'rb') as fh:
1303 1303 bundle = exchange.readbundle(ui, fh, bundlepath)
1304 1304 fn(bundle)
1305 1305
1306 1306 return run
1307 1307
1308 1308 def makereadnbytes(size):
1309 1309 def run():
1310 1310 with open(bundlepath, b'rb') as fh:
1311 1311 bundle = exchange.readbundle(ui, fh, bundlepath)
1312 1312 while bundle.read(size):
1313 1313 pass
1314 1314
1315 1315 return run
1316 1316
1317 1317 def makestdioread(size):
1318 1318 def run():
1319 1319 with open(bundlepath, b'rb') as fh:
1320 1320 while fh.read(size):
1321 1321 pass
1322 1322
1323 1323 return run
1324 1324
1325 1325 # bundle1
1326 1326
1327 1327 def deltaiter(bundle):
1328 1328 for delta in bundle.deltaiter():
1329 1329 pass
1330 1330
1331 1331 def iterchunks(bundle):
1332 1332 for chunk in bundle.getchunks():
1333 1333 pass
1334 1334
1335 1335 # bundle2
1336 1336
1337 1337 def forwardchunks(bundle):
1338 1338 for chunk in bundle._forwardchunks():
1339 1339 pass
1340 1340
1341 1341 def iterparts(bundle):
1342 1342 for part in bundle.iterparts():
1343 1343 pass
1344 1344
1345 1345 def iterpartsseekable(bundle):
1346 1346 for part in bundle.iterparts(seekable=True):
1347 1347 pass
1348 1348
1349 1349 def seek(bundle):
1350 1350 for part in bundle.iterparts(seekable=True):
1351 1351 part.seek(0, os.SEEK_END)
1352 1352
1353 1353 def makepartreadnbytes(size):
1354 1354 def run():
1355 1355 with open(bundlepath, b'rb') as fh:
1356 1356 bundle = exchange.readbundle(ui, fh, bundlepath)
1357 1357 for part in bundle.iterparts():
1358 1358 while part.read(size):
1359 1359 pass
1360 1360
1361 1361 return run
1362 1362
1363 1363 benches = [
1364 1364 (makestdioread(8192), b'read(8k)'),
1365 1365 (makestdioread(16384), b'read(16k)'),
1366 1366 (makestdioread(32768), b'read(32k)'),
1367 1367 (makestdioread(131072), b'read(128k)'),
1368 1368 ]
1369 1369
1370 1370 with open(bundlepath, b'rb') as fh:
1371 1371 bundle = exchange.readbundle(ui, fh, bundlepath)
1372 1372
1373 1373 if isinstance(bundle, changegroup.cg1unpacker):
1374 1374 benches.extend(
1375 1375 [
1376 1376 (makebench(deltaiter), b'cg1 deltaiter()'),
1377 1377 (makebench(iterchunks), b'cg1 getchunks()'),
1378 1378 (makereadnbytes(8192), b'cg1 read(8k)'),
1379 1379 (makereadnbytes(16384), b'cg1 read(16k)'),
1380 1380 (makereadnbytes(32768), b'cg1 read(32k)'),
1381 1381 (makereadnbytes(131072), b'cg1 read(128k)'),
1382 1382 ]
1383 1383 )
1384 1384 elif isinstance(bundle, bundle2.unbundle20):
1385 1385 benches.extend(
1386 1386 [
1387 1387 (makebench(forwardchunks), b'bundle2 forwardchunks()'),
1388 1388 (makebench(iterparts), b'bundle2 iterparts()'),
1389 1389 (
1390 1390 makebench(iterpartsseekable),
1391 1391 b'bundle2 iterparts() seekable',
1392 1392 ),
1393 1393 (makebench(seek), b'bundle2 part seek()'),
1394 1394 (makepartreadnbytes(8192), b'bundle2 part read(8k)'),
1395 1395 (makepartreadnbytes(16384), b'bundle2 part read(16k)'),
1396 1396 (makepartreadnbytes(32768), b'bundle2 part read(32k)'),
1397 1397 (makepartreadnbytes(131072), b'bundle2 part read(128k)'),
1398 1398 ]
1399 1399 )
1400 1400 elif isinstance(bundle, streamclone.streamcloneapplier):
1401 1401 raise error.Abort(b'stream clone bundles not supported')
1402 1402 else:
1403 1403 raise error.Abort(b'unhandled bundle type: %s' % type(bundle))
1404 1404
1405 1405 for fn, title in benches:
1406 1406 timer, fm = gettimer(ui, opts)
1407 1407 timer(fn, title=title)
1408 1408 fm.end()
1409 1409
1410 1410
1411 1411 @command(
1412 1412 b'perf::changegroupchangelog|perfchangegroupchangelog',
1413 1413 formatteropts
1414 1414 + [
1415 1415 (b'', b'cgversion', b'02', b'changegroup version'),
1416 1416 (b'r', b'rev', b'', b'revisions to add to changegroup'),
1417 1417 ],
1418 1418 )
1419 1419 def perfchangegroupchangelog(ui, repo, cgversion=b'02', rev=None, **opts):
1420 1420 """Benchmark producing a changelog group for a changegroup.
1421 1421
1422 1422 This measures the time spent processing the changelog during a
1423 1423 bundle operation. This occurs during `hg bundle` and on a server
1424 1424 processing a `getbundle` wire protocol request (handles clones
1425 1425 and pull requests).
1426 1426
1427 1427 By default, all revisions are added to the changegroup.
1428 1428 """
1429 1429 opts = _byteskwargs(opts)
1430 1430 cl = repo.changelog
1431 1431 nodes = [cl.lookup(r) for r in repo.revs(rev or b'all()')]
1432 1432 bundler = changegroup.getbundler(cgversion, repo)
1433 1433
1434 1434 def d():
1435 1435 state, chunks = bundler._generatechangelog(cl, nodes)
1436 1436 for chunk in chunks:
1437 1437 pass
1438 1438
1439 1439 timer, fm = gettimer(ui, opts)
1440 1440
1441 1441 # Terminal printing can interfere with timing. So disable it.
1442 1442 with ui.configoverride({(b'progress', b'disable'): True}):
1443 1443 timer(d)
1444 1444
1445 1445 fm.end()
1446 1446
1447 1447
1448 1448 @command(b'perf::dirs|perfdirs', formatteropts)
1449 1449 def perfdirs(ui, repo, **opts):
1450 1450 opts = _byteskwargs(opts)
1451 1451 timer, fm = gettimer(ui, opts)
1452 1452 dirstate = repo.dirstate
1453 1453 b'a' in dirstate
1454 1454
1455 1455 def d():
1456 1456 dirstate.hasdir(b'a')
1457 1457 try:
1458 1458 del dirstate._map._dirs
1459 1459 except AttributeError:
1460 1460 pass
1461 1461
1462 1462 timer(d)
1463 1463 fm.end()
1464 1464
1465 1465
1466 1466 @command(
1467 1467 b'perf::dirstate|perfdirstate',
1468 1468 [
1469 1469 (
1470 1470 b'',
1471 1471 b'iteration',
1472 1472 None,
1473 1473 b'benchmark a full iteration for the dirstate',
1474 1474 ),
1475 1475 (
1476 1476 b'',
1477 1477 b'contains',
1478 1478 None,
1479 1479 b'benchmark a large amount of `nf in dirstate` calls',
1480 1480 ),
1481 1481 ]
1482 1482 + formatteropts,
1483 1483 )
1484 1484 def perfdirstate(ui, repo, **opts):
1485 1485 """benchmap the time of various distate operations
1486 1486
1487 1487 By default benchmark the time necessary to load a dirstate from scratch.
1488 1488 The dirstate is loaded to the point were a "contains" request can be
1489 1489 answered.
1490 1490 """
1491 1491 opts = _byteskwargs(opts)
1492 1492 timer, fm = gettimer(ui, opts)
1493 1493 b"a" in repo.dirstate
1494 1494
1495 1495 if opts[b'iteration'] and opts[b'contains']:
1496 1496 msg = b'only specify one of --iteration or --contains'
1497 1497 raise error.Abort(msg)
1498 1498
1499 1499 if opts[b'iteration']:
1500 1500 setup = None
1501 1501 dirstate = repo.dirstate
1502 1502
1503 1503 def d():
1504 1504 for f in dirstate:
1505 1505 pass
1506 1506
1507 1507 elif opts[b'contains']:
1508 1508 setup = None
1509 1509 dirstate = repo.dirstate
1510 1510 allfiles = list(dirstate)
1511 1511 # also add file path that will be "missing" from the dirstate
1512 1512 allfiles.extend([f[::-1] for f in allfiles])
1513 1513
1514 1514 def d():
1515 1515 for f in allfiles:
1516 1516 f in dirstate
1517 1517
1518 1518 else:
1519 1519
1520 1520 def setup():
1521 1521 repo.dirstate.invalidate()
1522 1522
1523 1523 def d():
1524 1524 b"a" in repo.dirstate
1525 1525
1526 1526 timer(d, setup=setup)
1527 1527 fm.end()
1528 1528
1529 1529
1530 1530 @command(b'perf::dirstatedirs|perfdirstatedirs', formatteropts)
1531 1531 def perfdirstatedirs(ui, repo, **opts):
1532 1532 """benchmap a 'dirstate.hasdir' call from an empty `dirs` cache"""
1533 1533 opts = _byteskwargs(opts)
1534 1534 timer, fm = gettimer(ui, opts)
1535 1535 repo.dirstate.hasdir(b"a")
1536 1536
1537 1537 def setup():
1538 1538 try:
1539 1539 del repo.dirstate._map._dirs
1540 1540 except AttributeError:
1541 1541 pass
1542 1542
1543 1543 def d():
1544 1544 repo.dirstate.hasdir(b"a")
1545 1545
1546 1546 timer(d, setup=setup)
1547 1547 fm.end()
1548 1548
1549 1549
1550 1550 @command(b'perf::dirstatefoldmap|perfdirstatefoldmap', formatteropts)
1551 1551 def perfdirstatefoldmap(ui, repo, **opts):
1552 1552 """benchmap a `dirstate._map.filefoldmap.get()` request
1553 1553
1554 1554 The dirstate filefoldmap cache is dropped between every request.
1555 1555 """
1556 1556 opts = _byteskwargs(opts)
1557 1557 timer, fm = gettimer(ui, opts)
1558 1558 dirstate = repo.dirstate
1559 1559 dirstate._map.filefoldmap.get(b'a')
1560 1560
1561 1561 def setup():
1562 1562 del dirstate._map.filefoldmap
1563 1563
1564 1564 def d():
1565 1565 dirstate._map.filefoldmap.get(b'a')
1566 1566
1567 1567 timer(d, setup=setup)
1568 1568 fm.end()
1569 1569
1570 1570
1571 1571 @command(b'perf::dirfoldmap|perfdirfoldmap', formatteropts)
1572 1572 def perfdirfoldmap(ui, repo, **opts):
1573 1573 """benchmap a `dirstate._map.dirfoldmap.get()` request
1574 1574
1575 1575 The dirstate dirfoldmap cache is dropped between every request.
1576 1576 """
1577 1577 opts = _byteskwargs(opts)
1578 1578 timer, fm = gettimer(ui, opts)
1579 1579 dirstate = repo.dirstate
1580 1580 dirstate._map.dirfoldmap.get(b'a')
1581 1581
1582 1582 def setup():
1583 1583 del dirstate._map.dirfoldmap
1584 1584 try:
1585 1585 del dirstate._map._dirs
1586 1586 except AttributeError:
1587 1587 pass
1588 1588
1589 1589 def d():
1590 1590 dirstate._map.dirfoldmap.get(b'a')
1591 1591
1592 1592 timer(d, setup=setup)
1593 1593 fm.end()
1594 1594
1595 1595
1596 1596 @command(b'perf::dirstatewrite|perfdirstatewrite', formatteropts)
1597 1597 def perfdirstatewrite(ui, repo, **opts):
1598 1598 """benchmap the time it take to write a dirstate on disk"""
1599 1599 opts = _byteskwargs(opts)
1600 1600 timer, fm = gettimer(ui, opts)
1601 1601 ds = repo.dirstate
1602 1602 b"a" in ds
1603 1603
1604 1604 def setup():
1605 1605 ds._dirty = True
1606 1606
1607 1607 def d():
1608 1608 ds.write(repo.currenttransaction())
1609 1609
1610 1610 with repo.wlock():
1611 1611 timer(d, setup=setup)
1612 1612 fm.end()
1613 1613
1614 1614
1615 1615 def _getmergerevs(repo, opts):
1616 1616 """parse command argument to return rev involved in merge
1617 1617
1618 1618 input: options dictionnary with `rev`, `from` and `bse`
1619 1619 output: (localctx, otherctx, basectx)
1620 1620 """
1621 1621 if opts[b'from']:
1622 1622 fromrev = scmutil.revsingle(repo, opts[b'from'])
1623 1623 wctx = repo[fromrev]
1624 1624 else:
1625 1625 wctx = repo[None]
1626 1626 # we don't want working dir files to be stat'd in the benchmark, so
1627 1627 # prime that cache
1628 1628 wctx.dirty()
1629 1629 rctx = scmutil.revsingle(repo, opts[b'rev'], opts[b'rev'])
1630 1630 if opts[b'base']:
1631 1631 fromrev = scmutil.revsingle(repo, opts[b'base'])
1632 1632 ancestor = repo[fromrev]
1633 1633 else:
1634 1634 ancestor = wctx.ancestor(rctx)
1635 1635 return (wctx, rctx, ancestor)
1636 1636
1637 1637
1638 1638 @command(
1639 1639 b'perf::mergecalculate|perfmergecalculate',
1640 1640 [
1641 1641 (b'r', b'rev', b'.', b'rev to merge against'),
1642 1642 (b'', b'from', b'', b'rev to merge from'),
1643 1643 (b'', b'base', b'', b'the revision to use as base'),
1644 1644 ]
1645 1645 + formatteropts,
1646 1646 )
1647 1647 def perfmergecalculate(ui, repo, **opts):
1648 1648 opts = _byteskwargs(opts)
1649 1649 timer, fm = gettimer(ui, opts)
1650 1650
1651 1651 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1652 1652
1653 1653 def d():
1654 1654 # acceptremote is True because we don't want prompts in the middle of
1655 1655 # our benchmark
1656 1656 merge.calculateupdates(
1657 1657 repo,
1658 1658 wctx,
1659 1659 rctx,
1660 1660 [ancestor],
1661 1661 branchmerge=False,
1662 1662 force=False,
1663 1663 acceptremote=True,
1664 1664 followcopies=True,
1665 1665 )
1666 1666
1667 1667 timer(d)
1668 1668 fm.end()
1669 1669
1670 1670
1671 1671 @command(
1672 1672 b'perf::mergecopies|perfmergecopies',
1673 1673 [
1674 1674 (b'r', b'rev', b'.', b'rev to merge against'),
1675 1675 (b'', b'from', b'', b'rev to merge from'),
1676 1676 (b'', b'base', b'', b'the revision to use as base'),
1677 1677 ]
1678 1678 + formatteropts,
1679 1679 )
1680 1680 def perfmergecopies(ui, repo, **opts):
1681 1681 """measure runtime of `copies.mergecopies`"""
1682 1682 opts = _byteskwargs(opts)
1683 1683 timer, fm = gettimer(ui, opts)
1684 1684 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1685 1685
1686 1686 def d():
1687 1687 # acceptremote is True because we don't want prompts in the middle of
1688 1688 # our benchmark
1689 1689 copies.mergecopies(repo, wctx, rctx, ancestor)
1690 1690
1691 1691 timer(d)
1692 1692 fm.end()
1693 1693
1694 1694
1695 1695 @command(b'perf::pathcopies|perfpathcopies', [], b"REV REV")
1696 1696 def perfpathcopies(ui, repo, rev1, rev2, **opts):
1697 1697 """benchmark the copy tracing logic"""
1698 1698 opts = _byteskwargs(opts)
1699 1699 timer, fm = gettimer(ui, opts)
1700 1700 ctx1 = scmutil.revsingle(repo, rev1, rev1)
1701 1701 ctx2 = scmutil.revsingle(repo, rev2, rev2)
1702 1702
1703 1703 def d():
1704 1704 copies.pathcopies(ctx1, ctx2)
1705 1705
1706 1706 timer(d)
1707 1707 fm.end()
1708 1708
1709 1709
1710 1710 @command(
1711 1711 b'perf::phases|perfphases',
1712 1712 [
1713 1713 (b'', b'full', False, b'include file reading time too'),
1714 1714 ],
1715 1715 b"",
1716 1716 )
1717 1717 def perfphases(ui, repo, **opts):
1718 1718 """benchmark phasesets computation"""
1719 1719 opts = _byteskwargs(opts)
1720 1720 timer, fm = gettimer(ui, opts)
1721 1721 _phases = repo._phasecache
1722 1722 full = opts.get(b'full')
1723 1723
1724 1724 def d():
1725 1725 phases = _phases
1726 1726 if full:
1727 1727 clearfilecache(repo, b'_phasecache')
1728 1728 phases = repo._phasecache
1729 1729 phases.invalidate()
1730 1730 phases.loadphaserevs(repo)
1731 1731
1732 1732 timer(d)
1733 1733 fm.end()
1734 1734
1735 1735
1736 1736 @command(b'perf::phasesremote|perfphasesremote', [], b"[DEST]")
1737 1737 def perfphasesremote(ui, repo, dest=None, **opts):
1738 1738 """benchmark time needed to analyse phases of the remote server"""
1739 1739 from mercurial.node import bin
1740 1740 from mercurial import (
1741 1741 exchange,
1742 1742 hg,
1743 1743 phases,
1744 1744 )
1745 1745
1746 1746 opts = _byteskwargs(opts)
1747 1747 timer, fm = gettimer(ui, opts)
1748 1748
1749 1749 path = ui.getpath(dest, default=(b'default-push', b'default'))
1750 1750 if not path:
1751 1751 raise error.Abort(
1752 1752 b'default repository not configured!',
1753 1753 hint=b"see 'hg help config.paths'",
1754 1754 )
1755 1755 if util.safehasattr(path, 'main_path'):
1756 1756 path = path.get_push_variant()
1757 1757 dest = path.loc
1758 1758 else:
1759 1759 dest = path.pushloc or path.loc
1760 1760 ui.statusnoi18n(b'analysing phase of %s\n' % util.hidepassword(dest))
1761 1761 other = hg.peer(repo, opts, dest)
1762 1762
1763 1763 # easier to perform discovery through the operation
1764 1764 op = exchange.pushoperation(repo, other)
1765 1765 exchange._pushdiscoverychangeset(op)
1766 1766
1767 1767 remotesubset = op.fallbackheads
1768 1768
1769 1769 with other.commandexecutor() as e:
1770 1770 remotephases = e.callcommand(
1771 1771 b'listkeys', {b'namespace': b'phases'}
1772 1772 ).result()
1773 1773 del other
1774 1774 publishing = remotephases.get(b'publishing', False)
1775 1775 if publishing:
1776 1776 ui.statusnoi18n(b'publishing: yes\n')
1777 1777 else:
1778 1778 ui.statusnoi18n(b'publishing: no\n')
1779 1779
1780 1780 has_node = getattr(repo.changelog.index, 'has_node', None)
1781 1781 if has_node is None:
1782 1782 has_node = repo.changelog.nodemap.__contains__
1783 1783 nonpublishroots = 0
1784 1784 for nhex, phase in remotephases.iteritems():
1785 1785 if nhex == b'publishing': # ignore data related to publish option
1786 1786 continue
1787 1787 node = bin(nhex)
1788 1788 if has_node(node) and int(phase):
1789 1789 nonpublishroots += 1
1790 1790 ui.statusnoi18n(b'number of roots: %d\n' % len(remotephases))
1791 1791 ui.statusnoi18n(b'number of known non public roots: %d\n' % nonpublishroots)
1792 1792
1793 1793 def d():
1794 1794 phases.remotephasessummary(repo, remotesubset, remotephases)
1795 1795
1796 1796 timer(d)
1797 1797 fm.end()
1798 1798
1799 1799
1800 1800 @command(
1801 1801 b'perf::manifest|perfmanifest',
1802 1802 [
1803 1803 (b'm', b'manifest-rev', False, b'Look up a manifest node revision'),
1804 1804 (b'', b'clear-disk', False, b'clear on-disk caches too'),
1805 1805 ]
1806 1806 + formatteropts,
1807 1807 b'REV|NODE',
1808 1808 )
1809 1809 def perfmanifest(ui, repo, rev, manifest_rev=False, clear_disk=False, **opts):
1810 1810 """benchmark the time to read a manifest from disk and return a usable
1811 1811 dict-like object
1812 1812
1813 1813 Manifest caches are cleared before retrieval."""
1814 1814 opts = _byteskwargs(opts)
1815 1815 timer, fm = gettimer(ui, opts)
1816 1816 if not manifest_rev:
1817 1817 ctx = scmutil.revsingle(repo, rev, rev)
1818 1818 t = ctx.manifestnode()
1819 1819 else:
1820 1820 from mercurial.node import bin
1821 1821
1822 1822 if len(rev) == 40:
1823 1823 t = bin(rev)
1824 1824 else:
1825 1825 try:
1826 1826 rev = int(rev)
1827 1827
1828 1828 if util.safehasattr(repo.manifestlog, b'getstorage'):
1829 1829 t = repo.manifestlog.getstorage(b'').node(rev)
1830 1830 else:
1831 1831 t = repo.manifestlog._revlog.lookup(rev)
1832 1832 except ValueError:
1833 1833 raise error.Abort(
1834 1834 b'manifest revision must be integer or full node'
1835 1835 )
1836 1836
1837 1837 def d():
1838 1838 repo.manifestlog.clearcaches(clear_persisted_data=clear_disk)
1839 1839 repo.manifestlog[t].read()
1840 1840
1841 1841 timer(d)
1842 1842 fm.end()
1843 1843
1844 1844
1845 1845 @command(b'perf::changeset|perfchangeset', formatteropts)
1846 1846 def perfchangeset(ui, repo, rev, **opts):
1847 1847 opts = _byteskwargs(opts)
1848 1848 timer, fm = gettimer(ui, opts)
1849 1849 n = scmutil.revsingle(repo, rev).node()
1850 1850
1851 1851 def d():
1852 1852 repo.changelog.read(n)
1853 1853 # repo.changelog._cache = None
1854 1854
1855 1855 timer(d)
1856 1856 fm.end()
1857 1857
1858 1858
1859 1859 @command(b'perf::ignore|perfignore', formatteropts)
1860 1860 def perfignore(ui, repo, **opts):
1861 1861 """benchmark operation related to computing ignore"""
1862 1862 opts = _byteskwargs(opts)
1863 1863 timer, fm = gettimer(ui, opts)
1864 1864 dirstate = repo.dirstate
1865 1865
1866 1866 def setupone():
1867 1867 dirstate.invalidate()
1868 1868 clearfilecache(dirstate, b'_ignore')
1869 1869
1870 1870 def runone():
1871 1871 dirstate._ignore
1872 1872
1873 1873 timer(runone, setup=setupone, title=b"load")
1874 1874 fm.end()
1875 1875
1876 1876
1877 1877 @command(
1878 1878 b'perf::index|perfindex',
1879 1879 [
1880 1880 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1881 1881 (b'', b'no-lookup', None, b'do not revision lookup post creation'),
1882 1882 ]
1883 1883 + formatteropts,
1884 1884 )
1885 1885 def perfindex(ui, repo, **opts):
1886 1886 """benchmark index creation time followed by a lookup
1887 1887
1888 1888 The default is to look `tip` up. Depending on the index implementation,
1889 1889 the revision looked up can matters. For example, an implementation
1890 1890 scanning the index will have a faster lookup time for `--rev tip` than for
1891 1891 `--rev 0`. The number of looked up revisions and their order can also
1892 1892 matters.
1893 1893
1894 1894 Example of useful set to test:
1895 1895
1896 1896 * tip
1897 1897 * 0
1898 1898 * -10:
1899 1899 * :10
1900 1900 * -10: + :10
1901 1901 * :10: + -10:
1902 1902 * -10000:
1903 1903 * -10000: + 0
1904 1904
1905 1905 It is not currently possible to check for lookup of a missing node. For
1906 1906 deeper lookup benchmarking, checkout the `perfnodemap` command."""
1907 1907 import mercurial.revlog
1908 1908
1909 1909 opts = _byteskwargs(opts)
1910 1910 timer, fm = gettimer(ui, opts)
1911 1911 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1912 1912 if opts[b'no_lookup']:
1913 1913 if opts['rev']:
1914 1914 raise error.Abort('--no-lookup and --rev are mutually exclusive')
1915 1915 nodes = []
1916 1916 elif not opts[b'rev']:
1917 1917 nodes = [repo[b"tip"].node()]
1918 1918 else:
1919 1919 revs = scmutil.revrange(repo, opts[b'rev'])
1920 1920 cl = repo.changelog
1921 1921 nodes = [cl.node(r) for r in revs]
1922 1922
1923 1923 unfi = repo.unfiltered()
1924 1924 # find the filecache func directly
1925 1925 # This avoid polluting the benchmark with the filecache logic
1926 1926 makecl = unfi.__class__.changelog.func
1927 1927
1928 1928 def setup():
1929 1929 # probably not necessary, but for good measure
1930 1930 clearchangelog(unfi)
1931 1931
1932 1932 def d():
1933 1933 cl = makecl(unfi)
1934 1934 for n in nodes:
1935 1935 cl.rev(n)
1936 1936
1937 1937 timer(d, setup=setup)
1938 1938 fm.end()
1939 1939
1940 1940
1941 1941 @command(
1942 1942 b'perf::nodemap|perfnodemap',
1943 1943 [
1944 1944 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1945 1945 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
1946 1946 ]
1947 1947 + formatteropts,
1948 1948 )
1949 1949 def perfnodemap(ui, repo, **opts):
1950 1950 """benchmark the time necessary to look up revision from a cold nodemap
1951 1951
1952 1952 Depending on the implementation, the amount and order of revision we look
1953 1953 up can varies. Example of useful set to test:
1954 1954 * tip
1955 1955 * 0
1956 1956 * -10:
1957 1957 * :10
1958 1958 * -10: + :10
1959 1959 * :10: + -10:
1960 1960 * -10000:
1961 1961 * -10000: + 0
1962 1962
1963 1963 The command currently focus on valid binary lookup. Benchmarking for
1964 1964 hexlookup, prefix lookup and missing lookup would also be valuable.
1965 1965 """
1966 1966 import mercurial.revlog
1967 1967
1968 1968 opts = _byteskwargs(opts)
1969 1969 timer, fm = gettimer(ui, opts)
1970 1970 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1971 1971
1972 1972 unfi = repo.unfiltered()
1973 1973 clearcaches = opts[b'clear_caches']
1974 1974 # find the filecache func directly
1975 1975 # This avoid polluting the benchmark with the filecache logic
1976 1976 makecl = unfi.__class__.changelog.func
1977 1977 if not opts[b'rev']:
1978 1978 raise error.Abort(b'use --rev to specify revisions to look up')
1979 1979 revs = scmutil.revrange(repo, opts[b'rev'])
1980 1980 cl = repo.changelog
1981 1981 nodes = [cl.node(r) for r in revs]
1982 1982
1983 1983 # use a list to pass reference to a nodemap from one closure to the next
1984 1984 nodeget = [None]
1985 1985
1986 1986 def setnodeget():
1987 1987 # probably not necessary, but for good measure
1988 1988 clearchangelog(unfi)
1989 1989 cl = makecl(unfi)
1990 1990 if util.safehasattr(cl.index, 'get_rev'):
1991 1991 nodeget[0] = cl.index.get_rev
1992 1992 else:
1993 1993 nodeget[0] = cl.nodemap.get
1994 1994
1995 1995 def d():
1996 1996 get = nodeget[0]
1997 1997 for n in nodes:
1998 1998 get(n)
1999 1999
2000 2000 setup = None
2001 2001 if clearcaches:
2002 2002
2003 2003 def setup():
2004 2004 setnodeget()
2005 2005
2006 2006 else:
2007 2007 setnodeget()
2008 2008 d() # prewarm the data structure
2009 2009 timer(d, setup=setup)
2010 2010 fm.end()
2011 2011
2012 2012
2013 2013 @command(b'perf::startup|perfstartup', formatteropts)
2014 2014 def perfstartup(ui, repo, **opts):
2015 2015 opts = _byteskwargs(opts)
2016 2016 timer, fm = gettimer(ui, opts)
2017 2017
2018 2018 def d():
2019 2019 if os.name != 'nt':
2020 2020 os.system(
2021 2021 b"HGRCPATH= %s version -q > /dev/null" % fsencode(sys.argv[0])
2022 2022 )
2023 2023 else:
2024 2024 os.environ['HGRCPATH'] = r' '
2025 2025 os.system("%s version -q > NUL" % sys.argv[0])
2026 2026
2027 2027 timer(d)
2028 2028 fm.end()
2029 2029
2030 2030
2031 2031 def _find_stream_generator(version):
2032 2032 """find the proper generator function for this stream version"""
2033 2033 import mercurial.streamclone
2034 2034
2035 2035 available = {}
2036 2036
2037 2037 # try to fetch a v1 generator
2038 2038 generatev1 = getattr(mercurial.streamclone, "generatev1", None)
2039 2039 if generatev1 is not None:
2040 2040
2041 2041 def generate(repo):
2042 2042 entries, bytes, data = generatev2(repo, None, None, True)
2043 2043 return data
2044 2044
2045 2045 available[b'v1'] = generatev1
2046 2046 # try to fetch a v2 generator
2047 2047 generatev2 = getattr(mercurial.streamclone, "generatev2", None)
2048 2048 if generatev2 is not None:
2049 2049
2050 2050 def generate(repo):
2051 2051 entries, bytes, data = generatev2(repo, None, None, True)
2052 2052 return data
2053 2053
2054 2054 available[b'v2'] = generate
2055 2055 # try to fetch a v3 generator
2056 2056 generatev3 = getattr(mercurial.streamclone, "generatev3", None)
2057 2057 if generatev3 is not None:
2058 2058
2059 2059 def generate(repo):
2060 2060 entries, bytes, data = generatev3(repo, None, None, True)
2061 2061 return data
2062 2062
2063 2063 available[b'v3-exp'] = generate
2064 2064
2065 2065 # resolve the request
2066 2066 if version == b"latest":
2067 2067 # latest is the highest non experimental version
2068 2068 latest_key = max(v for v in available if b'-exp' not in v)
2069 2069 return available[latest_key]
2070 2070 elif version in available:
2071 2071 return available[version]
2072 2072 else:
2073 2073 msg = b"unkown or unavailable version: %s"
2074 2074 msg %= version
2075 2075 hint = b"available versions: %s"
2076 2076 hint %= b', '.join(sorted(available))
2077 2077 raise error.Abort(msg, hint=hint)
2078 2078
2079 2079
2080 2080 @command(
2081 2081 b'perf::stream-locked-section',
2082 2082 [
2083 2083 (
2084 2084 b'',
2085 2085 b'stream-version',
2086 2086 b'latest',
2087 2087 b'stream version to use ("v1", "v2", "v3" or "latest", (the default))',
2088 2088 ),
2089 2089 ]
2090 2090 + formatteropts,
2091 2091 )
2092 2092 def perf_stream_clone_scan(ui, repo, stream_version, **opts):
2093 2093 """benchmark the initial, repo-locked, section of a stream-clone"""
2094 2094
2095 2095 opts = _byteskwargs(opts)
2096 2096 timer, fm = gettimer(ui, opts)
2097 2097
2098 2098 # deletion of the generator may trigger some cleanup that we do not want to
2099 2099 # measure
2100 2100 result_holder = [None]
2101 2101
2102 2102 def setupone():
2103 2103 result_holder[0] = None
2104 2104
2105 2105 generate = _find_stream_generator(stream_version)
2106 2106
2107 2107 def runone():
2108 2108 # the lock is held for the duration the initialisation
2109 2109 result_holder[0] = generate(repo)
2110 2110
2111 2111 timer(runone, setup=setupone, title=b"load")
2112 2112 fm.end()
2113 2113
2114 2114
2115 2115 @command(
2116 2116 b'perf::stream-generate',
2117 2117 [
2118 2118 (
2119 2119 b'',
2120 2120 b'stream-version',
2121 2121 b'latest',
2122 2122 b'stream version to us ("v1", "v2" or "latest", (the default))',
2123 2123 ),
2124 2124 ]
2125 2125 + formatteropts,
2126 2126 )
2127 2127 def perf_stream_clone_generate(ui, repo, stream_version, **opts):
2128 2128 """benchmark the full generation of a stream clone"""
2129 2129
2130 2130 opts = _byteskwargs(opts)
2131 2131 timer, fm = gettimer(ui, opts)
2132 2132
2133 2133 # deletion of the generator may trigger some cleanup that we do not want to
2134 2134 # measure
2135 2135
2136 2136 generate = _find_stream_generator(stream_version)
2137 2137
2138 2138 def runone():
2139 2139 # the lock is held for the duration the initialisation
2140 2140 for chunk in generate(repo):
2141 2141 pass
2142 2142
2143 2143 timer(runone, title=b"generate")
2144 2144 fm.end()
2145 2145
2146 2146
2147 2147 @command(
2148 2148 b'perf::stream-consume',
2149 2149 formatteropts,
2150 2150 )
2151 2151 def perf_stream_clone_consume(ui, repo, filename, **opts):
2152 2152 """benchmark the full application of a stream clone
2153 2153
2154 2154 This include the creation of the repository
2155 2155 """
2156 2156 # try except to appease check code
2157 2157 msg = b"mercurial too old, missing necessary module: %s"
2158 2158 try:
2159 2159 from mercurial import bundle2
2160 2160 except ImportError as exc:
2161 2161 msg %= _bytestr(exc)
2162 2162 raise error.Abort(msg)
2163 2163 try:
2164 2164 from mercurial import exchange
2165 2165 except ImportError as exc:
2166 2166 msg %= _bytestr(exc)
2167 2167 raise error.Abort(msg)
2168 2168 try:
2169 2169 from mercurial import hg
2170 2170 except ImportError as exc:
2171 2171 msg %= _bytestr(exc)
2172 2172 raise error.Abort(msg)
2173 2173 try:
2174 2174 from mercurial import localrepo
2175 2175 except ImportError as exc:
2176 2176 msg %= _bytestr(exc)
2177 2177 raise error.Abort(msg)
2178 2178
2179 2179 opts = _byteskwargs(opts)
2180 2180 timer, fm = gettimer(ui, opts)
2181 2181
2182 2182 # deletion of the generator may trigger some cleanup that we do not want to
2183 2183 # measure
2184 2184 if not (os.path.isfile(filename) and os.access(filename, os.R_OK)):
2185 2185 raise error.Abort("not a readable file: %s" % filename)
2186 2186
2187 2187 run_variables = [None, None]
2188 2188
2189 2189 @contextlib.contextmanager
2190 2190 def context():
2191 2191 with open(filename, mode='rb') as bundle:
2192 2192 with tempfile.TemporaryDirectory() as tmp_dir:
2193 2193 tmp_dir = fsencode(tmp_dir)
2194 2194 run_variables[0] = bundle
2195 2195 run_variables[1] = tmp_dir
2196 2196 yield
2197 2197 run_variables[0] = None
2198 2198 run_variables[1] = None
2199 2199
2200 2200 def runone():
2201 2201 bundle = run_variables[0]
2202 2202 tmp_dir = run_variables[1]
2203 2203 # only pass ui when no srcrepo
2204 2204 localrepo.createrepository(
2205 2205 repo.ui, tmp_dir, requirements=repo.requirements
2206 2206 )
2207 2207 target = hg.repository(repo.ui, tmp_dir)
2208 2208 gen = exchange.readbundle(target.ui, bundle, bundle.name)
2209 2209 # stream v1
2210 2210 if util.safehasattr(gen, 'apply'):
2211 2211 gen.apply(target)
2212 2212 else:
2213 2213 with target.transaction(b"perf::stream-consume") as tr:
2214 2214 bundle2.applybundle(
2215 2215 target,
2216 2216 gen,
2217 2217 tr,
2218 2218 source=b'unbundle',
2219 2219 url=filename,
2220 2220 )
2221 2221
2222 2222 timer(runone, context=context, title=b"consume")
2223 2223 fm.end()
2224 2224
2225 2225
2226 2226 @command(b'perf::parents|perfparents', formatteropts)
2227 2227 def perfparents(ui, repo, **opts):
2228 2228 """benchmark the time necessary to fetch one changeset's parents.
2229 2229
2230 2230 The fetch is done using the `node identifier`, traversing all object layers
2231 2231 from the repository object. The first N revisions will be used for this
2232 2232 benchmark. N is controlled by the ``perf.parentscount`` config option
2233 2233 (default: 1000).
2234 2234 """
2235 2235 opts = _byteskwargs(opts)
2236 2236 timer, fm = gettimer(ui, opts)
2237 2237 # control the number of commits perfparents iterates over
2238 2238 # experimental config: perf.parentscount
2239 2239 count = getint(ui, b"perf", b"parentscount", 1000)
2240 2240 if len(repo.changelog) < count:
2241 2241 raise error.Abort(b"repo needs %d commits for this test" % count)
2242 2242 repo = repo.unfiltered()
2243 2243 nl = [repo.changelog.node(i) for i in _xrange(count)]
2244 2244
2245 2245 def d():
2246 2246 for n in nl:
2247 2247 repo.changelog.parents(n)
2248 2248
2249 2249 timer(d)
2250 2250 fm.end()
2251 2251
2252 2252
2253 2253 @command(b'perf::ctxfiles|perfctxfiles', formatteropts)
2254 2254 def perfctxfiles(ui, repo, x, **opts):
2255 2255 opts = _byteskwargs(opts)
2256 2256 x = int(x)
2257 2257 timer, fm = gettimer(ui, opts)
2258 2258
2259 2259 def d():
2260 2260 len(repo[x].files())
2261 2261
2262 2262 timer(d)
2263 2263 fm.end()
2264 2264
2265 2265
2266 2266 @command(b'perf::rawfiles|perfrawfiles', formatteropts)
2267 2267 def perfrawfiles(ui, repo, x, **opts):
2268 2268 opts = _byteskwargs(opts)
2269 2269 x = int(x)
2270 2270 timer, fm = gettimer(ui, opts)
2271 2271 cl = repo.changelog
2272 2272
2273 2273 def d():
2274 2274 len(cl.read(x)[3])
2275 2275
2276 2276 timer(d)
2277 2277 fm.end()
2278 2278
2279 2279
2280 2280 @command(b'perf::lookup|perflookup', formatteropts)
2281 2281 def perflookup(ui, repo, rev, **opts):
2282 2282 opts = _byteskwargs(opts)
2283 2283 timer, fm = gettimer(ui, opts)
2284 2284 timer(lambda: len(repo.lookup(rev)))
2285 2285 fm.end()
2286 2286
2287 2287
2288 2288 @command(
2289 2289 b'perf::linelogedits|perflinelogedits',
2290 2290 [
2291 2291 (b'n', b'edits', 10000, b'number of edits'),
2292 2292 (b'', b'max-hunk-lines', 10, b'max lines in a hunk'),
2293 2293 ],
2294 2294 norepo=True,
2295 2295 )
2296 2296 def perflinelogedits(ui, **opts):
2297 2297 from mercurial import linelog
2298 2298
2299 2299 opts = _byteskwargs(opts)
2300 2300
2301 2301 edits = opts[b'edits']
2302 2302 maxhunklines = opts[b'max_hunk_lines']
2303 2303
2304 2304 maxb1 = 100000
2305 2305 random.seed(0)
2306 2306 randint = random.randint
2307 2307 currentlines = 0
2308 2308 arglist = []
2309 2309 for rev in _xrange(edits):
2310 2310 a1 = randint(0, currentlines)
2311 2311 a2 = randint(a1, min(currentlines, a1 + maxhunklines))
2312 2312 b1 = randint(0, maxb1)
2313 2313 b2 = randint(b1, b1 + maxhunklines)
2314 2314 currentlines += (b2 - b1) - (a2 - a1)
2315 2315 arglist.append((rev, a1, a2, b1, b2))
2316 2316
2317 2317 def d():
2318 2318 ll = linelog.linelog()
2319 2319 for args in arglist:
2320 2320 ll.replacelines(*args)
2321 2321
2322 2322 timer, fm = gettimer(ui, opts)
2323 2323 timer(d)
2324 2324 fm.end()
2325 2325
2326 2326
2327 2327 @command(b'perf::revrange|perfrevrange', formatteropts)
2328 2328 def perfrevrange(ui, repo, *specs, **opts):
2329 2329 opts = _byteskwargs(opts)
2330 2330 timer, fm = gettimer(ui, opts)
2331 2331 revrange = scmutil.revrange
2332 2332 timer(lambda: len(revrange(repo, specs)))
2333 2333 fm.end()
2334 2334
2335 2335
2336 2336 @command(b'perf::nodelookup|perfnodelookup', formatteropts)
2337 2337 def perfnodelookup(ui, repo, rev, **opts):
2338 2338 opts = _byteskwargs(opts)
2339 2339 timer, fm = gettimer(ui, opts)
2340 2340 import mercurial.revlog
2341 2341
2342 2342 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
2343 2343 n = scmutil.revsingle(repo, rev).node()
2344 2344
2345 2345 try:
2346 2346 cl = revlog(getsvfs(repo), radix=b"00changelog")
2347 2347 except TypeError:
2348 2348 cl = revlog(getsvfs(repo), indexfile=b"00changelog.i")
2349 2349
2350 2350 def d():
2351 2351 cl.rev(n)
2352 2352 clearcaches(cl)
2353 2353
2354 2354 timer(d)
2355 2355 fm.end()
2356 2356
2357 2357
2358 2358 @command(
2359 2359 b'perf::log|perflog',
2360 2360 [(b'', b'rename', False, b'ask log to follow renames')] + formatteropts,
2361 2361 )
2362 2362 def perflog(ui, repo, rev=None, **opts):
2363 2363 opts = _byteskwargs(opts)
2364 2364 if rev is None:
2365 2365 rev = []
2366 2366 timer, fm = gettimer(ui, opts)
2367 2367 ui.pushbuffer()
2368 2368 timer(
2369 2369 lambda: commands.log(
2370 2370 ui, repo, rev=rev, date=b'', user=b'', copies=opts.get(b'rename')
2371 2371 )
2372 2372 )
2373 2373 ui.popbuffer()
2374 2374 fm.end()
2375 2375
2376 2376
2377 2377 @command(b'perf::moonwalk|perfmoonwalk', formatteropts)
2378 2378 def perfmoonwalk(ui, repo, **opts):
2379 2379 """benchmark walking the changelog backwards
2380 2380
2381 2381 This also loads the changelog data for each revision in the changelog.
2382 2382 """
2383 2383 opts = _byteskwargs(opts)
2384 2384 timer, fm = gettimer(ui, opts)
2385 2385
2386 2386 def moonwalk():
2387 2387 for i in repo.changelog.revs(start=(len(repo) - 1), stop=-1):
2388 2388 ctx = repo[i]
2389 2389 ctx.branch() # read changelog data (in addition to the index)
2390 2390
2391 2391 timer(moonwalk)
2392 2392 fm.end()
2393 2393
2394 2394
2395 2395 @command(
2396 2396 b'perf::templating|perftemplating',
2397 2397 [
2398 2398 (b'r', b'rev', [], b'revisions to run the template on'),
2399 2399 ]
2400 2400 + formatteropts,
2401 2401 )
2402 2402 def perftemplating(ui, repo, testedtemplate=None, **opts):
2403 2403 """test the rendering time of a given template"""
2404 2404 if makelogtemplater is None:
2405 2405 raise error.Abort(
2406 2406 b"perftemplating not available with this Mercurial",
2407 2407 hint=b"use 4.3 or later",
2408 2408 )
2409 2409
2410 2410 opts = _byteskwargs(opts)
2411 2411
2412 2412 nullui = ui.copy()
2413 2413 nullui.fout = open(os.devnull, 'wb')
2414 2414 nullui.disablepager()
2415 2415 revs = opts.get(b'rev')
2416 2416 if not revs:
2417 2417 revs = [b'all()']
2418 2418 revs = list(scmutil.revrange(repo, revs))
2419 2419
2420 2420 defaulttemplate = (
2421 2421 b'{date|shortdate} [{rev}:{node|short}]'
2422 2422 b' {author|person}: {desc|firstline}\n'
2423 2423 )
2424 2424 if testedtemplate is None:
2425 2425 testedtemplate = defaulttemplate
2426 2426 displayer = makelogtemplater(nullui, repo, testedtemplate)
2427 2427
2428 2428 def format():
2429 2429 for r in revs:
2430 2430 ctx = repo[r]
2431 2431 displayer.show(ctx)
2432 2432 displayer.flush(ctx)
2433 2433
2434 2434 timer, fm = gettimer(ui, opts)
2435 2435 timer(format)
2436 2436 fm.end()
2437 2437
2438 2438
2439 2439 def _displaystats(ui, opts, entries, data):
2440 2440 # use a second formatter because the data are quite different, not sure
2441 2441 # how it flies with the templater.
2442 2442 fm = ui.formatter(b'perf-stats', opts)
2443 2443 for key, title in entries:
2444 2444 values = data[key]
2445 2445 nbvalues = len(data)
2446 2446 values.sort()
2447 2447 stats = {
2448 2448 'key': key,
2449 2449 'title': title,
2450 2450 'nbitems': len(values),
2451 2451 'min': values[0][0],
2452 2452 '10%': values[(nbvalues * 10) // 100][0],
2453 2453 '25%': values[(nbvalues * 25) // 100][0],
2454 2454 '50%': values[(nbvalues * 50) // 100][0],
2455 2455 '75%': values[(nbvalues * 75) // 100][0],
2456 2456 '80%': values[(nbvalues * 80) // 100][0],
2457 2457 '85%': values[(nbvalues * 85) // 100][0],
2458 2458 '90%': values[(nbvalues * 90) // 100][0],
2459 2459 '95%': values[(nbvalues * 95) // 100][0],
2460 2460 '99%': values[(nbvalues * 99) // 100][0],
2461 2461 'max': values[-1][0],
2462 2462 }
2463 2463 fm.startitem()
2464 2464 fm.data(**stats)
2465 2465 # make node pretty for the human output
2466 2466 fm.plain('### %s (%d items)\n' % (title, len(values)))
2467 2467 lines = [
2468 2468 'min',
2469 2469 '10%',
2470 2470 '25%',
2471 2471 '50%',
2472 2472 '75%',
2473 2473 '80%',
2474 2474 '85%',
2475 2475 '90%',
2476 2476 '95%',
2477 2477 '99%',
2478 2478 'max',
2479 2479 ]
2480 2480 for l in lines:
2481 2481 fm.plain('%s: %s\n' % (l, stats[l]))
2482 2482 fm.end()
2483 2483
2484 2484
2485 2485 @command(
2486 2486 b'perf::helper-mergecopies|perfhelper-mergecopies',
2487 2487 formatteropts
2488 2488 + [
2489 2489 (b'r', b'revs', [], b'restrict search to these revisions'),
2490 2490 (b'', b'timing', False, b'provides extra data (costly)'),
2491 2491 (b'', b'stats', False, b'provides statistic about the measured data'),
2492 2492 ],
2493 2493 )
2494 2494 def perfhelpermergecopies(ui, repo, revs=[], **opts):
2495 2495 """find statistics about potential parameters for `perfmergecopies`
2496 2496
2497 2497 This command find (base, p1, p2) triplet relevant for copytracing
2498 2498 benchmarking in the context of a merge. It reports values for some of the
2499 2499 parameters that impact merge copy tracing time during merge.
2500 2500
2501 2501 If `--timing` is set, rename detection is run and the associated timing
2502 2502 will be reported. The extra details come at the cost of slower command
2503 2503 execution.
2504 2504
2505 2505 Since rename detection is only run once, other factors might easily
2506 2506 affect the precision of the timing. However it should give a good
2507 2507 approximation of which revision triplets are very costly.
2508 2508 """
2509 2509 opts = _byteskwargs(opts)
2510 2510 fm = ui.formatter(b'perf', opts)
2511 2511 dotiming = opts[b'timing']
2512 2512 dostats = opts[b'stats']
2513 2513
2514 2514 output_template = [
2515 2515 ("base", "%(base)12s"),
2516 2516 ("p1", "%(p1.node)12s"),
2517 2517 ("p2", "%(p2.node)12s"),
2518 2518 ("p1.nb-revs", "%(p1.nbrevs)12d"),
2519 2519 ("p1.nb-files", "%(p1.nbmissingfiles)12d"),
2520 2520 ("p1.renames", "%(p1.renamedfiles)12d"),
2521 2521 ("p1.time", "%(p1.time)12.3f"),
2522 2522 ("p2.nb-revs", "%(p2.nbrevs)12d"),
2523 2523 ("p2.nb-files", "%(p2.nbmissingfiles)12d"),
2524 2524 ("p2.renames", "%(p2.renamedfiles)12d"),
2525 2525 ("p2.time", "%(p2.time)12.3f"),
2526 2526 ("renames", "%(nbrenamedfiles)12d"),
2527 2527 ("total.time", "%(time)12.3f"),
2528 2528 ]
2529 2529 if not dotiming:
2530 2530 output_template = [
2531 2531 i
2532 2532 for i in output_template
2533 2533 if not ('time' in i[0] or 'renames' in i[0])
2534 2534 ]
2535 2535 header_names = [h for (h, v) in output_template]
2536 2536 output = ' '.join([v for (h, v) in output_template]) + '\n'
2537 2537 header = ' '.join(['%12s'] * len(header_names)) + '\n'
2538 2538 fm.plain(header % tuple(header_names))
2539 2539
2540 2540 if not revs:
2541 2541 revs = ['all()']
2542 2542 revs = scmutil.revrange(repo, revs)
2543 2543
2544 2544 if dostats:
2545 2545 alldata = {
2546 2546 'nbrevs': [],
2547 2547 'nbmissingfiles': [],
2548 2548 }
2549 2549 if dotiming:
2550 2550 alldata['parentnbrenames'] = []
2551 2551 alldata['totalnbrenames'] = []
2552 2552 alldata['parenttime'] = []
2553 2553 alldata['totaltime'] = []
2554 2554
2555 2555 roi = repo.revs('merge() and %ld', revs)
2556 2556 for r in roi:
2557 2557 ctx = repo[r]
2558 2558 p1 = ctx.p1()
2559 2559 p2 = ctx.p2()
2560 2560 bases = repo.changelog._commonancestorsheads(p1.rev(), p2.rev())
2561 2561 for b in bases:
2562 2562 b = repo[b]
2563 2563 p1missing = copies._computeforwardmissing(b, p1)
2564 2564 p2missing = copies._computeforwardmissing(b, p2)
2565 2565 data = {
2566 2566 b'base': b.hex(),
2567 2567 b'p1.node': p1.hex(),
2568 2568 b'p1.nbrevs': len(repo.revs('only(%d, %d)', p1.rev(), b.rev())),
2569 2569 b'p1.nbmissingfiles': len(p1missing),
2570 2570 b'p2.node': p2.hex(),
2571 2571 b'p2.nbrevs': len(repo.revs('only(%d, %d)', p2.rev(), b.rev())),
2572 2572 b'p2.nbmissingfiles': len(p2missing),
2573 2573 }
2574 2574 if dostats:
2575 2575 if p1missing:
2576 2576 alldata['nbrevs'].append(
2577 2577 (data['p1.nbrevs'], b.hex(), p1.hex())
2578 2578 )
2579 2579 alldata['nbmissingfiles'].append(
2580 2580 (data['p1.nbmissingfiles'], b.hex(), p1.hex())
2581 2581 )
2582 2582 if p2missing:
2583 2583 alldata['nbrevs'].append(
2584 2584 (data['p2.nbrevs'], b.hex(), p2.hex())
2585 2585 )
2586 2586 alldata['nbmissingfiles'].append(
2587 2587 (data['p2.nbmissingfiles'], b.hex(), p2.hex())
2588 2588 )
2589 2589 if dotiming:
2590 2590 begin = util.timer()
2591 2591 mergedata = copies.mergecopies(repo, p1, p2, b)
2592 2592 end = util.timer()
2593 2593 # not very stable timing since we did only one run
2594 2594 data['time'] = end - begin
2595 2595 # mergedata contains five dicts: "copy", "movewithdir",
2596 2596 # "diverge", "renamedelete" and "dirmove".
2597 2597 # The first 4 are about renamed file so lets count that.
2598 2598 renames = len(mergedata[0])
2599 2599 renames += len(mergedata[1])
2600 2600 renames += len(mergedata[2])
2601 2601 renames += len(mergedata[3])
2602 2602 data['nbrenamedfiles'] = renames
2603 2603 begin = util.timer()
2604 2604 p1renames = copies.pathcopies(b, p1)
2605 2605 end = util.timer()
2606 2606 data['p1.time'] = end - begin
2607 2607 begin = util.timer()
2608 2608 p2renames = copies.pathcopies(b, p2)
2609 2609 end = util.timer()
2610 2610 data['p2.time'] = end - begin
2611 2611 data['p1.renamedfiles'] = len(p1renames)
2612 2612 data['p2.renamedfiles'] = len(p2renames)
2613 2613
2614 2614 if dostats:
2615 2615 if p1missing:
2616 2616 alldata['parentnbrenames'].append(
2617 2617 (data['p1.renamedfiles'], b.hex(), p1.hex())
2618 2618 )
2619 2619 alldata['parenttime'].append(
2620 2620 (data['p1.time'], b.hex(), p1.hex())
2621 2621 )
2622 2622 if p2missing:
2623 2623 alldata['parentnbrenames'].append(
2624 2624 (data['p2.renamedfiles'], b.hex(), p2.hex())
2625 2625 )
2626 2626 alldata['parenttime'].append(
2627 2627 (data['p2.time'], b.hex(), p2.hex())
2628 2628 )
2629 2629 if p1missing or p2missing:
2630 2630 alldata['totalnbrenames'].append(
2631 2631 (
2632 2632 data['nbrenamedfiles'],
2633 2633 b.hex(),
2634 2634 p1.hex(),
2635 2635 p2.hex(),
2636 2636 )
2637 2637 )
2638 2638 alldata['totaltime'].append(
2639 2639 (data['time'], b.hex(), p1.hex(), p2.hex())
2640 2640 )
2641 2641 fm.startitem()
2642 2642 fm.data(**data)
2643 2643 # make node pretty for the human output
2644 2644 out = data.copy()
2645 2645 out['base'] = fm.hexfunc(b.node())
2646 2646 out['p1.node'] = fm.hexfunc(p1.node())
2647 2647 out['p2.node'] = fm.hexfunc(p2.node())
2648 2648 fm.plain(output % out)
2649 2649
2650 2650 fm.end()
2651 2651 if dostats:
2652 2652 # use a second formatter because the data are quite different, not sure
2653 2653 # how it flies with the templater.
2654 2654 entries = [
2655 2655 ('nbrevs', 'number of revision covered'),
2656 2656 ('nbmissingfiles', 'number of missing files at head'),
2657 2657 ]
2658 2658 if dotiming:
2659 2659 entries.append(
2660 2660 ('parentnbrenames', 'rename from one parent to base')
2661 2661 )
2662 2662 entries.append(('totalnbrenames', 'total number of renames'))
2663 2663 entries.append(('parenttime', 'time for one parent'))
2664 2664 entries.append(('totaltime', 'time for both parents'))
2665 2665 _displaystats(ui, opts, entries, alldata)
2666 2666
2667 2667
2668 2668 @command(
2669 2669 b'perf::helper-pathcopies|perfhelper-pathcopies',
2670 2670 formatteropts
2671 2671 + [
2672 2672 (b'r', b'revs', [], b'restrict search to these revisions'),
2673 2673 (b'', b'timing', False, b'provides extra data (costly)'),
2674 2674 (b'', b'stats', False, b'provides statistic about the measured data'),
2675 2675 ],
2676 2676 )
2677 2677 def perfhelperpathcopies(ui, repo, revs=[], **opts):
2678 2678 """find statistic about potential parameters for the `perftracecopies`
2679 2679
2680 2680 This command find source-destination pair relevant for copytracing testing.
2681 2681 It report value for some of the parameters that impact copy tracing time.
2682 2682
2683 2683 If `--timing` is set, rename detection is run and the associated timing
2684 2684 will be reported. The extra details comes at the cost of a slower command
2685 2685 execution.
2686 2686
2687 2687 Since the rename detection is only run once, other factors might easily
2688 2688 affect the precision of the timing. However it should give a good
2689 2689 approximation of which revision pairs are very costly.
2690 2690 """
2691 2691 opts = _byteskwargs(opts)
2692 2692 fm = ui.formatter(b'perf', opts)
2693 2693 dotiming = opts[b'timing']
2694 2694 dostats = opts[b'stats']
2695 2695
2696 2696 if dotiming:
2697 2697 header = '%12s %12s %12s %12s %12s %12s\n'
2698 2698 output = (
2699 2699 "%(source)12s %(destination)12s "
2700 2700 "%(nbrevs)12d %(nbmissingfiles)12d "
2701 2701 "%(nbrenamedfiles)12d %(time)18.5f\n"
2702 2702 )
2703 2703 header_names = (
2704 2704 "source",
2705 2705 "destination",
2706 2706 "nb-revs",
2707 2707 "nb-files",
2708 2708 "nb-renames",
2709 2709 "time",
2710 2710 )
2711 2711 fm.plain(header % header_names)
2712 2712 else:
2713 2713 header = '%12s %12s %12s %12s\n'
2714 2714 output = (
2715 2715 "%(source)12s %(destination)12s "
2716 2716 "%(nbrevs)12d %(nbmissingfiles)12d\n"
2717 2717 )
2718 2718 fm.plain(header % ("source", "destination", "nb-revs", "nb-files"))
2719 2719
2720 2720 if not revs:
2721 2721 revs = ['all()']
2722 2722 revs = scmutil.revrange(repo, revs)
2723 2723
2724 2724 if dostats:
2725 2725 alldata = {
2726 2726 'nbrevs': [],
2727 2727 'nbmissingfiles': [],
2728 2728 }
2729 2729 if dotiming:
2730 2730 alldata['nbrenames'] = []
2731 2731 alldata['time'] = []
2732 2732
2733 2733 roi = repo.revs('merge() and %ld', revs)
2734 2734 for r in roi:
2735 2735 ctx = repo[r]
2736 2736 p1 = ctx.p1().rev()
2737 2737 p2 = ctx.p2().rev()
2738 2738 bases = repo.changelog._commonancestorsheads(p1, p2)
2739 2739 for p in (p1, p2):
2740 2740 for b in bases:
2741 2741 base = repo[b]
2742 2742 parent = repo[p]
2743 2743 missing = copies._computeforwardmissing(base, parent)
2744 2744 if not missing:
2745 2745 continue
2746 2746 data = {
2747 2747 b'source': base.hex(),
2748 2748 b'destination': parent.hex(),
2749 2749 b'nbrevs': len(repo.revs('only(%d, %d)', p, b)),
2750 2750 b'nbmissingfiles': len(missing),
2751 2751 }
2752 2752 if dostats:
2753 2753 alldata['nbrevs'].append(
2754 2754 (
2755 2755 data['nbrevs'],
2756 2756 base.hex(),
2757 2757 parent.hex(),
2758 2758 )
2759 2759 )
2760 2760 alldata['nbmissingfiles'].append(
2761 2761 (
2762 2762 data['nbmissingfiles'],
2763 2763 base.hex(),
2764 2764 parent.hex(),
2765 2765 )
2766 2766 )
2767 2767 if dotiming:
2768 2768 begin = util.timer()
2769 2769 renames = copies.pathcopies(base, parent)
2770 2770 end = util.timer()
2771 2771 # not very stable timing since we did only one run
2772 2772 data['time'] = end - begin
2773 2773 data['nbrenamedfiles'] = len(renames)
2774 2774 if dostats:
2775 2775 alldata['time'].append(
2776 2776 (
2777 2777 data['time'],
2778 2778 base.hex(),
2779 2779 parent.hex(),
2780 2780 )
2781 2781 )
2782 2782 alldata['nbrenames'].append(
2783 2783 (
2784 2784 data['nbrenamedfiles'],
2785 2785 base.hex(),
2786 2786 parent.hex(),
2787 2787 )
2788 2788 )
2789 2789 fm.startitem()
2790 2790 fm.data(**data)
2791 2791 out = data.copy()
2792 2792 out['source'] = fm.hexfunc(base.node())
2793 2793 out['destination'] = fm.hexfunc(parent.node())
2794 2794 fm.plain(output % out)
2795 2795
2796 2796 fm.end()
2797 2797 if dostats:
2798 2798 entries = [
2799 2799 ('nbrevs', 'number of revision covered'),
2800 2800 ('nbmissingfiles', 'number of missing files at head'),
2801 2801 ]
2802 2802 if dotiming:
2803 2803 entries.append(('nbrenames', 'renamed files'))
2804 2804 entries.append(('time', 'time'))
2805 2805 _displaystats(ui, opts, entries, alldata)
2806 2806
2807 2807
2808 2808 @command(b'perf::cca|perfcca', formatteropts)
2809 2809 def perfcca(ui, repo, **opts):
2810 2810 opts = _byteskwargs(opts)
2811 2811 timer, fm = gettimer(ui, opts)
2812 2812 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
2813 2813 fm.end()
2814 2814
2815 2815
2816 2816 @command(b'perf::fncacheload|perffncacheload', formatteropts)
2817 2817 def perffncacheload(ui, repo, **opts):
2818 2818 opts = _byteskwargs(opts)
2819 2819 timer, fm = gettimer(ui, opts)
2820 2820 s = repo.store
2821 2821
2822 2822 def d():
2823 2823 s.fncache._load()
2824 2824
2825 2825 timer(d)
2826 2826 fm.end()
2827 2827
2828 2828
2829 2829 @command(b'perf::fncachewrite|perffncachewrite', formatteropts)
2830 2830 def perffncachewrite(ui, repo, **opts):
2831 2831 opts = _byteskwargs(opts)
2832 2832 timer, fm = gettimer(ui, opts)
2833 2833 s = repo.store
2834 2834 lock = repo.lock()
2835 2835 s.fncache._load()
2836 2836 tr = repo.transaction(b'perffncachewrite')
2837 2837 tr.addbackup(b'fncache')
2838 2838
2839 2839 def d():
2840 2840 s.fncache._dirty = True
2841 2841 s.fncache.write(tr)
2842 2842
2843 2843 timer(d)
2844 2844 tr.close()
2845 2845 lock.release()
2846 2846 fm.end()
2847 2847
2848 2848
2849 2849 @command(b'perf::fncacheencode|perffncacheencode', formatteropts)
2850 2850 def perffncacheencode(ui, repo, **opts):
2851 2851 opts = _byteskwargs(opts)
2852 2852 timer, fm = gettimer(ui, opts)
2853 2853 s = repo.store
2854 2854 s.fncache._load()
2855 2855
2856 2856 def d():
2857 2857 for p in s.fncache.entries:
2858 2858 s.encode(p)
2859 2859
2860 2860 timer(d)
2861 2861 fm.end()
2862 2862
2863 2863
2864 2864 def _bdiffworker(q, blocks, xdiff, ready, done):
2865 2865 while not done.is_set():
2866 2866 pair = q.get()
2867 2867 while pair is not None:
2868 2868 if xdiff:
2869 2869 mdiff.bdiff.xdiffblocks(*pair)
2870 2870 elif blocks:
2871 2871 mdiff.bdiff.blocks(*pair)
2872 2872 else:
2873 2873 mdiff.textdiff(*pair)
2874 2874 q.task_done()
2875 2875 pair = q.get()
2876 2876 q.task_done() # for the None one
2877 2877 with ready:
2878 2878 ready.wait()
2879 2879
2880 2880
2881 2881 def _manifestrevision(repo, mnode):
2882 2882 ml = repo.manifestlog
2883 2883
2884 2884 if util.safehasattr(ml, b'getstorage'):
2885 2885 store = ml.getstorage(b'')
2886 2886 else:
2887 2887 store = ml._revlog
2888 2888
2889 2889 return store.revision(mnode)
2890 2890
2891 2891
2892 2892 @command(
2893 2893 b'perf::bdiff|perfbdiff',
2894 2894 revlogopts
2895 2895 + formatteropts
2896 2896 + [
2897 2897 (
2898 2898 b'',
2899 2899 b'count',
2900 2900 1,
2901 2901 b'number of revisions to test (when using --startrev)',
2902 2902 ),
2903 2903 (b'', b'alldata', False, b'test bdiffs for all associated revisions'),
2904 2904 (b'', b'threads', 0, b'number of thread to use (disable with 0)'),
2905 2905 (b'', b'blocks', False, b'test computing diffs into blocks'),
2906 2906 (b'', b'xdiff', False, b'use xdiff algorithm'),
2907 2907 ],
2908 2908 b'-c|-m|FILE REV',
2909 2909 )
2910 2910 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
2911 2911 """benchmark a bdiff between revisions
2912 2912
2913 2913 By default, benchmark a bdiff between its delta parent and itself.
2914 2914
2915 2915 With ``--count``, benchmark bdiffs between delta parents and self for N
2916 2916 revisions starting at the specified revision.
2917 2917
2918 2918 With ``--alldata``, assume the requested revision is a changeset and
2919 2919 measure bdiffs for all changes related to that changeset (manifest
2920 2920 and filelogs).
2921 2921 """
2922 2922 opts = _byteskwargs(opts)
2923 2923
2924 2924 if opts[b'xdiff'] and not opts[b'blocks']:
2925 2925 raise error.CommandError(b'perfbdiff', b'--xdiff requires --blocks')
2926 2926
2927 2927 if opts[b'alldata']:
2928 2928 opts[b'changelog'] = True
2929 2929
2930 2930 if opts.get(b'changelog') or opts.get(b'manifest'):
2931 2931 file_, rev = None, file_
2932 2932 elif rev is None:
2933 2933 raise error.CommandError(b'perfbdiff', b'invalid arguments')
2934 2934
2935 2935 blocks = opts[b'blocks']
2936 2936 xdiff = opts[b'xdiff']
2937 2937 textpairs = []
2938 2938
2939 2939 r = cmdutil.openrevlog(repo, b'perfbdiff', file_, opts)
2940 2940
2941 2941 startrev = r.rev(r.lookup(rev))
2942 2942 for rev in range(startrev, min(startrev + count, len(r) - 1)):
2943 2943 if opts[b'alldata']:
2944 2944 # Load revisions associated with changeset.
2945 2945 ctx = repo[rev]
2946 2946 mtext = _manifestrevision(repo, ctx.manifestnode())
2947 2947 for pctx in ctx.parents():
2948 2948 pman = _manifestrevision(repo, pctx.manifestnode())
2949 2949 textpairs.append((pman, mtext))
2950 2950
2951 2951 # Load filelog revisions by iterating manifest delta.
2952 2952 man = ctx.manifest()
2953 2953 pman = ctx.p1().manifest()
2954 2954 for filename, change in pman.diff(man).items():
2955 2955 fctx = repo.file(filename)
2956 2956 f1 = fctx.revision(change[0][0] or -1)
2957 2957 f2 = fctx.revision(change[1][0] or -1)
2958 2958 textpairs.append((f1, f2))
2959 2959 else:
2960 2960 dp = r.deltaparent(rev)
2961 2961 textpairs.append((r.revision(dp), r.revision(rev)))
2962 2962
2963 2963 withthreads = threads > 0
2964 2964 if not withthreads:
2965 2965
2966 2966 def d():
2967 2967 for pair in textpairs:
2968 2968 if xdiff:
2969 2969 mdiff.bdiff.xdiffblocks(*pair)
2970 2970 elif blocks:
2971 2971 mdiff.bdiff.blocks(*pair)
2972 2972 else:
2973 2973 mdiff.textdiff(*pair)
2974 2974
2975 2975 else:
2976 2976 q = queue()
2977 2977 for i in _xrange(threads):
2978 2978 q.put(None)
2979 2979 ready = threading.Condition()
2980 2980 done = threading.Event()
2981 2981 for i in _xrange(threads):
2982 2982 threading.Thread(
2983 2983 target=_bdiffworker, args=(q, blocks, xdiff, ready, done)
2984 2984 ).start()
2985 2985 q.join()
2986 2986
2987 2987 def d():
2988 2988 for pair in textpairs:
2989 2989 q.put(pair)
2990 2990 for i in _xrange(threads):
2991 2991 q.put(None)
2992 2992 with ready:
2993 2993 ready.notify_all()
2994 2994 q.join()
2995 2995
2996 2996 timer, fm = gettimer(ui, opts)
2997 2997 timer(d)
2998 2998 fm.end()
2999 2999
3000 3000 if withthreads:
3001 3001 done.set()
3002 3002 for i in _xrange(threads):
3003 3003 q.put(None)
3004 3004 with ready:
3005 3005 ready.notify_all()
3006 3006
3007 3007
3008 3008 @command(
3009 3009 b'perf::unbundle',
3010 3010 formatteropts,
3011 3011 b'BUNDLE_FILE',
3012 3012 )
3013 3013 def perf_unbundle(ui, repo, fname, **opts):
3014 3014 """benchmark application of a bundle in a repository.
3015 3015
3016 3016 This does not include the final transaction processing"""
3017 3017
3018 3018 from mercurial import exchange
3019 3019 from mercurial import bundle2
3020 3020 from mercurial import transaction
3021 3021
3022 3022 opts = _byteskwargs(opts)
3023 3023
3024 3024 ### some compatibility hotfix
3025 3025 #
3026 3026 # the data attribute is dropped in 63edc384d3b7 a changeset introducing a
3027 3027 # critical regression that break transaction rollback for files that are
3028 3028 # de-inlined.
3029 3029 method = transaction.transaction._addentry
3030 3030 pre_63edc384d3b7 = "data" in getargspec(method).args
3031 3031 # the `detailed_exit_code` attribute is introduced in 33c0c25d0b0f
3032 3032 # a changeset that is a close descendant of 18415fc918a1, the changeset
3033 3033 # that conclude the fix run for the bug introduced in 63edc384d3b7.
3034 3034 args = getargspec(error.Abort.__init__).args
3035 3035 post_18415fc918a1 = "detailed_exit_code" in args
3036 3036
3037 3037 old_max_inline = None
3038 3038 try:
3039 3039 if not (pre_63edc384d3b7 or post_18415fc918a1):
3040 3040 # disable inlining
3041 3041 old_max_inline = mercurial.revlog._maxinline
3042 3042 # large enough to never happen
3043 3043 mercurial.revlog._maxinline = 2 ** 50
3044 3044
3045 3045 with repo.lock():
3046 3046 bundle = [None, None]
3047 3047 orig_quiet = repo.ui.quiet
3048 3048 try:
3049 3049 repo.ui.quiet = True
3050 3050 with open(fname, mode="rb") as f:
3051 3051
3052 3052 def noop_report(*args, **kwargs):
3053 3053 pass
3054 3054
3055 3055 def setup():
3056 3056 gen, tr = bundle
3057 3057 if tr is not None:
3058 3058 tr.abort()
3059 3059 bundle[:] = [None, None]
3060 3060 f.seek(0)
3061 3061 bundle[0] = exchange.readbundle(ui, f, fname)
3062 3062 bundle[1] = repo.transaction(b'perf::unbundle')
3063 3063 # silence the transaction
3064 3064 bundle[1]._report = noop_report
3065 3065
3066 3066 def apply():
3067 3067 gen, tr = bundle
3068 3068 bundle2.applybundle(
3069 3069 repo,
3070 3070 gen,
3071 3071 tr,
3072 3072 source=b'perf::unbundle',
3073 3073 url=fname,
3074 3074 )
3075 3075
3076 3076 timer, fm = gettimer(ui, opts)
3077 3077 timer(apply, setup=setup)
3078 3078 fm.end()
3079 3079 finally:
3080 3080 repo.ui.quiet == orig_quiet
3081 3081 gen, tr = bundle
3082 3082 if tr is not None:
3083 3083 tr.abort()
3084 3084 finally:
3085 3085 if old_max_inline is not None:
3086 3086 mercurial.revlog._maxinline = old_max_inline
3087 3087
3088 3088
3089 3089 @command(
3090 3090 b'perf::unidiff|perfunidiff',
3091 3091 revlogopts
3092 3092 + formatteropts
3093 3093 + [
3094 3094 (
3095 3095 b'',
3096 3096 b'count',
3097 3097 1,
3098 3098 b'number of revisions to test (when using --startrev)',
3099 3099 ),
3100 3100 (b'', b'alldata', False, b'test unidiffs for all associated revisions'),
3101 3101 ],
3102 3102 b'-c|-m|FILE REV',
3103 3103 )
3104 3104 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
3105 3105 """benchmark a unified diff between revisions
3106 3106
3107 3107 This doesn't include any copy tracing - it's just a unified diff
3108 3108 of the texts.
3109 3109
3110 3110 By default, benchmark a diff between its delta parent and itself.
3111 3111
3112 3112 With ``--count``, benchmark diffs between delta parents and self for N
3113 3113 revisions starting at the specified revision.
3114 3114
3115 3115 With ``--alldata``, assume the requested revision is a changeset and
3116 3116 measure diffs for all changes related to that changeset (manifest
3117 3117 and filelogs).
3118 3118 """
3119 3119 opts = _byteskwargs(opts)
3120 3120 if opts[b'alldata']:
3121 3121 opts[b'changelog'] = True
3122 3122
3123 3123 if opts.get(b'changelog') or opts.get(b'manifest'):
3124 3124 file_, rev = None, file_
3125 3125 elif rev is None:
3126 3126 raise error.CommandError(b'perfunidiff', b'invalid arguments')
3127 3127
3128 3128 textpairs = []
3129 3129
3130 3130 r = cmdutil.openrevlog(repo, b'perfunidiff', file_, opts)
3131 3131
3132 3132 startrev = r.rev(r.lookup(rev))
3133 3133 for rev in range(startrev, min(startrev + count, len(r) - 1)):
3134 3134 if opts[b'alldata']:
3135 3135 # Load revisions associated with changeset.
3136 3136 ctx = repo[rev]
3137 3137 mtext = _manifestrevision(repo, ctx.manifestnode())
3138 3138 for pctx in ctx.parents():
3139 3139 pman = _manifestrevision(repo, pctx.manifestnode())
3140 3140 textpairs.append((pman, mtext))
3141 3141
3142 3142 # Load filelog revisions by iterating manifest delta.
3143 3143 man = ctx.manifest()
3144 3144 pman = ctx.p1().manifest()
3145 3145 for filename, change in pman.diff(man).items():
3146 3146 fctx = repo.file(filename)
3147 3147 f1 = fctx.revision(change[0][0] or -1)
3148 3148 f2 = fctx.revision(change[1][0] or -1)
3149 3149 textpairs.append((f1, f2))
3150 3150 else:
3151 3151 dp = r.deltaparent(rev)
3152 3152 textpairs.append((r.revision(dp), r.revision(rev)))
3153 3153
3154 3154 def d():
3155 3155 for left, right in textpairs:
3156 3156 # The date strings don't matter, so we pass empty strings.
3157 3157 headerlines, hunks = mdiff.unidiff(
3158 3158 left, b'', right, b'', b'left', b'right', binary=False
3159 3159 )
3160 3160 # consume iterators in roughly the way patch.py does
3161 3161 b'\n'.join(headerlines)
3162 3162 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
3163 3163
3164 3164 timer, fm = gettimer(ui, opts)
3165 3165 timer(d)
3166 3166 fm.end()
3167 3167
3168 3168
3169 3169 @command(b'perf::diffwd|perfdiffwd', formatteropts)
3170 3170 def perfdiffwd(ui, repo, **opts):
3171 3171 """Profile diff of working directory changes"""
3172 3172 opts = _byteskwargs(opts)
3173 3173 timer, fm = gettimer(ui, opts)
3174 3174 options = {
3175 3175 'w': 'ignore_all_space',
3176 3176 'b': 'ignore_space_change',
3177 3177 'B': 'ignore_blank_lines',
3178 3178 }
3179 3179
3180 3180 for diffopt in ('', 'w', 'b', 'B', 'wB'):
3181 3181 opts = {options[c]: b'1' for c in diffopt}
3182 3182
3183 3183 def d():
3184 3184 ui.pushbuffer()
3185 3185 commands.diff(ui, repo, **opts)
3186 3186 ui.popbuffer()
3187 3187
3188 3188 diffopt = diffopt.encode('ascii')
3189 3189 title = b'diffopts: %s' % (diffopt and (b'-' + diffopt) or b'none')
3190 3190 timer(d, title=title)
3191 3191 fm.end()
3192 3192
3193 3193
3194 3194 @command(
3195 3195 b'perf::revlogindex|perfrevlogindex',
3196 3196 revlogopts + formatteropts,
3197 3197 b'-c|-m|FILE',
3198 3198 )
3199 3199 def perfrevlogindex(ui, repo, file_=None, **opts):
3200 3200 """Benchmark operations against a revlog index.
3201 3201
3202 3202 This tests constructing a revlog instance, reading index data,
3203 3203 parsing index data, and performing various operations related to
3204 3204 index data.
3205 3205 """
3206 3206
3207 3207 opts = _byteskwargs(opts)
3208 3208
3209 3209 rl = cmdutil.openrevlog(repo, b'perfrevlogindex', file_, opts)
3210 3210
3211 3211 opener = getattr(rl, 'opener') # trick linter
3212 3212 # compat with hg <= 5.8
3213 3213 radix = getattr(rl, 'radix', None)
3214 3214 indexfile = getattr(rl, '_indexfile', None)
3215 3215 if indexfile is None:
3216 3216 # compatibility with <= hg-5.8
3217 3217 indexfile = getattr(rl, 'indexfile')
3218 3218 data = opener.read(indexfile)
3219 3219
3220 3220 header = struct.unpack(b'>I', data[0:4])[0]
3221 3221 version = header & 0xFFFF
3222 3222 if version == 1:
3223 3223 inline = header & (1 << 16)
3224 3224 else:
3225 3225 raise error.Abort(b'unsupported revlog version: %d' % version)
3226 3226
3227 3227 parse_index_v1 = getattr(mercurial.revlog, 'parse_index_v1', None)
3228 3228 if parse_index_v1 is None:
3229 3229 parse_index_v1 = mercurial.revlog.revlogio().parseindex
3230 3230
3231 3231 rllen = len(rl)
3232 3232
3233 3233 node0 = rl.node(0)
3234 3234 node25 = rl.node(rllen // 4)
3235 3235 node50 = rl.node(rllen // 2)
3236 3236 node75 = rl.node(rllen // 4 * 3)
3237 3237 node100 = rl.node(rllen - 1)
3238 3238
3239 3239 allrevs = range(rllen)
3240 3240 allrevsrev = list(reversed(allrevs))
3241 3241 allnodes = [rl.node(rev) for rev in range(rllen)]
3242 3242 allnodesrev = list(reversed(allnodes))
3243 3243
3244 3244 def constructor():
3245 3245 if radix is not None:
3246 3246 revlog(opener, radix=radix)
3247 3247 else:
3248 3248 # hg <= 5.8
3249 3249 revlog(opener, indexfile=indexfile)
3250 3250
3251 3251 def read():
3252 3252 with opener(indexfile) as fh:
3253 3253 fh.read()
3254 3254
3255 3255 def parseindex():
3256 3256 parse_index_v1(data, inline)
3257 3257
3258 3258 def getentry(revornode):
3259 3259 index = parse_index_v1(data, inline)[0]
3260 3260 index[revornode]
3261 3261
3262 3262 def getentries(revs, count=1):
3263 3263 index = parse_index_v1(data, inline)[0]
3264 3264
3265 3265 for i in range(count):
3266 3266 for rev in revs:
3267 3267 index[rev]
3268 3268
3269 3269 def resolvenode(node):
3270 3270 index = parse_index_v1(data, inline)[0]
3271 3271 rev = getattr(index, 'rev', None)
3272 3272 if rev is None:
3273 3273 nodemap = getattr(parse_index_v1(data, inline)[0], 'nodemap', None)
3274 3274 # This only works for the C code.
3275 3275 if nodemap is None:
3276 3276 return
3277 3277 rev = nodemap.__getitem__
3278 3278
3279 3279 try:
3280 3280 rev(node)
3281 3281 except error.RevlogError:
3282 3282 pass
3283 3283
3284 3284 def resolvenodes(nodes, count=1):
3285 3285 index = parse_index_v1(data, inline)[0]
3286 3286 rev = getattr(index, 'rev', None)
3287 3287 if rev is None:
3288 3288 nodemap = getattr(parse_index_v1(data, inline)[0], 'nodemap', None)
3289 3289 # This only works for the C code.
3290 3290 if nodemap is None:
3291 3291 return
3292 3292 rev = nodemap.__getitem__
3293 3293
3294 3294 for i in range(count):
3295 3295 for node in nodes:
3296 3296 try:
3297 3297 rev(node)
3298 3298 except error.RevlogError:
3299 3299 pass
3300 3300
3301 3301 benches = [
3302 3302 (constructor, b'revlog constructor'),
3303 3303 (read, b'read'),
3304 3304 (parseindex, b'create index object'),
3305 3305 (lambda: getentry(0), b'retrieve index entry for rev 0'),
3306 3306 (lambda: resolvenode(b'a' * 20), b'look up missing node'),
3307 3307 (lambda: resolvenode(node0), b'look up node at rev 0'),
3308 3308 (lambda: resolvenode(node25), b'look up node at 1/4 len'),
3309 3309 (lambda: resolvenode(node50), b'look up node at 1/2 len'),
3310 3310 (lambda: resolvenode(node75), b'look up node at 3/4 len'),
3311 3311 (lambda: resolvenode(node100), b'look up node at tip'),
3312 3312 # 2x variation is to measure caching impact.
3313 3313 (lambda: resolvenodes(allnodes), b'look up all nodes (forward)'),
3314 3314 (lambda: resolvenodes(allnodes, 2), b'look up all nodes 2x (forward)'),
3315 3315 (lambda: resolvenodes(allnodesrev), b'look up all nodes (reverse)'),
3316 3316 (
3317 3317 lambda: resolvenodes(allnodesrev, 2),
3318 3318 b'look up all nodes 2x (reverse)',
3319 3319 ),
3320 3320 (lambda: getentries(allrevs), b'retrieve all index entries (forward)'),
3321 3321 (
3322 3322 lambda: getentries(allrevs, 2),
3323 3323 b'retrieve all index entries 2x (forward)',
3324 3324 ),
3325 3325 (
3326 3326 lambda: getentries(allrevsrev),
3327 3327 b'retrieve all index entries (reverse)',
3328 3328 ),
3329 3329 (
3330 3330 lambda: getentries(allrevsrev, 2),
3331 3331 b'retrieve all index entries 2x (reverse)',
3332 3332 ),
3333 3333 ]
3334 3334
3335 3335 for fn, title in benches:
3336 3336 timer, fm = gettimer(ui, opts)
3337 3337 timer(fn, title=title)
3338 3338 fm.end()
3339 3339
3340 3340
3341 3341 @command(
3342 3342 b'perf::revlogrevisions|perfrevlogrevisions',
3343 3343 revlogopts
3344 3344 + formatteropts
3345 3345 + [
3346 3346 (b'd', b'dist', 100, b'distance between the revisions'),
3347 3347 (b's', b'startrev', 0, b'revision to start reading at'),
3348 3348 (b'', b'reverse', False, b'read in reverse'),
3349 3349 ],
3350 3350 b'-c|-m|FILE',
3351 3351 )
3352 3352 def perfrevlogrevisions(
3353 3353 ui, repo, file_=None, startrev=0, reverse=False, **opts
3354 3354 ):
3355 3355 """Benchmark reading a series of revisions from a revlog.
3356 3356
3357 3357 By default, we read every ``-d/--dist`` revision from 0 to tip of
3358 3358 the specified revlog.
3359 3359
3360 3360 The start revision can be defined via ``-s/--startrev``.
3361 3361 """
3362 3362 opts = _byteskwargs(opts)
3363 3363
3364 3364 rl = cmdutil.openrevlog(repo, b'perfrevlogrevisions', file_, opts)
3365 3365 rllen = getlen(ui)(rl)
3366 3366
3367 3367 if startrev < 0:
3368 3368 startrev = rllen + startrev
3369 3369
3370 3370 def d():
3371 3371 rl.clearcaches()
3372 3372
3373 3373 beginrev = startrev
3374 3374 endrev = rllen
3375 3375 dist = opts[b'dist']
3376 3376
3377 3377 if reverse:
3378 3378 beginrev, endrev = endrev - 1, beginrev - 1
3379 3379 dist = -1 * dist
3380 3380
3381 3381 for x in _xrange(beginrev, endrev, dist):
3382 3382 # Old revisions don't support passing int.
3383 3383 n = rl.node(x)
3384 3384 rl.revision(n)
3385 3385
3386 3386 timer, fm = gettimer(ui, opts)
3387 3387 timer(d)
3388 3388 fm.end()
3389 3389
3390 3390
3391 3391 @command(
3392 3392 b'perf::revlogwrite|perfrevlogwrite',
3393 3393 revlogopts
3394 3394 + formatteropts
3395 3395 + [
3396 3396 (b's', b'startrev', 1000, b'revision to start writing at'),
3397 3397 (b'', b'stoprev', -1, b'last revision to write'),
3398 3398 (b'', b'count', 3, b'number of passes to perform'),
3399 3399 (b'', b'details', False, b'print timing for every revisions tested'),
3400 3400 (b'', b'source', b'full', b'the kind of data feed in the revlog'),
3401 3401 (b'', b'lazydeltabase', True, b'try the provided delta first'),
3402 3402 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
3403 3403 ],
3404 3404 b'-c|-m|FILE',
3405 3405 )
3406 3406 def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts):
3407 3407 """Benchmark writing a series of revisions to a revlog.
3408 3408
3409 3409 Possible source values are:
3410 3410 * `full`: add from a full text (default).
3411 3411 * `parent-1`: add from a delta to the first parent
3412 3412 * `parent-2`: add from a delta to the second parent if it exists
3413 3413 (use a delta from the first parent otherwise)
3414 3414 * `parent-smallest`: add from the smallest delta (either p1 or p2)
3415 3415 * `storage`: add from the existing precomputed deltas
3416 3416
3417 3417 Note: This performance command measures performance in a custom way. As a
3418 3418 result some of the global configuration of the 'perf' command does not
3419 3419 apply to it:
3420 3420
3421 3421 * ``pre-run``: disabled
3422 3422
3423 3423 * ``profile-benchmark``: disabled
3424 3424
3425 3425 * ``run-limits``: disabled use --count instead
3426 3426 """
3427 3427 opts = _byteskwargs(opts)
3428 3428
3429 3429 rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts)
3430 3430 rllen = getlen(ui)(rl)
3431 3431 if startrev < 0:
3432 3432 startrev = rllen + startrev
3433 3433 if stoprev < 0:
3434 3434 stoprev = rllen + stoprev
3435 3435
3436 3436 lazydeltabase = opts['lazydeltabase']
3437 3437 source = opts['source']
3438 3438 clearcaches = opts['clear_caches']
3439 3439 validsource = (
3440 3440 b'full',
3441 3441 b'parent-1',
3442 3442 b'parent-2',
3443 3443 b'parent-smallest',
3444 3444 b'storage',
3445 3445 )
3446 3446 if source not in validsource:
3447 3447 raise error.Abort('invalid source type: %s' % source)
3448 3448
3449 3449 ### actually gather results
3450 3450 count = opts['count']
3451 3451 if count <= 0:
3452 3452 raise error.Abort('invalide run count: %d' % count)
3453 3453 allresults = []
3454 3454 for c in range(count):
3455 3455 timing = _timeonewrite(
3456 3456 ui,
3457 3457 rl,
3458 3458 source,
3459 3459 startrev,
3460 3460 stoprev,
3461 3461 c + 1,
3462 3462 lazydeltabase=lazydeltabase,
3463 3463 clearcaches=clearcaches,
3464 3464 )
3465 3465 allresults.append(timing)
3466 3466
3467 3467 ### consolidate the results in a single list
3468 3468 results = []
3469 3469 for idx, (rev, t) in enumerate(allresults[0]):
3470 3470 ts = [t]
3471 3471 for other in allresults[1:]:
3472 3472 orev, ot = other[idx]
3473 3473 assert orev == rev
3474 3474 ts.append(ot)
3475 3475 results.append((rev, ts))
3476 3476 resultcount = len(results)
3477 3477
3478 3478 ### Compute and display relevant statistics
3479 3479
3480 3480 # get a formatter
3481 3481 fm = ui.formatter(b'perf', opts)
3482 3482 displayall = ui.configbool(b"perf", b"all-timing", True)
3483 3483
3484 3484 # print individual details if requested
3485 3485 if opts['details']:
3486 3486 for idx, item in enumerate(results, 1):
3487 3487 rev, data = item
3488 3488 title = 'revisions #%d of %d, rev %d' % (idx, resultcount, rev)
3489 3489 formatone(fm, data, title=title, displayall=displayall)
3490 3490
3491 3491 # sorts results by median time
3492 3492 results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2])
3493 3493 # list of (name, index) to display)
3494 3494 relevants = [
3495 3495 ("min", 0),
3496 3496 ("10%", resultcount * 10 // 100),
3497 3497 ("25%", resultcount * 25 // 100),
3498 3498 ("50%", resultcount * 70 // 100),
3499 3499 ("75%", resultcount * 75 // 100),
3500 3500 ("90%", resultcount * 90 // 100),
3501 3501 ("95%", resultcount * 95 // 100),
3502 3502 ("99%", resultcount * 99 // 100),
3503 3503 ("99.9%", resultcount * 999 // 1000),
3504 3504 ("99.99%", resultcount * 9999 // 10000),
3505 3505 ("99.999%", resultcount * 99999 // 100000),
3506 3506 ("max", -1),
3507 3507 ]
3508 3508 if not ui.quiet:
3509 3509 for name, idx in relevants:
3510 3510 data = results[idx]
3511 3511 title = '%s of %d, rev %d' % (name, resultcount, data[0])
3512 3512 formatone(fm, data[1], title=title, displayall=displayall)
3513 3513
3514 3514 # XXX summing that many float will not be very precise, we ignore this fact
3515 3515 # for now
3516 3516 totaltime = []
3517 3517 for item in allresults:
3518 3518 totaltime.append(
3519 3519 (
3520 3520 sum(x[1][0] for x in item),
3521 3521 sum(x[1][1] for x in item),
3522 3522 sum(x[1][2] for x in item),
3523 3523 )
3524 3524 )
3525 3525 formatone(
3526 3526 fm,
3527 3527 totaltime,
3528 3528 title="total time (%d revs)" % resultcount,
3529 3529 displayall=displayall,
3530 3530 )
3531 3531 fm.end()
3532 3532
3533 3533
3534 3534 class _faketr:
3535 3535 def add(s, x, y, z=None):
3536 3536 return None
3537 3537
3538 3538
3539 3539 def _timeonewrite(
3540 3540 ui,
3541 3541 orig,
3542 3542 source,
3543 3543 startrev,
3544 3544 stoprev,
3545 3545 runidx=None,
3546 3546 lazydeltabase=True,
3547 3547 clearcaches=True,
3548 3548 ):
3549 3549 timings = []
3550 3550 tr = _faketr()
3551 3551 with _temprevlog(ui, orig, startrev) as dest:
3552 3552 if hasattr(dest, "delta_config"):
3553 3553 dest.delta_config.lazy_delta_base = lazydeltabase
3554 3554 else:
3555 3555 dest._lazydeltabase = lazydeltabase
3556 3556 revs = list(orig.revs(startrev, stoprev))
3557 3557 total = len(revs)
3558 3558 topic = 'adding'
3559 3559 if runidx is not None:
3560 3560 topic += ' (run #%d)' % runidx
3561 3561 # Support both old and new progress API
3562 3562 if util.safehasattr(ui, 'makeprogress'):
3563 3563 progress = ui.makeprogress(topic, unit='revs', total=total)
3564 3564
3565 3565 def updateprogress(pos):
3566 3566 progress.update(pos)
3567 3567
3568 3568 def completeprogress():
3569 3569 progress.complete()
3570 3570
3571 3571 else:
3572 3572
3573 3573 def updateprogress(pos):
3574 3574 ui.progress(topic, pos, unit='revs', total=total)
3575 3575
3576 3576 def completeprogress():
3577 3577 ui.progress(topic, None, unit='revs', total=total)
3578 3578
3579 3579 for idx, rev in enumerate(revs):
3580 3580 updateprogress(idx)
3581 3581 addargs, addkwargs = _getrevisionseed(orig, rev, tr, source)
3582 3582 if clearcaches:
3583 3583 dest.index.clearcaches()
3584 3584 dest.clearcaches()
3585 3585 with timeone() as r:
3586 3586 dest.addrawrevision(*addargs, **addkwargs)
3587 3587 timings.append((rev, r[0]))
3588 3588 updateprogress(total)
3589 3589 completeprogress()
3590 3590 return timings
3591 3591
3592 3592
3593 3593 def _getrevisionseed(orig, rev, tr, source):
3594 3594 from mercurial.node import nullid
3595 3595
3596 3596 linkrev = orig.linkrev(rev)
3597 3597 node = orig.node(rev)
3598 3598 p1, p2 = orig.parents(node)
3599 3599 flags = orig.flags(rev)
3600 3600 cachedelta = None
3601 3601 text = None
3602 3602
3603 3603 if source == b'full':
3604 3604 text = orig.revision(rev)
3605 3605 elif source == b'parent-1':
3606 3606 baserev = orig.rev(p1)
3607 3607 cachedelta = (baserev, orig.revdiff(p1, rev))
3608 3608 elif source == b'parent-2':
3609 3609 parent = p2
3610 3610 if p2 == nullid:
3611 3611 parent = p1
3612 3612 baserev = orig.rev(parent)
3613 3613 cachedelta = (baserev, orig.revdiff(parent, rev))
3614 3614 elif source == b'parent-smallest':
3615 3615 p1diff = orig.revdiff(p1, rev)
3616 3616 parent = p1
3617 3617 diff = p1diff
3618 3618 if p2 != nullid:
3619 3619 p2diff = orig.revdiff(p2, rev)
3620 3620 if len(p1diff) > len(p2diff):
3621 3621 parent = p2
3622 3622 diff = p2diff
3623 3623 baserev = orig.rev(parent)
3624 3624 cachedelta = (baserev, diff)
3625 3625 elif source == b'storage':
3626 3626 baserev = orig.deltaparent(rev)
3627 3627 cachedelta = (baserev, orig.revdiff(orig.node(baserev), rev))
3628 3628
3629 3629 return (
3630 3630 (text, tr, linkrev, p1, p2),
3631 3631 {'node': node, 'flags': flags, 'cachedelta': cachedelta},
3632 3632 )
3633 3633
3634 3634
3635 3635 @contextlib.contextmanager
3636 3636 def _temprevlog(ui, orig, truncaterev):
3637 3637 from mercurial import vfs as vfsmod
3638 3638
3639 3639 if orig._inline:
3640 3640 raise error.Abort('not supporting inline revlog (yet)')
3641 3641 revlogkwargs = {}
3642 3642 k = 'upperboundcomp'
3643 3643 if util.safehasattr(orig, k):
3644 3644 revlogkwargs[k] = getattr(orig, k)
3645 3645
3646 3646 indexfile = getattr(orig, '_indexfile', None)
3647 3647 if indexfile is None:
3648 3648 # compatibility with <= hg-5.8
3649 3649 indexfile = getattr(orig, 'indexfile')
3650 3650 origindexpath = orig.opener.join(indexfile)
3651 3651
3652 3652 datafile = getattr(orig, '_datafile', getattr(orig, 'datafile'))
3653 3653 origdatapath = orig.opener.join(datafile)
3654 3654 radix = b'revlog'
3655 3655 indexname = b'revlog.i'
3656 3656 dataname = b'revlog.d'
3657 3657
3658 3658 tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
3659 3659 try:
3660 3660 # copy the data file in a temporary directory
3661 3661 ui.debug('copying data in %s\n' % tmpdir)
3662 3662 destindexpath = os.path.join(tmpdir, 'revlog.i')
3663 3663 destdatapath = os.path.join(tmpdir, 'revlog.d')
3664 3664 shutil.copyfile(origindexpath, destindexpath)
3665 3665 shutil.copyfile(origdatapath, destdatapath)
3666 3666
3667 3667 # remove the data we want to add again
3668 3668 ui.debug('truncating data to be rewritten\n')
3669 3669 with open(destindexpath, 'ab') as index:
3670 3670 index.seek(0)
3671 3671 index.truncate(truncaterev * orig._io.size)
3672 3672 with open(destdatapath, 'ab') as data:
3673 3673 data.seek(0)
3674 3674 data.truncate(orig.start(truncaterev))
3675 3675
3676 3676 # instantiate a new revlog from the temporary copy
3677 3677 ui.debug('truncating adding to be rewritten\n')
3678 3678 vfs = vfsmod.vfs(tmpdir)
3679 3679 vfs.options = getattr(orig.opener, 'options', None)
3680 3680
3681 3681 try:
3682 3682 dest = revlog(vfs, radix=radix, **revlogkwargs)
3683 3683 except TypeError:
3684 3684 dest = revlog(
3685 3685 vfs, indexfile=indexname, datafile=dataname, **revlogkwargs
3686 3686 )
3687 3687 if dest._inline:
3688 3688 raise error.Abort('not supporting inline revlog (yet)')
3689 3689 # make sure internals are initialized
3690 3690 dest.revision(len(dest) - 1)
3691 3691 yield dest
3692 3692 del dest, vfs
3693 3693 finally:
3694 3694 shutil.rmtree(tmpdir, True)
3695 3695
3696 3696
3697 3697 @command(
3698 3698 b'perf::revlogchunks|perfrevlogchunks',
3699 3699 revlogopts
3700 3700 + formatteropts
3701 3701 + [
3702 3702 (b'e', b'engines', b'', b'compression engines to use'),
3703 3703 (b's', b'startrev', 0, b'revision to start at'),
3704 3704 ],
3705 3705 b'-c|-m|FILE',
3706 3706 )
3707 3707 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
3708 3708 """Benchmark operations on revlog chunks.
3709 3709
3710 3710 Logically, each revlog is a collection of fulltext revisions. However,
3711 3711 stored within each revlog are "chunks" of possibly compressed data. This
3712 3712 data needs to be read and decompressed or compressed and written.
3713 3713
3714 3714 This command measures the time it takes to read+decompress and recompress
3715 3715 chunks in a revlog. It effectively isolates I/O and compression performance.
3716 3716 For measurements of higher-level operations like resolving revisions,
3717 3717 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
3718 3718 """
3719 3719 opts = _byteskwargs(opts)
3720 3720
3721 3721 rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
3722 3722
3723 3723 # - _chunkraw was renamed to _getsegmentforrevs
3724 3724 # - _getsegmentforrevs was moved on the inner object
3725 3725 try:
3726 3726 segmentforrevs = rl._inner.get_segment_for_revs
3727 3727 except AttributeError:
3728 3728 try:
3729 3729 segmentforrevs = rl._getsegmentforrevs
3730 3730 except AttributeError:
3731 3731 segmentforrevs = rl._chunkraw
3732 3732
3733 3733 # Verify engines argument.
3734 3734 if engines:
3735 3735 engines = {e.strip() for e in engines.split(b',')}
3736 3736 for engine in engines:
3737 3737 try:
3738 3738 util.compressionengines[engine]
3739 3739 except KeyError:
3740 3740 raise error.Abort(b'unknown compression engine: %s' % engine)
3741 3741 else:
3742 3742 engines = []
3743 3743 for e in util.compengines:
3744 3744 engine = util.compengines[e]
3745 3745 try:
3746 3746 if engine.available():
3747 3747 engine.revlogcompressor().compress(b'dummy')
3748 3748 engines.append(e)
3749 3749 except NotImplementedError:
3750 3750 pass
3751 3751
3752 3752 revs = list(rl.revs(startrev, len(rl) - 1))
3753 3753
3754 3754 @contextlib.contextmanager
3755 3755 def reading(rl):
3756 3756 if getattr(rl, 'reading', None) is not None:
3757 3757 with rl.reading():
3758 3758 yield None
3759 3759 elif rl._inline:
3760 3760 indexfile = getattr(rl, '_indexfile', None)
3761 3761 if indexfile is None:
3762 3762 # compatibility with <= hg-5.8
3763 3763 indexfile = getattr(rl, 'indexfile')
3764 3764 yield getsvfs(repo)(indexfile)
3765 3765 else:
3766 3766 datafile = getattr(rl, 'datafile', getattr(rl, 'datafile'))
3767 3767 yield getsvfs(repo)(datafile)
3768 3768
3769 3769 if getattr(rl, 'reading', None) is not None:
3770 3770
3771 3771 @contextlib.contextmanager
3772 3772 def lazy_reading(rl):
3773 3773 with rl.reading():
3774 3774 yield
3775 3775
3776 3776 else:
3777 3777
3778 3778 @contextlib.contextmanager
3779 3779 def lazy_reading(rl):
3780 3780 yield
3781 3781
3782 3782 def doread():
3783 3783 rl.clearcaches()
3784 3784 for rev in revs:
3785 3785 with lazy_reading(rl):
3786 3786 segmentforrevs(rev, rev)
3787 3787
3788 3788 def doreadcachedfh():
3789 3789 rl.clearcaches()
3790 3790 with reading(rl) as fh:
3791 3791 if fh is not None:
3792 3792 for rev in revs:
3793 3793 segmentforrevs(rev, rev, df=fh)
3794 3794 else:
3795 3795 for rev in revs:
3796 3796 segmentforrevs(rev, rev)
3797 3797
3798 3798 def doreadbatch():
3799 3799 rl.clearcaches()
3800 3800 with lazy_reading(rl):
3801 3801 segmentforrevs(revs[0], revs[-1])
3802 3802
3803 3803 def doreadbatchcachedfh():
3804 3804 rl.clearcaches()
3805 3805 with reading(rl) as fh:
3806 3806 if fh is not None:
3807 3807 segmentforrevs(revs[0], revs[-1], df=fh)
3808 3808 else:
3809 3809 segmentforrevs(revs[0], revs[-1])
3810 3810
3811 3811 def dochunk():
3812 3812 rl.clearcaches()
3813 # chunk used to be available directly on the revlog
3814 _chunk = getattr(rl, '_inner', rl)._chunk
3813 3815 with reading(rl) as fh:
3814 3816 if fh is not None:
3815 3817 for rev in revs:
3816 rl._chunk(rev, df=fh)
3818 _chunk(rev, df=fh)
3817 3819 else:
3818 3820 for rev in revs:
3819 rl._chunk(rev)
3821 _chunk(rev)
3820 3822
3821 3823 chunks = [None]
3822 3824
3823 3825 def dochunkbatch():
3824 3826 rl.clearcaches()
3825 3827 with reading(rl) as fh:
3826 3828 if fh is not None:
3827 3829 # Save chunks as a side-effect.
3828 3830 chunks[0] = rl._chunks(revs, df=fh)
3829 3831 else:
3830 3832 # Save chunks as a side-effect.
3831 3833 chunks[0] = rl._chunks(revs)
3832 3834
3833 3835 def docompress(compressor):
3834 3836 rl.clearcaches()
3835 3837
3836 3838 compressor_holder = getattr(rl, '_inner', rl)
3837 3839
3838 3840 try:
3839 3841 # Swap in the requested compression engine.
3840 3842 oldcompressor = compressor_holder._compressor
3841 3843 compressor_holder._compressor = compressor
3842 3844 for chunk in chunks[0]:
3843 3845 rl.compress(chunk)
3844 3846 finally:
3845 3847 compressor_holder._compressor = oldcompressor
3846 3848
3847 3849 benches = [
3848 3850 (lambda: doread(), b'read'),
3849 3851 (lambda: doreadcachedfh(), b'read w/ reused fd'),
3850 3852 (lambda: doreadbatch(), b'read batch'),
3851 3853 (lambda: doreadbatchcachedfh(), b'read batch w/ reused fd'),
3852 3854 (lambda: dochunk(), b'chunk'),
3853 3855 (lambda: dochunkbatch(), b'chunk batch'),
3854 3856 ]
3855 3857
3856 3858 for engine in sorted(engines):
3857 3859 compressor = util.compengines[engine].revlogcompressor()
3858 3860 benches.append(
3859 3861 (
3860 3862 functools.partial(docompress, compressor),
3861 3863 b'compress w/ %s' % engine,
3862 3864 )
3863 3865 )
3864 3866
3865 3867 for fn, title in benches:
3866 3868 timer, fm = gettimer(ui, opts)
3867 3869 timer(fn, title=title)
3868 3870 fm.end()
3869 3871
3870 3872
3871 3873 @command(
3872 3874 b'perf::revlogrevision|perfrevlogrevision',
3873 3875 revlogopts
3874 3876 + formatteropts
3875 3877 + [(b'', b'cache', False, b'use caches instead of clearing')],
3876 3878 b'-c|-m|FILE REV',
3877 3879 )
3878 3880 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
3879 3881 """Benchmark obtaining a revlog revision.
3880 3882
3881 3883 Obtaining a revlog revision consists of roughly the following steps:
3882 3884
3883 3885 1. Compute the delta chain
3884 3886 2. Slice the delta chain if applicable
3885 3887 3. Obtain the raw chunks for that delta chain
3886 3888 4. Decompress each raw chunk
3887 3889 5. Apply binary patches to obtain fulltext
3888 3890 6. Verify hash of fulltext
3889 3891
3890 3892 This command measures the time spent in each of these phases.
3891 3893 """
3892 3894 opts = _byteskwargs(opts)
3893 3895
3894 3896 if opts.get(b'changelog') or opts.get(b'manifest'):
3895 3897 file_, rev = None, file_
3896 3898 elif rev is None:
3897 3899 raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
3898 3900
3899 3901 r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
3900 3902
3901 3903 # _chunkraw was renamed to _getsegmentforrevs.
3902 3904 try:
3903 3905 segmentforrevs = r._inner.get_segment_for_revs
3904 3906 except AttributeError:
3905 3907 try:
3906 3908 segmentforrevs = r._getsegmentforrevs
3907 3909 except AttributeError:
3908 3910 segmentforrevs = r._chunkraw
3909 3911
3910 3912 node = r.lookup(rev)
3911 3913 rev = r.rev(node)
3912 3914
3913 3915 if getattr(r, 'reading', None) is not None:
3914 3916
3915 3917 @contextlib.contextmanager
3916 3918 def lazy_reading(r):
3917 3919 with r.reading():
3918 3920 yield
3919 3921
3920 3922 else:
3921 3923
3922 3924 @contextlib.contextmanager
3923 3925 def lazy_reading(r):
3924 3926 yield
3925 3927
3926 3928 def getrawchunks(data, chain):
3927 3929 start = r.start
3928 3930 length = r.length
3929 3931 inline = r._inline
3930 3932 try:
3931 3933 iosize = r.index.entry_size
3932 3934 except AttributeError:
3933 3935 iosize = r._io.size
3934 3936 buffer = util.buffer
3935 3937
3936 3938 chunks = []
3937 3939 ladd = chunks.append
3938 3940 for idx, item in enumerate(chain):
3939 3941 offset = start(item[0])
3940 3942 bits = data[idx]
3941 3943 for rev in item:
3942 3944 chunkstart = start(rev)
3943 3945 if inline:
3944 3946 chunkstart += (rev + 1) * iosize
3945 3947 chunklength = length(rev)
3946 3948 ladd(buffer(bits, chunkstart - offset, chunklength))
3947 3949
3948 3950 return chunks
3949 3951
3950 3952 def dodeltachain(rev):
3951 3953 if not cache:
3952 3954 r.clearcaches()
3953 3955 r._deltachain(rev)
3954 3956
3955 3957 def doread(chain):
3956 3958 if not cache:
3957 3959 r.clearcaches()
3958 3960 for item in slicedchain:
3959 3961 with lazy_reading(r):
3960 3962 segmentforrevs(item[0], item[-1])
3961 3963
3962 3964 def doslice(r, chain, size):
3963 3965 for s in slicechunk(r, chain, targetsize=size):
3964 3966 pass
3965 3967
3966 3968 def dorawchunks(data, chain):
3967 3969 if not cache:
3968 3970 r.clearcaches()
3969 3971 getrawchunks(data, chain)
3970 3972
3971 3973 def dodecompress(chunks):
3972 3974 decomp = r.decompress
3973 3975 for chunk in chunks:
3974 3976 decomp(chunk)
3975 3977
3976 3978 def dopatch(text, bins):
3977 3979 if not cache:
3978 3980 r.clearcaches()
3979 3981 mdiff.patches(text, bins)
3980 3982
3981 3983 def dohash(text):
3982 3984 if not cache:
3983 3985 r.clearcaches()
3984 3986 r.checkhash(text, node, rev=rev)
3985 3987
3986 3988 def dorevision():
3987 3989 if not cache:
3988 3990 r.clearcaches()
3989 3991 r.revision(node)
3990 3992
3991 3993 try:
3992 3994 from mercurial.revlogutils.deltas import slicechunk
3993 3995 except ImportError:
3994 3996 slicechunk = getattr(revlog, '_slicechunk', None)
3995 3997
3996 3998 size = r.length(rev)
3997 3999 chain = r._deltachain(rev)[0]
3998 4000
3999 4001 with_sparse_read = False
4000 4002 if hasattr(r, 'data_config'):
4001 4003 with_sparse_read = r.data_config.with_sparse_read
4002 4004 elif hasattr(r, '_withsparseread'):
4003 4005 with_sparse_read = r._withsparseread
4004 4006 if with_sparse_read:
4005 4007 slicedchain = (chain,)
4006 4008 else:
4007 4009 slicedchain = tuple(slicechunk(r, chain, targetsize=size))
4008 4010 data = [segmentforrevs(seg[0], seg[-1])[1] for seg in slicedchain]
4009 4011 rawchunks = getrawchunks(data, slicedchain)
4010 4012 bins = r._chunks(chain)
4011 4013 text = bytes(bins[0])
4012 4014 bins = bins[1:]
4013 4015 text = mdiff.patches(text, bins)
4014 4016
4015 4017 benches = [
4016 4018 (lambda: dorevision(), b'full'),
4017 4019 (lambda: dodeltachain(rev), b'deltachain'),
4018 4020 (lambda: doread(chain), b'read'),
4019 4021 ]
4020 4022
4021 4023 if with_sparse_read:
4022 4024 slicing = (lambda: doslice(r, chain, size), b'slice-sparse-chain')
4023 4025 benches.append(slicing)
4024 4026
4025 4027 benches.extend(
4026 4028 [
4027 4029 (lambda: dorawchunks(data, slicedchain), b'rawchunks'),
4028 4030 (lambda: dodecompress(rawchunks), b'decompress'),
4029 4031 (lambda: dopatch(text, bins), b'patch'),
4030 4032 (lambda: dohash(text), b'hash'),
4031 4033 ]
4032 4034 )
4033 4035
4034 4036 timer, fm = gettimer(ui, opts)
4035 4037 for fn, title in benches:
4036 4038 timer(fn, title=title)
4037 4039 fm.end()
4038 4040
4039 4041
4040 4042 @command(
4041 4043 b'perf::revset|perfrevset',
4042 4044 [
4043 4045 (b'C', b'clear', False, b'clear volatile cache between each call.'),
4044 4046 (b'', b'contexts', False, b'obtain changectx for each revision'),
4045 4047 ]
4046 4048 + formatteropts,
4047 4049 b"REVSET",
4048 4050 )
4049 4051 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
4050 4052 """benchmark the execution time of a revset
4051 4053
4052 4054 Use the --clean option if need to evaluate the impact of build volatile
4053 4055 revisions set cache on the revset execution. Volatile cache hold filtered
4054 4056 and obsolete related cache."""
4055 4057 opts = _byteskwargs(opts)
4056 4058
4057 4059 timer, fm = gettimer(ui, opts)
4058 4060
4059 4061 def d():
4060 4062 if clear:
4061 4063 repo.invalidatevolatilesets()
4062 4064 if contexts:
4063 4065 for ctx in repo.set(expr):
4064 4066 pass
4065 4067 else:
4066 4068 for r in repo.revs(expr):
4067 4069 pass
4068 4070
4069 4071 timer(d)
4070 4072 fm.end()
4071 4073
4072 4074
4073 4075 @command(
4074 4076 b'perf::volatilesets|perfvolatilesets',
4075 4077 [
4076 4078 (b'', b'clear-obsstore', False, b'drop obsstore between each call.'),
4077 4079 ]
4078 4080 + formatteropts,
4079 4081 )
4080 4082 def perfvolatilesets(ui, repo, *names, **opts):
4081 4083 """benchmark the computation of various volatile set
4082 4084
4083 4085 Volatile set computes element related to filtering and obsolescence."""
4084 4086 opts = _byteskwargs(opts)
4085 4087 timer, fm = gettimer(ui, opts)
4086 4088 repo = repo.unfiltered()
4087 4089
4088 4090 def getobs(name):
4089 4091 def d():
4090 4092 repo.invalidatevolatilesets()
4091 4093 if opts[b'clear_obsstore']:
4092 4094 clearfilecache(repo, b'obsstore')
4093 4095 obsolete.getrevs(repo, name)
4094 4096
4095 4097 return d
4096 4098
4097 4099 allobs = sorted(obsolete.cachefuncs)
4098 4100 if names:
4099 4101 allobs = [n for n in allobs if n in names]
4100 4102
4101 4103 for name in allobs:
4102 4104 timer(getobs(name), title=name)
4103 4105
4104 4106 def getfiltered(name):
4105 4107 def d():
4106 4108 repo.invalidatevolatilesets()
4107 4109 if opts[b'clear_obsstore']:
4108 4110 clearfilecache(repo, b'obsstore')
4109 4111 repoview.filterrevs(repo, name)
4110 4112
4111 4113 return d
4112 4114
4113 4115 allfilter = sorted(repoview.filtertable)
4114 4116 if names:
4115 4117 allfilter = [n for n in allfilter if n in names]
4116 4118
4117 4119 for name in allfilter:
4118 4120 timer(getfiltered(name), title=name)
4119 4121 fm.end()
4120 4122
4121 4123
4122 4124 @command(
4123 4125 b'perf::branchmap|perfbranchmap',
4124 4126 [
4125 4127 (b'f', b'full', False, b'Includes build time of subset'),
4126 4128 (
4127 4129 b'',
4128 4130 b'clear-revbranch',
4129 4131 False,
4130 4132 b'purge the revbranch cache between computation',
4131 4133 ),
4132 4134 ]
4133 4135 + formatteropts,
4134 4136 )
4135 4137 def perfbranchmap(ui, repo, *filternames, **opts):
4136 4138 """benchmark the update of a branchmap
4137 4139
4138 4140 This benchmarks the full repo.branchmap() call with read and write disabled
4139 4141 """
4140 4142 opts = _byteskwargs(opts)
4141 4143 full = opts.get(b"full", False)
4142 4144 clear_revbranch = opts.get(b"clear_revbranch", False)
4143 4145 timer, fm = gettimer(ui, opts)
4144 4146
4145 4147 def getbranchmap(filtername):
4146 4148 """generate a benchmark function for the filtername"""
4147 4149 if filtername is None:
4148 4150 view = repo
4149 4151 else:
4150 4152 view = repo.filtered(filtername)
4151 4153 if util.safehasattr(view._branchcaches, '_per_filter'):
4152 4154 filtered = view._branchcaches._per_filter
4153 4155 else:
4154 4156 # older versions
4155 4157 filtered = view._branchcaches
4156 4158
4157 4159 def d():
4158 4160 if clear_revbranch:
4159 4161 repo.revbranchcache()._clear()
4160 4162 if full:
4161 4163 view._branchcaches.clear()
4162 4164 else:
4163 4165 filtered.pop(filtername, None)
4164 4166 view.branchmap()
4165 4167
4166 4168 return d
4167 4169
4168 4170 # add filter in smaller subset to bigger subset
4169 4171 possiblefilters = set(repoview.filtertable)
4170 4172 if filternames:
4171 4173 possiblefilters &= set(filternames)
4172 4174 subsettable = getbranchmapsubsettable()
4173 4175 allfilters = []
4174 4176 while possiblefilters:
4175 4177 for name in possiblefilters:
4176 4178 subset = subsettable.get(name)
4177 4179 if subset not in possiblefilters:
4178 4180 break
4179 4181 else:
4180 4182 assert False, b'subset cycle %s!' % possiblefilters
4181 4183 allfilters.append(name)
4182 4184 possiblefilters.remove(name)
4183 4185
4184 4186 # warm the cache
4185 4187 if not full:
4186 4188 for name in allfilters:
4187 4189 repo.filtered(name).branchmap()
4188 4190 if not filternames or b'unfiltered' in filternames:
4189 4191 # add unfiltered
4190 4192 allfilters.append(None)
4191 4193
4192 4194 if util.safehasattr(branchmap.branchcache, 'fromfile'):
4193 4195 branchcacheread = safeattrsetter(branchmap.branchcache, b'fromfile')
4194 4196 branchcacheread.set(classmethod(lambda *args: None))
4195 4197 else:
4196 4198 # older versions
4197 4199 branchcacheread = safeattrsetter(branchmap, b'read')
4198 4200 branchcacheread.set(lambda *args: None)
4199 4201 branchcachewrite = safeattrsetter(branchmap.branchcache, b'write')
4200 4202 branchcachewrite.set(lambda *args: None)
4201 4203 try:
4202 4204 for name in allfilters:
4203 4205 printname = name
4204 4206 if name is None:
4205 4207 printname = b'unfiltered'
4206 4208 timer(getbranchmap(name), title=printname)
4207 4209 finally:
4208 4210 branchcacheread.restore()
4209 4211 branchcachewrite.restore()
4210 4212 fm.end()
4211 4213
4212 4214
4213 4215 @command(
4214 4216 b'perf::branchmapupdate|perfbranchmapupdate',
4215 4217 [
4216 4218 (b'', b'base', [], b'subset of revision to start from'),
4217 4219 (b'', b'target', [], b'subset of revision to end with'),
4218 4220 (b'', b'clear-caches', False, b'clear cache between each runs'),
4219 4221 ]
4220 4222 + formatteropts,
4221 4223 )
4222 4224 def perfbranchmapupdate(ui, repo, base=(), target=(), **opts):
4223 4225 """benchmark branchmap update from for <base> revs to <target> revs
4224 4226
4225 4227 If `--clear-caches` is passed, the following items will be reset before
4226 4228 each update:
4227 4229 * the changelog instance and associated indexes
4228 4230 * the rev-branch-cache instance
4229 4231
4230 4232 Examples:
4231 4233
4232 4234 # update for the one last revision
4233 4235 $ hg perfbranchmapupdate --base 'not tip' --target 'tip'
4234 4236
4235 4237 $ update for change coming with a new branch
4236 4238 $ hg perfbranchmapupdate --base 'stable' --target 'default'
4237 4239 """
4238 4240 from mercurial import branchmap
4239 4241 from mercurial import repoview
4240 4242
4241 4243 opts = _byteskwargs(opts)
4242 4244 timer, fm = gettimer(ui, opts)
4243 4245 clearcaches = opts[b'clear_caches']
4244 4246 unfi = repo.unfiltered()
4245 4247 x = [None] # used to pass data between closure
4246 4248
4247 4249 # we use a `list` here to avoid possible side effect from smartset
4248 4250 baserevs = list(scmutil.revrange(repo, base))
4249 4251 targetrevs = list(scmutil.revrange(repo, target))
4250 4252 if not baserevs:
4251 4253 raise error.Abort(b'no revisions selected for --base')
4252 4254 if not targetrevs:
4253 4255 raise error.Abort(b'no revisions selected for --target')
4254 4256
4255 4257 # make sure the target branchmap also contains the one in the base
4256 4258 targetrevs = list(set(baserevs) | set(targetrevs))
4257 4259 targetrevs.sort()
4258 4260
4259 4261 cl = repo.changelog
4260 4262 allbaserevs = list(cl.ancestors(baserevs, inclusive=True))
4261 4263 allbaserevs.sort()
4262 4264 alltargetrevs = frozenset(cl.ancestors(targetrevs, inclusive=True))
4263 4265
4264 4266 newrevs = list(alltargetrevs.difference(allbaserevs))
4265 4267 newrevs.sort()
4266 4268
4267 4269 allrevs = frozenset(unfi.changelog.revs())
4268 4270 basefilterrevs = frozenset(allrevs.difference(allbaserevs))
4269 4271 targetfilterrevs = frozenset(allrevs.difference(alltargetrevs))
4270 4272
4271 4273 def basefilter(repo, visibilityexceptions=None):
4272 4274 return basefilterrevs
4273 4275
4274 4276 def targetfilter(repo, visibilityexceptions=None):
4275 4277 return targetfilterrevs
4276 4278
4277 4279 msg = b'benchmark of branchmap with %d revisions with %d new ones\n'
4278 4280 ui.status(msg % (len(allbaserevs), len(newrevs)))
4279 4281 if targetfilterrevs:
4280 4282 msg = b'(%d revisions still filtered)\n'
4281 4283 ui.status(msg % len(targetfilterrevs))
4282 4284
4283 4285 try:
4284 4286 repoview.filtertable[b'__perf_branchmap_update_base'] = basefilter
4285 4287 repoview.filtertable[b'__perf_branchmap_update_target'] = targetfilter
4286 4288
4287 4289 baserepo = repo.filtered(b'__perf_branchmap_update_base')
4288 4290 targetrepo = repo.filtered(b'__perf_branchmap_update_target')
4289 4291
4290 4292 # try to find an existing branchmap to reuse
4291 4293 subsettable = getbranchmapsubsettable()
4292 4294 candidatefilter = subsettable.get(None)
4293 4295 while candidatefilter is not None:
4294 4296 candidatebm = repo.filtered(candidatefilter).branchmap()
4295 4297 if candidatebm.validfor(baserepo):
4296 4298 filtered = repoview.filterrevs(repo, candidatefilter)
4297 4299 missing = [r for r in allbaserevs if r in filtered]
4298 4300 base = candidatebm.copy()
4299 4301 base.update(baserepo, missing)
4300 4302 break
4301 4303 candidatefilter = subsettable.get(candidatefilter)
4302 4304 else:
4303 4305 # no suitable subset where found
4304 4306 base = branchmap.branchcache()
4305 4307 base.update(baserepo, allbaserevs)
4306 4308
4307 4309 def setup():
4308 4310 x[0] = base.copy()
4309 4311 if clearcaches:
4310 4312 unfi._revbranchcache = None
4311 4313 clearchangelog(repo)
4312 4314
4313 4315 def bench():
4314 4316 x[0].update(targetrepo, newrevs)
4315 4317
4316 4318 timer(bench, setup=setup)
4317 4319 fm.end()
4318 4320 finally:
4319 4321 repoview.filtertable.pop(b'__perf_branchmap_update_base', None)
4320 4322 repoview.filtertable.pop(b'__perf_branchmap_update_target', None)
4321 4323
4322 4324
4323 4325 @command(
4324 4326 b'perf::branchmapload|perfbranchmapload',
4325 4327 [
4326 4328 (b'f', b'filter', b'', b'Specify repoview filter'),
4327 4329 (b'', b'list', False, b'List brachmap filter caches'),
4328 4330 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
4329 4331 ]
4330 4332 + formatteropts,
4331 4333 )
4332 4334 def perfbranchmapload(ui, repo, filter=b'', list=False, **opts):
4333 4335 """benchmark reading the branchmap"""
4334 4336 opts = _byteskwargs(opts)
4335 4337 clearrevlogs = opts[b'clear_revlogs']
4336 4338
4337 4339 if list:
4338 4340 for name, kind, st in repo.cachevfs.readdir(stat=True):
4339 4341 if name.startswith(b'branch2'):
4340 4342 filtername = name.partition(b'-')[2] or b'unfiltered'
4341 4343 ui.status(
4342 4344 b'%s - %s\n' % (filtername, util.bytecount(st.st_size))
4343 4345 )
4344 4346 return
4345 4347 if not filter:
4346 4348 filter = None
4347 4349 subsettable = getbranchmapsubsettable()
4348 4350 if filter is None:
4349 4351 repo = repo.unfiltered()
4350 4352 else:
4351 4353 repo = repoview.repoview(repo, filter)
4352 4354
4353 4355 repo.branchmap() # make sure we have a relevant, up to date branchmap
4354 4356
4355 4357 try:
4356 4358 fromfile = branchmap.branchcache.fromfile
4357 4359 except AttributeError:
4358 4360 # older versions
4359 4361 fromfile = branchmap.read
4360 4362
4361 4363 currentfilter = filter
4362 4364 # try once without timer, the filter may not be cached
4363 4365 while fromfile(repo) is None:
4364 4366 currentfilter = subsettable.get(currentfilter)
4365 4367 if currentfilter is None:
4366 4368 raise error.Abort(
4367 4369 b'No branchmap cached for %s repo' % (filter or b'unfiltered')
4368 4370 )
4369 4371 repo = repo.filtered(currentfilter)
4370 4372 timer, fm = gettimer(ui, opts)
4371 4373
4372 4374 def setup():
4373 4375 if clearrevlogs:
4374 4376 clearchangelog(repo)
4375 4377
4376 4378 def bench():
4377 4379 fromfile(repo)
4378 4380
4379 4381 timer(bench, setup=setup)
4380 4382 fm.end()
4381 4383
4382 4384
4383 4385 @command(b'perf::loadmarkers|perfloadmarkers')
4384 4386 def perfloadmarkers(ui, repo):
4385 4387 """benchmark the time to parse the on-disk markers for a repo
4386 4388
4387 4389 Result is the number of markers in the repo."""
4388 4390 timer, fm = gettimer(ui)
4389 4391 svfs = getsvfs(repo)
4390 4392 timer(lambda: len(obsolete.obsstore(repo, svfs)))
4391 4393 fm.end()
4392 4394
4393 4395
4394 4396 @command(
4395 4397 b'perf::lrucachedict|perflrucachedict',
4396 4398 formatteropts
4397 4399 + [
4398 4400 (b'', b'costlimit', 0, b'maximum total cost of items in cache'),
4399 4401 (b'', b'mincost', 0, b'smallest cost of items in cache'),
4400 4402 (b'', b'maxcost', 100, b'maximum cost of items in cache'),
4401 4403 (b'', b'size', 4, b'size of cache'),
4402 4404 (b'', b'gets', 10000, b'number of key lookups'),
4403 4405 (b'', b'sets', 10000, b'number of key sets'),
4404 4406 (b'', b'mixed', 10000, b'number of mixed mode operations'),
4405 4407 (
4406 4408 b'',
4407 4409 b'mixedgetfreq',
4408 4410 50,
4409 4411 b'frequency of get vs set ops in mixed mode',
4410 4412 ),
4411 4413 ],
4412 4414 norepo=True,
4413 4415 )
4414 4416 def perflrucache(
4415 4417 ui,
4416 4418 mincost=0,
4417 4419 maxcost=100,
4418 4420 costlimit=0,
4419 4421 size=4,
4420 4422 gets=10000,
4421 4423 sets=10000,
4422 4424 mixed=10000,
4423 4425 mixedgetfreq=50,
4424 4426 **opts
4425 4427 ):
4426 4428 opts = _byteskwargs(opts)
4427 4429
4428 4430 def doinit():
4429 4431 for i in _xrange(10000):
4430 4432 util.lrucachedict(size)
4431 4433
4432 4434 costrange = list(range(mincost, maxcost + 1))
4433 4435
4434 4436 values = []
4435 4437 for i in _xrange(size):
4436 4438 values.append(random.randint(0, _maxint))
4437 4439
4438 4440 # Get mode fills the cache and tests raw lookup performance with no
4439 4441 # eviction.
4440 4442 getseq = []
4441 4443 for i in _xrange(gets):
4442 4444 getseq.append(random.choice(values))
4443 4445
4444 4446 def dogets():
4445 4447 d = util.lrucachedict(size)
4446 4448 for v in values:
4447 4449 d[v] = v
4448 4450 for key in getseq:
4449 4451 value = d[key]
4450 4452 value # silence pyflakes warning
4451 4453
4452 4454 def dogetscost():
4453 4455 d = util.lrucachedict(size, maxcost=costlimit)
4454 4456 for i, v in enumerate(values):
4455 4457 d.insert(v, v, cost=costs[i])
4456 4458 for key in getseq:
4457 4459 try:
4458 4460 value = d[key]
4459 4461 value # silence pyflakes warning
4460 4462 except KeyError:
4461 4463 pass
4462 4464
4463 4465 # Set mode tests insertion speed with cache eviction.
4464 4466 setseq = []
4465 4467 costs = []
4466 4468 for i in _xrange(sets):
4467 4469 setseq.append(random.randint(0, _maxint))
4468 4470 costs.append(random.choice(costrange))
4469 4471
4470 4472 def doinserts():
4471 4473 d = util.lrucachedict(size)
4472 4474 for v in setseq:
4473 4475 d.insert(v, v)
4474 4476
4475 4477 def doinsertscost():
4476 4478 d = util.lrucachedict(size, maxcost=costlimit)
4477 4479 for i, v in enumerate(setseq):
4478 4480 d.insert(v, v, cost=costs[i])
4479 4481
4480 4482 def dosets():
4481 4483 d = util.lrucachedict(size)
4482 4484 for v in setseq:
4483 4485 d[v] = v
4484 4486
4485 4487 # Mixed mode randomly performs gets and sets with eviction.
4486 4488 mixedops = []
4487 4489 for i in _xrange(mixed):
4488 4490 r = random.randint(0, 100)
4489 4491 if r < mixedgetfreq:
4490 4492 op = 0
4491 4493 else:
4492 4494 op = 1
4493 4495
4494 4496 mixedops.append(
4495 4497 (op, random.randint(0, size * 2), random.choice(costrange))
4496 4498 )
4497 4499
4498 4500 def domixed():
4499 4501 d = util.lrucachedict(size)
4500 4502
4501 4503 for op, v, cost in mixedops:
4502 4504 if op == 0:
4503 4505 try:
4504 4506 d[v]
4505 4507 except KeyError:
4506 4508 pass
4507 4509 else:
4508 4510 d[v] = v
4509 4511
4510 4512 def domixedcost():
4511 4513 d = util.lrucachedict(size, maxcost=costlimit)
4512 4514
4513 4515 for op, v, cost in mixedops:
4514 4516 if op == 0:
4515 4517 try:
4516 4518 d[v]
4517 4519 except KeyError:
4518 4520 pass
4519 4521 else:
4520 4522 d.insert(v, v, cost=cost)
4521 4523
4522 4524 benches = [
4523 4525 (doinit, b'init'),
4524 4526 ]
4525 4527
4526 4528 if costlimit:
4527 4529 benches.extend(
4528 4530 [
4529 4531 (dogetscost, b'gets w/ cost limit'),
4530 4532 (doinsertscost, b'inserts w/ cost limit'),
4531 4533 (domixedcost, b'mixed w/ cost limit'),
4532 4534 ]
4533 4535 )
4534 4536 else:
4535 4537 benches.extend(
4536 4538 [
4537 4539 (dogets, b'gets'),
4538 4540 (doinserts, b'inserts'),
4539 4541 (dosets, b'sets'),
4540 4542 (domixed, b'mixed'),
4541 4543 ]
4542 4544 )
4543 4545
4544 4546 for fn, title in benches:
4545 4547 timer, fm = gettimer(ui, opts)
4546 4548 timer(fn, title=title)
4547 4549 fm.end()
4548 4550
4549 4551
4550 4552 @command(
4551 4553 b'perf::write|perfwrite',
4552 4554 formatteropts
4553 4555 + [
4554 4556 (b'', b'write-method', b'write', b'ui write method'),
4555 4557 (b'', b'nlines', 100, b'number of lines'),
4556 4558 (b'', b'nitems', 100, b'number of items (per line)'),
4557 4559 (b'', b'item', b'x', b'item that is written'),
4558 4560 (b'', b'batch-line', None, b'pass whole line to write method at once'),
4559 4561 (b'', b'flush-line', None, b'flush after each line'),
4560 4562 ],
4561 4563 )
4562 4564 def perfwrite(ui, repo, **opts):
4563 4565 """microbenchmark ui.write (and others)"""
4564 4566 opts = _byteskwargs(opts)
4565 4567
4566 4568 write = getattr(ui, _sysstr(opts[b'write_method']))
4567 4569 nlines = int(opts[b'nlines'])
4568 4570 nitems = int(opts[b'nitems'])
4569 4571 item = opts[b'item']
4570 4572 batch_line = opts.get(b'batch_line')
4571 4573 flush_line = opts.get(b'flush_line')
4572 4574
4573 4575 if batch_line:
4574 4576 line = item * nitems + b'\n'
4575 4577
4576 4578 def benchmark():
4577 4579 for i in pycompat.xrange(nlines):
4578 4580 if batch_line:
4579 4581 write(line)
4580 4582 else:
4581 4583 for i in pycompat.xrange(nitems):
4582 4584 write(item)
4583 4585 write(b'\n')
4584 4586 if flush_line:
4585 4587 ui.flush()
4586 4588 ui.flush()
4587 4589
4588 4590 timer, fm = gettimer(ui, opts)
4589 4591 timer(benchmark)
4590 4592 fm.end()
4591 4593
4592 4594
4593 4595 def uisetup(ui):
4594 4596 if util.safehasattr(cmdutil, b'openrevlog') and not util.safehasattr(
4595 4597 commands, b'debugrevlogopts'
4596 4598 ):
4597 4599 # for "historical portability":
4598 4600 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
4599 4601 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
4600 4602 # openrevlog() should cause failure, because it has been
4601 4603 # available since 3.5 (or 49c583ca48c4).
4602 4604 def openrevlog(orig, repo, cmd, file_, opts):
4603 4605 if opts.get(b'dir') and not util.safehasattr(repo, b'dirlog'):
4604 4606 raise error.Abort(
4605 4607 b"This version doesn't support --dir option",
4606 4608 hint=b"use 3.5 or later",
4607 4609 )
4608 4610 return orig(repo, cmd, file_, opts)
4609 4611
4610 4612 name = _sysstr(b'openrevlog')
4611 4613 extensions.wrapfunction(cmdutil, name, openrevlog)
4612 4614
4613 4615
4614 4616 @command(
4615 4617 b'perf::progress|perfprogress',
4616 4618 formatteropts
4617 4619 + [
4618 4620 (b'', b'topic', b'topic', b'topic for progress messages'),
4619 4621 (b'c', b'total', 1000000, b'total value we are progressing to'),
4620 4622 ],
4621 4623 norepo=True,
4622 4624 )
4623 4625 def perfprogress(ui, topic=None, total=None, **opts):
4624 4626 """printing of progress bars"""
4625 4627 opts = _byteskwargs(opts)
4626 4628
4627 4629 timer, fm = gettimer(ui, opts)
4628 4630
4629 4631 def doprogress():
4630 4632 with ui.makeprogress(topic, total=total) as progress:
4631 4633 for i in _xrange(total):
4632 4634 progress.increment()
4633 4635
4634 4636 timer(doprogress)
4635 4637 fm.end()
@@ -1,3948 +1,3948 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import io
20 20 import os
21 21 import struct
22 22 import weakref
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .revlogutils.constants import (
36 36 ALL_KINDS,
37 37 CHANGELOGV2,
38 38 COMP_MODE_DEFAULT,
39 39 COMP_MODE_INLINE,
40 40 COMP_MODE_PLAIN,
41 41 DELTA_BASE_REUSE_NO,
42 42 DELTA_BASE_REUSE_TRY,
43 43 ENTRY_RANK,
44 44 FEATURES_BY_VERSION,
45 45 FLAG_GENERALDELTA,
46 46 FLAG_INLINE_DATA,
47 47 INDEX_HEADER,
48 48 KIND_CHANGELOG,
49 49 KIND_FILELOG,
50 50 RANK_UNKNOWN,
51 51 REVLOGV0,
52 52 REVLOGV1,
53 53 REVLOGV1_FLAGS,
54 54 REVLOGV2,
55 55 REVLOGV2_FLAGS,
56 56 REVLOG_DEFAULT_FLAGS,
57 57 REVLOG_DEFAULT_FORMAT,
58 58 REVLOG_DEFAULT_VERSION,
59 59 SUPPORTED_FLAGS,
60 60 )
61 61 from .revlogutils.flagutil import (
62 62 REVIDX_DEFAULT_FLAGS,
63 63 REVIDX_ELLIPSIS,
64 64 REVIDX_EXTSTORED,
65 65 REVIDX_FLAGS_ORDER,
66 66 REVIDX_HASCOPIESINFO,
67 67 REVIDX_ISCENSORED,
68 68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 69 )
70 70 from .thirdparty import attr
71 71 from . import (
72 72 ancestor,
73 73 dagop,
74 74 error,
75 75 mdiff,
76 76 policy,
77 77 pycompat,
78 78 revlogutils,
79 79 templatefilters,
80 80 util,
81 81 )
82 82 from .interfaces import (
83 83 repository,
84 84 util as interfaceutil,
85 85 )
86 86 from .revlogutils import (
87 87 deltas as deltautil,
88 88 docket as docketutil,
89 89 flagutil,
90 90 nodemap as nodemaputil,
91 91 randomaccessfile,
92 92 revlogv0,
93 93 rewrite,
94 94 sidedata as sidedatautil,
95 95 )
96 96 from .utils import (
97 97 storageutil,
98 98 stringutil,
99 99 )
100 100
101 101 # blanked usage of all the name to prevent pyflakes constraints
102 102 # We need these name available in the module for extensions.
103 103
104 104 REVLOGV0
105 105 REVLOGV1
106 106 REVLOGV2
107 107 CHANGELOGV2
108 108 FLAG_INLINE_DATA
109 109 FLAG_GENERALDELTA
110 110 REVLOG_DEFAULT_FLAGS
111 111 REVLOG_DEFAULT_FORMAT
112 112 REVLOG_DEFAULT_VERSION
113 113 REVLOGV1_FLAGS
114 114 REVLOGV2_FLAGS
115 115 REVIDX_ISCENSORED
116 116 REVIDX_ELLIPSIS
117 117 REVIDX_HASCOPIESINFO
118 118 REVIDX_EXTSTORED
119 119 REVIDX_DEFAULT_FLAGS
120 120 REVIDX_FLAGS_ORDER
121 121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 122
123 123 parsers = policy.importmod('parsers')
124 124 rustancestor = policy.importrust('ancestor')
125 125 rustdagop = policy.importrust('dagop')
126 126 rustrevlog = policy.importrust('revlog')
127 127
128 128 # Aliased for performance.
129 129 _zlibdecompress = zlib.decompress
130 130
131 131 # max size of inline data embedded into a revlog
132 132 _maxinline = 131072
133 133
134 134 # Flag processors for REVIDX_ELLIPSIS.
135 135 def ellipsisreadprocessor(rl, text):
136 136 return text, False
137 137
138 138
139 139 def ellipsiswriteprocessor(rl, text):
140 140 return text, False
141 141
142 142
143 143 def ellipsisrawprocessor(rl, text):
144 144 return False
145 145
146 146
147 147 ellipsisprocessor = (
148 148 ellipsisreadprocessor,
149 149 ellipsiswriteprocessor,
150 150 ellipsisrawprocessor,
151 151 )
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @interfaceutil.implementer(repository.irevisiondelta)
175 175 @attr.s(slots=True)
176 176 class revlogrevisiondelta:
177 177 node = attr.ib()
178 178 p1node = attr.ib()
179 179 p2node = attr.ib()
180 180 basenode = attr.ib()
181 181 flags = attr.ib()
182 182 baserevisionsize = attr.ib()
183 183 revision = attr.ib()
184 184 delta = attr.ib()
185 185 sidedata = attr.ib()
186 186 protocol_flags = attr.ib()
187 187 linknode = attr.ib(default=None)
188 188
189 189
190 190 @interfaceutil.implementer(repository.iverifyproblem)
191 191 @attr.s(frozen=True)
192 192 class revlogproblem:
193 193 warning = attr.ib(default=None)
194 194 error = attr.ib(default=None)
195 195 node = attr.ib(default=None)
196 196
197 197
198 198 def parse_index_v1(data, inline):
199 199 # call the C implementation to parse the index data
200 200 index, cache = parsers.parse_index2(data, inline)
201 201 return index, cache
202 202
203 203
204 204 def parse_index_v2(data, inline):
205 205 # call the C implementation to parse the index data
206 206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 207 return index, cache
208 208
209 209
210 210 def parse_index_cl_v2(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 213 return index, cache
214 214
215 215
216 216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 217
218 218 def parse_index_v1_nodemap(data, inline):
219 219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 220 return index, cache
221 221
222 222
223 223 else:
224 224 parse_index_v1_nodemap = None
225 225
226 226
227 227 def parse_index_v1_mixed(data, inline):
228 228 index, cache = parse_index_v1(data, inline)
229 229 return rustrevlog.MixedIndex(index), cache
230 230
231 231
232 232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 233 # signed integer)
234 234 _maxentrysize = 0x7FFFFFFF
235 235
236 236 FILE_TOO_SHORT_MSG = _(
237 237 b'cannot read from revlog %s;'
238 238 b' expected %d bytes from offset %d, data size is %d'
239 239 )
240 240
241 241 hexdigits = b'0123456789abcdefABCDEF'
242 242
243 243
244 244 class _Config:
245 245 def copy(self):
246 246 return self.__class__(**self.__dict__)
247 247
248 248
249 249 @attr.s()
250 250 class FeatureConfig(_Config):
251 251 """Hold configuration values about the available revlog features"""
252 252
253 253 # the default compression engine
254 254 compression_engine = attr.ib(default=b'zlib')
255 255 # compression engines options
256 256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 257
258 258 # can we use censor on this revlog
259 259 censorable = attr.ib(default=False)
260 260 # does this revlog use the "side data" feature
261 261 has_side_data = attr.ib(default=False)
262 262 # might remove rank configuration once the computation has no impact
263 263 compute_rank = attr.ib(default=False)
264 264 # parent order is supposed to be semantically irrelevant, so we
265 265 # normally resort parents to ensure that the first parent is non-null,
266 266 # if there is a non-null parent at all.
267 267 # filelog abuses the parent order as flag to mark some instances of
268 268 # meta-encoded files, so allow it to disable this behavior.
269 269 canonical_parent_order = attr.ib(default=False)
270 270 # can ellipsis commit be used
271 271 enable_ellipsis = attr.ib(default=False)
272 272
273 273 def copy(self):
274 274 new = super().copy()
275 275 new.compression_engine_options = self.compression_engine_options.copy()
276 276 return new
277 277
278 278
279 279 @attr.s()
280 280 class DataConfig(_Config):
281 281 """Hold configuration value about how the revlog data are read"""
282 282
283 283 # should we try to open the "pending" version of the revlog
284 284 try_pending = attr.ib(default=False)
285 285 # should we try to open the "splitted" version of the revlog
286 286 try_split = attr.ib(default=False)
287 287 # When True, indexfile should be opened with checkambig=True at writing,
288 288 # to avoid file stat ambiguity.
289 289 check_ambig = attr.ib(default=False)
290 290
291 291 # If true, use mmap instead of reading to deal with large index
292 292 mmap_large_index = attr.ib(default=False)
293 293 # how much data is large
294 294 mmap_index_threshold = attr.ib(default=None)
295 295 # How much data to read and cache into the raw revlog data cache.
296 296 chunk_cache_size = attr.ib(default=65536)
297 297
298 298 # Allow sparse reading of the revlog data
299 299 with_sparse_read = attr.ib(default=False)
300 300 # minimal density of a sparse read chunk
301 301 sr_density_threshold = attr.ib(default=0.50)
302 302 # minimal size of data we skip when performing sparse read
303 303 sr_min_gap_size = attr.ib(default=262144)
304 304
305 305 # are delta encoded against arbitrary bases.
306 306 generaldelta = attr.ib(default=False)
307 307
308 308
309 309 @attr.s()
310 310 class DeltaConfig(_Config):
311 311 """Hold configuration value about how new delta are computed
312 312
313 313 Some attributes are duplicated from DataConfig to help havign each object
314 314 self contained.
315 315 """
316 316
317 317 # can delta be encoded against arbitrary bases.
318 318 general_delta = attr.ib(default=False)
319 319 # Allow sparse writing of the revlog data
320 320 sparse_revlog = attr.ib(default=False)
321 321 # maximum length of a delta chain
322 322 max_chain_len = attr.ib(default=None)
323 323 # Maximum distance between delta chain base start and end
324 324 max_deltachain_span = attr.ib(default=-1)
325 325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 326 # compression for the data content.
327 327 upper_bound_comp = attr.ib(default=None)
328 328 # Should we try a delta against both parent
329 329 delta_both_parents = attr.ib(default=True)
330 330 # Test delta base candidate group by chunk of this maximal size.
331 331 candidate_group_chunk_size = attr.ib(default=0)
332 332 # Should we display debug information about delta computation
333 333 debug_delta = attr.ib(default=False)
334 334 # trust incoming delta by default
335 335 lazy_delta = attr.ib(default=True)
336 336 # trust the base of incoming delta by default
337 337 lazy_delta_base = attr.ib(default=False)
338 338
339 339
340 340 class _InnerRevlog:
341 341 """An inner layer of the revlog object
342 342
343 343 That layer exist to be able to delegate some operation to Rust, its
344 344 boundaries are arbitrary and based on what we can delegate to Rust.
345 345 """
346 346
347 347 def __init__(
348 348 self,
349 349 opener,
350 350 index,
351 351 index_file,
352 352 data_file,
353 353 sidedata_file,
354 354 inline,
355 355 data_config,
356 356 feature_config,
357 357 chunk_cache,
358 358 default_compression_header,
359 359 ):
360 360 self.opener = opener
361 361 self.index = index
362 362
363 363 self.__index_file = index_file
364 364 self.data_file = data_file
365 365 self.sidedata_file = sidedata_file
366 366 self.inline = inline
367 367 self.data_config = data_config
368 368 self.feature_config = feature_config
369 369
370 370 self._default_compression_header = default_compression_header
371 371
372 372 # index
373 373
374 374 # 3-tuple of file handles being used for active writing.
375 375 self._writinghandles = None
376 376
377 377 self._segmentfile = randomaccessfile.randomaccessfile(
378 378 self.opener,
379 379 (self.index_file if self.inline else self.data_file),
380 380 self.data_config.chunk_cache_size,
381 381 chunk_cache,
382 382 )
383 383 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
384 384 self.opener,
385 385 self.sidedata_file,
386 386 self.data_config.chunk_cache_size,
387 387 )
388 388
389 389 # revlog header -> revlog compressor
390 390 self._decompressors = {}
391 391
392 392 @property
393 393 def index_file(self):
394 394 return self.__index_file
395 395
396 396 @index_file.setter
397 397 def index_file(self, new_index_file):
398 398 self.__index_file = new_index_file
399 399 if self.inline:
400 400 self._segmentfile.filename = new_index_file
401 401
402 402 # Derived from index values.
403 403
404 404 def start(self, rev):
405 405 """the offset of the data chunk for this revision"""
406 406 return int(self.index[rev][0] >> 16)
407 407
408 408 def length(self, rev):
409 409 """the length of the data chunk for this revision"""
410 410 return self.index[rev][1]
411 411
412 412 def end(self, rev):
413 413 """the end of the data chunk for this revision"""
414 414 return self.start(rev) + self.length(rev)
415 415
416 416 @util.propertycache
417 417 def _compressor(self):
418 418 engine = util.compengines[self.feature_config.compression_engine]
419 419 return engine.revlogcompressor(
420 420 self.feature_config.compression_engine_options
421 421 )
422 422
423 423 @util.propertycache
424 424 def _decompressor(self):
425 425 """the default decompressor"""
426 426 if self._default_compression_header is None:
427 427 return None
428 428 t = self._default_compression_header
429 429 c = self._get_decompressor(t)
430 430 return c.decompress
431 431
432 432 def _get_decompressor(self, t):
433 433 try:
434 434 compressor = self._decompressors[t]
435 435 except KeyError:
436 436 try:
437 437 engine = util.compengines.forrevlogheader(t)
438 438 compressor = engine.revlogcompressor(
439 439 self.feature_config.compression_engine_options
440 440 )
441 441 self._decompressors[t] = compressor
442 442 except KeyError:
443 443 raise error.RevlogError(
444 444 _(b'unknown compression type %s') % binascii.hexlify(t)
445 445 )
446 446 return compressor
447 447
448 448 def compress(self, data):
449 449 """Generate a possibly-compressed representation of data."""
450 450 if not data:
451 451 return b'', data
452 452
453 453 compressed = self._compressor.compress(data)
454 454
455 455 if compressed:
456 456 # The revlog compressor added the header in the returned data.
457 457 return b'', compressed
458 458
459 459 if data[0:1] == b'\0':
460 460 return b'', data
461 461 return b'u', data
462 462
463 463 def decompress(self, data):
464 464 """Decompress a revlog chunk.
465 465
466 466 The chunk is expected to begin with a header identifying the
467 467 format type so it can be routed to an appropriate decompressor.
468 468 """
469 469 if not data:
470 470 return data
471 471
472 472 # Revlogs are read much more frequently than they are written and many
473 473 # chunks only take microseconds to decompress, so performance is
474 474 # important here.
475 475 #
476 476 # We can make a few assumptions about revlogs:
477 477 #
478 478 # 1) the majority of chunks will be compressed (as opposed to inline
479 479 # raw data).
480 480 # 2) decompressing *any* data will likely by at least 10x slower than
481 481 # returning raw inline data.
482 482 # 3) we want to prioritize common and officially supported compression
483 483 # engines
484 484 #
485 485 # It follows that we want to optimize for "decompress compressed data
486 486 # when encoded with common and officially supported compression engines"
487 487 # case over "raw data" and "data encoded by less common or non-official
488 488 # compression engines." That is why we have the inline lookup first
489 489 # followed by the compengines lookup.
490 490 #
491 491 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
492 492 # compressed chunks. And this matters for changelog and manifest reads.
493 493 t = data[0:1]
494 494
495 495 if t == b'x':
496 496 try:
497 497 return _zlibdecompress(data)
498 498 except zlib.error as e:
499 499 raise error.RevlogError(
500 500 _(b'revlog decompress error: %s')
501 501 % stringutil.forcebytestr(e)
502 502 )
503 503 # '\0' is more common than 'u' so it goes first.
504 504 elif t == b'\0':
505 505 return data
506 506 elif t == b'u':
507 507 return util.buffer(data, 1)
508 508
509 509 compressor = self._get_decompressor(t)
510 510
511 511 return compressor.decompress(data)
512 512
513 513 @contextlib.contextmanager
514 514 def reading(self):
515 515 """Context manager that keeps data and sidedata files open for reading"""
516 516 if len(self.index) == 0:
517 517 yield # nothing to be read
518 518 else:
519 519 with self._segmentfile.reading():
520 520 with self._segmentfile_sidedata.reading():
521 521 yield
522 522
523 523 @property
524 524 def is_writing(self):
525 525 """True is a writing context is open"""
526 526 return self._writinghandles is not None
527 527
528 528 @contextlib.contextmanager
529 529 def writing(self, transaction, data_end=None, sidedata_end=None):
530 530 """Open the revlog files for writing
531 531
532 532 Add content to a revlog should be done within such context.
533 533 """
534 534 if self.is_writing:
535 535 yield
536 536 else:
537 537 ifh = dfh = sdfh = None
538 538 try:
539 539 r = len(self.index)
540 540 # opening the data file.
541 541 dsize = 0
542 542 if r:
543 543 dsize = self.end(r - 1)
544 544 dfh = None
545 545 if not self.inline:
546 546 try:
547 547 dfh = self.opener(self.data_file, mode=b"r+")
548 548 if data_end is None:
549 549 dfh.seek(0, os.SEEK_END)
550 550 else:
551 551 dfh.seek(data_end, os.SEEK_SET)
552 552 except FileNotFoundError:
553 553 dfh = self.opener(self.data_file, mode=b"w+")
554 554 transaction.add(self.data_file, dsize)
555 555 if self.sidedata_file is not None:
556 556 assert sidedata_end is not None
557 557 # revlog-v2 does not inline, help Pytype
558 558 assert dfh is not None
559 559 try:
560 560 sdfh = self.opener(self.sidedata_file, mode=b"r+")
561 561 dfh.seek(sidedata_end, os.SEEK_SET)
562 562 except FileNotFoundError:
563 563 sdfh = self.opener(self.sidedata_file, mode=b"w+")
564 564 transaction.add(self.sidedata_file, sidedata_end)
565 565
566 566 # opening the index file.
567 567 isize = r * self.index.entry_size
568 568 ifh = self.__index_write_fp()
569 569 if self.inline:
570 570 transaction.add(self.index_file, dsize + isize)
571 571 else:
572 572 transaction.add(self.index_file, isize)
573 573 # exposing all file handle for writing.
574 574 self._writinghandles = (ifh, dfh, sdfh)
575 575 self._segmentfile.writing_handle = ifh if self.inline else dfh
576 576 self._segmentfile_sidedata.writing_handle = sdfh
577 577 yield
578 578 finally:
579 579 self._writinghandles = None
580 580 self._segmentfile.writing_handle = None
581 581 self._segmentfile_sidedata.writing_handle = None
582 582 if dfh is not None:
583 583 dfh.close()
584 584 if sdfh is not None:
585 585 sdfh.close()
586 586 # closing the index file last to avoid exposing referent to
587 587 # potential unflushed data content.
588 588 if ifh is not None:
589 589 ifh.close()
590 590
591 591 def __index_write_fp(self, index_end=None):
592 592 """internal method to open the index file for writing
593 593
594 594 You should not use this directly and use `_writing` instead
595 595 """
596 596 try:
597 597 f = self.opener(
598 598 self.index_file,
599 599 mode=b"r+",
600 600 checkambig=self.data_config.check_ambig,
601 601 )
602 602 if index_end is None:
603 603 f.seek(0, os.SEEK_END)
604 604 else:
605 605 f.seek(index_end, os.SEEK_SET)
606 606 return f
607 607 except FileNotFoundError:
608 608 return self.opener(
609 609 self.index_file,
610 610 mode=b"w+",
611 611 checkambig=self.data_config.check_ambig,
612 612 )
613 613
614 614 def __index_new_fp(self):
615 615 """internal method to create a new index file for writing
616 616
617 617 You should not use this unless you are upgrading from inline revlog
618 618 """
619 619 return self.opener(
620 620 self.index_file,
621 621 mode=b"w",
622 622 checkambig=self.data_config.check_ambig,
623 623 atomictemp=True,
624 624 )
625 625
626 626 def split_inline(self, tr, header, new_index_file_path=None):
627 627 """split the data of an inline revlog into an index and a data file"""
628 628 existing_handles = False
629 629 if self._writinghandles is not None:
630 630 existing_handles = True
631 631 fp = self._writinghandles[0]
632 632 fp.flush()
633 633 fp.close()
634 634 # We can't use the cached file handle after close(). So prevent
635 635 # its usage.
636 636 self._writinghandles = None
637 637 self._segmentfile.writing_handle = None
638 638 # No need to deal with sidedata writing handle as it is only
639 639 # relevant with revlog-v2 which is never inline, not reaching
640 640 # this code
641 641
642 642 new_dfh = self.opener(self.data_file, mode=b"w+")
643 643 new_dfh.truncate(0) # drop any potentially existing data
644 644 try:
645 645 with self.reading():
646 646 for r in range(len(self.index)):
647 647 new_dfh.write(self.get_segment_for_revs(r, r)[1])
648 648 new_dfh.flush()
649 649
650 650 if new_index_file_path is not None:
651 651 self.index_file = new_index_file_path
652 652 with self.__index_new_fp() as fp:
653 653 self.inline = False
654 654 for i in range(len(self.index)):
655 655 e = self.index.entry_binary(i)
656 656 if i == 0:
657 657 packed_header = self.index.pack_header(header)
658 658 e = packed_header + e
659 659 fp.write(e)
660 660
661 661 # If we don't use side-write, the temp file replace the real
662 662 # index when we exit the context manager
663 663
664 664 self._segmentfile = randomaccessfile.randomaccessfile(
665 665 self.opener,
666 666 self.data_file,
667 667 self.data_config.chunk_cache_size,
668 668 )
669 669
670 670 if existing_handles:
671 671 # switched from inline to conventional reopen the index
672 672 ifh = self.__index_write_fp()
673 673 self._writinghandles = (ifh, new_dfh, None)
674 674 self._segmentfile.writing_handle = new_dfh
675 675 new_dfh = None
676 676 # No need to deal with sidedata writing handle as it is only
677 677 # relevant with revlog-v2 which is never inline, not reaching
678 678 # this code
679 679 finally:
680 680 if new_dfh is not None:
681 681 new_dfh.close()
682 682 return self.index_file
683 683
684 684 def get_segment_for_revs(self, startrev, endrev):
685 685 """Obtain a segment of raw data corresponding to a range of revisions.
686 686
687 687 Accepts the start and end revisions and an optional already-open
688 688 file handle to be used for reading. If the file handle is read, its
689 689 seek position will not be preserved.
690 690
691 691 Requests for data may be satisfied by a cache.
692 692
693 693 Returns a 2-tuple of (offset, data) for the requested range of
694 694 revisions. Offset is the integer offset from the beginning of the
695 695 revlog and data is a str or buffer of the raw byte data.
696 696
697 697 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
698 698 to determine where each revision's data begins and ends.
699 699
700 700 API: we should consider making this a private part of the InnerRevlog
701 701 at some point.
702 702 """
703 703 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
704 704 # (functions are expensive).
705 705 index = self.index
706 706 istart = index[startrev]
707 707 start = int(istart[0] >> 16)
708 708 if startrev == endrev:
709 709 end = start + istart[1]
710 710 else:
711 711 iend = index[endrev]
712 712 end = int(iend[0] >> 16) + iend[1]
713 713
714 714 if self.inline:
715 715 start += (startrev + 1) * self.index.entry_size
716 716 end += (endrev + 1) * self.index.entry_size
717 717 length = end - start
718 718
719 719 return start, self._segmentfile.read_chunk(start, length)
720 720
721 def _chunk(self, rev):
722 """Obtain a single decompressed chunk for a revision.
723
724 Accepts an integer revision and an optional already-open file handle
725 to be used for reading. If used, the seek position of the file will not
726 be preserved.
727
728 Returns a str holding uncompressed data for the requested revision.
729 """
730 compression_mode = self.index[rev][10]
731 data = self.get_segment_for_revs(rev, rev)[1]
732 if compression_mode == COMP_MODE_PLAIN:
733 return data
734 elif compression_mode == COMP_MODE_DEFAULT:
735 return self._decompressor(data)
736 elif compression_mode == COMP_MODE_INLINE:
737 return self.decompress(data)
738 else:
739 msg = b'unknown compression mode %d'
740 msg %= compression_mode
741 raise error.RevlogError(msg)
742
721 743
722 744 class revlog:
723 745 """
724 746 the underlying revision storage object
725 747
726 748 A revlog consists of two parts, an index and the revision data.
727 749
728 750 The index is a file with a fixed record size containing
729 751 information on each revision, including its nodeid (hash), the
730 752 nodeids of its parents, the position and offset of its data within
731 753 the data file, and the revision it's based on. Finally, each entry
732 754 contains a linkrev entry that can serve as a pointer to external
733 755 data.
734 756
735 757 The revision data itself is a linear collection of data chunks.
736 758 Each chunk represents a revision and is usually represented as a
737 759 delta against the previous chunk. To bound lookup time, runs of
738 760 deltas are limited to about 2 times the length of the original
739 761 version data. This makes retrieval of a version proportional to
740 762 its size, or O(1) relative to the number of revisions.
741 763
742 764 Both pieces of the revlog are written to in an append-only
743 765 fashion, which means we never need to rewrite a file to insert or
744 766 remove data, and can use some simple techniques to avoid the need
745 767 for locking while reading.
746 768
747 769 If checkambig, indexfile is opened with checkambig=True at
748 770 writing, to avoid file stat ambiguity.
749 771
750 772 If mmaplargeindex is True, and an mmapindexthreshold is set, the
751 773 index will be mmapped rather than read if it is larger than the
752 774 configured threshold.
753 775
754 776 If censorable is True, the revlog can have censored revisions.
755 777
756 778 If `upperboundcomp` is not None, this is the expected maximal gain from
757 779 compression for the data content.
758 780
759 781 `concurrencychecker` is an optional function that receives 3 arguments: a
760 782 file handle, a filename, and an expected position. It should check whether
761 783 the current position in the file handle is valid, and log/warn/fail (by
762 784 raising).
763 785
764 786 See mercurial/revlogutils/contants.py for details about the content of an
765 787 index entry.
766 788 """
767 789
768 790 _flagserrorclass = error.RevlogError
769 791
770 792 @staticmethod
771 793 def is_inline_index(header_bytes):
772 794 """Determine if a revlog is inline from the initial bytes of the index"""
773 795 header = INDEX_HEADER.unpack(header_bytes)[0]
774 796
775 797 _format_flags = header & ~0xFFFF
776 798 _format_version = header & 0xFFFF
777 799
778 800 features = FEATURES_BY_VERSION[_format_version]
779 801 return features[b'inline'](_format_flags)
780 802
781 803 def __init__(
782 804 self,
783 805 opener,
784 806 target,
785 807 radix,
786 808 postfix=None, # only exist for `tmpcensored` now
787 809 checkambig=False,
788 810 mmaplargeindex=False,
789 811 censorable=False,
790 812 upperboundcomp=None,
791 813 persistentnodemap=False,
792 814 concurrencychecker=None,
793 815 trypending=False,
794 816 try_split=False,
795 817 canonical_parent_order=True,
796 818 ):
797 819 """
798 820 create a revlog object
799 821
800 822 opener is a function that abstracts the file opening operation
801 823 and can be used to implement COW semantics or the like.
802 824
803 825 `target`: a (KIND, ID) tuple that identify the content stored in
804 826 this revlog. It help the rest of the code to understand what the revlog
805 827 is about without having to resort to heuristic and index filename
806 828 analysis. Note: that this must be reliably be set by normal code, but
807 829 that test, debug, or performance measurement code might not set this to
808 830 accurate value.
809 831 """
810 832
811 833 self.radix = radix
812 834
813 835 self._docket_file = None
814 836 self._indexfile = None
815 837 self._datafile = None
816 838 self._sidedatafile = None
817 839 self._nodemap_file = None
818 840 self.postfix = postfix
819 841 self._trypending = trypending
820 842 self._try_split = try_split
821 843 self.opener = opener
822 844 if persistentnodemap:
823 845 self._nodemap_file = nodemaputil.get_nodemap_file(self)
824 846
825 847 assert target[0] in ALL_KINDS
826 848 assert len(target) == 2
827 849 self.target = target
828 850 if b'feature-config' in self.opener.options:
829 851 self.feature_config = self.opener.options[b'feature-config'].copy()
830 852 else:
831 853 self.feature_config = FeatureConfig()
832 854 self.feature_config.censorable = censorable
833 855 self.feature_config.canonical_parent_order = canonical_parent_order
834 856 if b'data-config' in self.opener.options:
835 857 self.data_config = self.opener.options[b'data-config'].copy()
836 858 else:
837 859 self.data_config = DataConfig()
838 860 self.data_config.check_ambig = checkambig
839 861 self.data_config.mmap_large_index = mmaplargeindex
840 862 if b'delta-config' in self.opener.options:
841 863 self.delta_config = self.opener.options[b'delta-config'].copy()
842 864 else:
843 865 self.delta_config = DeltaConfig()
844 866 self.delta_config.upper_bound_comp = upperboundcomp
845 867
846 868 # 3-tuple of (node, rev, text) for a raw revision.
847 869 self._revisioncache = None
848 870 # Maps rev to chain base rev.
849 871 self._chainbasecache = util.lrucachedict(100)
850 872
851 873 self.index = None
852 874 self._docket = None
853 875 self._nodemap_docket = None
854 876 # Mapping of partial identifiers to full nodes.
855 877 self._pcache = {}
856 878
857 879 # other optionnals features
858 880
859 881 # Make copy of flag processors so each revlog instance can support
860 882 # custom flags.
861 883 self._flagprocessors = dict(flagutil.flagprocessors)
862 884 # prevent nesting of addgroup
863 885 self._adding_group = None
864 886
865 887 chunk_cache = self._loadindex()
866 888 self._load_inner(chunk_cache)
867 889
868 890 self._concurrencychecker = concurrencychecker
869 891
870 892 @property
871 893 def _generaldelta(self):
872 894 """temporary compatibility proxy"""
873 895 util.nouideprecwarn(
874 896 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
875 897 )
876 898 return self.delta_config.general_delta
877 899
878 900 @property
879 901 def _checkambig(self):
880 902 """temporary compatibility proxy"""
881 903 util.nouideprecwarn(
882 904 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
883 905 )
884 906 return self.data_config.check_ambig
885 907
886 908 @property
887 909 def _mmaplargeindex(self):
888 910 """temporary compatibility proxy"""
889 911 util.nouideprecwarn(
890 912 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
891 913 )
892 914 return self.data_config.mmap_large_index
893 915
894 916 @property
895 917 def _censorable(self):
896 918 """temporary compatibility proxy"""
897 919 util.nouideprecwarn(
898 920 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
899 921 )
900 922 return self.feature_config.censorable
901 923
902 924 @property
903 925 def _chunkcachesize(self):
904 926 """temporary compatibility proxy"""
905 927 util.nouideprecwarn(
906 928 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
907 929 )
908 930 return self.data_config.chunk_cache_size
909 931
910 932 @property
911 933 def _maxchainlen(self):
912 934 """temporary compatibility proxy"""
913 935 util.nouideprecwarn(
914 936 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
915 937 )
916 938 return self.delta_config.max_chain_len
917 939
918 940 @property
919 941 def _deltabothparents(self):
920 942 """temporary compatibility proxy"""
921 943 util.nouideprecwarn(
922 944 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
923 945 )
924 946 return self.delta_config.delta_both_parents
925 947
926 948 @property
927 949 def _candidate_group_chunk_size(self):
928 950 """temporary compatibility proxy"""
929 951 util.nouideprecwarn(
930 952 b"use revlog.delta_config.candidate_group_chunk_size",
931 953 b"6.6",
932 954 stacklevel=2,
933 955 )
934 956 return self.delta_config.candidate_group_chunk_size
935 957
936 958 @property
937 959 def _debug_delta(self):
938 960 """temporary compatibility proxy"""
939 961 util.nouideprecwarn(
940 962 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
941 963 )
942 964 return self.delta_config.debug_delta
943 965
944 966 @property
945 967 def _compengine(self):
946 968 """temporary compatibility proxy"""
947 969 util.nouideprecwarn(
948 970 b"use revlog.feature_config.compression_engine",
949 971 b"6.6",
950 972 stacklevel=2,
951 973 )
952 974 return self.feature_config.compression_engine
953 975
954 976 @property
955 977 def upperboundcomp(self):
956 978 """temporary compatibility proxy"""
957 979 util.nouideprecwarn(
958 980 b"use revlog.delta_config.upper_bound_comp",
959 981 b"6.6",
960 982 stacklevel=2,
961 983 )
962 984 return self.delta_config.upper_bound_comp
963 985
964 986 @property
965 987 def _compengineopts(self):
966 988 """temporary compatibility proxy"""
967 989 util.nouideprecwarn(
968 990 b"use revlog.feature_config.compression_engine_options",
969 991 b"6.6",
970 992 stacklevel=2,
971 993 )
972 994 return self.feature_config.compression_engine_options
973 995
974 996 @property
975 997 def _maxdeltachainspan(self):
976 998 """temporary compatibility proxy"""
977 999 util.nouideprecwarn(
978 1000 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
979 1001 )
980 1002 return self.delta_config.max_deltachain_span
981 1003
982 1004 @property
983 1005 def _withsparseread(self):
984 1006 """temporary compatibility proxy"""
985 1007 util.nouideprecwarn(
986 1008 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
987 1009 )
988 1010 return self.data_config.with_sparse_read
989 1011
990 1012 @property
991 1013 def _sparserevlog(self):
992 1014 """temporary compatibility proxy"""
993 1015 util.nouideprecwarn(
994 1016 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
995 1017 )
996 1018 return self.delta_config.sparse_revlog
997 1019
998 1020 @property
999 1021 def hassidedata(self):
1000 1022 """temporary compatibility proxy"""
1001 1023 util.nouideprecwarn(
1002 1024 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1003 1025 )
1004 1026 return self.feature_config.has_side_data
1005 1027
1006 1028 @property
1007 1029 def _srdensitythreshold(self):
1008 1030 """temporary compatibility proxy"""
1009 1031 util.nouideprecwarn(
1010 1032 b"use revlog.data_config.sr_density_threshold",
1011 1033 b"6.6",
1012 1034 stacklevel=2,
1013 1035 )
1014 1036 return self.data_config.sr_density_threshold
1015 1037
1016 1038 @property
1017 1039 def _srmingapsize(self):
1018 1040 """temporary compatibility proxy"""
1019 1041 util.nouideprecwarn(
1020 1042 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1021 1043 )
1022 1044 return self.data_config.sr_min_gap_size
1023 1045
1024 1046 @property
1025 1047 def _compute_rank(self):
1026 1048 """temporary compatibility proxy"""
1027 1049 util.nouideprecwarn(
1028 1050 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1029 1051 )
1030 1052 return self.feature_config.compute_rank
1031 1053
1032 1054 @property
1033 1055 def canonical_parent_order(self):
1034 1056 """temporary compatibility proxy"""
1035 1057 util.nouideprecwarn(
1036 1058 b"use revlog.feature_config.canonical_parent_order",
1037 1059 b"6.6",
1038 1060 stacklevel=2,
1039 1061 )
1040 1062 return self.feature_config.canonical_parent_order
1041 1063
1042 1064 @property
1043 1065 def _lazydelta(self):
1044 1066 """temporary compatibility proxy"""
1045 1067 util.nouideprecwarn(
1046 1068 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1047 1069 )
1048 1070 return self.delta_config.lazy_delta
1049 1071
1050 1072 @property
1051 1073 def _lazydeltabase(self):
1052 1074 """temporary compatibility proxy"""
1053 1075 util.nouideprecwarn(
1054 1076 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1055 1077 )
1056 1078 return self.delta_config.lazy_delta_base
1057 1079
1058 1080 def _init_opts(self):
1059 1081 """process options (from above/config) to setup associated default revlog mode
1060 1082
1061 1083 These values might be affected when actually reading on disk information.
1062 1084
1063 1085 The relevant values are returned for use in _loadindex().
1064 1086
1065 1087 * newversionflags:
1066 1088 version header to use if we need to create a new revlog
1067 1089
1068 1090 * mmapindexthreshold:
1069 1091 minimal index size for start to use mmap
1070 1092
1071 1093 * force_nodemap:
1072 1094 force the usage of a "development" version of the nodemap code
1073 1095 """
1074 1096 opts = self.opener.options
1075 1097
1076 1098 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1077 1099 new_header = CHANGELOGV2
1078 1100 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1079 1101 self.feature_config.compute_rank = compute_rank
1080 1102 elif b'revlogv2' in opts:
1081 1103 new_header = REVLOGV2
1082 1104 elif b'revlogv1' in opts:
1083 1105 new_header = REVLOGV1 | FLAG_INLINE_DATA
1084 1106 if b'generaldelta' in opts:
1085 1107 new_header |= FLAG_GENERALDELTA
1086 1108 elif b'revlogv0' in self.opener.options:
1087 1109 new_header = REVLOGV0
1088 1110 else:
1089 1111 new_header = REVLOG_DEFAULT_VERSION
1090 1112
1091 1113 mmapindexthreshold = None
1092 1114 if self.data_config.mmap_large_index:
1093 1115 mmapindexthreshold = self.data_config.mmap_index_threshold
1094 1116 if self.feature_config.enable_ellipsis:
1095 1117 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1096 1118
1097 1119 # revlog v0 doesn't have flag processors
1098 1120 for flag, processor in opts.get(b'flagprocessors', {}).items():
1099 1121 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1100 1122
1101 1123 chunk_cache_size = self.data_config.chunk_cache_size
1102 1124 if chunk_cache_size <= 0:
1103 1125 raise error.RevlogError(
1104 1126 _(b'revlog chunk cache size %r is not greater than 0')
1105 1127 % chunk_cache_size
1106 1128 )
1107 1129 elif chunk_cache_size & (chunk_cache_size - 1):
1108 1130 raise error.RevlogError(
1109 1131 _(b'revlog chunk cache size %r is not a power of 2')
1110 1132 % chunk_cache_size
1111 1133 )
1112 1134 force_nodemap = opts.get(b'devel-force-nodemap', False)
1113 1135 return new_header, mmapindexthreshold, force_nodemap
1114 1136
1115 1137 def _get_data(self, filepath, mmap_threshold, size=None):
1116 1138 """return a file content with or without mmap
1117 1139
1118 1140 If the file is missing return the empty string"""
1119 1141 try:
1120 1142 with self.opener(filepath) as fp:
1121 1143 if mmap_threshold is not None:
1122 1144 file_size = self.opener.fstat(fp).st_size
1123 1145 if file_size >= mmap_threshold:
1124 1146 if size is not None:
1125 1147 # avoid potentiel mmap crash
1126 1148 size = min(file_size, size)
1127 1149 # TODO: should .close() to release resources without
1128 1150 # relying on Python GC
1129 1151 if size is None:
1130 1152 return util.buffer(util.mmapread(fp))
1131 1153 else:
1132 1154 return util.buffer(util.mmapread(fp, size))
1133 1155 if size is None:
1134 1156 return fp.read()
1135 1157 else:
1136 1158 return fp.read(size)
1137 1159 except FileNotFoundError:
1138 1160 return b''
1139 1161
1140 1162 def get_streams(self, max_linkrev, force_inline=False):
1141 1163 """return a list of streams that represent this revlog
1142 1164
1143 1165 This is used by stream-clone to do bytes to bytes copies of a repository.
1144 1166
1145 1167 This streams data for all revisions that refer to a changelog revision up
1146 1168 to `max_linkrev`.
1147 1169
1148 1170 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1149 1171
1150 1172 It returns is a list of three-tuple:
1151 1173
1152 1174 [
1153 1175 (filename, bytes_stream, stream_size),
1154 1176 …
1155 1177 ]
1156 1178 """
1157 1179 n = len(self)
1158 1180 index = self.index
1159 1181 while n > 0:
1160 1182 linkrev = index[n - 1][4]
1161 1183 if linkrev < max_linkrev:
1162 1184 break
1163 1185 # note: this loop will rarely go through multiple iterations, since
1164 1186 # it only traverses commits created during the current streaming
1165 1187 # pull operation.
1166 1188 #
1167 1189 # If this become a problem, using a binary search should cap the
1168 1190 # runtime of this.
1169 1191 n = n - 1
1170 1192 if n == 0:
1171 1193 # no data to send
1172 1194 return []
1173 1195 index_size = n * index.entry_size
1174 1196 data_size = self.end(n - 1)
1175 1197
1176 1198 # XXX we might have been split (or stripped) since the object
1177 1199 # initialization, We need to close this race too, but having a way to
1178 1200 # pre-open the file we feed to the revlog and never closing them before
1179 1201 # we are done streaming.
1180 1202
1181 1203 if self._inline:
1182 1204
1183 1205 def get_stream():
1184 1206 with self.opener(self._indexfile, mode=b"r") as fp:
1185 1207 yield None
1186 1208 size = index_size + data_size
1187 1209 if size <= 65536:
1188 1210 yield fp.read(size)
1189 1211 else:
1190 1212 yield from util.filechunkiter(fp, limit=size)
1191 1213
1192 1214 inline_stream = get_stream()
1193 1215 next(inline_stream)
1194 1216 return [
1195 1217 (self._indexfile, inline_stream, index_size + data_size),
1196 1218 ]
1197 1219 elif force_inline:
1198 1220
1199 1221 def get_stream():
1200 1222 with self.reading():
1201 1223 yield None
1202 1224
1203 1225 for rev in range(n):
1204 1226 idx = self.index.entry_binary(rev)
1205 1227 if rev == 0 and self._docket is None:
1206 1228 # re-inject the inline flag
1207 1229 header = self._format_flags
1208 1230 header |= self._format_version
1209 1231 header |= FLAG_INLINE_DATA
1210 1232 header = self.index.pack_header(header)
1211 1233 idx = header + idx
1212 1234 yield idx
1213 1235 yield self._inner.get_segment_for_revs(rev, rev)[1]
1214 1236
1215 1237 inline_stream = get_stream()
1216 1238 next(inline_stream)
1217 1239 return [
1218 1240 (self._indexfile, inline_stream, index_size + data_size),
1219 1241 ]
1220 1242 else:
1221 1243
1222 1244 def get_index_stream():
1223 1245 with self.opener(self._indexfile, mode=b"r") as fp:
1224 1246 yield None
1225 1247 if index_size <= 65536:
1226 1248 yield fp.read(index_size)
1227 1249 else:
1228 1250 yield from util.filechunkiter(fp, limit=index_size)
1229 1251
1230 1252 def get_data_stream():
1231 1253 with self._datafp() as fp:
1232 1254 yield None
1233 1255 if data_size <= 65536:
1234 1256 yield fp.read(data_size)
1235 1257 else:
1236 1258 yield from util.filechunkiter(fp, limit=data_size)
1237 1259
1238 1260 index_stream = get_index_stream()
1239 1261 next(index_stream)
1240 1262 data_stream = get_data_stream()
1241 1263 next(data_stream)
1242 1264 return [
1243 1265 (self._datafile, data_stream, data_size),
1244 1266 (self._indexfile, index_stream, index_size),
1245 1267 ]
1246 1268
1247 1269 def _loadindex(self, docket=None):
1248 1270
1249 1271 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1250 1272
1251 1273 if self.postfix is not None:
1252 1274 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1253 1275 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1254 1276 entry_point = b'%s.i.a' % self.radix
1255 1277 elif self._try_split and self.opener.exists(self._split_index_file):
1256 1278 entry_point = self._split_index_file
1257 1279 else:
1258 1280 entry_point = b'%s.i' % self.radix
1259 1281
1260 1282 if docket is not None:
1261 1283 self._docket = docket
1262 1284 self._docket_file = entry_point
1263 1285 else:
1264 1286 self._initempty = True
1265 1287 entry_data = self._get_data(entry_point, mmapindexthreshold)
1266 1288 if len(entry_data) > 0:
1267 1289 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1268 1290 self._initempty = False
1269 1291 else:
1270 1292 header = new_header
1271 1293
1272 1294 self._format_flags = header & ~0xFFFF
1273 1295 self._format_version = header & 0xFFFF
1274 1296
1275 1297 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1276 1298 if supported_flags is None:
1277 1299 msg = _(b'unknown version (%d) in revlog %s')
1278 1300 msg %= (self._format_version, self.display_id)
1279 1301 raise error.RevlogError(msg)
1280 1302 elif self._format_flags & ~supported_flags:
1281 1303 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1282 1304 display_flag = self._format_flags >> 16
1283 1305 msg %= (display_flag, self._format_version, self.display_id)
1284 1306 raise error.RevlogError(msg)
1285 1307
1286 1308 features = FEATURES_BY_VERSION[self._format_version]
1287 1309 self._inline = features[b'inline'](self._format_flags)
1288 1310 self.delta_config.general_delta = features[b'generaldelta'](
1289 1311 self._format_flags
1290 1312 )
1291 1313 self.feature_config.has_side_data = features[b'sidedata']
1292 1314
1293 1315 if not features[b'docket']:
1294 1316 self._indexfile = entry_point
1295 1317 index_data = entry_data
1296 1318 else:
1297 1319 self._docket_file = entry_point
1298 1320 if self._initempty:
1299 1321 self._docket = docketutil.default_docket(self, header)
1300 1322 else:
1301 1323 self._docket = docketutil.parse_docket(
1302 1324 self, entry_data, use_pending=self._trypending
1303 1325 )
1304 1326
1305 1327 if self._docket is not None:
1306 1328 self._indexfile = self._docket.index_filepath()
1307 1329 index_data = b''
1308 1330 index_size = self._docket.index_end
1309 1331 if index_size > 0:
1310 1332 index_data = self._get_data(
1311 1333 self._indexfile, mmapindexthreshold, size=index_size
1312 1334 )
1313 1335 if len(index_data) < index_size:
1314 1336 msg = _(b'too few index data for %s: got %d, expected %d')
1315 1337 msg %= (self.display_id, len(index_data), index_size)
1316 1338 raise error.RevlogError(msg)
1317 1339
1318 1340 self._inline = False
1319 1341 # generaldelta implied by version 2 revlogs.
1320 1342 self.delta_config.general_delta = True
1321 1343 # the logic for persistent nodemap will be dealt with within the
1322 1344 # main docket, so disable it for now.
1323 1345 self._nodemap_file = None
1324 1346
1325 1347 if self._docket is not None:
1326 1348 self._datafile = self._docket.data_filepath()
1327 1349 self._sidedatafile = self._docket.sidedata_filepath()
1328 1350 elif self.postfix is None:
1329 1351 self._datafile = b'%s.d' % self.radix
1330 1352 else:
1331 1353 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1332 1354
1333 1355 self.nodeconstants = sha1nodeconstants
1334 1356 self.nullid = self.nodeconstants.nullid
1335 1357
1336 1358 # sparse-revlog can't be on without general-delta (issue6056)
1337 1359 if not self.delta_config.general_delta:
1338 1360 self.delta_config.sparse_revlog = False
1339 1361
1340 1362 self._storedeltachains = True
1341 1363
1342 1364 devel_nodemap = (
1343 1365 self._nodemap_file
1344 1366 and force_nodemap
1345 1367 and parse_index_v1_nodemap is not None
1346 1368 )
1347 1369
1348 1370 use_rust_index = False
1349 1371 if rustrevlog is not None:
1350 1372 if self._nodemap_file is not None:
1351 1373 use_rust_index = True
1352 1374 else:
1353 1375 use_rust_index = self.opener.options.get(b'rust.index')
1354 1376
1355 1377 self._parse_index = parse_index_v1
1356 1378 if self._format_version == REVLOGV0:
1357 1379 self._parse_index = revlogv0.parse_index_v0
1358 1380 elif self._format_version == REVLOGV2:
1359 1381 self._parse_index = parse_index_v2
1360 1382 elif self._format_version == CHANGELOGV2:
1361 1383 self._parse_index = parse_index_cl_v2
1362 1384 elif devel_nodemap:
1363 1385 self._parse_index = parse_index_v1_nodemap
1364 1386 elif use_rust_index:
1365 1387 self._parse_index = parse_index_v1_mixed
1366 1388 try:
1367 1389 d = self._parse_index(index_data, self._inline)
1368 1390 index, chunkcache = d
1369 1391 use_nodemap = (
1370 1392 not self._inline
1371 1393 and self._nodemap_file is not None
1372 1394 and hasattr(index, 'update_nodemap_data')
1373 1395 )
1374 1396 if use_nodemap:
1375 1397 nodemap_data = nodemaputil.persisted_data(self)
1376 1398 if nodemap_data is not None:
1377 1399 docket = nodemap_data[0]
1378 1400 if (
1379 1401 len(d[0]) > docket.tip_rev
1380 1402 and d[0][docket.tip_rev][7] == docket.tip_node
1381 1403 ):
1382 1404 # no changelog tampering
1383 1405 self._nodemap_docket = docket
1384 1406 index.update_nodemap_data(*nodemap_data)
1385 1407 except (ValueError, IndexError):
1386 1408 raise error.RevlogError(
1387 1409 _(b"index %s is corrupted") % self.display_id
1388 1410 )
1389 1411 self.index = index
1390 1412 # revnum -> (chain-length, sum-delta-length)
1391 1413 self._chaininfocache = util.lrucachedict(500)
1392 1414
1393 1415 return chunkcache
1394 1416
1395 1417 def _load_inner(self, chunk_cache):
1396 1418 if self._docket is None:
1397 1419 default_compression_header = None
1398 1420 else:
1399 1421 default_compression_header = self._docket.default_compression_header
1400 1422
1401 1423 self._inner = _InnerRevlog(
1402 1424 opener=self.opener,
1403 1425 index=self.index,
1404 1426 index_file=self._indexfile,
1405 1427 data_file=self._datafile,
1406 1428 sidedata_file=self._sidedatafile,
1407 1429 inline=self._inline,
1408 1430 data_config=self.data_config,
1409 1431 feature_config=self.feature_config,
1410 1432 chunk_cache=chunk_cache,
1411 1433 default_compression_header=default_compression_header,
1412 1434 )
1413 1435
1414 1436 def get_revlog(self):
1415 1437 """simple function to mirror API of other not-really-revlog API"""
1416 1438 return self
1417 1439
1418 1440 @util.propertycache
1419 1441 def revlog_kind(self):
1420 1442 return self.target[0]
1421 1443
1422 1444 @util.propertycache
1423 1445 def display_id(self):
1424 1446 """The public facing "ID" of the revlog that we use in message"""
1425 1447 if self.revlog_kind == KIND_FILELOG:
1426 1448 # Reference the file without the "data/" prefix, so it is familiar
1427 1449 # to the user.
1428 1450 return self.target[1]
1429 1451 else:
1430 1452 return self.radix
1431 1453
1432 1454 def _datafp(self, mode=b'r'):
1433 1455 """file object for the revlog's data file"""
1434 1456 return self.opener(self._datafile, mode=mode)
1435 1457
1436 1458 def tiprev(self):
1437 1459 return len(self.index) - 1
1438 1460
1439 1461 def tip(self):
1440 1462 return self.node(self.tiprev())
1441 1463
1442 1464 def __contains__(self, rev):
1443 1465 return 0 <= rev < len(self)
1444 1466
1445 1467 def __len__(self):
1446 1468 return len(self.index)
1447 1469
1448 1470 def __iter__(self):
1449 1471 return iter(range(len(self)))
1450 1472
1451 1473 def revs(self, start=0, stop=None):
1452 1474 """iterate over all rev in this revlog (from start to stop)"""
1453 1475 return storageutil.iterrevs(len(self), start=start, stop=stop)
1454 1476
1455 1477 def hasnode(self, node):
1456 1478 try:
1457 1479 self.rev(node)
1458 1480 return True
1459 1481 except KeyError:
1460 1482 return False
1461 1483
1462 1484 def _candelta(self, baserev, rev):
1463 1485 """whether two revisions (baserev, rev) can be delta-ed or not"""
1464 1486 # Disable delta if either rev requires a content-changing flag
1465 1487 # processor (ex. LFS). This is because such flag processor can alter
1466 1488 # the rawtext content that the delta will be based on, and two clients
1467 1489 # could have a same revlog node with different flags (i.e. different
1468 1490 # rawtext contents) and the delta could be incompatible.
1469 1491 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1470 1492 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1471 1493 ):
1472 1494 return False
1473 1495 return True
1474 1496
1475 1497 def update_caches(self, transaction):
1476 1498 """update on disk cache
1477 1499
1478 1500 If a transaction is passed, the update may be delayed to transaction
1479 1501 commit."""
1480 1502 if self._nodemap_file is not None:
1481 1503 if transaction is None:
1482 1504 nodemaputil.update_persistent_nodemap(self)
1483 1505 else:
1484 1506 nodemaputil.setup_persistent_nodemap(transaction, self)
1485 1507
1486 1508 def clearcaches(self):
1487 1509 """Clear in-memory caches"""
1488 1510 self._revisioncache = None
1489 1511 self._chainbasecache.clear()
1490 1512 self._inner._segmentfile.clear_cache()
1491 1513 self._inner._segmentfile_sidedata.clear_cache()
1492 1514 self._pcache = {}
1493 1515 self._nodemap_docket = None
1494 1516 self.index.clearcaches()
1495 1517 # The python code is the one responsible for validating the docket, we
1496 1518 # end up having to refresh it here.
1497 1519 use_nodemap = (
1498 1520 not self._inline
1499 1521 and self._nodemap_file is not None
1500 1522 and hasattr(self.index, 'update_nodemap_data')
1501 1523 )
1502 1524 if use_nodemap:
1503 1525 nodemap_data = nodemaputil.persisted_data(self)
1504 1526 if nodemap_data is not None:
1505 1527 self._nodemap_docket = nodemap_data[0]
1506 1528 self.index.update_nodemap_data(*nodemap_data)
1507 1529
1508 1530 def rev(self, node):
1509 1531 """return the revision number associated with a <nodeid>"""
1510 1532 try:
1511 1533 return self.index.rev(node)
1512 1534 except TypeError:
1513 1535 raise
1514 1536 except error.RevlogError:
1515 1537 # parsers.c radix tree lookup failed
1516 1538 if (
1517 1539 node == self.nodeconstants.wdirid
1518 1540 or node in self.nodeconstants.wdirfilenodeids
1519 1541 ):
1520 1542 raise error.WdirUnsupported
1521 1543 raise error.LookupError(node, self.display_id, _(b'no node'))
1522 1544
1523 1545 # Accessors for index entries.
1524 1546
1525 1547 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1526 1548 # are flags.
1527 1549 def start(self, rev):
1528 1550 return int(self.index[rev][0] >> 16)
1529 1551
1530 1552 def sidedata_cut_off(self, rev):
1531 1553 sd_cut_off = self.index[rev][8]
1532 1554 if sd_cut_off != 0:
1533 1555 return sd_cut_off
1534 1556 # This is some annoying dance, because entries without sidedata
1535 1557 # currently use 0 as their ofsset. (instead of previous-offset +
1536 1558 # previous-size)
1537 1559 #
1538 1560 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1539 1561 # In the meantime, we need this.
1540 1562 while 0 <= rev:
1541 1563 e = self.index[rev]
1542 1564 if e[9] != 0:
1543 1565 return e[8] + e[9]
1544 1566 rev -= 1
1545 1567 return 0
1546 1568
1547 1569 def flags(self, rev):
1548 1570 return self.index[rev][0] & 0xFFFF
1549 1571
1550 1572 def length(self, rev):
1551 1573 return self.index[rev][1]
1552 1574
1553 1575 def sidedata_length(self, rev):
1554 1576 if not self.feature_config.has_side_data:
1555 1577 return 0
1556 1578 return self.index[rev][9]
1557 1579
1558 1580 def rawsize(self, rev):
1559 1581 """return the length of the uncompressed text for a given revision"""
1560 1582 l = self.index[rev][2]
1561 1583 if l >= 0:
1562 1584 return l
1563 1585
1564 1586 t = self.rawdata(rev)
1565 1587 return len(t)
1566 1588
1567 1589 def size(self, rev):
1568 1590 """length of non-raw text (processed by a "read" flag processor)"""
1569 1591 # fast path: if no "read" flag processor could change the content,
1570 1592 # size is rawsize. note: ELLIPSIS is known to not change the content.
1571 1593 flags = self.flags(rev)
1572 1594 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1573 1595 return self.rawsize(rev)
1574 1596
1575 1597 return len(self.revision(rev))
1576 1598
1577 1599 def fast_rank(self, rev):
1578 1600 """Return the rank of a revision if already known, or None otherwise.
1579 1601
1580 1602 The rank of a revision is the size of the sub-graph it defines as a
1581 1603 head. Equivalently, the rank of a revision `r` is the size of the set
1582 1604 `ancestors(r)`, `r` included.
1583 1605
1584 1606 This method returns the rank retrieved from the revlog in constant
1585 1607 time. It makes no attempt at computing unknown values for versions of
1586 1608 the revlog which do not persist the rank.
1587 1609 """
1588 1610 rank = self.index[rev][ENTRY_RANK]
1589 1611 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1590 1612 return None
1591 1613 if rev == nullrev:
1592 1614 return 0 # convention
1593 1615 return rank
1594 1616
1595 1617 def chainbase(self, rev):
1596 1618 base = self._chainbasecache.get(rev)
1597 1619 if base is not None:
1598 1620 return base
1599 1621
1600 1622 index = self.index
1601 1623 iterrev = rev
1602 1624 base = index[iterrev][3]
1603 1625 while base != iterrev:
1604 1626 iterrev = base
1605 1627 base = index[iterrev][3]
1606 1628
1607 1629 self._chainbasecache[rev] = base
1608 1630 return base
1609 1631
1610 1632 def linkrev(self, rev):
1611 1633 return self.index[rev][4]
1612 1634
1613 1635 def parentrevs(self, rev):
1614 1636 try:
1615 1637 entry = self.index[rev]
1616 1638 except IndexError:
1617 1639 if rev == wdirrev:
1618 1640 raise error.WdirUnsupported
1619 1641 raise
1620 1642
1621 1643 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1622 1644 return entry[6], entry[5]
1623 1645 else:
1624 1646 return entry[5], entry[6]
1625 1647
1626 1648 # fast parentrevs(rev) where rev isn't filtered
1627 1649 _uncheckedparentrevs = parentrevs
1628 1650
1629 1651 def node(self, rev):
1630 1652 try:
1631 1653 return self.index[rev][7]
1632 1654 except IndexError:
1633 1655 if rev == wdirrev:
1634 1656 raise error.WdirUnsupported
1635 1657 raise
1636 1658
1637 1659 # Derived from index values.
1638 1660
1639 1661 def end(self, rev):
1640 1662 return self.start(rev) + self.length(rev)
1641 1663
1642 1664 def parents(self, node):
1643 1665 i = self.index
1644 1666 d = i[self.rev(node)]
1645 1667 # inline node() to avoid function call overhead
1646 1668 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1647 1669 return i[d[6]][7], i[d[5]][7]
1648 1670 else:
1649 1671 return i[d[5]][7], i[d[6]][7]
1650 1672
1651 1673 def chainlen(self, rev):
1652 1674 return self._chaininfo(rev)[0]
1653 1675
1654 1676 def _chaininfo(self, rev):
1655 1677 chaininfocache = self._chaininfocache
1656 1678 if rev in chaininfocache:
1657 1679 return chaininfocache[rev]
1658 1680 index = self.index
1659 1681 generaldelta = self.delta_config.general_delta
1660 1682 iterrev = rev
1661 1683 e = index[iterrev]
1662 1684 clen = 0
1663 1685 compresseddeltalen = 0
1664 1686 while iterrev != e[3]:
1665 1687 clen += 1
1666 1688 compresseddeltalen += e[1]
1667 1689 if generaldelta:
1668 1690 iterrev = e[3]
1669 1691 else:
1670 1692 iterrev -= 1
1671 1693 if iterrev in chaininfocache:
1672 1694 t = chaininfocache[iterrev]
1673 1695 clen += t[0]
1674 1696 compresseddeltalen += t[1]
1675 1697 break
1676 1698 e = index[iterrev]
1677 1699 else:
1678 1700 # Add text length of base since decompressing that also takes
1679 1701 # work. For cache hits the length is already included.
1680 1702 compresseddeltalen += e[1]
1681 1703 r = (clen, compresseddeltalen)
1682 1704 chaininfocache[rev] = r
1683 1705 return r
1684 1706
1685 1707 def _deltachain(self, rev, stoprev=None):
1686 1708 """Obtain the delta chain for a revision.
1687 1709
1688 1710 ``stoprev`` specifies a revision to stop at. If not specified, we
1689 1711 stop at the base of the chain.
1690 1712
1691 1713 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1692 1714 revs in ascending order and ``stopped`` is a bool indicating whether
1693 1715 ``stoprev`` was hit.
1694 1716 """
1695 1717 generaldelta = self.delta_config.general_delta
1696 1718 # Try C implementation.
1697 1719 try:
1698 1720 return self.index.deltachain(rev, stoprev, generaldelta)
1699 1721 except AttributeError:
1700 1722 pass
1701 1723
1702 1724 chain = []
1703 1725
1704 1726 # Alias to prevent attribute lookup in tight loop.
1705 1727 index = self.index
1706 1728
1707 1729 iterrev = rev
1708 1730 e = index[iterrev]
1709 1731 while iterrev != e[3] and iterrev != stoprev:
1710 1732 chain.append(iterrev)
1711 1733 if generaldelta:
1712 1734 iterrev = e[3]
1713 1735 else:
1714 1736 iterrev -= 1
1715 1737 e = index[iterrev]
1716 1738
1717 1739 if iterrev == stoprev:
1718 1740 stopped = True
1719 1741 else:
1720 1742 chain.append(iterrev)
1721 1743 stopped = False
1722 1744
1723 1745 chain.reverse()
1724 1746 return chain, stopped
1725 1747
1726 1748 def ancestors(self, revs, stoprev=0, inclusive=False):
1727 1749 """Generate the ancestors of 'revs' in reverse revision order.
1728 1750 Does not generate revs lower than stoprev.
1729 1751
1730 1752 See the documentation for ancestor.lazyancestors for more details."""
1731 1753
1732 1754 # first, make sure start revisions aren't filtered
1733 1755 revs = list(revs)
1734 1756 checkrev = self.node
1735 1757 for r in revs:
1736 1758 checkrev(r)
1737 1759 # and we're sure ancestors aren't filtered as well
1738 1760
1739 1761 if rustancestor is not None and self.index.rust_ext_compat:
1740 1762 lazyancestors = rustancestor.LazyAncestors
1741 1763 arg = self.index
1742 1764 else:
1743 1765 lazyancestors = ancestor.lazyancestors
1744 1766 arg = self._uncheckedparentrevs
1745 1767 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1746 1768
1747 1769 def descendants(self, revs):
1748 1770 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1749 1771
1750 1772 def findcommonmissing(self, common=None, heads=None):
1751 1773 """Return a tuple of the ancestors of common and the ancestors of heads
1752 1774 that are not ancestors of common. In revset terminology, we return the
1753 1775 tuple:
1754 1776
1755 1777 ::common, (::heads) - (::common)
1756 1778
1757 1779 The list is sorted by revision number, meaning it is
1758 1780 topologically sorted.
1759 1781
1760 1782 'heads' and 'common' are both lists of node IDs. If heads is
1761 1783 not supplied, uses all of the revlog's heads. If common is not
1762 1784 supplied, uses nullid."""
1763 1785 if common is None:
1764 1786 common = [self.nullid]
1765 1787 if heads is None:
1766 1788 heads = self.heads()
1767 1789
1768 1790 common = [self.rev(n) for n in common]
1769 1791 heads = [self.rev(n) for n in heads]
1770 1792
1771 1793 # we want the ancestors, but inclusive
1772 1794 class lazyset:
1773 1795 def __init__(self, lazyvalues):
1774 1796 self.addedvalues = set()
1775 1797 self.lazyvalues = lazyvalues
1776 1798
1777 1799 def __contains__(self, value):
1778 1800 return value in self.addedvalues or value in self.lazyvalues
1779 1801
1780 1802 def __iter__(self):
1781 1803 added = self.addedvalues
1782 1804 for r in added:
1783 1805 yield r
1784 1806 for r in self.lazyvalues:
1785 1807 if not r in added:
1786 1808 yield r
1787 1809
1788 1810 def add(self, value):
1789 1811 self.addedvalues.add(value)
1790 1812
1791 1813 def update(self, values):
1792 1814 self.addedvalues.update(values)
1793 1815
1794 1816 has = lazyset(self.ancestors(common))
1795 1817 has.add(nullrev)
1796 1818 has.update(common)
1797 1819
1798 1820 # take all ancestors from heads that aren't in has
1799 1821 missing = set()
1800 1822 visit = collections.deque(r for r in heads if r not in has)
1801 1823 while visit:
1802 1824 r = visit.popleft()
1803 1825 if r in missing:
1804 1826 continue
1805 1827 else:
1806 1828 missing.add(r)
1807 1829 for p in self.parentrevs(r):
1808 1830 if p not in has:
1809 1831 visit.append(p)
1810 1832 missing = list(missing)
1811 1833 missing.sort()
1812 1834 return has, [self.node(miss) for miss in missing]
1813 1835
1814 1836 def incrementalmissingrevs(self, common=None):
1815 1837 """Return an object that can be used to incrementally compute the
1816 1838 revision numbers of the ancestors of arbitrary sets that are not
1817 1839 ancestors of common. This is an ancestor.incrementalmissingancestors
1818 1840 object.
1819 1841
1820 1842 'common' is a list of revision numbers. If common is not supplied, uses
1821 1843 nullrev.
1822 1844 """
1823 1845 if common is None:
1824 1846 common = [nullrev]
1825 1847
1826 1848 if rustancestor is not None and self.index.rust_ext_compat:
1827 1849 return rustancestor.MissingAncestors(self.index, common)
1828 1850 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1829 1851
1830 1852 def findmissingrevs(self, common=None, heads=None):
1831 1853 """Return the revision numbers of the ancestors of heads that
1832 1854 are not ancestors of common.
1833 1855
1834 1856 More specifically, return a list of revision numbers corresponding to
1835 1857 nodes N such that every N satisfies the following constraints:
1836 1858
1837 1859 1. N is an ancestor of some node in 'heads'
1838 1860 2. N is not an ancestor of any node in 'common'
1839 1861
1840 1862 The list is sorted by revision number, meaning it is
1841 1863 topologically sorted.
1842 1864
1843 1865 'heads' and 'common' are both lists of revision numbers. If heads is
1844 1866 not supplied, uses all of the revlog's heads. If common is not
1845 1867 supplied, uses nullid."""
1846 1868 if common is None:
1847 1869 common = [nullrev]
1848 1870 if heads is None:
1849 1871 heads = self.headrevs()
1850 1872
1851 1873 inc = self.incrementalmissingrevs(common=common)
1852 1874 return inc.missingancestors(heads)
1853 1875
1854 1876 def findmissing(self, common=None, heads=None):
1855 1877 """Return the ancestors of heads that are not ancestors of common.
1856 1878
1857 1879 More specifically, return a list of nodes N such that every N
1858 1880 satisfies the following constraints:
1859 1881
1860 1882 1. N is an ancestor of some node in 'heads'
1861 1883 2. N is not an ancestor of any node in 'common'
1862 1884
1863 1885 The list is sorted by revision number, meaning it is
1864 1886 topologically sorted.
1865 1887
1866 1888 'heads' and 'common' are both lists of node IDs. If heads is
1867 1889 not supplied, uses all of the revlog's heads. If common is not
1868 1890 supplied, uses nullid."""
1869 1891 if common is None:
1870 1892 common = [self.nullid]
1871 1893 if heads is None:
1872 1894 heads = self.heads()
1873 1895
1874 1896 common = [self.rev(n) for n in common]
1875 1897 heads = [self.rev(n) for n in heads]
1876 1898
1877 1899 inc = self.incrementalmissingrevs(common=common)
1878 1900 return [self.node(r) for r in inc.missingancestors(heads)]
1879 1901
1880 1902 def nodesbetween(self, roots=None, heads=None):
1881 1903 """Return a topological path from 'roots' to 'heads'.
1882 1904
1883 1905 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1884 1906 topologically sorted list of all nodes N that satisfy both of
1885 1907 these constraints:
1886 1908
1887 1909 1. N is a descendant of some node in 'roots'
1888 1910 2. N is an ancestor of some node in 'heads'
1889 1911
1890 1912 Every node is considered to be both a descendant and an ancestor
1891 1913 of itself, so every reachable node in 'roots' and 'heads' will be
1892 1914 included in 'nodes'.
1893 1915
1894 1916 'outroots' is the list of reachable nodes in 'roots', i.e., the
1895 1917 subset of 'roots' that is returned in 'nodes'. Likewise,
1896 1918 'outheads' is the subset of 'heads' that is also in 'nodes'.
1897 1919
1898 1920 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1899 1921 unspecified, uses nullid as the only root. If 'heads' is
1900 1922 unspecified, uses list of all of the revlog's heads."""
1901 1923 nonodes = ([], [], [])
1902 1924 if roots is not None:
1903 1925 roots = list(roots)
1904 1926 if not roots:
1905 1927 return nonodes
1906 1928 lowestrev = min([self.rev(n) for n in roots])
1907 1929 else:
1908 1930 roots = [self.nullid] # Everybody's a descendant of nullid
1909 1931 lowestrev = nullrev
1910 1932 if (lowestrev == nullrev) and (heads is None):
1911 1933 # We want _all_ the nodes!
1912 1934 return (
1913 1935 [self.node(r) for r in self],
1914 1936 [self.nullid],
1915 1937 list(self.heads()),
1916 1938 )
1917 1939 if heads is None:
1918 1940 # All nodes are ancestors, so the latest ancestor is the last
1919 1941 # node.
1920 1942 highestrev = len(self) - 1
1921 1943 # Set ancestors to None to signal that every node is an ancestor.
1922 1944 ancestors = None
1923 1945 # Set heads to an empty dictionary for later discovery of heads
1924 1946 heads = {}
1925 1947 else:
1926 1948 heads = list(heads)
1927 1949 if not heads:
1928 1950 return nonodes
1929 1951 ancestors = set()
1930 1952 # Turn heads into a dictionary so we can remove 'fake' heads.
1931 1953 # Also, later we will be using it to filter out the heads we can't
1932 1954 # find from roots.
1933 1955 heads = dict.fromkeys(heads, False)
1934 1956 # Start at the top and keep marking parents until we're done.
1935 1957 nodestotag = set(heads)
1936 1958 # Remember where the top was so we can use it as a limit later.
1937 1959 highestrev = max([self.rev(n) for n in nodestotag])
1938 1960 while nodestotag:
1939 1961 # grab a node to tag
1940 1962 n = nodestotag.pop()
1941 1963 # Never tag nullid
1942 1964 if n == self.nullid:
1943 1965 continue
1944 1966 # A node's revision number represents its place in a
1945 1967 # topologically sorted list of nodes.
1946 1968 r = self.rev(n)
1947 1969 if r >= lowestrev:
1948 1970 if n not in ancestors:
1949 1971 # If we are possibly a descendant of one of the roots
1950 1972 # and we haven't already been marked as an ancestor
1951 1973 ancestors.add(n) # Mark as ancestor
1952 1974 # Add non-nullid parents to list of nodes to tag.
1953 1975 nodestotag.update(
1954 1976 [p for p in self.parents(n) if p != self.nullid]
1955 1977 )
1956 1978 elif n in heads: # We've seen it before, is it a fake head?
1957 1979 # So it is, real heads should not be the ancestors of
1958 1980 # any other heads.
1959 1981 heads.pop(n)
1960 1982 if not ancestors:
1961 1983 return nonodes
1962 1984 # Now that we have our set of ancestors, we want to remove any
1963 1985 # roots that are not ancestors.
1964 1986
1965 1987 # If one of the roots was nullid, everything is included anyway.
1966 1988 if lowestrev > nullrev:
1967 1989 # But, since we weren't, let's recompute the lowest rev to not
1968 1990 # include roots that aren't ancestors.
1969 1991
1970 1992 # Filter out roots that aren't ancestors of heads
1971 1993 roots = [root for root in roots if root in ancestors]
1972 1994 # Recompute the lowest revision
1973 1995 if roots:
1974 1996 lowestrev = min([self.rev(root) for root in roots])
1975 1997 else:
1976 1998 # No more roots? Return empty list
1977 1999 return nonodes
1978 2000 else:
1979 2001 # We are descending from nullid, and don't need to care about
1980 2002 # any other roots.
1981 2003 lowestrev = nullrev
1982 2004 roots = [self.nullid]
1983 2005 # Transform our roots list into a set.
1984 2006 descendants = set(roots)
1985 2007 # Also, keep the original roots so we can filter out roots that aren't
1986 2008 # 'real' roots (i.e. are descended from other roots).
1987 2009 roots = descendants.copy()
1988 2010 # Our topologically sorted list of output nodes.
1989 2011 orderedout = []
1990 2012 # Don't start at nullid since we don't want nullid in our output list,
1991 2013 # and if nullid shows up in descendants, empty parents will look like
1992 2014 # they're descendants.
1993 2015 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1994 2016 n = self.node(r)
1995 2017 isdescendant = False
1996 2018 if lowestrev == nullrev: # Everybody is a descendant of nullid
1997 2019 isdescendant = True
1998 2020 elif n in descendants:
1999 2021 # n is already a descendant
2000 2022 isdescendant = True
2001 2023 # This check only needs to be done here because all the roots
2002 2024 # will start being marked is descendants before the loop.
2003 2025 if n in roots:
2004 2026 # If n was a root, check if it's a 'real' root.
2005 2027 p = tuple(self.parents(n))
2006 2028 # If any of its parents are descendants, it's not a root.
2007 2029 if (p[0] in descendants) or (p[1] in descendants):
2008 2030 roots.remove(n)
2009 2031 else:
2010 2032 p = tuple(self.parents(n))
2011 2033 # A node is a descendant if either of its parents are
2012 2034 # descendants. (We seeded the dependents list with the roots
2013 2035 # up there, remember?)
2014 2036 if (p[0] in descendants) or (p[1] in descendants):
2015 2037 descendants.add(n)
2016 2038 isdescendant = True
2017 2039 if isdescendant and ((ancestors is None) or (n in ancestors)):
2018 2040 # Only include nodes that are both descendants and ancestors.
2019 2041 orderedout.append(n)
2020 2042 if (ancestors is not None) and (n in heads):
2021 2043 # We're trying to figure out which heads are reachable
2022 2044 # from roots.
2023 2045 # Mark this head as having been reached
2024 2046 heads[n] = True
2025 2047 elif ancestors is None:
2026 2048 # Otherwise, we're trying to discover the heads.
2027 2049 # Assume this is a head because if it isn't, the next step
2028 2050 # will eventually remove it.
2029 2051 heads[n] = True
2030 2052 # But, obviously its parents aren't.
2031 2053 for p in self.parents(n):
2032 2054 heads.pop(p, None)
2033 2055 heads = [head for head, flag in heads.items() if flag]
2034 2056 roots = list(roots)
2035 2057 assert orderedout
2036 2058 assert roots
2037 2059 assert heads
2038 2060 return (orderedout, roots, heads)
2039 2061
2040 2062 def headrevs(self, revs=None):
2041 2063 if revs is None:
2042 2064 try:
2043 2065 return self.index.headrevs()
2044 2066 except AttributeError:
2045 2067 return self._headrevs()
2046 2068 if rustdagop is not None and self.index.rust_ext_compat:
2047 2069 return rustdagop.headrevs(self.index, revs)
2048 2070 return dagop.headrevs(revs, self._uncheckedparentrevs)
2049 2071
2050 2072 def computephases(self, roots):
2051 2073 return self.index.computephasesmapsets(roots)
2052 2074
2053 2075 def _headrevs(self):
2054 2076 count = len(self)
2055 2077 if not count:
2056 2078 return [nullrev]
2057 2079 # we won't iter over filtered rev so nobody is a head at start
2058 2080 ishead = [0] * (count + 1)
2059 2081 index = self.index
2060 2082 for r in self:
2061 2083 ishead[r] = 1 # I may be an head
2062 2084 e = index[r]
2063 2085 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2064 2086 return [r for r, val in enumerate(ishead) if val]
2065 2087
2066 2088 def heads(self, start=None, stop=None):
2067 2089 """return the list of all nodes that have no children
2068 2090
2069 2091 if start is specified, only heads that are descendants of
2070 2092 start will be returned
2071 2093 if stop is specified, it will consider all the revs from stop
2072 2094 as if they had no children
2073 2095 """
2074 2096 if start is None and stop is None:
2075 2097 if not len(self):
2076 2098 return [self.nullid]
2077 2099 return [self.node(r) for r in self.headrevs()]
2078 2100
2079 2101 if start is None:
2080 2102 start = nullrev
2081 2103 else:
2082 2104 start = self.rev(start)
2083 2105
2084 2106 stoprevs = {self.rev(n) for n in stop or []}
2085 2107
2086 2108 revs = dagop.headrevssubset(
2087 2109 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2088 2110 )
2089 2111
2090 2112 return [self.node(rev) for rev in revs]
2091 2113
2092 2114 def children(self, node):
2093 2115 """find the children of a given node"""
2094 2116 c = []
2095 2117 p = self.rev(node)
2096 2118 for r in self.revs(start=p + 1):
2097 2119 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2098 2120 if prevs:
2099 2121 for pr in prevs:
2100 2122 if pr == p:
2101 2123 c.append(self.node(r))
2102 2124 elif p == nullrev:
2103 2125 c.append(self.node(r))
2104 2126 return c
2105 2127
2106 2128 def commonancestorsheads(self, a, b):
2107 2129 """calculate all the heads of the common ancestors of nodes a and b"""
2108 2130 a, b = self.rev(a), self.rev(b)
2109 2131 ancs = self._commonancestorsheads(a, b)
2110 2132 return pycompat.maplist(self.node, ancs)
2111 2133
2112 2134 def _commonancestorsheads(self, *revs):
2113 2135 """calculate all the heads of the common ancestors of revs"""
2114 2136 try:
2115 2137 ancs = self.index.commonancestorsheads(*revs)
2116 2138 except (AttributeError, OverflowError): # C implementation failed
2117 2139 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2118 2140 return ancs
2119 2141
2120 2142 def isancestor(self, a, b):
2121 2143 """return True if node a is an ancestor of node b
2122 2144
2123 2145 A revision is considered an ancestor of itself."""
2124 2146 a, b = self.rev(a), self.rev(b)
2125 2147 return self.isancestorrev(a, b)
2126 2148
2127 2149 def isancestorrev(self, a, b):
2128 2150 """return True if revision a is an ancestor of revision b
2129 2151
2130 2152 A revision is considered an ancestor of itself.
2131 2153
2132 2154 The implementation of this is trivial but the use of
2133 2155 reachableroots is not."""
2134 2156 if a == nullrev:
2135 2157 return True
2136 2158 elif a == b:
2137 2159 return True
2138 2160 elif a > b:
2139 2161 return False
2140 2162 return bool(self.reachableroots(a, [b], [a], includepath=False))
2141 2163
2142 2164 def reachableroots(self, minroot, heads, roots, includepath=False):
2143 2165 """return (heads(::(<roots> and <roots>::<heads>)))
2144 2166
2145 2167 If includepath is True, return (<roots>::<heads>)."""
2146 2168 try:
2147 2169 return self.index.reachableroots2(
2148 2170 minroot, heads, roots, includepath
2149 2171 )
2150 2172 except AttributeError:
2151 2173 return dagop._reachablerootspure(
2152 2174 self.parentrevs, minroot, roots, heads, includepath
2153 2175 )
2154 2176
2155 2177 def ancestor(self, a, b):
2156 2178 """calculate the "best" common ancestor of nodes a and b"""
2157 2179
2158 2180 a, b = self.rev(a), self.rev(b)
2159 2181 try:
2160 2182 ancs = self.index.ancestors(a, b)
2161 2183 except (AttributeError, OverflowError):
2162 2184 ancs = ancestor.ancestors(self.parentrevs, a, b)
2163 2185 if ancs:
2164 2186 # choose a consistent winner when there's a tie
2165 2187 return min(map(self.node, ancs))
2166 2188 return self.nullid
2167 2189
2168 2190 def _match(self, id):
2169 2191 if isinstance(id, int):
2170 2192 # rev
2171 2193 return self.node(id)
2172 2194 if len(id) == self.nodeconstants.nodelen:
2173 2195 # possibly a binary node
2174 2196 # odds of a binary node being all hex in ASCII are 1 in 10**25
2175 2197 try:
2176 2198 node = id
2177 2199 self.rev(node) # quick search the index
2178 2200 return node
2179 2201 except error.LookupError:
2180 2202 pass # may be partial hex id
2181 2203 try:
2182 2204 # str(rev)
2183 2205 rev = int(id)
2184 2206 if b"%d" % rev != id:
2185 2207 raise ValueError
2186 2208 if rev < 0:
2187 2209 rev = len(self) + rev
2188 2210 if rev < 0 or rev >= len(self):
2189 2211 raise ValueError
2190 2212 return self.node(rev)
2191 2213 except (ValueError, OverflowError):
2192 2214 pass
2193 2215 if len(id) == 2 * self.nodeconstants.nodelen:
2194 2216 try:
2195 2217 # a full hex nodeid?
2196 2218 node = bin(id)
2197 2219 self.rev(node)
2198 2220 return node
2199 2221 except (binascii.Error, error.LookupError):
2200 2222 pass
2201 2223
2202 2224 def _partialmatch(self, id):
2203 2225 # we don't care wdirfilenodeids as they should be always full hash
2204 2226 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2205 2227 ambiguous = False
2206 2228 try:
2207 2229 partial = self.index.partialmatch(id)
2208 2230 if partial and self.hasnode(partial):
2209 2231 if maybewdir:
2210 2232 # single 'ff...' match in radix tree, ambiguous with wdir
2211 2233 ambiguous = True
2212 2234 else:
2213 2235 return partial
2214 2236 elif maybewdir:
2215 2237 # no 'ff...' match in radix tree, wdir identified
2216 2238 raise error.WdirUnsupported
2217 2239 else:
2218 2240 return None
2219 2241 except error.RevlogError:
2220 2242 # parsers.c radix tree lookup gave multiple matches
2221 2243 # fast path: for unfiltered changelog, radix tree is accurate
2222 2244 if not getattr(self, 'filteredrevs', None):
2223 2245 ambiguous = True
2224 2246 # fall through to slow path that filters hidden revisions
2225 2247 except (AttributeError, ValueError):
2226 2248 # we are pure python, or key is not hex
2227 2249 pass
2228 2250 if ambiguous:
2229 2251 raise error.AmbiguousPrefixLookupError(
2230 2252 id, self.display_id, _(b'ambiguous identifier')
2231 2253 )
2232 2254
2233 2255 if id in self._pcache:
2234 2256 return self._pcache[id]
2235 2257
2236 2258 if len(id) <= 40:
2237 2259 # hex(node)[:...]
2238 2260 l = len(id) // 2 * 2 # grab an even number of digits
2239 2261 try:
2240 2262 # we're dropping the last digit, so let's check that it's hex,
2241 2263 # to avoid the expensive computation below if it's not
2242 2264 if len(id) % 2 > 0:
2243 2265 if not (id[-1] in hexdigits):
2244 2266 return None
2245 2267 prefix = bin(id[:l])
2246 2268 except binascii.Error:
2247 2269 pass
2248 2270 else:
2249 2271 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2250 2272 nl = [
2251 2273 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2252 2274 ]
2253 2275 if self.nodeconstants.nullhex.startswith(id):
2254 2276 nl.append(self.nullid)
2255 2277 if len(nl) > 0:
2256 2278 if len(nl) == 1 and not maybewdir:
2257 2279 self._pcache[id] = nl[0]
2258 2280 return nl[0]
2259 2281 raise error.AmbiguousPrefixLookupError(
2260 2282 id, self.display_id, _(b'ambiguous identifier')
2261 2283 )
2262 2284 if maybewdir:
2263 2285 raise error.WdirUnsupported
2264 2286 return None
2265 2287
2266 2288 def lookup(self, id):
2267 2289 """locate a node based on:
2268 2290 - revision number or str(revision number)
2269 2291 - nodeid or subset of hex nodeid
2270 2292 """
2271 2293 n = self._match(id)
2272 2294 if n is not None:
2273 2295 return n
2274 2296 n = self._partialmatch(id)
2275 2297 if n:
2276 2298 return n
2277 2299
2278 2300 raise error.LookupError(id, self.display_id, _(b'no match found'))
2279 2301
2280 2302 def shortest(self, node, minlength=1):
2281 2303 """Find the shortest unambiguous prefix that matches node."""
2282 2304
2283 2305 def isvalid(prefix):
2284 2306 try:
2285 2307 matchednode = self._partialmatch(prefix)
2286 2308 except error.AmbiguousPrefixLookupError:
2287 2309 return False
2288 2310 except error.WdirUnsupported:
2289 2311 # single 'ff...' match
2290 2312 return True
2291 2313 if matchednode is None:
2292 2314 raise error.LookupError(node, self.display_id, _(b'no node'))
2293 2315 return True
2294 2316
2295 2317 def maybewdir(prefix):
2296 2318 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2297 2319
2298 2320 hexnode = hex(node)
2299 2321
2300 2322 def disambiguate(hexnode, minlength):
2301 2323 """Disambiguate against wdirid."""
2302 2324 for length in range(minlength, len(hexnode) + 1):
2303 2325 prefix = hexnode[:length]
2304 2326 if not maybewdir(prefix):
2305 2327 return prefix
2306 2328
2307 2329 if not getattr(self, 'filteredrevs', None):
2308 2330 try:
2309 2331 length = max(self.index.shortest(node), minlength)
2310 2332 return disambiguate(hexnode, length)
2311 2333 except error.RevlogError:
2312 2334 if node != self.nodeconstants.wdirid:
2313 2335 raise error.LookupError(
2314 2336 node, self.display_id, _(b'no node')
2315 2337 )
2316 2338 except AttributeError:
2317 2339 # Fall through to pure code
2318 2340 pass
2319 2341
2320 2342 if node == self.nodeconstants.wdirid:
2321 2343 for length in range(minlength, len(hexnode) + 1):
2322 2344 prefix = hexnode[:length]
2323 2345 if isvalid(prefix):
2324 2346 return prefix
2325 2347
2326 2348 for length in range(minlength, len(hexnode) + 1):
2327 2349 prefix = hexnode[:length]
2328 2350 if isvalid(prefix):
2329 2351 return disambiguate(hexnode, length)
2330 2352
2331 2353 def cmp(self, node, text):
2332 2354 """compare text with a given file revision
2333 2355
2334 2356 returns True if text is different than what is stored.
2335 2357 """
2336 2358 p1, p2 = self.parents(node)
2337 2359 return storageutil.hashrevisionsha1(text, p1, p2) != node
2338 2360
2339 def _chunk(self, rev):
2340 """Obtain a single decompressed chunk for a revision.
2341
2342 Accepts an integer revision and an optional already-open file handle
2343 to be used for reading. If used, the seek position of the file will not
2344 be preserved.
2345
2346 Returns a str holding uncompressed data for the requested revision.
2347 """
2348 compression_mode = self.index[rev][10]
2349 data = self._inner.get_segment_for_revs(rev, rev)[1]
2350 if compression_mode == COMP_MODE_PLAIN:
2351 return data
2352 elif compression_mode == COMP_MODE_DEFAULT:
2353 return self._inner._decompressor(data)
2354 elif compression_mode == COMP_MODE_INLINE:
2355 return self._inner.decompress(data)
2356 else:
2357 msg = b'unknown compression mode %d'
2358 msg %= compression_mode
2359 raise error.RevlogError(msg)
2360
2361 2361 def _chunks(self, revs, targetsize=None):
2362 2362 """Obtain decompressed chunks for the specified revisions.
2363 2363
2364 2364 Accepts an iterable of numeric revisions that are assumed to be in
2365 2365 ascending order. Also accepts an optional already-open file handle
2366 2366 to be used for reading. If used, the seek position of the file will
2367 2367 not be preserved.
2368 2368
2369 2369 This function is similar to calling ``self._chunk()`` multiple times,
2370 2370 but is faster.
2371 2371
2372 2372 Returns a list with decompressed data for each requested revision.
2373 2373 """
2374 2374 if not revs:
2375 2375 return []
2376 2376 start = self.start
2377 2377 length = self.length
2378 2378 inline = self._inline
2379 2379 iosize = self.index.entry_size
2380 2380 buffer = util.buffer
2381 2381
2382 2382 l = []
2383 2383 ladd = l.append
2384 2384
2385 2385 if not self.data_config.with_sparse_read:
2386 2386 slicedchunks = (revs,)
2387 2387 else:
2388 2388 slicedchunks = deltautil.slicechunk(
2389 2389 self, revs, targetsize=targetsize
2390 2390 )
2391 2391
2392 2392 for revschunk in slicedchunks:
2393 2393 firstrev = revschunk[0]
2394 2394 # Skip trailing revisions with empty diff
2395 2395 for lastrev in revschunk[::-1]:
2396 2396 if length(lastrev) != 0:
2397 2397 break
2398 2398
2399 2399 try:
2400 2400 offset, data = self._inner.get_segment_for_revs(
2401 2401 firstrev,
2402 2402 lastrev,
2403 2403 )
2404 2404 except OverflowError:
2405 2405 # issue4215 - we can't cache a run of chunks greater than
2406 2406 # 2G on Windows
2407 return [self._chunk(rev) for rev in revschunk]
2407 return [self._inner._chunk(rev) for rev in revschunk]
2408 2408
2409 2409 decomp = self._inner.decompress
2410 2410 # self._decompressor might be None, but will not be used in that case
2411 2411 def_decomp = self._inner._decompressor
2412 2412 for rev in revschunk:
2413 2413 chunkstart = start(rev)
2414 2414 if inline:
2415 2415 chunkstart += (rev + 1) * iosize
2416 2416 chunklength = length(rev)
2417 2417 comp_mode = self.index[rev][10]
2418 2418 c = buffer(data, chunkstart - offset, chunklength)
2419 2419 if comp_mode == COMP_MODE_PLAIN:
2420 2420 ladd(c)
2421 2421 elif comp_mode == COMP_MODE_INLINE:
2422 2422 ladd(decomp(c))
2423 2423 elif comp_mode == COMP_MODE_DEFAULT:
2424 2424 ladd(def_decomp(c))
2425 2425 else:
2426 2426 msg = b'unknown compression mode %d'
2427 2427 msg %= comp_mode
2428 2428 raise error.RevlogError(msg)
2429 2429
2430 2430 return l
2431 2431
2432 2432 def deltaparent(self, rev):
2433 2433 """return deltaparent of the given revision"""
2434 2434 base = self.index[rev][3]
2435 2435 if base == rev:
2436 2436 return nullrev
2437 2437 elif self.delta_config.general_delta:
2438 2438 return base
2439 2439 else:
2440 2440 return rev - 1
2441 2441
2442 2442 def issnapshot(self, rev):
2443 2443 """tells whether rev is a snapshot"""
2444 2444 if not self.delta_config.sparse_revlog:
2445 2445 return self.deltaparent(rev) == nullrev
2446 2446 elif hasattr(self.index, 'issnapshot'):
2447 2447 # directly assign the method to cache the testing and access
2448 2448 self.issnapshot = self.index.issnapshot
2449 2449 return self.issnapshot(rev)
2450 2450 if rev == nullrev:
2451 2451 return True
2452 2452 entry = self.index[rev]
2453 2453 base = entry[3]
2454 2454 if base == rev:
2455 2455 return True
2456 2456 if base == nullrev:
2457 2457 return True
2458 2458 p1 = entry[5]
2459 2459 while self.length(p1) == 0:
2460 2460 b = self.deltaparent(p1)
2461 2461 if b == p1:
2462 2462 break
2463 2463 p1 = b
2464 2464 p2 = entry[6]
2465 2465 while self.length(p2) == 0:
2466 2466 b = self.deltaparent(p2)
2467 2467 if b == p2:
2468 2468 break
2469 2469 p2 = b
2470 2470 if base == p1 or base == p2:
2471 2471 return False
2472 2472 return self.issnapshot(base)
2473 2473
2474 2474 def snapshotdepth(self, rev):
2475 2475 """number of snapshot in the chain before this one"""
2476 2476 if not self.issnapshot(rev):
2477 2477 raise error.ProgrammingError(b'revision %d not a snapshot')
2478 2478 return len(self._deltachain(rev)[0]) - 1
2479 2479
2480 2480 def revdiff(self, rev1, rev2):
2481 2481 """return or calculate a delta between two revisions
2482 2482
2483 2483 The delta calculated is in binary form and is intended to be written to
2484 2484 revlog data directly. So this function needs raw revision data.
2485 2485 """
2486 2486 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2487 return bytes(self._chunk(rev2))
2487 return bytes(self._inner._chunk(rev2))
2488 2488
2489 2489 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2490 2490
2491 2491 def revision(self, nodeorrev):
2492 2492 """return an uncompressed revision of a given node or revision
2493 2493 number.
2494 2494 """
2495 2495 return self._revisiondata(nodeorrev)
2496 2496
2497 2497 def sidedata(self, nodeorrev):
2498 2498 """a map of extra data related to the changeset but not part of the hash
2499 2499
2500 2500 This function currently return a dictionary. However, more advanced
2501 2501 mapping object will likely be used in the future for a more
2502 2502 efficient/lazy code.
2503 2503 """
2504 2504 # deal with <nodeorrev> argument type
2505 2505 if isinstance(nodeorrev, int):
2506 2506 rev = nodeorrev
2507 2507 else:
2508 2508 rev = self.rev(nodeorrev)
2509 2509 return self._sidedata(rev)
2510 2510
2511 2511 def _revisiondata(self, nodeorrev, raw=False):
2512 2512 # deal with <nodeorrev> argument type
2513 2513 if isinstance(nodeorrev, int):
2514 2514 rev = nodeorrev
2515 2515 node = self.node(rev)
2516 2516 else:
2517 2517 node = nodeorrev
2518 2518 rev = None
2519 2519
2520 2520 # fast path the special `nullid` rev
2521 2521 if node == self.nullid:
2522 2522 return b""
2523 2523
2524 2524 # ``rawtext`` is the text as stored inside the revlog. Might be the
2525 2525 # revision or might need to be processed to retrieve the revision.
2526 2526 rev, rawtext, validated = self._rawtext(node, rev)
2527 2527
2528 2528 if raw and validated:
2529 2529 # if we don't want to process the raw text and that raw
2530 2530 # text is cached, we can exit early.
2531 2531 return rawtext
2532 2532 if rev is None:
2533 2533 rev = self.rev(node)
2534 2534 # the revlog's flag for this revision
2535 2535 # (usually alter its state or content)
2536 2536 flags = self.flags(rev)
2537 2537
2538 2538 if validated and flags == REVIDX_DEFAULT_FLAGS:
2539 2539 # no extra flags set, no flag processor runs, text = rawtext
2540 2540 return rawtext
2541 2541
2542 2542 if raw:
2543 2543 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2544 2544 text = rawtext
2545 2545 else:
2546 2546 r = flagutil.processflagsread(self, rawtext, flags)
2547 2547 text, validatehash = r
2548 2548 if validatehash:
2549 2549 self.checkhash(text, node, rev=rev)
2550 2550 if not validated:
2551 2551 self._revisioncache = (node, rev, rawtext)
2552 2552
2553 2553 return text
2554 2554
2555 2555 def _rawtext(self, node, rev):
2556 2556 """return the possibly unvalidated rawtext for a revision
2557 2557
2558 2558 returns (rev, rawtext, validated)
2559 2559 """
2560 2560
2561 2561 # revision in the cache (could be useful to apply delta)
2562 2562 cachedrev = None
2563 2563 # An intermediate text to apply deltas to
2564 2564 basetext = None
2565 2565
2566 2566 # Check if we have the entry in cache
2567 2567 # The cache entry looks like (node, rev, rawtext)
2568 2568 if self._revisioncache:
2569 2569 if self._revisioncache[0] == node:
2570 2570 return (rev, self._revisioncache[2], True)
2571 2571 cachedrev = self._revisioncache[1]
2572 2572
2573 2573 if rev is None:
2574 2574 rev = self.rev(node)
2575 2575
2576 2576 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2577 2577 if stopped:
2578 2578 basetext = self._revisioncache[2]
2579 2579
2580 2580 # drop cache to save memory, the caller is expected to
2581 2581 # update self._revisioncache after validating the text
2582 2582 self._revisioncache = None
2583 2583
2584 2584 targetsize = None
2585 2585 rawsize = self.index[rev][2]
2586 2586 if 0 <= rawsize:
2587 2587 targetsize = 4 * rawsize
2588 2588
2589 2589 bins = self._chunks(chain, targetsize=targetsize)
2590 2590 if basetext is None:
2591 2591 basetext = bytes(bins[0])
2592 2592 bins = bins[1:]
2593 2593
2594 2594 rawtext = mdiff.patches(basetext, bins)
2595 2595 del basetext # let us have a chance to free memory early
2596 2596 return (rev, rawtext, False)
2597 2597
2598 2598 def _sidedata(self, rev):
2599 2599 """Return the sidedata for a given revision number."""
2600 2600 index_entry = self.index[rev]
2601 2601 sidedata_offset = index_entry[8]
2602 2602 sidedata_size = index_entry[9]
2603 2603
2604 2604 if self._inline:
2605 2605 sidedata_offset += self.index.entry_size * (1 + rev)
2606 2606 if sidedata_size == 0:
2607 2607 return {}
2608 2608
2609 2609 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2610 2610 filename = self._sidedatafile
2611 2611 end = self._docket.sidedata_end
2612 2612 offset = sidedata_offset
2613 2613 length = sidedata_size
2614 2614 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2615 2615 raise error.RevlogError(m)
2616 2616
2617 2617 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2618 2618 sidedata_offset, sidedata_size
2619 2619 )
2620 2620
2621 2621 comp = self.index[rev][11]
2622 2622 if comp == COMP_MODE_PLAIN:
2623 2623 segment = comp_segment
2624 2624 elif comp == COMP_MODE_DEFAULT:
2625 2625 segment = self._inner._decompressor(comp_segment)
2626 2626 elif comp == COMP_MODE_INLINE:
2627 2627 segment = self._inner.decompress(comp_segment)
2628 2628 else:
2629 2629 msg = b'unknown compression mode %d'
2630 2630 msg %= comp
2631 2631 raise error.RevlogError(msg)
2632 2632
2633 2633 sidedata = sidedatautil.deserialize_sidedata(segment)
2634 2634 return sidedata
2635 2635
2636 2636 def rawdata(self, nodeorrev):
2637 2637 """return an uncompressed raw data of a given node or revision number."""
2638 2638 return self._revisiondata(nodeorrev, raw=True)
2639 2639
2640 2640 def hash(self, text, p1, p2):
2641 2641 """Compute a node hash.
2642 2642
2643 2643 Available as a function so that subclasses can replace the hash
2644 2644 as needed.
2645 2645 """
2646 2646 return storageutil.hashrevisionsha1(text, p1, p2)
2647 2647
2648 2648 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2649 2649 """Check node hash integrity.
2650 2650
2651 2651 Available as a function so that subclasses can extend hash mismatch
2652 2652 behaviors as needed.
2653 2653 """
2654 2654 try:
2655 2655 if p1 is None and p2 is None:
2656 2656 p1, p2 = self.parents(node)
2657 2657 if node != self.hash(text, p1, p2):
2658 2658 # Clear the revision cache on hash failure. The revision cache
2659 2659 # only stores the raw revision and clearing the cache does have
2660 2660 # the side-effect that we won't have a cache hit when the raw
2661 2661 # revision data is accessed. But this case should be rare and
2662 2662 # it is extra work to teach the cache about the hash
2663 2663 # verification state.
2664 2664 if self._revisioncache and self._revisioncache[0] == node:
2665 2665 self._revisioncache = None
2666 2666
2667 2667 revornode = rev
2668 2668 if revornode is None:
2669 2669 revornode = templatefilters.short(hex(node))
2670 2670 raise error.RevlogError(
2671 2671 _(b"integrity check failed on %s:%s")
2672 2672 % (self.display_id, pycompat.bytestr(revornode))
2673 2673 )
2674 2674 except error.RevlogError:
2675 2675 if self.feature_config.censorable and storageutil.iscensoredtext(
2676 2676 text
2677 2677 ):
2678 2678 raise error.CensoredNodeError(self.display_id, node, text)
2679 2679 raise
2680 2680
2681 2681 @property
2682 2682 def _split_index_file(self):
2683 2683 """the path where to expect the index of an ongoing splitting operation
2684 2684
2685 2685 The file will only exist if a splitting operation is in progress, but
2686 2686 it is always expected at the same location."""
2687 2687 parts = self.radix.split(b'/')
2688 2688 if len(parts) > 1:
2689 2689 # adds a '-s' prefix to the ``data/` or `meta/` base
2690 2690 head = parts[0] + b'-s'
2691 2691 mids = parts[1:-1]
2692 2692 tail = parts[-1] + b'.i'
2693 2693 pieces = [head] + mids + [tail]
2694 2694 return b'/'.join(pieces)
2695 2695 else:
2696 2696 # the revlog is stored at the root of the store (changelog or
2697 2697 # manifest), no risk of collision.
2698 2698 return self.radix + b'.i.s'
2699 2699
2700 2700 def _enforceinlinesize(self, tr, side_write=True):
2701 2701 """Check if the revlog is too big for inline and convert if so.
2702 2702
2703 2703 This should be called after revisions are added to the revlog. If the
2704 2704 revlog has grown too large to be an inline revlog, it will convert it
2705 2705 to use multiple index and data files.
2706 2706 """
2707 2707 tiprev = len(self) - 1
2708 2708 total_size = self.start(tiprev) + self.length(tiprev)
2709 2709 if not self._inline or total_size < _maxinline:
2710 2710 return
2711 2711
2712 2712 if self._docket is not None:
2713 2713 msg = b"inline revlog should not have a docket"
2714 2714 raise error.ProgrammingError(msg)
2715 2715
2716 2716 troffset = tr.findoffset(self._indexfile)
2717 2717 if troffset is None:
2718 2718 raise error.RevlogError(
2719 2719 _(b"%s not found in the transaction") % self._indexfile
2720 2720 )
2721 2721 if troffset:
2722 2722 tr.addbackup(self._indexfile, for_offset=True)
2723 2723 tr.add(self._datafile, 0)
2724 2724
2725 2725 new_index_file_path = None
2726 2726 if side_write:
2727 2727 old_index_file_path = self._indexfile
2728 2728 new_index_file_path = self._split_index_file
2729 2729 opener = self.opener
2730 2730 weak_self = weakref.ref(self)
2731 2731
2732 2732 # the "split" index replace the real index when the transaction is
2733 2733 # finalized
2734 2734 def finalize_callback(tr):
2735 2735 opener.rename(
2736 2736 new_index_file_path,
2737 2737 old_index_file_path,
2738 2738 checkambig=True,
2739 2739 )
2740 2740 maybe_self = weak_self()
2741 2741 if maybe_self is not None:
2742 2742 maybe_self._indexfile = old_index_file_path
2743 2743 maybe_self._inner.index_file = maybe_self._indexfile
2744 2744
2745 2745 def abort_callback(tr):
2746 2746 maybe_self = weak_self()
2747 2747 if maybe_self is not None:
2748 2748 maybe_self._indexfile = old_index_file_path
2749 2749 maybe_self._inner.inline = True
2750 2750 maybe_self._inner.index_file = old_index_file_path
2751 2751
2752 2752 tr.registertmp(new_index_file_path)
2753 2753 if self.target[1] is not None:
2754 2754 callback_id = b'000-revlog-split-%d-%s' % self.target
2755 2755 else:
2756 2756 callback_id = b'000-revlog-split-%d' % self.target[0]
2757 2757 tr.addfinalize(callback_id, finalize_callback)
2758 2758 tr.addabort(callback_id, abort_callback)
2759 2759
2760 2760 self._format_flags &= ~FLAG_INLINE_DATA
2761 2761 self._inner.split_inline(
2762 2762 tr,
2763 2763 self._format_flags | self._format_version,
2764 2764 new_index_file_path=new_index_file_path,
2765 2765 )
2766 2766
2767 2767 self._inline = False
2768 2768 if new_index_file_path is not None:
2769 2769 self._indexfile = new_index_file_path
2770 2770
2771 2771 nodemaputil.setup_persistent_nodemap(tr, self)
2772 2772
2773 2773 def _nodeduplicatecallback(self, transaction, node):
2774 2774 """called when trying to add a node already stored."""
2775 2775
2776 2776 @contextlib.contextmanager
2777 2777 def reading(self):
2778 2778 with self._inner.reading():
2779 2779 yield
2780 2780
2781 2781 @contextlib.contextmanager
2782 2782 def _writing(self, transaction):
2783 2783 if self._trypending:
2784 2784 msg = b'try to write in a `trypending` revlog: %s'
2785 2785 msg %= self.display_id
2786 2786 raise error.ProgrammingError(msg)
2787 2787 if self._inner.is_writing:
2788 2788 yield
2789 2789 else:
2790 2790 data_end = None
2791 2791 sidedata_end = None
2792 2792 if self._docket is not None:
2793 2793 data_end = self._docket.data_end
2794 2794 sidedata_end = self._docket.sidedata_end
2795 2795 with self._inner.writing(
2796 2796 transaction,
2797 2797 data_end=data_end,
2798 2798 sidedata_end=sidedata_end,
2799 2799 ):
2800 2800 yield
2801 2801 if self._docket is not None:
2802 2802 self._write_docket(transaction)
2803 2803
2804 2804 def _write_docket(self, transaction):
2805 2805 """write the current docket on disk
2806 2806
2807 2807 Exist as a method to help changelog to implement transaction logic
2808 2808
2809 2809 We could also imagine using the same transaction logic for all revlog
2810 2810 since docket are cheap."""
2811 2811 self._docket.write(transaction)
2812 2812
2813 2813 def addrevision(
2814 2814 self,
2815 2815 text,
2816 2816 transaction,
2817 2817 link,
2818 2818 p1,
2819 2819 p2,
2820 2820 cachedelta=None,
2821 2821 node=None,
2822 2822 flags=REVIDX_DEFAULT_FLAGS,
2823 2823 deltacomputer=None,
2824 2824 sidedata=None,
2825 2825 ):
2826 2826 """add a revision to the log
2827 2827
2828 2828 text - the revision data to add
2829 2829 transaction - the transaction object used for rollback
2830 2830 link - the linkrev data to add
2831 2831 p1, p2 - the parent nodeids of the revision
2832 2832 cachedelta - an optional precomputed delta
2833 2833 node - nodeid of revision; typically node is not specified, and it is
2834 2834 computed by default as hash(text, p1, p2), however subclasses might
2835 2835 use different hashing method (and override checkhash() in such case)
2836 2836 flags - the known flags to set on the revision
2837 2837 deltacomputer - an optional deltacomputer instance shared between
2838 2838 multiple calls
2839 2839 """
2840 2840 if link == nullrev:
2841 2841 raise error.RevlogError(
2842 2842 _(b"attempted to add linkrev -1 to %s") % self.display_id
2843 2843 )
2844 2844
2845 2845 if sidedata is None:
2846 2846 sidedata = {}
2847 2847 elif sidedata and not self.feature_config.has_side_data:
2848 2848 raise error.ProgrammingError(
2849 2849 _(b"trying to add sidedata to a revlog who don't support them")
2850 2850 )
2851 2851
2852 2852 if flags:
2853 2853 node = node or self.hash(text, p1, p2)
2854 2854
2855 2855 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2856 2856
2857 2857 # If the flag processor modifies the revision data, ignore any provided
2858 2858 # cachedelta.
2859 2859 if rawtext != text:
2860 2860 cachedelta = None
2861 2861
2862 2862 if len(rawtext) > _maxentrysize:
2863 2863 raise error.RevlogError(
2864 2864 _(
2865 2865 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2866 2866 )
2867 2867 % (self.display_id, len(rawtext))
2868 2868 )
2869 2869
2870 2870 node = node or self.hash(rawtext, p1, p2)
2871 2871 rev = self.index.get_rev(node)
2872 2872 if rev is not None:
2873 2873 return rev
2874 2874
2875 2875 if validatehash:
2876 2876 self.checkhash(rawtext, node, p1=p1, p2=p2)
2877 2877
2878 2878 return self.addrawrevision(
2879 2879 rawtext,
2880 2880 transaction,
2881 2881 link,
2882 2882 p1,
2883 2883 p2,
2884 2884 node,
2885 2885 flags,
2886 2886 cachedelta=cachedelta,
2887 2887 deltacomputer=deltacomputer,
2888 2888 sidedata=sidedata,
2889 2889 )
2890 2890
2891 2891 def addrawrevision(
2892 2892 self,
2893 2893 rawtext,
2894 2894 transaction,
2895 2895 link,
2896 2896 p1,
2897 2897 p2,
2898 2898 node,
2899 2899 flags,
2900 2900 cachedelta=None,
2901 2901 deltacomputer=None,
2902 2902 sidedata=None,
2903 2903 ):
2904 2904 """add a raw revision with known flags, node and parents
2905 2905 useful when reusing a revision not stored in this revlog (ex: received
2906 2906 over wire, or read from an external bundle).
2907 2907 """
2908 2908 with self._writing(transaction):
2909 2909 return self._addrevision(
2910 2910 node,
2911 2911 rawtext,
2912 2912 transaction,
2913 2913 link,
2914 2914 p1,
2915 2915 p2,
2916 2916 flags,
2917 2917 cachedelta,
2918 2918 deltacomputer=deltacomputer,
2919 2919 sidedata=sidedata,
2920 2920 )
2921 2921
2922 2922 def compress(self, data):
2923 2923 return self._inner.compress(data)
2924 2924
2925 2925 def decompress(self, data):
2926 2926 return self._inner.decompress(data)
2927 2927
2928 2928 def _addrevision(
2929 2929 self,
2930 2930 node,
2931 2931 rawtext,
2932 2932 transaction,
2933 2933 link,
2934 2934 p1,
2935 2935 p2,
2936 2936 flags,
2937 2937 cachedelta,
2938 2938 alwayscache=False,
2939 2939 deltacomputer=None,
2940 2940 sidedata=None,
2941 2941 ):
2942 2942 """internal function to add revisions to the log
2943 2943
2944 2944 see addrevision for argument descriptions.
2945 2945
2946 2946 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2947 2947
2948 2948 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2949 2949 be used.
2950 2950
2951 2951 invariants:
2952 2952 - rawtext is optional (can be None); if not set, cachedelta must be set.
2953 2953 if both are set, they must correspond to each other.
2954 2954 """
2955 2955 if node == self.nullid:
2956 2956 raise error.RevlogError(
2957 2957 _(b"%s: attempt to add null revision") % self.display_id
2958 2958 )
2959 2959 if (
2960 2960 node == self.nodeconstants.wdirid
2961 2961 or node in self.nodeconstants.wdirfilenodeids
2962 2962 ):
2963 2963 raise error.RevlogError(
2964 2964 _(b"%s: attempt to add wdir revision") % self.display_id
2965 2965 )
2966 2966 if self._inner._writinghandles is None:
2967 2967 msg = b'adding revision outside `revlog._writing` context'
2968 2968 raise error.ProgrammingError(msg)
2969 2969
2970 2970 btext = [rawtext]
2971 2971
2972 2972 curr = len(self)
2973 2973 prev = curr - 1
2974 2974
2975 2975 offset = self._get_data_offset(prev)
2976 2976
2977 2977 if self._concurrencychecker:
2978 2978 ifh, dfh, sdfh = self._inner._writinghandles
2979 2979 # XXX no checking for the sidedata file
2980 2980 if self._inline:
2981 2981 # offset is "as if" it were in the .d file, so we need to add on
2982 2982 # the size of the entry metadata.
2983 2983 self._concurrencychecker(
2984 2984 ifh, self._indexfile, offset + curr * self.index.entry_size
2985 2985 )
2986 2986 else:
2987 2987 # Entries in the .i are a consistent size.
2988 2988 self._concurrencychecker(
2989 2989 ifh, self._indexfile, curr * self.index.entry_size
2990 2990 )
2991 2991 self._concurrencychecker(dfh, self._datafile, offset)
2992 2992
2993 2993 p1r, p2r = self.rev(p1), self.rev(p2)
2994 2994
2995 2995 # full versions are inserted when the needed deltas
2996 2996 # become comparable to the uncompressed text
2997 2997 if rawtext is None:
2998 2998 # need rawtext size, before changed by flag processors, which is
2999 2999 # the non-raw size. use revlog explicitly to avoid filelog's extra
3000 3000 # logic that might remove metadata size.
3001 3001 textlen = mdiff.patchedsize(
3002 3002 revlog.size(self, cachedelta[0]), cachedelta[1]
3003 3003 )
3004 3004 else:
3005 3005 textlen = len(rawtext)
3006 3006
3007 3007 if deltacomputer is None:
3008 3008 write_debug = None
3009 3009 if self.delta_config.debug_delta:
3010 3010 write_debug = transaction._report
3011 3011 deltacomputer = deltautil.deltacomputer(
3012 3012 self, write_debug=write_debug
3013 3013 )
3014 3014
3015 3015 if cachedelta is not None and len(cachedelta) == 2:
3016 3016 # If the cached delta has no information about how it should be
3017 3017 # reused, add the default reuse instruction according to the
3018 3018 # revlog's configuration.
3019 3019 if (
3020 3020 self.delta_config.general_delta
3021 3021 and self.delta_config.lazy_delta_base
3022 3022 ):
3023 3023 delta_base_reuse = DELTA_BASE_REUSE_TRY
3024 3024 else:
3025 3025 delta_base_reuse = DELTA_BASE_REUSE_NO
3026 3026 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3027 3027
3028 3028 revinfo = revlogutils.revisioninfo(
3029 3029 node,
3030 3030 p1,
3031 3031 p2,
3032 3032 btext,
3033 3033 textlen,
3034 3034 cachedelta,
3035 3035 flags,
3036 3036 )
3037 3037
3038 3038 deltainfo = deltacomputer.finddeltainfo(revinfo)
3039 3039
3040 3040 compression_mode = COMP_MODE_INLINE
3041 3041 if self._docket is not None:
3042 3042 default_comp = self._docket.default_compression_header
3043 3043 r = deltautil.delta_compression(default_comp, deltainfo)
3044 3044 compression_mode, deltainfo = r
3045 3045
3046 3046 sidedata_compression_mode = COMP_MODE_INLINE
3047 3047 if sidedata and self.feature_config.has_side_data:
3048 3048 sidedata_compression_mode = COMP_MODE_PLAIN
3049 3049 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3050 3050 sidedata_offset = self._docket.sidedata_end
3051 3051 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3052 3052 if (
3053 3053 h != b'u'
3054 3054 and comp_sidedata[0:1] != b'\0'
3055 3055 and len(comp_sidedata) < len(serialized_sidedata)
3056 3056 ):
3057 3057 assert not h
3058 3058 if (
3059 3059 comp_sidedata[0:1]
3060 3060 == self._docket.default_compression_header
3061 3061 ):
3062 3062 sidedata_compression_mode = COMP_MODE_DEFAULT
3063 3063 serialized_sidedata = comp_sidedata
3064 3064 else:
3065 3065 sidedata_compression_mode = COMP_MODE_INLINE
3066 3066 serialized_sidedata = comp_sidedata
3067 3067 else:
3068 3068 serialized_sidedata = b""
3069 3069 # Don't store the offset if the sidedata is empty, that way
3070 3070 # we can easily detect empty sidedata and they will be no different
3071 3071 # than ones we manually add.
3072 3072 sidedata_offset = 0
3073 3073
3074 3074 rank = RANK_UNKNOWN
3075 3075 if self.feature_config.compute_rank:
3076 3076 if (p1r, p2r) == (nullrev, nullrev):
3077 3077 rank = 1
3078 3078 elif p1r != nullrev and p2r == nullrev:
3079 3079 rank = 1 + self.fast_rank(p1r)
3080 3080 elif p1r == nullrev and p2r != nullrev:
3081 3081 rank = 1 + self.fast_rank(p2r)
3082 3082 else: # merge node
3083 3083 if rustdagop is not None and self.index.rust_ext_compat:
3084 3084 rank = rustdagop.rank(self.index, p1r, p2r)
3085 3085 else:
3086 3086 pmin, pmax = sorted((p1r, p2r))
3087 3087 rank = 1 + self.fast_rank(pmax)
3088 3088 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3089 3089
3090 3090 e = revlogutils.entry(
3091 3091 flags=flags,
3092 3092 data_offset=offset,
3093 3093 data_compressed_length=deltainfo.deltalen,
3094 3094 data_uncompressed_length=textlen,
3095 3095 data_compression_mode=compression_mode,
3096 3096 data_delta_base=deltainfo.base,
3097 3097 link_rev=link,
3098 3098 parent_rev_1=p1r,
3099 3099 parent_rev_2=p2r,
3100 3100 node_id=node,
3101 3101 sidedata_offset=sidedata_offset,
3102 3102 sidedata_compressed_length=len(serialized_sidedata),
3103 3103 sidedata_compression_mode=sidedata_compression_mode,
3104 3104 rank=rank,
3105 3105 )
3106 3106
3107 3107 self.index.append(e)
3108 3108 entry = self.index.entry_binary(curr)
3109 3109 if curr == 0 and self._docket is None:
3110 3110 header = self._format_flags | self._format_version
3111 3111 header = self.index.pack_header(header)
3112 3112 entry = header + entry
3113 3113 self._writeentry(
3114 3114 transaction,
3115 3115 entry,
3116 3116 deltainfo.data,
3117 3117 link,
3118 3118 offset,
3119 3119 serialized_sidedata,
3120 3120 sidedata_offset,
3121 3121 )
3122 3122
3123 3123 rawtext = btext[0]
3124 3124
3125 3125 if alwayscache and rawtext is None:
3126 3126 rawtext = deltacomputer.buildtext(revinfo)
3127 3127
3128 3128 if type(rawtext) == bytes: # only accept immutable objects
3129 3129 self._revisioncache = (node, curr, rawtext)
3130 3130 self._chainbasecache[curr] = deltainfo.chainbase
3131 3131 return curr
3132 3132
3133 3133 def _get_data_offset(self, prev):
3134 3134 """Returns the current offset in the (in-transaction) data file.
3135 3135 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3136 3136 file to store that information: since sidedata can be rewritten to the
3137 3137 end of the data file within a transaction, you can have cases where, for
3138 3138 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3139 3139 to `n - 1`'s sidedata being written after `n`'s data.
3140 3140
3141 3141 TODO cache this in a docket file before getting out of experimental."""
3142 3142 if self._docket is None:
3143 3143 return self.end(prev)
3144 3144 else:
3145 3145 return self._docket.data_end
3146 3146
3147 3147 def _writeentry(
3148 3148 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3149 3149 ):
3150 3150 # Files opened in a+ mode have inconsistent behavior on various
3151 3151 # platforms. Windows requires that a file positioning call be made
3152 3152 # when the file handle transitions between reads and writes. See
3153 3153 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3154 3154 # platforms, Python or the platform itself can be buggy. Some versions
3155 3155 # of Solaris have been observed to not append at the end of the file
3156 3156 # if the file was seeked to before the end. See issue4943 for more.
3157 3157 #
3158 3158 # We work around this issue by inserting a seek() before writing.
3159 3159 # Note: This is likely not necessary on Python 3. However, because
3160 3160 # the file handle is reused for reads and may be seeked there, we need
3161 3161 # to be careful before changing this.
3162 3162 if self._inner._writinghandles is None:
3163 3163 msg = b'adding revision outside `revlog._writing` context'
3164 3164 raise error.ProgrammingError(msg)
3165 3165 ifh, dfh, sdfh = self._inner._writinghandles
3166 3166 if self._docket is None:
3167 3167 ifh.seek(0, os.SEEK_END)
3168 3168 else:
3169 3169 ifh.seek(self._docket.index_end, os.SEEK_SET)
3170 3170 if dfh:
3171 3171 if self._docket is None:
3172 3172 dfh.seek(0, os.SEEK_END)
3173 3173 else:
3174 3174 dfh.seek(self._docket.data_end, os.SEEK_SET)
3175 3175 if sdfh:
3176 3176 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3177 3177
3178 3178 curr = len(self) - 1
3179 3179 if not self._inline:
3180 3180 transaction.add(self._datafile, offset)
3181 3181 if self._sidedatafile:
3182 3182 transaction.add(self._sidedatafile, sidedata_offset)
3183 3183 transaction.add(self._indexfile, curr * len(entry))
3184 3184 if data[0]:
3185 3185 dfh.write(data[0])
3186 3186 dfh.write(data[1])
3187 3187 if sidedata:
3188 3188 sdfh.write(sidedata)
3189 3189 ifh.write(entry)
3190 3190 else:
3191 3191 offset += curr * self.index.entry_size
3192 3192 transaction.add(self._indexfile, offset)
3193 3193 ifh.write(entry)
3194 3194 ifh.write(data[0])
3195 3195 ifh.write(data[1])
3196 3196 assert not sidedata
3197 3197 self._enforceinlinesize(transaction)
3198 3198 if self._docket is not None:
3199 3199 # revlog-v2 always has 3 writing handles, help Pytype
3200 3200 wh1 = self._inner._writinghandles[0]
3201 3201 wh2 = self._inner._writinghandles[1]
3202 3202 wh3 = self._inner._writinghandles[2]
3203 3203 assert wh1 is not None
3204 3204 assert wh2 is not None
3205 3205 assert wh3 is not None
3206 3206 self._docket.index_end = wh1.tell()
3207 3207 self._docket.data_end = wh2.tell()
3208 3208 self._docket.sidedata_end = wh3.tell()
3209 3209
3210 3210 nodemaputil.setup_persistent_nodemap(transaction, self)
3211 3211
3212 3212 def addgroup(
3213 3213 self,
3214 3214 deltas,
3215 3215 linkmapper,
3216 3216 transaction,
3217 3217 alwayscache=False,
3218 3218 addrevisioncb=None,
3219 3219 duplicaterevisioncb=None,
3220 3220 debug_info=None,
3221 3221 delta_base_reuse_policy=None,
3222 3222 ):
3223 3223 """
3224 3224 add a delta group
3225 3225
3226 3226 given a set of deltas, add them to the revision log. the
3227 3227 first delta is against its parent, which should be in our
3228 3228 log, the rest are against the previous delta.
3229 3229
3230 3230 If ``addrevisioncb`` is defined, it will be called with arguments of
3231 3231 this revlog and the node that was added.
3232 3232 """
3233 3233
3234 3234 if self._adding_group:
3235 3235 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3236 3236
3237 3237 # read the default delta-base reuse policy from revlog config if the
3238 3238 # group did not specify one.
3239 3239 if delta_base_reuse_policy is None:
3240 3240 if (
3241 3241 self.delta_config.general_delta
3242 3242 and self.delta_config.lazy_delta_base
3243 3243 ):
3244 3244 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3245 3245 else:
3246 3246 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3247 3247
3248 3248 self._adding_group = True
3249 3249 empty = True
3250 3250 try:
3251 3251 with self._writing(transaction):
3252 3252 write_debug = None
3253 3253 if self.delta_config.debug_delta:
3254 3254 write_debug = transaction._report
3255 3255 deltacomputer = deltautil.deltacomputer(
3256 3256 self,
3257 3257 write_debug=write_debug,
3258 3258 debug_info=debug_info,
3259 3259 )
3260 3260 # loop through our set of deltas
3261 3261 for data in deltas:
3262 3262 (
3263 3263 node,
3264 3264 p1,
3265 3265 p2,
3266 3266 linknode,
3267 3267 deltabase,
3268 3268 delta,
3269 3269 flags,
3270 3270 sidedata,
3271 3271 ) = data
3272 3272 link = linkmapper(linknode)
3273 3273 flags = flags or REVIDX_DEFAULT_FLAGS
3274 3274
3275 3275 rev = self.index.get_rev(node)
3276 3276 if rev is not None:
3277 3277 # this can happen if two branches make the same change
3278 3278 self._nodeduplicatecallback(transaction, rev)
3279 3279 if duplicaterevisioncb:
3280 3280 duplicaterevisioncb(self, rev)
3281 3281 empty = False
3282 3282 continue
3283 3283
3284 3284 for p in (p1, p2):
3285 3285 if not self.index.has_node(p):
3286 3286 raise error.LookupError(
3287 3287 p, self.radix, _(b'unknown parent')
3288 3288 )
3289 3289
3290 3290 if not self.index.has_node(deltabase):
3291 3291 raise error.LookupError(
3292 3292 deltabase, self.display_id, _(b'unknown delta base')
3293 3293 )
3294 3294
3295 3295 baserev = self.rev(deltabase)
3296 3296
3297 3297 if baserev != nullrev and self.iscensored(baserev):
3298 3298 # if base is censored, delta must be full replacement in a
3299 3299 # single patch operation
3300 3300 hlen = struct.calcsize(b">lll")
3301 3301 oldlen = self.rawsize(baserev)
3302 3302 newlen = len(delta) - hlen
3303 3303 if delta[:hlen] != mdiff.replacediffheader(
3304 3304 oldlen, newlen
3305 3305 ):
3306 3306 raise error.CensoredBaseError(
3307 3307 self.display_id, self.node(baserev)
3308 3308 )
3309 3309
3310 3310 if not flags and self._peek_iscensored(baserev, delta):
3311 3311 flags |= REVIDX_ISCENSORED
3312 3312
3313 3313 # We assume consumers of addrevisioncb will want to retrieve
3314 3314 # the added revision, which will require a call to
3315 3315 # revision(). revision() will fast path if there is a cache
3316 3316 # hit. So, we tell _addrevision() to always cache in this case.
3317 3317 # We're only using addgroup() in the context of changegroup
3318 3318 # generation so the revision data can always be handled as raw
3319 3319 # by the flagprocessor.
3320 3320 rev = self._addrevision(
3321 3321 node,
3322 3322 None,
3323 3323 transaction,
3324 3324 link,
3325 3325 p1,
3326 3326 p2,
3327 3327 flags,
3328 3328 (baserev, delta, delta_base_reuse_policy),
3329 3329 alwayscache=alwayscache,
3330 3330 deltacomputer=deltacomputer,
3331 3331 sidedata=sidedata,
3332 3332 )
3333 3333
3334 3334 if addrevisioncb:
3335 3335 addrevisioncb(self, rev)
3336 3336 empty = False
3337 3337 finally:
3338 3338 self._adding_group = False
3339 3339 return not empty
3340 3340
3341 3341 def iscensored(self, rev):
3342 3342 """Check if a file revision is censored."""
3343 3343 if not self.feature_config.censorable:
3344 3344 return False
3345 3345
3346 3346 return self.flags(rev) & REVIDX_ISCENSORED
3347 3347
3348 3348 def _peek_iscensored(self, baserev, delta):
3349 3349 """Quickly check if a delta produces a censored revision."""
3350 3350 if not self.feature_config.censorable:
3351 3351 return False
3352 3352
3353 3353 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3354 3354
3355 3355 def getstrippoint(self, minlink):
3356 3356 """find the minimum rev that must be stripped to strip the linkrev
3357 3357
3358 3358 Returns a tuple containing the minimum rev and a set of all revs that
3359 3359 have linkrevs that will be broken by this strip.
3360 3360 """
3361 3361 return storageutil.resolvestripinfo(
3362 3362 minlink,
3363 3363 len(self) - 1,
3364 3364 self.headrevs(),
3365 3365 self.linkrev,
3366 3366 self.parentrevs,
3367 3367 )
3368 3368
3369 3369 def strip(self, minlink, transaction):
3370 3370 """truncate the revlog on the first revision with a linkrev >= minlink
3371 3371
3372 3372 This function is called when we're stripping revision minlink and
3373 3373 its descendants from the repository.
3374 3374
3375 3375 We have to remove all revisions with linkrev >= minlink, because
3376 3376 the equivalent changelog revisions will be renumbered after the
3377 3377 strip.
3378 3378
3379 3379 So we truncate the revlog on the first of these revisions, and
3380 3380 trust that the caller has saved the revisions that shouldn't be
3381 3381 removed and that it'll re-add them after this truncation.
3382 3382 """
3383 3383 if len(self) == 0:
3384 3384 return
3385 3385
3386 3386 rev, _ = self.getstrippoint(minlink)
3387 3387 if rev == len(self):
3388 3388 return
3389 3389
3390 3390 # first truncate the files on disk
3391 3391 data_end = self.start(rev)
3392 3392 if not self._inline:
3393 3393 transaction.add(self._datafile, data_end)
3394 3394 end = rev * self.index.entry_size
3395 3395 else:
3396 3396 end = data_end + (rev * self.index.entry_size)
3397 3397
3398 3398 if self._sidedatafile:
3399 3399 sidedata_end = self.sidedata_cut_off(rev)
3400 3400 transaction.add(self._sidedatafile, sidedata_end)
3401 3401
3402 3402 transaction.add(self._indexfile, end)
3403 3403 if self._docket is not None:
3404 3404 # XXX we could, leverage the docket while stripping. However it is
3405 3405 # not powerfull enough at the time of this comment
3406 3406 self._docket.index_end = end
3407 3407 self._docket.data_end = data_end
3408 3408 self._docket.sidedata_end = sidedata_end
3409 3409 self._docket.write(transaction, stripping=True)
3410 3410
3411 3411 # then reset internal state in memory to forget those revisions
3412 3412 self._revisioncache = None
3413 3413 self._chaininfocache = util.lrucachedict(500)
3414 3414 self._inner._segmentfile.clear_cache()
3415 3415 self._inner._segmentfile_sidedata.clear_cache()
3416 3416
3417 3417 del self.index[rev:-1]
3418 3418
3419 3419 def checksize(self):
3420 3420 """Check size of index and data files
3421 3421
3422 3422 return a (dd, di) tuple.
3423 3423 - dd: extra bytes for the "data" file
3424 3424 - di: extra bytes for the "index" file
3425 3425
3426 3426 A healthy revlog will return (0, 0).
3427 3427 """
3428 3428 expected = 0
3429 3429 if len(self):
3430 3430 expected = max(0, self.end(len(self) - 1))
3431 3431
3432 3432 try:
3433 3433 with self._datafp() as f:
3434 3434 f.seek(0, io.SEEK_END)
3435 3435 actual = f.tell()
3436 3436 dd = actual - expected
3437 3437 except FileNotFoundError:
3438 3438 dd = 0
3439 3439
3440 3440 try:
3441 3441 f = self.opener(self._indexfile)
3442 3442 f.seek(0, io.SEEK_END)
3443 3443 actual = f.tell()
3444 3444 f.close()
3445 3445 s = self.index.entry_size
3446 3446 i = max(0, actual // s)
3447 3447 di = actual - (i * s)
3448 3448 if self._inline:
3449 3449 databytes = 0
3450 3450 for r in self:
3451 3451 databytes += max(0, self.length(r))
3452 3452 dd = 0
3453 3453 di = actual - len(self) * s - databytes
3454 3454 except FileNotFoundError:
3455 3455 di = 0
3456 3456
3457 3457 return (dd, di)
3458 3458
3459 3459 def files(self):
3460 3460 """return list of files that compose this revlog"""
3461 3461 res = [self._indexfile]
3462 3462 if self._docket_file is None:
3463 3463 if not self._inline:
3464 3464 res.append(self._datafile)
3465 3465 else:
3466 3466 res.append(self._docket_file)
3467 3467 res.extend(self._docket.old_index_filepaths(include_empty=False))
3468 3468 if self._docket.data_end:
3469 3469 res.append(self._datafile)
3470 3470 res.extend(self._docket.old_data_filepaths(include_empty=False))
3471 3471 if self._docket.sidedata_end:
3472 3472 res.append(self._sidedatafile)
3473 3473 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3474 3474 return res
3475 3475
3476 3476 def emitrevisions(
3477 3477 self,
3478 3478 nodes,
3479 3479 nodesorder=None,
3480 3480 revisiondata=False,
3481 3481 assumehaveparentrevisions=False,
3482 3482 deltamode=repository.CG_DELTAMODE_STD,
3483 3483 sidedata_helpers=None,
3484 3484 debug_info=None,
3485 3485 ):
3486 3486 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3487 3487 raise error.ProgrammingError(
3488 3488 b'unhandled value for nodesorder: %s' % nodesorder
3489 3489 )
3490 3490
3491 3491 if nodesorder is None and not self.delta_config.general_delta:
3492 3492 nodesorder = b'storage'
3493 3493
3494 3494 if (
3495 3495 not self._storedeltachains
3496 3496 and deltamode != repository.CG_DELTAMODE_PREV
3497 3497 ):
3498 3498 deltamode = repository.CG_DELTAMODE_FULL
3499 3499
3500 3500 return storageutil.emitrevisions(
3501 3501 self,
3502 3502 nodes,
3503 3503 nodesorder,
3504 3504 revlogrevisiondelta,
3505 3505 deltaparentfn=self.deltaparent,
3506 3506 candeltafn=self._candelta,
3507 3507 rawsizefn=self.rawsize,
3508 3508 revdifffn=self.revdiff,
3509 3509 flagsfn=self.flags,
3510 3510 deltamode=deltamode,
3511 3511 revisiondata=revisiondata,
3512 3512 assumehaveparentrevisions=assumehaveparentrevisions,
3513 3513 sidedata_helpers=sidedata_helpers,
3514 3514 debug_info=debug_info,
3515 3515 )
3516 3516
3517 3517 DELTAREUSEALWAYS = b'always'
3518 3518 DELTAREUSESAMEREVS = b'samerevs'
3519 3519 DELTAREUSENEVER = b'never'
3520 3520
3521 3521 DELTAREUSEFULLADD = b'fulladd'
3522 3522
3523 3523 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3524 3524
3525 3525 def clone(
3526 3526 self,
3527 3527 tr,
3528 3528 destrevlog,
3529 3529 addrevisioncb=None,
3530 3530 deltareuse=DELTAREUSESAMEREVS,
3531 3531 forcedeltabothparents=None,
3532 3532 sidedata_helpers=None,
3533 3533 ):
3534 3534 """Copy this revlog to another, possibly with format changes.
3535 3535
3536 3536 The destination revlog will contain the same revisions and nodes.
3537 3537 However, it may not be bit-for-bit identical due to e.g. delta encoding
3538 3538 differences.
3539 3539
3540 3540 The ``deltareuse`` argument control how deltas from the existing revlog
3541 3541 are preserved in the destination revlog. The argument can have the
3542 3542 following values:
3543 3543
3544 3544 DELTAREUSEALWAYS
3545 3545 Deltas will always be reused (if possible), even if the destination
3546 3546 revlog would not select the same revisions for the delta. This is the
3547 3547 fastest mode of operation.
3548 3548 DELTAREUSESAMEREVS
3549 3549 Deltas will be reused if the destination revlog would pick the same
3550 3550 revisions for the delta. This mode strikes a balance between speed
3551 3551 and optimization.
3552 3552 DELTAREUSENEVER
3553 3553 Deltas will never be reused. This is the slowest mode of execution.
3554 3554 This mode can be used to recompute deltas (e.g. if the diff/delta
3555 3555 algorithm changes).
3556 3556 DELTAREUSEFULLADD
3557 3557 Revision will be re-added as if their were new content. This is
3558 3558 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3559 3559 eg: large file detection and handling.
3560 3560
3561 3561 Delta computation can be slow, so the choice of delta reuse policy can
3562 3562 significantly affect run time.
3563 3563
3564 3564 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3565 3565 two extremes. Deltas will be reused if they are appropriate. But if the
3566 3566 delta could choose a better revision, it will do so. This means if you
3567 3567 are converting a non-generaldelta revlog to a generaldelta revlog,
3568 3568 deltas will be recomputed if the delta's parent isn't a parent of the
3569 3569 revision.
3570 3570
3571 3571 In addition to the delta policy, the ``forcedeltabothparents``
3572 3572 argument controls whether to force compute deltas against both parents
3573 3573 for merges. By default, the current default is used.
3574 3574
3575 3575 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3576 3576 `sidedata_helpers`.
3577 3577 """
3578 3578 if deltareuse not in self.DELTAREUSEALL:
3579 3579 raise ValueError(
3580 3580 _(b'value for deltareuse invalid: %s') % deltareuse
3581 3581 )
3582 3582
3583 3583 if len(destrevlog):
3584 3584 raise ValueError(_(b'destination revlog is not empty'))
3585 3585
3586 3586 if getattr(self, 'filteredrevs', None):
3587 3587 raise ValueError(_(b'source revlog has filtered revisions'))
3588 3588 if getattr(destrevlog, 'filteredrevs', None):
3589 3589 raise ValueError(_(b'destination revlog has filtered revisions'))
3590 3590
3591 3591 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3592 3592 # if possible.
3593 3593 old_delta_config = destrevlog.delta_config
3594 3594 destrevlog.delta_config = destrevlog.delta_config.copy()
3595 3595
3596 3596 try:
3597 3597 if deltareuse == self.DELTAREUSEALWAYS:
3598 3598 destrevlog.delta_config.lazy_delta_base = True
3599 3599 destrevlog.delta_config.lazy_delta = True
3600 3600 elif deltareuse == self.DELTAREUSESAMEREVS:
3601 3601 destrevlog.delta_config.lazy_delta_base = False
3602 3602 destrevlog.delta_config.lazy_delta = True
3603 3603 elif deltareuse == self.DELTAREUSENEVER:
3604 3604 destrevlog.delta_config.lazy_delta_base = False
3605 3605 destrevlog.delta_config.lazy_delta = False
3606 3606
3607 3607 delta_both_parents = (
3608 3608 forcedeltabothparents or old_delta_config.delta_both_parents
3609 3609 )
3610 3610 destrevlog.delta_config.delta_both_parents = delta_both_parents
3611 3611
3612 3612 with self.reading(), destrevlog._writing(tr):
3613 3613 self._clone(
3614 3614 tr,
3615 3615 destrevlog,
3616 3616 addrevisioncb,
3617 3617 deltareuse,
3618 3618 forcedeltabothparents,
3619 3619 sidedata_helpers,
3620 3620 )
3621 3621
3622 3622 finally:
3623 3623 destrevlog.delta_config = old_delta_config
3624 3624
3625 3625 def _clone(
3626 3626 self,
3627 3627 tr,
3628 3628 destrevlog,
3629 3629 addrevisioncb,
3630 3630 deltareuse,
3631 3631 forcedeltabothparents,
3632 3632 sidedata_helpers,
3633 3633 ):
3634 3634 """perform the core duty of `revlog.clone` after parameter processing"""
3635 3635 write_debug = None
3636 3636 if self.delta_config.debug_delta:
3637 3637 write_debug = tr._report
3638 3638 deltacomputer = deltautil.deltacomputer(
3639 3639 destrevlog,
3640 3640 write_debug=write_debug,
3641 3641 )
3642 3642 index = self.index
3643 3643 for rev in self:
3644 3644 entry = index[rev]
3645 3645
3646 3646 # Some classes override linkrev to take filtered revs into
3647 3647 # account. Use raw entry from index.
3648 3648 flags = entry[0] & 0xFFFF
3649 3649 linkrev = entry[4]
3650 3650 p1 = index[entry[5]][7]
3651 3651 p2 = index[entry[6]][7]
3652 3652 node = entry[7]
3653 3653
3654 3654 # (Possibly) reuse the delta from the revlog if allowed and
3655 3655 # the revlog chunk is a delta.
3656 3656 cachedelta = None
3657 3657 rawtext = None
3658 3658 if deltareuse == self.DELTAREUSEFULLADD:
3659 3659 text = self._revisiondata(rev)
3660 3660 sidedata = self.sidedata(rev)
3661 3661
3662 3662 if sidedata_helpers is not None:
3663 3663 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3664 3664 self, sidedata_helpers, sidedata, rev
3665 3665 )
3666 3666 flags = flags | new_flags[0] & ~new_flags[1]
3667 3667
3668 3668 destrevlog.addrevision(
3669 3669 text,
3670 3670 tr,
3671 3671 linkrev,
3672 3672 p1,
3673 3673 p2,
3674 3674 cachedelta=cachedelta,
3675 3675 node=node,
3676 3676 flags=flags,
3677 3677 deltacomputer=deltacomputer,
3678 3678 sidedata=sidedata,
3679 3679 )
3680 3680 else:
3681 3681 if destrevlog.delta_config.lazy_delta:
3682 3682 dp = self.deltaparent(rev)
3683 3683 if dp != nullrev:
3684 cachedelta = (dp, bytes(self._chunk(rev)))
3684 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3685 3685
3686 3686 sidedata = None
3687 3687 if not cachedelta:
3688 3688 try:
3689 3689 rawtext = self._revisiondata(rev)
3690 3690 except error.CensoredNodeError as censored:
3691 3691 assert flags & REVIDX_ISCENSORED
3692 3692 rawtext = censored.tombstone
3693 3693 sidedata = self.sidedata(rev)
3694 3694 if sidedata is None:
3695 3695 sidedata = self.sidedata(rev)
3696 3696
3697 3697 if sidedata_helpers is not None:
3698 3698 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3699 3699 self, sidedata_helpers, sidedata, rev
3700 3700 )
3701 3701 flags = flags | new_flags[0] & ~new_flags[1]
3702 3702
3703 3703 destrevlog._addrevision(
3704 3704 node,
3705 3705 rawtext,
3706 3706 tr,
3707 3707 linkrev,
3708 3708 p1,
3709 3709 p2,
3710 3710 flags,
3711 3711 cachedelta,
3712 3712 deltacomputer=deltacomputer,
3713 3713 sidedata=sidedata,
3714 3714 )
3715 3715
3716 3716 if addrevisioncb:
3717 3717 addrevisioncb(self, rev, node)
3718 3718
3719 3719 def censorrevision(self, tr, censornode, tombstone=b''):
3720 3720 if self._format_version == REVLOGV0:
3721 3721 raise error.RevlogError(
3722 3722 _(b'cannot censor with version %d revlogs')
3723 3723 % self._format_version
3724 3724 )
3725 3725 elif self._format_version == REVLOGV1:
3726 3726 rewrite.v1_censor(self, tr, censornode, tombstone)
3727 3727 else:
3728 3728 rewrite.v2_censor(self, tr, censornode, tombstone)
3729 3729
3730 3730 def verifyintegrity(self, state):
3731 3731 """Verifies the integrity of the revlog.
3732 3732
3733 3733 Yields ``revlogproblem`` instances describing problems that are
3734 3734 found.
3735 3735 """
3736 3736 dd, di = self.checksize()
3737 3737 if dd:
3738 3738 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3739 3739 if di:
3740 3740 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3741 3741
3742 3742 version = self._format_version
3743 3743
3744 3744 # The verifier tells us what version revlog we should be.
3745 3745 if version != state[b'expectedversion']:
3746 3746 yield revlogproblem(
3747 3747 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3748 3748 % (self.display_id, version, state[b'expectedversion'])
3749 3749 )
3750 3750
3751 3751 state[b'skipread'] = set()
3752 3752 state[b'safe_renamed'] = set()
3753 3753
3754 3754 for rev in self:
3755 3755 node = self.node(rev)
3756 3756
3757 3757 # Verify contents. 4 cases to care about:
3758 3758 #
3759 3759 # common: the most common case
3760 3760 # rename: with a rename
3761 3761 # meta: file content starts with b'\1\n', the metadata
3762 3762 # header defined in filelog.py, but without a rename
3763 3763 # ext: content stored externally
3764 3764 #
3765 3765 # More formally, their differences are shown below:
3766 3766 #
3767 3767 # | common | rename | meta | ext
3768 3768 # -------------------------------------------------------
3769 3769 # flags() | 0 | 0 | 0 | not 0
3770 3770 # renamed() | False | True | False | ?
3771 3771 # rawtext[0:2]=='\1\n'| False | True | True | ?
3772 3772 #
3773 3773 # "rawtext" means the raw text stored in revlog data, which
3774 3774 # could be retrieved by "rawdata(rev)". "text"
3775 3775 # mentioned below is "revision(rev)".
3776 3776 #
3777 3777 # There are 3 different lengths stored physically:
3778 3778 # 1. L1: rawsize, stored in revlog index
3779 3779 # 2. L2: len(rawtext), stored in revlog data
3780 3780 # 3. L3: len(text), stored in revlog data if flags==0, or
3781 3781 # possibly somewhere else if flags!=0
3782 3782 #
3783 3783 # L1 should be equal to L2. L3 could be different from them.
3784 3784 # "text" may or may not affect commit hash depending on flag
3785 3785 # processors (see flagutil.addflagprocessor).
3786 3786 #
3787 3787 # | common | rename | meta | ext
3788 3788 # -------------------------------------------------
3789 3789 # rawsize() | L1 | L1 | L1 | L1
3790 3790 # size() | L1 | L2-LM | L1(*) | L1 (?)
3791 3791 # len(rawtext) | L2 | L2 | L2 | L2
3792 3792 # len(text) | L2 | L2 | L2 | L3
3793 3793 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3794 3794 #
3795 3795 # LM: length of metadata, depending on rawtext
3796 3796 # (*): not ideal, see comment in filelog.size
3797 3797 # (?): could be "- len(meta)" if the resolved content has
3798 3798 # rename metadata
3799 3799 #
3800 3800 # Checks needed to be done:
3801 3801 # 1. length check: L1 == L2, in all cases.
3802 3802 # 2. hash check: depending on flag processor, we may need to
3803 3803 # use either "text" (external), or "rawtext" (in revlog).
3804 3804
3805 3805 try:
3806 3806 skipflags = state.get(b'skipflags', 0)
3807 3807 if skipflags:
3808 3808 skipflags &= self.flags(rev)
3809 3809
3810 3810 _verify_revision(self, skipflags, state, node)
3811 3811
3812 3812 l1 = self.rawsize(rev)
3813 3813 l2 = len(self.rawdata(node))
3814 3814
3815 3815 if l1 != l2:
3816 3816 yield revlogproblem(
3817 3817 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3818 3818 node=node,
3819 3819 )
3820 3820
3821 3821 except error.CensoredNodeError:
3822 3822 if state[b'erroroncensored']:
3823 3823 yield revlogproblem(
3824 3824 error=_(b'censored file data'), node=node
3825 3825 )
3826 3826 state[b'skipread'].add(node)
3827 3827 except Exception as e:
3828 3828 yield revlogproblem(
3829 3829 error=_(b'unpacking %s: %s')
3830 3830 % (short(node), stringutil.forcebytestr(e)),
3831 3831 node=node,
3832 3832 )
3833 3833 state[b'skipread'].add(node)
3834 3834
3835 3835 def storageinfo(
3836 3836 self,
3837 3837 exclusivefiles=False,
3838 3838 sharedfiles=False,
3839 3839 revisionscount=False,
3840 3840 trackedsize=False,
3841 3841 storedsize=False,
3842 3842 ):
3843 3843 d = {}
3844 3844
3845 3845 if exclusivefiles:
3846 3846 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3847 3847 if not self._inline:
3848 3848 d[b'exclusivefiles'].append((self.opener, self._datafile))
3849 3849
3850 3850 if sharedfiles:
3851 3851 d[b'sharedfiles'] = []
3852 3852
3853 3853 if revisionscount:
3854 3854 d[b'revisionscount'] = len(self)
3855 3855
3856 3856 if trackedsize:
3857 3857 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3858 3858
3859 3859 if storedsize:
3860 3860 d[b'storedsize'] = sum(
3861 3861 self.opener.stat(path).st_size for path in self.files()
3862 3862 )
3863 3863
3864 3864 return d
3865 3865
3866 3866 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3867 3867 if not self.feature_config.has_side_data:
3868 3868 return
3869 3869 # revlog formats with sidedata support does not support inline
3870 3870 assert not self._inline
3871 3871 if not helpers[1] and not helpers[2]:
3872 3872 # Nothing to generate or remove
3873 3873 return
3874 3874
3875 3875 new_entries = []
3876 3876 # append the new sidedata
3877 3877 with self._writing(transaction):
3878 3878 ifh, dfh, sdfh = self._inner._writinghandles
3879 3879 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3880 3880
3881 3881 current_offset = sdfh.tell()
3882 3882 for rev in range(startrev, endrev + 1):
3883 3883 entry = self.index[rev]
3884 3884 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3885 3885 store=self,
3886 3886 sidedata_helpers=helpers,
3887 3887 sidedata={},
3888 3888 rev=rev,
3889 3889 )
3890 3890
3891 3891 serialized_sidedata = sidedatautil.serialize_sidedata(
3892 3892 new_sidedata
3893 3893 )
3894 3894
3895 3895 sidedata_compression_mode = COMP_MODE_INLINE
3896 3896 if serialized_sidedata and self.feature_config.has_side_data:
3897 3897 sidedata_compression_mode = COMP_MODE_PLAIN
3898 3898 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3899 3899 if (
3900 3900 h != b'u'
3901 3901 and comp_sidedata[0] != b'\0'
3902 3902 and len(comp_sidedata) < len(serialized_sidedata)
3903 3903 ):
3904 3904 assert not h
3905 3905 if (
3906 3906 comp_sidedata[0]
3907 3907 == self._docket.default_compression_header
3908 3908 ):
3909 3909 sidedata_compression_mode = COMP_MODE_DEFAULT
3910 3910 serialized_sidedata = comp_sidedata
3911 3911 else:
3912 3912 sidedata_compression_mode = COMP_MODE_INLINE
3913 3913 serialized_sidedata = comp_sidedata
3914 3914 if entry[8] != 0 or entry[9] != 0:
3915 3915 # rewriting entries that already have sidedata is not
3916 3916 # supported yet, because it introduces garbage data in the
3917 3917 # revlog.
3918 3918 msg = b"rewriting existing sidedata is not supported yet"
3919 3919 raise error.Abort(msg)
3920 3920
3921 3921 # Apply (potential) flags to add and to remove after running
3922 3922 # the sidedata helpers
3923 3923 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3924 3924 entry_update = (
3925 3925 current_offset,
3926 3926 len(serialized_sidedata),
3927 3927 new_offset_flags,
3928 3928 sidedata_compression_mode,
3929 3929 )
3930 3930
3931 3931 # the sidedata computation might have move the file cursors around
3932 3932 sdfh.seek(current_offset, os.SEEK_SET)
3933 3933 sdfh.write(serialized_sidedata)
3934 3934 new_entries.append(entry_update)
3935 3935 current_offset += len(serialized_sidedata)
3936 3936 self._docket.sidedata_end = sdfh.tell()
3937 3937
3938 3938 # rewrite the new index entries
3939 3939 ifh.seek(startrev * self.index.entry_size)
3940 3940 for i, e in enumerate(new_entries):
3941 3941 rev = startrev + i
3942 3942 self.index.replace_sidedata_info(rev, *e)
3943 3943 packed = self.index.entry_binary(rev)
3944 3944 if rev == 0 and self._docket is None:
3945 3945 header = self._format_flags | self._format_version
3946 3946 header = self.index.pack_header(header)
3947 3947 packed = header + packed
3948 3948 ifh.write(packed)
@@ -1,876 +1,876 b''
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import binascii
11 11 import contextlib
12 12 import os
13 13 import struct
14 14
15 15 from ..node import (
16 16 nullrev,
17 17 )
18 18 from .constants import (
19 19 COMP_MODE_PLAIN,
20 20 ENTRY_DATA_COMPRESSED_LENGTH,
21 21 ENTRY_DATA_COMPRESSION_MODE,
22 22 ENTRY_DATA_OFFSET,
23 23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 24 ENTRY_DELTA_BASE,
25 25 ENTRY_LINK_REV,
26 26 ENTRY_NODE_ID,
27 27 ENTRY_PARENT_1,
28 28 ENTRY_PARENT_2,
29 29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 31 ENTRY_SIDEDATA_OFFSET,
32 32 REVIDX_ISCENSORED,
33 33 REVLOGV0,
34 34 REVLOGV1,
35 35 )
36 36 from ..i18n import _
37 37
38 38 from .. import (
39 39 error,
40 40 mdiff,
41 41 pycompat,
42 42 revlogutils,
43 43 util,
44 44 )
45 45 from ..utils import (
46 46 storageutil,
47 47 )
48 48 from . import (
49 49 constants,
50 50 deltas,
51 51 )
52 52
53 53
54 54 def v1_censor(rl, tr, censornode, tombstone=b''):
55 55 """censors a revision in a "version 1" revlog"""
56 56 assert rl._format_version == constants.REVLOGV1, rl._format_version
57 57
58 58 # avoid cycle
59 59 from .. import revlog
60 60
61 61 censorrev = rl.rev(censornode)
62 62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63 63
64 64 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 65 # to create a new revlog, copy all revisions to it, then replace the
66 66 # revlogs on transaction close.
67 67 #
68 68 # This is a bit dangerous. We could easily have a mismatch of state.
69 69 newrl = revlog.revlog(
70 70 rl.opener,
71 71 target=rl.target,
72 72 radix=rl.radix,
73 73 postfix=b'tmpcensored',
74 74 censorable=True,
75 75 )
76 76 newrl._format_version = rl._format_version
77 77 newrl._format_flags = rl._format_flags
78 78 newrl.delta_config.general_delta = rl.delta_config.general_delta
79 79 newrl._parse_index = rl._parse_index
80 80
81 81 for rev in rl.revs():
82 82 node = rl.node(rev)
83 83 p1, p2 = rl.parents(node)
84 84
85 85 if rev == censorrev:
86 86 newrl.addrawrevision(
87 87 tombstone,
88 88 tr,
89 89 rl.linkrev(censorrev),
90 90 p1,
91 91 p2,
92 92 censornode,
93 93 constants.REVIDX_ISCENSORED,
94 94 )
95 95
96 96 if newrl.deltaparent(rev) != nullrev:
97 97 m = _(b'censored revision stored as delta; cannot censor')
98 98 h = _(
99 99 b'censoring of revlogs is not fully implemented;'
100 100 b' please report this bug'
101 101 )
102 102 raise error.Abort(m, hint=h)
103 103 continue
104 104
105 105 if rl.iscensored(rev):
106 106 if rl.deltaparent(rev) != nullrev:
107 107 m = _(
108 108 b'cannot censor due to censored '
109 109 b'revision having delta stored'
110 110 )
111 111 raise error.Abort(m)
112 rawtext = rl._chunk(rev)
112 rawtext = rl._inner._chunk(rev)
113 113 else:
114 114 rawtext = rl.rawdata(rev)
115 115
116 116 newrl.addrawrevision(
117 117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
118 118 )
119 119
120 120 tr.addbackup(rl._indexfile, location=b'store')
121 121 if not rl._inline:
122 122 tr.addbackup(rl._datafile, location=b'store')
123 123
124 124 rl.opener.rename(newrl._indexfile, rl._indexfile)
125 125 if not rl._inline:
126 126 rl.opener.rename(newrl._datafile, rl._datafile)
127 127
128 128 rl.clearcaches()
129 129 chunk_cache = rl._loadindex()
130 130 rl._load_inner(chunk_cache)
131 131
132 132
133 133 def v2_censor(revlog, tr, censornode, tombstone=b''):
134 134 """censors a revision in a "version 2" revlog"""
135 135 assert revlog._format_version != REVLOGV0, revlog._format_version
136 136 assert revlog._format_version != REVLOGV1, revlog._format_version
137 137
138 138 censor_revs = {revlog.rev(censornode)}
139 139 _rewrite_v2(revlog, tr, censor_revs, tombstone)
140 140
141 141
142 142 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
143 143 """rewrite a revlog to censor some of its content
144 144
145 145 General principle
146 146
147 147 We create new revlog files (index/data/sidedata) to copy the content of
148 148 the existing data without the censored data.
149 149
150 150 We need to recompute new delta for any revision that used the censored
151 151 revision as delta base. As the cumulative size of the new delta may be
152 152 large, we store them in a temporary file until they are stored in their
153 153 final destination.
154 154
155 155 All data before the censored data can be blindly copied. The rest needs
156 156 to be copied as we go and the associated index entry needs adjustement.
157 157 """
158 158 assert revlog._format_version != REVLOGV0, revlog._format_version
159 159 assert revlog._format_version != REVLOGV1, revlog._format_version
160 160
161 161 old_index = revlog.index
162 162 docket = revlog._docket
163 163
164 164 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
165 165
166 166 first_excl_rev = min(censor_revs)
167 167
168 168 first_excl_entry = revlog.index[first_excl_rev]
169 169 index_cutoff = revlog.index.entry_size * first_excl_rev
170 170 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
171 171 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
172 172
173 173 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
174 174 # rev β†’ (new_base, data_start, data_end, compression_mode)
175 175 rewritten_entries = _precompute_rewritten_delta(
176 176 revlog,
177 177 old_index,
178 178 censor_revs,
179 179 tmp_storage,
180 180 )
181 181
182 182 all_files = _setup_new_files(
183 183 revlog,
184 184 index_cutoff,
185 185 data_cutoff,
186 186 sidedata_cutoff,
187 187 )
188 188
189 189 # we dont need to open the old index file since its content already
190 190 # exist in a usable form in `old_index`.
191 191 with all_files() as open_files:
192 192 (
193 193 old_data_file,
194 194 old_sidedata_file,
195 195 new_index_file,
196 196 new_data_file,
197 197 new_sidedata_file,
198 198 ) = open_files
199 199
200 200 # writing the censored revision
201 201
202 202 # Writing all subsequent revisions
203 203 for rev in range(first_excl_rev, len(old_index)):
204 204 if rev in censor_revs:
205 205 _rewrite_censor(
206 206 revlog,
207 207 old_index,
208 208 open_files,
209 209 rev,
210 210 tombstone,
211 211 )
212 212 else:
213 213 _rewrite_simple(
214 214 revlog,
215 215 old_index,
216 216 open_files,
217 217 rev,
218 218 rewritten_entries,
219 219 tmp_storage,
220 220 )
221 221 docket.write(transaction=None, stripping=True)
222 222
223 223
224 224 def _precompute_rewritten_delta(
225 225 revlog,
226 226 old_index,
227 227 excluded_revs,
228 228 tmp_storage,
229 229 ):
230 230 """Compute new delta for revisions whose delta is based on revision that
231 231 will not survive as is.
232 232
233 233 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
234 234 """
235 235 dc = deltas.deltacomputer(revlog)
236 236 rewritten_entries = {}
237 237 first_excl_rev = min(excluded_revs)
238 238 with revlog.reading():
239 239 for rev in range(first_excl_rev, len(old_index)):
240 240 if rev in excluded_revs:
241 241 # this revision will be preserved as is, so we don't need to
242 242 # consider recomputing a delta.
243 243 continue
244 244 entry = old_index[rev]
245 245 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
246 246 continue
247 247 # This is a revision that use the censored revision as the base
248 248 # for its delta. We need a need new deltas
249 249 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
250 250 # this revision is empty, we can delta against nullrev
251 251 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
252 252 else:
253 253
254 254 text = revlog.rawdata(rev)
255 255 info = revlogutils.revisioninfo(
256 256 node=entry[ENTRY_NODE_ID],
257 257 p1=revlog.node(entry[ENTRY_PARENT_1]),
258 258 p2=revlog.node(entry[ENTRY_PARENT_2]),
259 259 btext=[text],
260 260 textlen=len(text),
261 261 cachedelta=None,
262 262 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
263 263 )
264 264 d = dc.finddeltainfo(
265 265 info, excluded_bases=excluded_revs, target_rev=rev
266 266 )
267 267 default_comp = revlog._docket.default_compression_header
268 268 comp_mode, d = deltas.delta_compression(default_comp, d)
269 269 # using `tell` is a bit lazy, but we are not here for speed
270 270 start = tmp_storage.tell()
271 271 tmp_storage.write(d.data[1])
272 272 end = tmp_storage.tell()
273 273 rewritten_entries[rev] = (d.base, start, end, comp_mode)
274 274 return rewritten_entries
275 275
276 276
277 277 def _setup_new_files(
278 278 revlog,
279 279 index_cutoff,
280 280 data_cutoff,
281 281 sidedata_cutoff,
282 282 ):
283 283 """
284 284
285 285 return a context manager to open all the relevant files:
286 286 - old_data_file,
287 287 - old_sidedata_file,
288 288 - new_index_file,
289 289 - new_data_file,
290 290 - new_sidedata_file,
291 291
292 292 The old_index_file is not here because it is accessed through the
293 293 `old_index` object if the caller function.
294 294 """
295 295 docket = revlog._docket
296 296 old_index_filepath = revlog.opener.join(docket.index_filepath())
297 297 old_data_filepath = revlog.opener.join(docket.data_filepath())
298 298 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
299 299
300 300 new_index_filepath = revlog.opener.join(docket.new_index_file())
301 301 new_data_filepath = revlog.opener.join(docket.new_data_file())
302 302 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
303 303
304 304 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
305 305 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
306 306 util.copyfile(
307 307 old_sidedata_filepath,
308 308 new_sidedata_filepath,
309 309 nb_bytes=sidedata_cutoff,
310 310 )
311 311 revlog.opener.register_file(docket.index_filepath())
312 312 revlog.opener.register_file(docket.data_filepath())
313 313 revlog.opener.register_file(docket.sidedata_filepath())
314 314
315 315 docket.index_end = index_cutoff
316 316 docket.data_end = data_cutoff
317 317 docket.sidedata_end = sidedata_cutoff
318 318
319 319 # reload the revlog internal information
320 320 revlog.clearcaches()
321 321 revlog._loadindex(docket=docket)
322 322
323 323 @contextlib.contextmanager
324 324 def all_files_opener():
325 325 # hide opening in an helper function to please check-code, black
326 326 # and various python version at the same time
327 327 with open(old_data_filepath, 'rb') as old_data_file:
328 328 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
329 329 with open(new_index_filepath, 'r+b') as new_index_file:
330 330 with open(new_data_filepath, 'r+b') as new_data_file:
331 331 with open(
332 332 new_sidedata_filepath, 'r+b'
333 333 ) as new_sidedata_file:
334 334 new_index_file.seek(0, os.SEEK_END)
335 335 assert new_index_file.tell() == index_cutoff
336 336 new_data_file.seek(0, os.SEEK_END)
337 337 assert new_data_file.tell() == data_cutoff
338 338 new_sidedata_file.seek(0, os.SEEK_END)
339 339 assert new_sidedata_file.tell() == sidedata_cutoff
340 340 yield (
341 341 old_data_file,
342 342 old_sidedata_file,
343 343 new_index_file,
344 344 new_data_file,
345 345 new_sidedata_file,
346 346 )
347 347
348 348 return all_files_opener
349 349
350 350
351 351 def _rewrite_simple(
352 352 revlog,
353 353 old_index,
354 354 all_files,
355 355 rev,
356 356 rewritten_entries,
357 357 tmp_storage,
358 358 ):
359 359 """append a normal revision to the index after the rewritten one(s)"""
360 360 (
361 361 old_data_file,
362 362 old_sidedata_file,
363 363 new_index_file,
364 364 new_data_file,
365 365 new_sidedata_file,
366 366 ) = all_files
367 367 entry = old_index[rev]
368 368 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
369 369 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
370 370
371 371 if rev not in rewritten_entries:
372 372 old_data_file.seek(old_data_offset)
373 373 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
374 374 new_data = old_data_file.read(new_data_size)
375 375 data_delta_base = entry[ENTRY_DELTA_BASE]
376 376 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
377 377 else:
378 378 (
379 379 data_delta_base,
380 380 start,
381 381 end,
382 382 d_comp_mode,
383 383 ) = rewritten_entries[rev]
384 384 new_data_size = end - start
385 385 tmp_storage.seek(start)
386 386 new_data = tmp_storage.read(new_data_size)
387 387
388 388 # It might be faster to group continuous read/write operation,
389 389 # however, this is censor, an operation that is not focussed
390 390 # around stellar performance. So I have not written this
391 391 # optimisation yet.
392 392 new_data_offset = new_data_file.tell()
393 393 new_data_file.write(new_data)
394 394
395 395 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
396 396 new_sidedata_offset = new_sidedata_file.tell()
397 397 if 0 < sidedata_size:
398 398 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
399 399 old_sidedata_file.seek(old_sidedata_offset)
400 400 new_sidedata = old_sidedata_file.read(sidedata_size)
401 401 new_sidedata_file.write(new_sidedata)
402 402
403 403 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
404 404 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
405 405 assert data_delta_base <= rev, (data_delta_base, rev)
406 406
407 407 new_entry = revlogutils.entry(
408 408 flags=flags,
409 409 data_offset=new_data_offset,
410 410 data_compressed_length=new_data_size,
411 411 data_uncompressed_length=data_uncompressed_length,
412 412 data_delta_base=data_delta_base,
413 413 link_rev=entry[ENTRY_LINK_REV],
414 414 parent_rev_1=entry[ENTRY_PARENT_1],
415 415 parent_rev_2=entry[ENTRY_PARENT_2],
416 416 node_id=entry[ENTRY_NODE_ID],
417 417 sidedata_offset=new_sidedata_offset,
418 418 sidedata_compressed_length=sidedata_size,
419 419 data_compression_mode=d_comp_mode,
420 420 sidedata_compression_mode=sd_com_mode,
421 421 )
422 422 revlog.index.append(new_entry)
423 423 entry_bin = revlog.index.entry_binary(rev)
424 424 new_index_file.write(entry_bin)
425 425
426 426 revlog._docket.index_end = new_index_file.tell()
427 427 revlog._docket.data_end = new_data_file.tell()
428 428 revlog._docket.sidedata_end = new_sidedata_file.tell()
429 429
430 430
431 431 def _rewrite_censor(
432 432 revlog,
433 433 old_index,
434 434 all_files,
435 435 rev,
436 436 tombstone,
437 437 ):
438 438 """rewrite and append a censored revision"""
439 439 (
440 440 old_data_file,
441 441 old_sidedata_file,
442 442 new_index_file,
443 443 new_data_file,
444 444 new_sidedata_file,
445 445 ) = all_files
446 446 entry = old_index[rev]
447 447
448 448 # XXX consider trying the default compression too
449 449 new_data_size = len(tombstone)
450 450 new_data_offset = new_data_file.tell()
451 451 new_data_file.write(tombstone)
452 452
453 453 # we are not adding any sidedata as they might leak info about the censored version
454 454
455 455 link_rev = entry[ENTRY_LINK_REV]
456 456
457 457 p1 = entry[ENTRY_PARENT_1]
458 458 p2 = entry[ENTRY_PARENT_2]
459 459
460 460 new_entry = revlogutils.entry(
461 461 flags=constants.REVIDX_ISCENSORED,
462 462 data_offset=new_data_offset,
463 463 data_compressed_length=new_data_size,
464 464 data_uncompressed_length=new_data_size,
465 465 data_delta_base=rev,
466 466 link_rev=link_rev,
467 467 parent_rev_1=p1,
468 468 parent_rev_2=p2,
469 469 node_id=entry[ENTRY_NODE_ID],
470 470 sidedata_offset=0,
471 471 sidedata_compressed_length=0,
472 472 data_compression_mode=COMP_MODE_PLAIN,
473 473 sidedata_compression_mode=COMP_MODE_PLAIN,
474 474 )
475 475 revlog.index.append(new_entry)
476 476 entry_bin = revlog.index.entry_binary(rev)
477 477 new_index_file.write(entry_bin)
478 478 revlog._docket.index_end = new_index_file.tell()
479 479 revlog._docket.data_end = new_data_file.tell()
480 480
481 481
482 482 def _get_filename_from_filelog_index(path):
483 483 # Drop the extension and the `data/` prefix
484 484 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
485 485 if len(path_part) < 2:
486 486 msg = _(b"cannot recognize filelog from filename: '%s'")
487 487 msg %= path
488 488 raise error.Abort(msg)
489 489
490 490 return path_part[1]
491 491
492 492
493 493 def _filelog_from_filename(repo, path):
494 494 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
495 495
496 496 from .. import filelog # avoid cycle
497 497
498 498 fl = filelog.filelog(repo.svfs, path)
499 499 return fl
500 500
501 501
502 502 def _write_swapped_parents(repo, rl, rev, offset, fp):
503 503 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
504 504 from ..pure import parsers # avoid cycle
505 505
506 506 if repo._currentlock(repo._lockref) is None:
507 507 # Let's be paranoid about it
508 508 msg = "repo needs to be locked to rewrite parents"
509 509 raise error.ProgrammingError(msg)
510 510
511 511 index_format = parsers.IndexObject.index_format
512 512 entry = rl.index[rev]
513 513 new_entry = list(entry)
514 514 new_entry[5], new_entry[6] = entry[6], entry[5]
515 515 packed = index_format.pack(*new_entry[:8])
516 516 fp.seek(offset)
517 517 fp.write(packed)
518 518
519 519
520 520 def _reorder_filelog_parents(repo, fl, to_fix):
521 521 """
522 522 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
523 523 new version to disk, overwriting the old one with a rename.
524 524 """
525 525 from ..pure import parsers # avoid cycle
526 526
527 527 ui = repo.ui
528 528 assert len(to_fix) > 0
529 529 rl = fl._revlog
530 530 if rl._format_version != constants.REVLOGV1:
531 531 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
532 532 raise error.ProgrammingError(msg)
533 533
534 534 index_file = rl._indexfile
535 535 new_file_path = index_file + b'.tmp-parents-fix'
536 536 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
537 537
538 538 with ui.uninterruptible():
539 539 try:
540 540 util.copyfile(
541 541 rl.opener.join(index_file),
542 542 rl.opener.join(new_file_path),
543 543 checkambig=rl.data_config.check_ambig,
544 544 )
545 545
546 546 with rl.opener(new_file_path, mode=b"r+") as fp:
547 547 if rl._inline:
548 548 index = parsers.InlinedIndexObject(fp.read())
549 549 for rev in fl.revs():
550 550 if rev in to_fix:
551 551 offset = index._calculate_index(rev)
552 552 _write_swapped_parents(repo, rl, rev, offset, fp)
553 553 ui.write(repaired_msg % (rev, index_file))
554 554 else:
555 555 index_format = parsers.IndexObject.index_format
556 556 for rev in to_fix:
557 557 offset = rev * index_format.size
558 558 _write_swapped_parents(repo, rl, rev, offset, fp)
559 559 ui.write(repaired_msg % (rev, index_file))
560 560
561 561 rl.opener.rename(new_file_path, index_file)
562 562 rl.clearcaches()
563 563 rl._loadindex()
564 564 finally:
565 565 util.tryunlink(new_file_path)
566 566
567 567
568 568 def _is_revision_affected(fl, filerev, metadata_cache=None):
569 569 full_text = lambda: fl._revlog.rawdata(filerev)
570 570 parent_revs = lambda: fl._revlog.parentrevs(filerev)
571 571 return _is_revision_affected_inner(
572 572 full_text, parent_revs, filerev, metadata_cache
573 573 )
574 574
575 575
576 576 def _is_revision_affected_inner(
577 577 full_text,
578 578 parents_revs,
579 579 filerev,
580 580 metadata_cache=None,
581 581 ):
582 582 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
583 583 special meaning compared to the reverse in the context of filelog-based
584 584 copytracing. issue6528 exists because new code assumed that parent ordering
585 585 didn't matter, so this detects if the revision contains metadata (since
586 586 it's only used for filelog-based copytracing) and its parents are in the
587 587 "wrong" order."""
588 588 try:
589 589 raw_text = full_text()
590 590 except error.CensoredNodeError:
591 591 # We don't care about censored nodes as they never carry metadata
592 592 return False
593 593
594 594 # raw text can be a `memoryview`, which doesn't implement `startswith`
595 595 has_meta = bytes(raw_text[:2]) == b'\x01\n'
596 596 if metadata_cache is not None:
597 597 metadata_cache[filerev] = has_meta
598 598 if has_meta:
599 599 (p1, p2) = parents_revs()
600 600 if p1 != nullrev and p2 == nullrev:
601 601 return True
602 602 return False
603 603
604 604
605 605 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
606 606 rl = fl._revlog
607 607 is_censored = lambda: rl.iscensored(filerev)
608 608 delta_base = lambda: rl.deltaparent(filerev)
609 609 delta = lambda: rl._chunk(filerev)
610 610 full_text = lambda: rl.rawdata(filerev)
611 611 parent_revs = lambda: rl.parentrevs(filerev)
612 612 return _is_revision_affected_fast_inner(
613 613 is_censored,
614 614 delta_base,
615 615 delta,
616 616 full_text,
617 617 parent_revs,
618 618 filerev,
619 619 metadata_cache,
620 620 )
621 621
622 622
623 623 def _is_revision_affected_fast_inner(
624 624 is_censored,
625 625 delta_base,
626 626 delta,
627 627 full_text,
628 628 parent_revs,
629 629 filerev,
630 630 metadata_cache,
631 631 ):
632 632 """Optimization fast-path for `_is_revision_affected`.
633 633
634 634 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
635 635 revision to check if its base has metadata, saving computation of the full
636 636 text, instead looking at the current delta.
637 637
638 638 This optimization only works if the revisions are looked at in order."""
639 639
640 640 if is_censored():
641 641 # Censored revisions don't contain metadata, so they cannot be affected
642 642 metadata_cache[filerev] = False
643 643 return False
644 644
645 645 p1, p2 = parent_revs()
646 646 if p1 == nullrev or p2 != nullrev:
647 647 return False
648 648
649 649 delta_parent = delta_base()
650 650 parent_has_metadata = metadata_cache.get(delta_parent)
651 651 if parent_has_metadata is None:
652 652 return _is_revision_affected_inner(
653 653 full_text,
654 654 parent_revs,
655 655 filerev,
656 656 metadata_cache,
657 657 )
658 658
659 659 chunk = delta()
660 660 if not len(chunk):
661 661 # No diff for this revision
662 662 return parent_has_metadata
663 663
664 664 header_length = 12
665 665 if len(chunk) < header_length:
666 666 raise error.Abort(_(b"patch cannot be decoded"))
667 667
668 668 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
669 669
670 670 if start < 2: # len(b'\x01\n') == 2
671 671 # This delta does *something* to the metadata marker (if any).
672 672 # Check it the slow way
673 673 is_affected = _is_revision_affected_inner(
674 674 full_text,
675 675 parent_revs,
676 676 filerev,
677 677 metadata_cache,
678 678 )
679 679 return is_affected
680 680
681 681 # The diff did not remove or add the metadata header, it's then in the same
682 682 # situation as its parent
683 683 metadata_cache[filerev] = parent_has_metadata
684 684 return parent_has_metadata
685 685
686 686
687 687 def _from_report(ui, repo, context, from_report, dry_run):
688 688 """
689 689 Fix the revisions given in the `from_report` file, but still checks if the
690 690 revisions are indeed affected to prevent an unfortunate cyclic situation
691 691 where we'd swap well-ordered parents again.
692 692
693 693 See the doc for `debug_fix_issue6528` for the format documentation.
694 694 """
695 695 ui.write(_(b"loading report file '%s'\n") % from_report)
696 696
697 697 with context(), open(from_report, mode='rb') as f:
698 698 for line in f.read().split(b'\n'):
699 699 if not line:
700 700 continue
701 701 filenodes, filename = line.split(b' ', 1)
702 702 fl = _filelog_from_filename(repo, filename)
703 703 to_fix = set(
704 704 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
705 705 )
706 706 excluded = set()
707 707
708 708 for filerev in to_fix:
709 709 if _is_revision_affected(fl, filerev):
710 710 msg = b"found affected revision %d for filelog '%s'\n"
711 711 ui.warn(msg % (filerev, filename))
712 712 else:
713 713 msg = _(b"revision %s of file '%s' is not affected\n")
714 714 msg %= (binascii.hexlify(fl.node(filerev)), filename)
715 715 ui.warn(msg)
716 716 excluded.add(filerev)
717 717
718 718 to_fix = to_fix - excluded
719 719 if not to_fix:
720 720 msg = _(b"no affected revisions were found for '%s'\n")
721 721 ui.write(msg % filename)
722 722 continue
723 723 if not dry_run:
724 724 _reorder_filelog_parents(repo, fl, sorted(to_fix))
725 725
726 726
727 727 def filter_delta_issue6528(revlog, deltas_iter):
728 728 """filter incomind deltas to repaire issue 6528 on the fly"""
729 729 metadata_cache = {}
730 730
731 731 deltacomputer = deltas.deltacomputer(revlog)
732 732
733 733 for rev, d in enumerate(deltas_iter, len(revlog)):
734 734 (
735 735 node,
736 736 p1_node,
737 737 p2_node,
738 738 linknode,
739 739 deltabase,
740 740 delta,
741 741 flags,
742 742 sidedata,
743 743 ) = d
744 744
745 745 if not revlog.index.has_node(deltabase):
746 746 raise error.LookupError(
747 747 deltabase, revlog.radix, _(b'unknown parent')
748 748 )
749 749 base_rev = revlog.rev(deltabase)
750 750 if not revlog.index.has_node(p1_node):
751 751 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
752 752 p1_rev = revlog.rev(p1_node)
753 753 if not revlog.index.has_node(p2_node):
754 754 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
755 755 p2_rev = revlog.rev(p2_node)
756 756
757 757 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
758 758 delta_base = lambda: revlog.rev(delta_base)
759 759 delta_base = lambda: base_rev
760 760 parent_revs = lambda: (p1_rev, p2_rev)
761 761
762 762 def full_text():
763 763 # note: being able to reuse the full text computation in the
764 764 # underlying addrevision would be useful however this is a bit too
765 765 # intrusive the for the "quick" issue6528 we are writing before the
766 766 # 5.8 release
767 767 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
768 768
769 769 revinfo = revlogutils.revisioninfo(
770 770 node,
771 771 p1_node,
772 772 p2_node,
773 773 [None],
774 774 textlen,
775 775 (base_rev, delta),
776 776 flags,
777 777 )
778 778 return deltacomputer.buildtext(revinfo)
779 779
780 780 is_affected = _is_revision_affected_fast_inner(
781 781 is_censored,
782 782 delta_base,
783 783 lambda: delta,
784 784 full_text,
785 785 parent_revs,
786 786 rev,
787 787 metadata_cache,
788 788 )
789 789 if is_affected:
790 790 d = (
791 791 node,
792 792 p2_node,
793 793 p1_node,
794 794 linknode,
795 795 deltabase,
796 796 delta,
797 797 flags,
798 798 sidedata,
799 799 )
800 800 yield d
801 801
802 802
803 803 def repair_issue6528(
804 804 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
805 805 ):
806 806 @contextlib.contextmanager
807 807 def context():
808 808 if dry_run or to_report: # No need for locking
809 809 yield
810 810 else:
811 811 with repo.wlock(), repo.lock():
812 812 yield
813 813
814 814 if from_report:
815 815 return _from_report(ui, repo, context, from_report, dry_run)
816 816
817 817 report_entries = []
818 818
819 819 with context():
820 820 files = list(
821 821 entry
822 822 for entry in repo.store.data_entries()
823 823 if entry.is_revlog and entry.is_filelog
824 824 )
825 825
826 826 progress = ui.makeprogress(
827 827 _(b"looking for affected revisions"),
828 828 unit=_(b"filelogs"),
829 829 total=len(files),
830 830 )
831 831 found_nothing = True
832 832
833 833 for entry in files:
834 834 progress.increment()
835 835 filename = entry.target_id
836 836 fl = _filelog_from_filename(repo, entry.target_id)
837 837
838 838 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
839 839 to_fix = set()
840 840 metadata_cache = {}
841 841 for filerev in fl.revs():
842 842 affected = _is_revision_affected_fast(
843 843 repo, fl, filerev, metadata_cache
844 844 )
845 845 if paranoid:
846 846 slow = _is_revision_affected(fl, filerev)
847 847 if slow != affected:
848 848 msg = _(b"paranoid check failed for '%s' at node %s")
849 849 node = binascii.hexlify(fl.node(filerev))
850 850 raise error.Abort(msg % (filename, node))
851 851 if affected:
852 852 msg = b"found affected revision %d for file '%s'\n"
853 853 ui.warn(msg % (filerev, filename))
854 854 found_nothing = False
855 855 if not dry_run:
856 856 if to_report:
857 857 to_fix.add(binascii.hexlify(fl.node(filerev)))
858 858 else:
859 859 to_fix.add(filerev)
860 860
861 861 if to_fix:
862 862 to_fix = sorted(to_fix)
863 863 if to_report:
864 864 report_entries.append((filename, to_fix))
865 865 else:
866 866 _reorder_filelog_parents(repo, fl, to_fix)
867 867
868 868 if found_nothing:
869 869 ui.write(_(b"no affected revisions were found\n"))
870 870
871 871 if to_report and report_entries:
872 872 with open(to_report, mode="wb") as f:
873 873 for path, to_fix in report_entries:
874 874 f.write(b"%s %s\n" % (b",".join(to_fix), path))
875 875
876 876 progress.complete()
General Comments 0
You need to be logged in to leave comments. Login now