##// END OF EJS Templates
revlog: replace revlog._io.size with a new revlog.index.entry_size...
marmoute -
r47736:3c920870 default
parent child Browse files
Show More
@@ -1,3925 +1,3928 b''
1 1 # perf.py - performance test routines
2 2 '''helper extension to measure performance
3 3
4 4 Configurations
5 5 ==============
6 6
7 7 ``perf``
8 8 --------
9 9
10 10 ``all-timing``
11 11 When set, additional statistics will be reported for each benchmark: best,
12 12 worst, median average. If not set only the best timing is reported
13 13 (default: off).
14 14
15 15 ``presleep``
16 16 number of second to wait before any group of runs (default: 1)
17 17
18 18 ``pre-run``
19 19 number of run to perform before starting measurement.
20 20
21 21 ``profile-benchmark``
22 22 Enable profiling for the benchmarked section.
23 23 (The first iteration is benchmarked)
24 24
25 25 ``run-limits``
26 26 Control the number of runs each benchmark will perform. The option value
27 27 should be a list of `<time>-<numberofrun>` pairs. After each run the
28 28 conditions are considered in order with the following logic:
29 29
30 30 If benchmark has been running for <time> seconds, and we have performed
31 31 <numberofrun> iterations, stop the benchmark,
32 32
33 33 The default value is: `3.0-100, 10.0-3`
34 34
35 35 ``stub``
36 36 When set, benchmarks will only be run once, useful for testing
37 37 (default: off)
38 38 '''
39 39
40 40 # "historical portability" policy of perf.py:
41 41 #
42 42 # We have to do:
43 43 # - make perf.py "loadable" with as wide Mercurial version as possible
44 44 # This doesn't mean that perf commands work correctly with that Mercurial.
45 45 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
46 46 # - make historical perf command work correctly with as wide Mercurial
47 47 # version as possible
48 48 #
49 49 # We have to do, if possible with reasonable cost:
50 50 # - make recent perf command for historical feature work correctly
51 51 # with early Mercurial
52 52 #
53 53 # We don't have to do:
54 54 # - make perf command for recent feature work correctly with early
55 55 # Mercurial
56 56
57 57 from __future__ import absolute_import
58 58 import contextlib
59 59 import functools
60 60 import gc
61 61 import os
62 62 import random
63 63 import shutil
64 64 import struct
65 65 import sys
66 66 import tempfile
67 67 import threading
68 68 import time
69 69 from mercurial import (
70 70 changegroup,
71 71 cmdutil,
72 72 commands,
73 73 copies,
74 74 error,
75 75 extensions,
76 76 hg,
77 77 mdiff,
78 78 merge,
79 79 revlog,
80 80 util,
81 81 )
82 82
83 83 # for "historical portability":
84 84 # try to import modules separately (in dict order), and ignore
85 85 # failure, because these aren't available with early Mercurial
86 86 try:
87 87 from mercurial import branchmap # since 2.5 (or bcee63733aad)
88 88 except ImportError:
89 89 pass
90 90 try:
91 91 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
92 92 except ImportError:
93 93 pass
94 94 try:
95 95 from mercurial import registrar # since 3.7 (or 37d50250b696)
96 96
97 97 dir(registrar) # forcibly load it
98 98 except ImportError:
99 99 registrar = None
100 100 try:
101 101 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
102 102 except ImportError:
103 103 pass
104 104 try:
105 105 from mercurial.utils import repoviewutil # since 5.0
106 106 except ImportError:
107 107 repoviewutil = None
108 108 try:
109 109 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
110 110 except ImportError:
111 111 pass
112 112 try:
113 113 from mercurial import setdiscovery # since 1.9 (or cb98fed52495)
114 114 except ImportError:
115 115 pass
116 116
117 117 try:
118 118 from mercurial import profiling
119 119 except ImportError:
120 120 profiling = None
121 121
122 122
123 123 def identity(a):
124 124 return a
125 125
126 126
127 127 try:
128 128 from mercurial import pycompat
129 129
130 130 getargspec = pycompat.getargspec # added to module after 4.5
131 131 _byteskwargs = pycompat.byteskwargs # since 4.1 (or fbc3f73dc802)
132 132 _sysstr = pycompat.sysstr # since 4.0 (or 2219f4f82ede)
133 133 _bytestr = pycompat.bytestr # since 4.2 (or b70407bd84d5)
134 134 _xrange = pycompat.xrange # since 4.8 (or 7eba8f83129b)
135 135 fsencode = pycompat.fsencode # since 3.9 (or f4a5e0e86a7e)
136 136 if pycompat.ispy3:
137 137 _maxint = sys.maxsize # per py3 docs for replacing maxint
138 138 else:
139 139 _maxint = sys.maxint
140 140 except (NameError, ImportError, AttributeError):
141 141 import inspect
142 142
143 143 getargspec = inspect.getargspec
144 144 _byteskwargs = identity
145 145 _bytestr = str
146 146 fsencode = identity # no py3 support
147 147 _maxint = sys.maxint # no py3 support
148 148 _sysstr = lambda x: x # no py3 support
149 149 _xrange = xrange
150 150
151 151 try:
152 152 # 4.7+
153 153 queue = pycompat.queue.Queue
154 154 except (NameError, AttributeError, ImportError):
155 155 # <4.7.
156 156 try:
157 157 queue = pycompat.queue
158 158 except (NameError, AttributeError, ImportError):
159 159 import Queue as queue
160 160
161 161 try:
162 162 from mercurial import logcmdutil
163 163
164 164 makelogtemplater = logcmdutil.maketemplater
165 165 except (AttributeError, ImportError):
166 166 try:
167 167 makelogtemplater = cmdutil.makelogtemplater
168 168 except (AttributeError, ImportError):
169 169 makelogtemplater = None
170 170
171 171 # for "historical portability":
172 172 # define util.safehasattr forcibly, because util.safehasattr has been
173 173 # available since 1.9.3 (or 94b200a11cf7)
174 174 _undefined = object()
175 175
176 176
177 177 def safehasattr(thing, attr):
178 178 return getattr(thing, _sysstr(attr), _undefined) is not _undefined
179 179
180 180
181 181 setattr(util, 'safehasattr', safehasattr)
182 182
183 183 # for "historical portability":
184 184 # define util.timer forcibly, because util.timer has been available
185 185 # since ae5d60bb70c9
186 186 if safehasattr(time, 'perf_counter'):
187 187 util.timer = time.perf_counter
188 188 elif os.name == b'nt':
189 189 util.timer = time.clock
190 190 else:
191 191 util.timer = time.time
192 192
193 193 # for "historical portability":
194 194 # use locally defined empty option list, if formatteropts isn't
195 195 # available, because commands.formatteropts has been available since
196 196 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
197 197 # available since 2.2 (or ae5f92e154d3)
198 198 formatteropts = getattr(
199 199 cmdutil, "formatteropts", getattr(commands, "formatteropts", [])
200 200 )
201 201
202 202 # for "historical portability":
203 203 # use locally defined option list, if debugrevlogopts isn't available,
204 204 # because commands.debugrevlogopts has been available since 3.7 (or
205 205 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
206 206 # since 1.9 (or a79fea6b3e77).
207 207 revlogopts = getattr(
208 208 cmdutil,
209 209 "debugrevlogopts",
210 210 getattr(
211 211 commands,
212 212 "debugrevlogopts",
213 213 [
214 214 (b'c', b'changelog', False, b'open changelog'),
215 215 (b'm', b'manifest', False, b'open manifest'),
216 216 (b'', b'dir', False, b'open directory manifest'),
217 217 ],
218 218 ),
219 219 )
220 220
221 221 cmdtable = {}
222 222
223 223 # for "historical portability":
224 224 # define parsealiases locally, because cmdutil.parsealiases has been
225 225 # available since 1.5 (or 6252852b4332)
226 226 def parsealiases(cmd):
227 227 return cmd.split(b"|")
228 228
229 229
230 230 if safehasattr(registrar, 'command'):
231 231 command = registrar.command(cmdtable)
232 232 elif safehasattr(cmdutil, 'command'):
233 233 command = cmdutil.command(cmdtable)
234 234 if 'norepo' not in getargspec(command).args:
235 235 # for "historical portability":
236 236 # wrap original cmdutil.command, because "norepo" option has
237 237 # been available since 3.1 (or 75a96326cecb)
238 238 _command = command
239 239
240 240 def command(name, options=(), synopsis=None, norepo=False):
241 241 if norepo:
242 242 commands.norepo += b' %s' % b' '.join(parsealiases(name))
243 243 return _command(name, list(options), synopsis)
244 244
245 245
246 246 else:
247 247 # for "historical portability":
248 248 # define "@command" annotation locally, because cmdutil.command
249 249 # has been available since 1.9 (or 2daa5179e73f)
250 250 def command(name, options=(), synopsis=None, norepo=False):
251 251 def decorator(func):
252 252 if synopsis:
253 253 cmdtable[name] = func, list(options), synopsis
254 254 else:
255 255 cmdtable[name] = func, list(options)
256 256 if norepo:
257 257 commands.norepo += b' %s' % b' '.join(parsealiases(name))
258 258 return func
259 259
260 260 return decorator
261 261
262 262
263 263 try:
264 264 import mercurial.registrar
265 265 import mercurial.configitems
266 266
267 267 configtable = {}
268 268 configitem = mercurial.registrar.configitem(configtable)
269 269 configitem(
270 270 b'perf',
271 271 b'presleep',
272 272 default=mercurial.configitems.dynamicdefault,
273 273 experimental=True,
274 274 )
275 275 configitem(
276 276 b'perf',
277 277 b'stub',
278 278 default=mercurial.configitems.dynamicdefault,
279 279 experimental=True,
280 280 )
281 281 configitem(
282 282 b'perf',
283 283 b'parentscount',
284 284 default=mercurial.configitems.dynamicdefault,
285 285 experimental=True,
286 286 )
287 287 configitem(
288 288 b'perf',
289 289 b'all-timing',
290 290 default=mercurial.configitems.dynamicdefault,
291 291 experimental=True,
292 292 )
293 293 configitem(
294 294 b'perf',
295 295 b'pre-run',
296 296 default=mercurial.configitems.dynamicdefault,
297 297 )
298 298 configitem(
299 299 b'perf',
300 300 b'profile-benchmark',
301 301 default=mercurial.configitems.dynamicdefault,
302 302 )
303 303 configitem(
304 304 b'perf',
305 305 b'run-limits',
306 306 default=mercurial.configitems.dynamicdefault,
307 307 experimental=True,
308 308 )
309 309 except (ImportError, AttributeError):
310 310 pass
311 311 except TypeError:
312 312 # compatibility fix for a11fd395e83f
313 313 # hg version: 5.2
314 314 configitem(
315 315 b'perf',
316 316 b'presleep',
317 317 default=mercurial.configitems.dynamicdefault,
318 318 )
319 319 configitem(
320 320 b'perf',
321 321 b'stub',
322 322 default=mercurial.configitems.dynamicdefault,
323 323 )
324 324 configitem(
325 325 b'perf',
326 326 b'parentscount',
327 327 default=mercurial.configitems.dynamicdefault,
328 328 )
329 329 configitem(
330 330 b'perf',
331 331 b'all-timing',
332 332 default=mercurial.configitems.dynamicdefault,
333 333 )
334 334 configitem(
335 335 b'perf',
336 336 b'pre-run',
337 337 default=mercurial.configitems.dynamicdefault,
338 338 )
339 339 configitem(
340 340 b'perf',
341 341 b'profile-benchmark',
342 342 default=mercurial.configitems.dynamicdefault,
343 343 )
344 344 configitem(
345 345 b'perf',
346 346 b'run-limits',
347 347 default=mercurial.configitems.dynamicdefault,
348 348 )
349 349
350 350
351 351 def getlen(ui):
352 352 if ui.configbool(b"perf", b"stub", False):
353 353 return lambda x: 1
354 354 return len
355 355
356 356
357 357 class noop(object):
358 358 """dummy context manager"""
359 359
360 360 def __enter__(self):
361 361 pass
362 362
363 363 def __exit__(self, *args):
364 364 pass
365 365
366 366
367 367 NOOPCTX = noop()
368 368
369 369
370 370 def gettimer(ui, opts=None):
371 371 """return a timer function and formatter: (timer, formatter)
372 372
373 373 This function exists to gather the creation of formatter in a single
374 374 place instead of duplicating it in all performance commands."""
375 375
376 376 # enforce an idle period before execution to counteract power management
377 377 # experimental config: perf.presleep
378 378 time.sleep(getint(ui, b"perf", b"presleep", 1))
379 379
380 380 if opts is None:
381 381 opts = {}
382 382 # redirect all to stderr unless buffer api is in use
383 383 if not ui._buffers:
384 384 ui = ui.copy()
385 385 uifout = safeattrsetter(ui, b'fout', ignoremissing=True)
386 386 if uifout:
387 387 # for "historical portability":
388 388 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
389 389 uifout.set(ui.ferr)
390 390
391 391 # get a formatter
392 392 uiformatter = getattr(ui, 'formatter', None)
393 393 if uiformatter:
394 394 fm = uiformatter(b'perf', opts)
395 395 else:
396 396 # for "historical portability":
397 397 # define formatter locally, because ui.formatter has been
398 398 # available since 2.2 (or ae5f92e154d3)
399 399 from mercurial import node
400 400
401 401 class defaultformatter(object):
402 402 """Minimized composition of baseformatter and plainformatter"""
403 403
404 404 def __init__(self, ui, topic, opts):
405 405 self._ui = ui
406 406 if ui.debugflag:
407 407 self.hexfunc = node.hex
408 408 else:
409 409 self.hexfunc = node.short
410 410
411 411 def __nonzero__(self):
412 412 return False
413 413
414 414 __bool__ = __nonzero__
415 415
416 416 def startitem(self):
417 417 pass
418 418
419 419 def data(self, **data):
420 420 pass
421 421
422 422 def write(self, fields, deftext, *fielddata, **opts):
423 423 self._ui.write(deftext % fielddata, **opts)
424 424
425 425 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
426 426 if cond:
427 427 self._ui.write(deftext % fielddata, **opts)
428 428
429 429 def plain(self, text, **opts):
430 430 self._ui.write(text, **opts)
431 431
432 432 def end(self):
433 433 pass
434 434
435 435 fm = defaultformatter(ui, b'perf', opts)
436 436
437 437 # stub function, runs code only once instead of in a loop
438 438 # experimental config: perf.stub
439 439 if ui.configbool(b"perf", b"stub", False):
440 440 return functools.partial(stub_timer, fm), fm
441 441
442 442 # experimental config: perf.all-timing
443 443 displayall = ui.configbool(b"perf", b"all-timing", False)
444 444
445 445 # experimental config: perf.run-limits
446 446 limitspec = ui.configlist(b"perf", b"run-limits", [])
447 447 limits = []
448 448 for item in limitspec:
449 449 parts = item.split(b'-', 1)
450 450 if len(parts) < 2:
451 451 ui.warn((b'malformatted run limit entry, missing "-": %s\n' % item))
452 452 continue
453 453 try:
454 454 time_limit = float(_sysstr(parts[0]))
455 455 except ValueError as e:
456 456 ui.warn(
457 457 (
458 458 b'malformatted run limit entry, %s: %s\n'
459 459 % (_bytestr(e), item)
460 460 )
461 461 )
462 462 continue
463 463 try:
464 464 run_limit = int(_sysstr(parts[1]))
465 465 except ValueError as e:
466 466 ui.warn(
467 467 (
468 468 b'malformatted run limit entry, %s: %s\n'
469 469 % (_bytestr(e), item)
470 470 )
471 471 )
472 472 continue
473 473 limits.append((time_limit, run_limit))
474 474 if not limits:
475 475 limits = DEFAULTLIMITS
476 476
477 477 profiler = None
478 478 if profiling is not None:
479 479 if ui.configbool(b"perf", b"profile-benchmark", False):
480 480 profiler = profiling.profile(ui)
481 481
482 482 prerun = getint(ui, b"perf", b"pre-run", 0)
483 483 t = functools.partial(
484 484 _timer,
485 485 fm,
486 486 displayall=displayall,
487 487 limits=limits,
488 488 prerun=prerun,
489 489 profiler=profiler,
490 490 )
491 491 return t, fm
492 492
493 493
494 494 def stub_timer(fm, func, setup=None, title=None):
495 495 if setup is not None:
496 496 setup()
497 497 func()
498 498
499 499
500 500 @contextlib.contextmanager
501 501 def timeone():
502 502 r = []
503 503 ostart = os.times()
504 504 cstart = util.timer()
505 505 yield r
506 506 cstop = util.timer()
507 507 ostop = os.times()
508 508 a, b = ostart, ostop
509 509 r.append((cstop - cstart, b[0] - a[0], b[1] - a[1]))
510 510
511 511
512 512 # list of stop condition (elapsed time, minimal run count)
513 513 DEFAULTLIMITS = (
514 514 (3.0, 100),
515 515 (10.0, 3),
516 516 )
517 517
518 518
519 519 def _timer(
520 520 fm,
521 521 func,
522 522 setup=None,
523 523 title=None,
524 524 displayall=False,
525 525 limits=DEFAULTLIMITS,
526 526 prerun=0,
527 527 profiler=None,
528 528 ):
529 529 gc.collect()
530 530 results = []
531 531 begin = util.timer()
532 532 count = 0
533 533 if profiler is None:
534 534 profiler = NOOPCTX
535 535 for i in range(prerun):
536 536 if setup is not None:
537 537 setup()
538 538 func()
539 539 keepgoing = True
540 540 while keepgoing:
541 541 if setup is not None:
542 542 setup()
543 543 with profiler:
544 544 with timeone() as item:
545 545 r = func()
546 546 profiler = NOOPCTX
547 547 count += 1
548 548 results.append(item[0])
549 549 cstop = util.timer()
550 550 # Look for a stop condition.
551 551 elapsed = cstop - begin
552 552 for t, mincount in limits:
553 553 if elapsed >= t and count >= mincount:
554 554 keepgoing = False
555 555 break
556 556
557 557 formatone(fm, results, title=title, result=r, displayall=displayall)
558 558
559 559
560 560 def formatone(fm, timings, title=None, result=None, displayall=False):
561 561
562 562 count = len(timings)
563 563
564 564 fm.startitem()
565 565
566 566 if title:
567 567 fm.write(b'title', b'! %s\n', title)
568 568 if result:
569 569 fm.write(b'result', b'! result: %s\n', result)
570 570
571 571 def display(role, entry):
572 572 prefix = b''
573 573 if role != b'best':
574 574 prefix = b'%s.' % role
575 575 fm.plain(b'!')
576 576 fm.write(prefix + b'wall', b' wall %f', entry[0])
577 577 fm.write(prefix + b'comb', b' comb %f', entry[1] + entry[2])
578 578 fm.write(prefix + b'user', b' user %f', entry[1])
579 579 fm.write(prefix + b'sys', b' sys %f', entry[2])
580 580 fm.write(prefix + b'count', b' (%s of %%d)' % role, count)
581 581 fm.plain(b'\n')
582 582
583 583 timings.sort()
584 584 min_val = timings[0]
585 585 display(b'best', min_val)
586 586 if displayall:
587 587 max_val = timings[-1]
588 588 display(b'max', max_val)
589 589 avg = tuple([sum(x) / count for x in zip(*timings)])
590 590 display(b'avg', avg)
591 591 median = timings[len(timings) // 2]
592 592 display(b'median', median)
593 593
594 594
595 595 # utilities for historical portability
596 596
597 597
598 598 def getint(ui, section, name, default):
599 599 # for "historical portability":
600 600 # ui.configint has been available since 1.9 (or fa2b596db182)
601 601 v = ui.config(section, name, None)
602 602 if v is None:
603 603 return default
604 604 try:
605 605 return int(v)
606 606 except ValueError:
607 607 raise error.ConfigError(
608 608 b"%s.%s is not an integer ('%s')" % (section, name, v)
609 609 )
610 610
611 611
612 612 def safeattrsetter(obj, name, ignoremissing=False):
613 613 """Ensure that 'obj' has 'name' attribute before subsequent setattr
614 614
615 615 This function is aborted, if 'obj' doesn't have 'name' attribute
616 616 at runtime. This avoids overlooking removal of an attribute, which
617 617 breaks assumption of performance measurement, in the future.
618 618
619 619 This function returns the object to (1) assign a new value, and
620 620 (2) restore an original value to the attribute.
621 621
622 622 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
623 623 abortion, and this function returns None. This is useful to
624 624 examine an attribute, which isn't ensured in all Mercurial
625 625 versions.
626 626 """
627 627 if not util.safehasattr(obj, name):
628 628 if ignoremissing:
629 629 return None
630 630 raise error.Abort(
631 631 (
632 632 b"missing attribute %s of %s might break assumption"
633 633 b" of performance measurement"
634 634 )
635 635 % (name, obj)
636 636 )
637 637
638 638 origvalue = getattr(obj, _sysstr(name))
639 639
640 640 class attrutil(object):
641 641 def set(self, newvalue):
642 642 setattr(obj, _sysstr(name), newvalue)
643 643
644 644 def restore(self):
645 645 setattr(obj, _sysstr(name), origvalue)
646 646
647 647 return attrutil()
648 648
649 649
650 650 # utilities to examine each internal API changes
651 651
652 652
653 653 def getbranchmapsubsettable():
654 654 # for "historical portability":
655 655 # subsettable is defined in:
656 656 # - branchmap since 2.9 (or 175c6fd8cacc)
657 657 # - repoview since 2.5 (or 59a9f18d4587)
658 658 # - repoviewutil since 5.0
659 659 for mod in (branchmap, repoview, repoviewutil):
660 660 subsettable = getattr(mod, 'subsettable', None)
661 661 if subsettable:
662 662 return subsettable
663 663
664 664 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
665 665 # branchmap and repoview modules exist, but subsettable attribute
666 666 # doesn't)
667 667 raise error.Abort(
668 668 b"perfbranchmap not available with this Mercurial",
669 669 hint=b"use 2.5 or later",
670 670 )
671 671
672 672
673 673 def getsvfs(repo):
674 674 """Return appropriate object to access files under .hg/store"""
675 675 # for "historical portability":
676 676 # repo.svfs has been available since 2.3 (or 7034365089bf)
677 677 svfs = getattr(repo, 'svfs', None)
678 678 if svfs:
679 679 return svfs
680 680 else:
681 681 return getattr(repo, 'sopener')
682 682
683 683
684 684 def getvfs(repo):
685 685 """Return appropriate object to access files under .hg"""
686 686 # for "historical portability":
687 687 # repo.vfs has been available since 2.3 (or 7034365089bf)
688 688 vfs = getattr(repo, 'vfs', None)
689 689 if vfs:
690 690 return vfs
691 691 else:
692 692 return getattr(repo, 'opener')
693 693
694 694
695 695 def repocleartagscachefunc(repo):
696 696 """Return the function to clear tags cache according to repo internal API"""
697 697 if util.safehasattr(repo, b'_tagscache'): # since 2.0 (or 9dca7653b525)
698 698 # in this case, setattr(repo, '_tagscache', None) or so isn't
699 699 # correct way to clear tags cache, because existing code paths
700 700 # expect _tagscache to be a structured object.
701 701 def clearcache():
702 702 # _tagscache has been filteredpropertycache since 2.5 (or
703 703 # 98c867ac1330), and delattr() can't work in such case
704 704 if '_tagscache' in vars(repo):
705 705 del repo.__dict__['_tagscache']
706 706
707 707 return clearcache
708 708
709 709 repotags = safeattrsetter(repo, b'_tags', ignoremissing=True)
710 710 if repotags: # since 1.4 (or 5614a628d173)
711 711 return lambda: repotags.set(None)
712 712
713 713 repotagscache = safeattrsetter(repo, b'tagscache', ignoremissing=True)
714 714 if repotagscache: # since 0.6 (or d7df759d0e97)
715 715 return lambda: repotagscache.set(None)
716 716
717 717 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
718 718 # this point, but it isn't so problematic, because:
719 719 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
720 720 # in perftags() causes failure soon
721 721 # - perf.py itself has been available since 1.1 (or eb240755386d)
722 722 raise error.Abort(b"tags API of this hg command is unknown")
723 723
724 724
725 725 # utilities to clear cache
726 726
727 727
728 728 def clearfilecache(obj, attrname):
729 729 unfiltered = getattr(obj, 'unfiltered', None)
730 730 if unfiltered is not None:
731 731 obj = obj.unfiltered()
732 732 if attrname in vars(obj):
733 733 delattr(obj, attrname)
734 734 obj._filecache.pop(attrname, None)
735 735
736 736
737 737 def clearchangelog(repo):
738 738 if repo is not repo.unfiltered():
739 739 object.__setattr__(repo, '_clcachekey', None)
740 740 object.__setattr__(repo, '_clcache', None)
741 741 clearfilecache(repo.unfiltered(), 'changelog')
742 742
743 743
744 744 # perf commands
745 745
746 746
747 747 @command(b'perf::walk|perfwalk', formatteropts)
748 748 def perfwalk(ui, repo, *pats, **opts):
749 749 opts = _byteskwargs(opts)
750 750 timer, fm = gettimer(ui, opts)
751 751 m = scmutil.match(repo[None], pats, {})
752 752 timer(
753 753 lambda: len(
754 754 list(
755 755 repo.dirstate.walk(m, subrepos=[], unknown=True, ignored=False)
756 756 )
757 757 )
758 758 )
759 759 fm.end()
760 760
761 761
762 762 @command(b'perf::annotate|perfannotate', formatteropts)
763 763 def perfannotate(ui, repo, f, **opts):
764 764 opts = _byteskwargs(opts)
765 765 timer, fm = gettimer(ui, opts)
766 766 fc = repo[b'.'][f]
767 767 timer(lambda: len(fc.annotate(True)))
768 768 fm.end()
769 769
770 770
771 771 @command(
772 772 b'perf::status|perfstatus',
773 773 [
774 774 (b'u', b'unknown', False, b'ask status to look for unknown files'),
775 775 (b'', b'dirstate', False, b'benchmark the internal dirstate call'),
776 776 ]
777 777 + formatteropts,
778 778 )
779 779 def perfstatus(ui, repo, **opts):
780 780 """benchmark the performance of a single status call
781 781
782 782 The repository data are preserved between each call.
783 783
784 784 By default, only the status of the tracked file are requested. If
785 785 `--unknown` is passed, the "unknown" files are also tracked.
786 786 """
787 787 opts = _byteskwargs(opts)
788 788 # m = match.always(repo.root, repo.getcwd())
789 789 # timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
790 790 # False))))
791 791 timer, fm = gettimer(ui, opts)
792 792 if opts[b'dirstate']:
793 793 dirstate = repo.dirstate
794 794 m = scmutil.matchall(repo)
795 795 unknown = opts[b'unknown']
796 796
797 797 def status_dirstate():
798 798 s = dirstate.status(
799 799 m, subrepos=[], ignored=False, clean=False, unknown=unknown
800 800 )
801 801 sum(map(bool, s))
802 802
803 803 timer(status_dirstate)
804 804 else:
805 805 timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown']))))
806 806 fm.end()
807 807
808 808
809 809 @command(b'perf::addremove|perfaddremove', formatteropts)
810 810 def perfaddremove(ui, repo, **opts):
811 811 opts = _byteskwargs(opts)
812 812 timer, fm = gettimer(ui, opts)
813 813 try:
814 814 oldquiet = repo.ui.quiet
815 815 repo.ui.quiet = True
816 816 matcher = scmutil.match(repo[None])
817 817 opts[b'dry_run'] = True
818 818 if 'uipathfn' in getargspec(scmutil.addremove).args:
819 819 uipathfn = scmutil.getuipathfn(repo)
820 820 timer(lambda: scmutil.addremove(repo, matcher, b"", uipathfn, opts))
821 821 else:
822 822 timer(lambda: scmutil.addremove(repo, matcher, b"", opts))
823 823 finally:
824 824 repo.ui.quiet = oldquiet
825 825 fm.end()
826 826
827 827
828 828 def clearcaches(cl):
829 829 # behave somewhat consistently across internal API changes
830 830 if util.safehasattr(cl, b'clearcaches'):
831 831 cl.clearcaches()
832 832 elif util.safehasattr(cl, b'_nodecache'):
833 833 # <= hg-5.2
834 834 from mercurial.node import nullid, nullrev
835 835
836 836 cl._nodecache = {nullid: nullrev}
837 837 cl._nodepos = None
838 838
839 839
840 840 @command(b'perf::heads|perfheads', formatteropts)
841 841 def perfheads(ui, repo, **opts):
842 842 """benchmark the computation of a changelog heads"""
843 843 opts = _byteskwargs(opts)
844 844 timer, fm = gettimer(ui, opts)
845 845 cl = repo.changelog
846 846
847 847 def s():
848 848 clearcaches(cl)
849 849
850 850 def d():
851 851 len(cl.headrevs())
852 852
853 853 timer(d, setup=s)
854 854 fm.end()
855 855
856 856
857 857 @command(
858 858 b'perf::tags|perftags',
859 859 formatteropts
860 860 + [
861 861 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
862 862 ],
863 863 )
864 864 def perftags(ui, repo, **opts):
865 865 opts = _byteskwargs(opts)
866 866 timer, fm = gettimer(ui, opts)
867 867 repocleartagscache = repocleartagscachefunc(repo)
868 868 clearrevlogs = opts[b'clear_revlogs']
869 869
870 870 def s():
871 871 if clearrevlogs:
872 872 clearchangelog(repo)
873 873 clearfilecache(repo.unfiltered(), 'manifest')
874 874 repocleartagscache()
875 875
876 876 def t():
877 877 return len(repo.tags())
878 878
879 879 timer(t, setup=s)
880 880 fm.end()
881 881
882 882
883 883 @command(b'perf::ancestors|perfancestors', formatteropts)
884 884 def perfancestors(ui, repo, **opts):
885 885 opts = _byteskwargs(opts)
886 886 timer, fm = gettimer(ui, opts)
887 887 heads = repo.changelog.headrevs()
888 888
889 889 def d():
890 890 for a in repo.changelog.ancestors(heads):
891 891 pass
892 892
893 893 timer(d)
894 894 fm.end()
895 895
896 896
897 897 @command(b'perf::ancestorset|perfancestorset', formatteropts)
898 898 def perfancestorset(ui, repo, revset, **opts):
899 899 opts = _byteskwargs(opts)
900 900 timer, fm = gettimer(ui, opts)
901 901 revs = repo.revs(revset)
902 902 heads = repo.changelog.headrevs()
903 903
904 904 def d():
905 905 s = repo.changelog.ancestors(heads)
906 906 for rev in revs:
907 907 rev in s
908 908
909 909 timer(d)
910 910 fm.end()
911 911
912 912
913 913 @command(b'perf::discovery|perfdiscovery', formatteropts, b'PATH')
914 914 def perfdiscovery(ui, repo, path, **opts):
915 915 """benchmark discovery between local repo and the peer at given path"""
916 916 repos = [repo, None]
917 917 timer, fm = gettimer(ui, opts)
918 918
919 919 try:
920 920 from mercurial.utils.urlutil import get_unique_pull_path
921 921
922 922 path = get_unique_pull_path(b'perfdiscovery', repo, ui, path)[0]
923 923 except ImportError:
924 924 path = ui.expandpath(path)
925 925
926 926 def s():
927 927 repos[1] = hg.peer(ui, opts, path)
928 928
929 929 def d():
930 930 setdiscovery.findcommonheads(ui, *repos)
931 931
932 932 timer(d, setup=s)
933 933 fm.end()
934 934
935 935
936 936 @command(
937 937 b'perf::bookmarks|perfbookmarks',
938 938 formatteropts
939 939 + [
940 940 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
941 941 ],
942 942 )
943 943 def perfbookmarks(ui, repo, **opts):
944 944 """benchmark parsing bookmarks from disk to memory"""
945 945 opts = _byteskwargs(opts)
946 946 timer, fm = gettimer(ui, opts)
947 947
948 948 clearrevlogs = opts[b'clear_revlogs']
949 949
950 950 def s():
951 951 if clearrevlogs:
952 952 clearchangelog(repo)
953 953 clearfilecache(repo, b'_bookmarks')
954 954
955 955 def d():
956 956 repo._bookmarks
957 957
958 958 timer(d, setup=s)
959 959 fm.end()
960 960
961 961
962 962 @command(b'perf::bundleread|perfbundleread', formatteropts, b'BUNDLE')
963 963 def perfbundleread(ui, repo, bundlepath, **opts):
964 964 """Benchmark reading of bundle files.
965 965
966 966 This command is meant to isolate the I/O part of bundle reading as
967 967 much as possible.
968 968 """
969 969 from mercurial import (
970 970 bundle2,
971 971 exchange,
972 972 streamclone,
973 973 )
974 974
975 975 opts = _byteskwargs(opts)
976 976
977 977 def makebench(fn):
978 978 def run():
979 979 with open(bundlepath, b'rb') as fh:
980 980 bundle = exchange.readbundle(ui, fh, bundlepath)
981 981 fn(bundle)
982 982
983 983 return run
984 984
985 985 def makereadnbytes(size):
986 986 def run():
987 987 with open(bundlepath, b'rb') as fh:
988 988 bundle = exchange.readbundle(ui, fh, bundlepath)
989 989 while bundle.read(size):
990 990 pass
991 991
992 992 return run
993 993
994 994 def makestdioread(size):
995 995 def run():
996 996 with open(bundlepath, b'rb') as fh:
997 997 while fh.read(size):
998 998 pass
999 999
1000 1000 return run
1001 1001
1002 1002 # bundle1
1003 1003
1004 1004 def deltaiter(bundle):
1005 1005 for delta in bundle.deltaiter():
1006 1006 pass
1007 1007
1008 1008 def iterchunks(bundle):
1009 1009 for chunk in bundle.getchunks():
1010 1010 pass
1011 1011
1012 1012 # bundle2
1013 1013
1014 1014 def forwardchunks(bundle):
1015 1015 for chunk in bundle._forwardchunks():
1016 1016 pass
1017 1017
1018 1018 def iterparts(bundle):
1019 1019 for part in bundle.iterparts():
1020 1020 pass
1021 1021
1022 1022 def iterpartsseekable(bundle):
1023 1023 for part in bundle.iterparts(seekable=True):
1024 1024 pass
1025 1025
1026 1026 def seek(bundle):
1027 1027 for part in bundle.iterparts(seekable=True):
1028 1028 part.seek(0, os.SEEK_END)
1029 1029
1030 1030 def makepartreadnbytes(size):
1031 1031 def run():
1032 1032 with open(bundlepath, b'rb') as fh:
1033 1033 bundle = exchange.readbundle(ui, fh, bundlepath)
1034 1034 for part in bundle.iterparts():
1035 1035 while part.read(size):
1036 1036 pass
1037 1037
1038 1038 return run
1039 1039
1040 1040 benches = [
1041 1041 (makestdioread(8192), b'read(8k)'),
1042 1042 (makestdioread(16384), b'read(16k)'),
1043 1043 (makestdioread(32768), b'read(32k)'),
1044 1044 (makestdioread(131072), b'read(128k)'),
1045 1045 ]
1046 1046
1047 1047 with open(bundlepath, b'rb') as fh:
1048 1048 bundle = exchange.readbundle(ui, fh, bundlepath)
1049 1049
1050 1050 if isinstance(bundle, changegroup.cg1unpacker):
1051 1051 benches.extend(
1052 1052 [
1053 1053 (makebench(deltaiter), b'cg1 deltaiter()'),
1054 1054 (makebench(iterchunks), b'cg1 getchunks()'),
1055 1055 (makereadnbytes(8192), b'cg1 read(8k)'),
1056 1056 (makereadnbytes(16384), b'cg1 read(16k)'),
1057 1057 (makereadnbytes(32768), b'cg1 read(32k)'),
1058 1058 (makereadnbytes(131072), b'cg1 read(128k)'),
1059 1059 ]
1060 1060 )
1061 1061 elif isinstance(bundle, bundle2.unbundle20):
1062 1062 benches.extend(
1063 1063 [
1064 1064 (makebench(forwardchunks), b'bundle2 forwardchunks()'),
1065 1065 (makebench(iterparts), b'bundle2 iterparts()'),
1066 1066 (
1067 1067 makebench(iterpartsseekable),
1068 1068 b'bundle2 iterparts() seekable',
1069 1069 ),
1070 1070 (makebench(seek), b'bundle2 part seek()'),
1071 1071 (makepartreadnbytes(8192), b'bundle2 part read(8k)'),
1072 1072 (makepartreadnbytes(16384), b'bundle2 part read(16k)'),
1073 1073 (makepartreadnbytes(32768), b'bundle2 part read(32k)'),
1074 1074 (makepartreadnbytes(131072), b'bundle2 part read(128k)'),
1075 1075 ]
1076 1076 )
1077 1077 elif isinstance(bundle, streamclone.streamcloneapplier):
1078 1078 raise error.Abort(b'stream clone bundles not supported')
1079 1079 else:
1080 1080 raise error.Abort(b'unhandled bundle type: %s' % type(bundle))
1081 1081
1082 1082 for fn, title in benches:
1083 1083 timer, fm = gettimer(ui, opts)
1084 1084 timer(fn, title=title)
1085 1085 fm.end()
1086 1086
1087 1087
1088 1088 @command(
1089 1089 b'perf::changegroupchangelog|perfchangegroupchangelog',
1090 1090 formatteropts
1091 1091 + [
1092 1092 (b'', b'cgversion', b'02', b'changegroup version'),
1093 1093 (b'r', b'rev', b'', b'revisions to add to changegroup'),
1094 1094 ],
1095 1095 )
1096 1096 def perfchangegroupchangelog(ui, repo, cgversion=b'02', rev=None, **opts):
1097 1097 """Benchmark producing a changelog group for a changegroup.
1098 1098
1099 1099 This measures the time spent processing the changelog during a
1100 1100 bundle operation. This occurs during `hg bundle` and on a server
1101 1101 processing a `getbundle` wire protocol request (handles clones
1102 1102 and pull requests).
1103 1103
1104 1104 By default, all revisions are added to the changegroup.
1105 1105 """
1106 1106 opts = _byteskwargs(opts)
1107 1107 cl = repo.changelog
1108 1108 nodes = [cl.lookup(r) for r in repo.revs(rev or b'all()')]
1109 1109 bundler = changegroup.getbundler(cgversion, repo)
1110 1110
1111 1111 def d():
1112 1112 state, chunks = bundler._generatechangelog(cl, nodes)
1113 1113 for chunk in chunks:
1114 1114 pass
1115 1115
1116 1116 timer, fm = gettimer(ui, opts)
1117 1117
1118 1118 # Terminal printing can interfere with timing. So disable it.
1119 1119 with ui.configoverride({(b'progress', b'disable'): True}):
1120 1120 timer(d)
1121 1121
1122 1122 fm.end()
1123 1123
1124 1124
1125 1125 @command(b'perf::dirs|perfdirs', formatteropts)
1126 1126 def perfdirs(ui, repo, **opts):
1127 1127 opts = _byteskwargs(opts)
1128 1128 timer, fm = gettimer(ui, opts)
1129 1129 dirstate = repo.dirstate
1130 1130 b'a' in dirstate
1131 1131
1132 1132 def d():
1133 1133 dirstate.hasdir(b'a')
1134 1134 del dirstate._map._dirs
1135 1135
1136 1136 timer(d)
1137 1137 fm.end()
1138 1138
1139 1139
1140 1140 @command(
1141 1141 b'perf::dirstate|perfdirstate',
1142 1142 [
1143 1143 (
1144 1144 b'',
1145 1145 b'iteration',
1146 1146 None,
1147 1147 b'benchmark a full iteration for the dirstate',
1148 1148 ),
1149 1149 (
1150 1150 b'',
1151 1151 b'contains',
1152 1152 None,
1153 1153 b'benchmark a large amount of `nf in dirstate` calls',
1154 1154 ),
1155 1155 ]
1156 1156 + formatteropts,
1157 1157 )
1158 1158 def perfdirstate(ui, repo, **opts):
1159 1159 """benchmap the time of various distate operations
1160 1160
1161 1161 By default benchmark the time necessary to load a dirstate from scratch.
1162 1162 The dirstate is loaded to the point were a "contains" request can be
1163 1163 answered.
1164 1164 """
1165 1165 opts = _byteskwargs(opts)
1166 1166 timer, fm = gettimer(ui, opts)
1167 1167 b"a" in repo.dirstate
1168 1168
1169 1169 if opts[b'iteration'] and opts[b'contains']:
1170 1170 msg = b'only specify one of --iteration or --contains'
1171 1171 raise error.Abort(msg)
1172 1172
1173 1173 if opts[b'iteration']:
1174 1174 setup = None
1175 1175 dirstate = repo.dirstate
1176 1176
1177 1177 def d():
1178 1178 for f in dirstate:
1179 1179 pass
1180 1180
1181 1181 elif opts[b'contains']:
1182 1182 setup = None
1183 1183 dirstate = repo.dirstate
1184 1184 allfiles = list(dirstate)
1185 1185 # also add file path that will be "missing" from the dirstate
1186 1186 allfiles.extend([f[::-1] for f in allfiles])
1187 1187
1188 1188 def d():
1189 1189 for f in allfiles:
1190 1190 f in dirstate
1191 1191
1192 1192 else:
1193 1193
1194 1194 def setup():
1195 1195 repo.dirstate.invalidate()
1196 1196
1197 1197 def d():
1198 1198 b"a" in repo.dirstate
1199 1199
1200 1200 timer(d, setup=setup)
1201 1201 fm.end()
1202 1202
1203 1203
1204 1204 @command(b'perf::dirstatedirs|perfdirstatedirs', formatteropts)
1205 1205 def perfdirstatedirs(ui, repo, **opts):
1206 1206 """benchmap a 'dirstate.hasdir' call from an empty `dirs` cache"""
1207 1207 opts = _byteskwargs(opts)
1208 1208 timer, fm = gettimer(ui, opts)
1209 1209 repo.dirstate.hasdir(b"a")
1210 1210
1211 1211 def setup():
1212 1212 del repo.dirstate._map._dirs
1213 1213
1214 1214 def d():
1215 1215 repo.dirstate.hasdir(b"a")
1216 1216
1217 1217 timer(d, setup=setup)
1218 1218 fm.end()
1219 1219
1220 1220
1221 1221 @command(b'perf::dirstatefoldmap|perfdirstatefoldmap', formatteropts)
1222 1222 def perfdirstatefoldmap(ui, repo, **opts):
1223 1223 """benchmap a `dirstate._map.filefoldmap.get()` request
1224 1224
1225 1225 The dirstate filefoldmap cache is dropped between every request.
1226 1226 """
1227 1227 opts = _byteskwargs(opts)
1228 1228 timer, fm = gettimer(ui, opts)
1229 1229 dirstate = repo.dirstate
1230 1230 dirstate._map.filefoldmap.get(b'a')
1231 1231
1232 1232 def setup():
1233 1233 del dirstate._map.filefoldmap
1234 1234
1235 1235 def d():
1236 1236 dirstate._map.filefoldmap.get(b'a')
1237 1237
1238 1238 timer(d, setup=setup)
1239 1239 fm.end()
1240 1240
1241 1241
1242 1242 @command(b'perf::dirfoldmap|perfdirfoldmap', formatteropts)
1243 1243 def perfdirfoldmap(ui, repo, **opts):
1244 1244 """benchmap a `dirstate._map.dirfoldmap.get()` request
1245 1245
1246 1246 The dirstate dirfoldmap cache is dropped between every request.
1247 1247 """
1248 1248 opts = _byteskwargs(opts)
1249 1249 timer, fm = gettimer(ui, opts)
1250 1250 dirstate = repo.dirstate
1251 1251 dirstate._map.dirfoldmap.get(b'a')
1252 1252
1253 1253 def setup():
1254 1254 del dirstate._map.dirfoldmap
1255 1255 del dirstate._map._dirs
1256 1256
1257 1257 def d():
1258 1258 dirstate._map.dirfoldmap.get(b'a')
1259 1259
1260 1260 timer(d, setup=setup)
1261 1261 fm.end()
1262 1262
1263 1263
1264 1264 @command(b'perf::dirstatewrite|perfdirstatewrite', formatteropts)
1265 1265 def perfdirstatewrite(ui, repo, **opts):
1266 1266 """benchmap the time it take to write a dirstate on disk"""
1267 1267 opts = _byteskwargs(opts)
1268 1268 timer, fm = gettimer(ui, opts)
1269 1269 ds = repo.dirstate
1270 1270 b"a" in ds
1271 1271
1272 1272 def setup():
1273 1273 ds._dirty = True
1274 1274
1275 1275 def d():
1276 1276 ds.write(repo.currenttransaction())
1277 1277
1278 1278 timer(d, setup=setup)
1279 1279 fm.end()
1280 1280
1281 1281
1282 1282 def _getmergerevs(repo, opts):
1283 1283 """parse command argument to return rev involved in merge
1284 1284
1285 1285 input: options dictionnary with `rev`, `from` and `bse`
1286 1286 output: (localctx, otherctx, basectx)
1287 1287 """
1288 1288 if opts[b'from']:
1289 1289 fromrev = scmutil.revsingle(repo, opts[b'from'])
1290 1290 wctx = repo[fromrev]
1291 1291 else:
1292 1292 wctx = repo[None]
1293 1293 # we don't want working dir files to be stat'd in the benchmark, so
1294 1294 # prime that cache
1295 1295 wctx.dirty()
1296 1296 rctx = scmutil.revsingle(repo, opts[b'rev'], opts[b'rev'])
1297 1297 if opts[b'base']:
1298 1298 fromrev = scmutil.revsingle(repo, opts[b'base'])
1299 1299 ancestor = repo[fromrev]
1300 1300 else:
1301 1301 ancestor = wctx.ancestor(rctx)
1302 1302 return (wctx, rctx, ancestor)
1303 1303
1304 1304
1305 1305 @command(
1306 1306 b'perf::mergecalculate|perfmergecalculate',
1307 1307 [
1308 1308 (b'r', b'rev', b'.', b'rev to merge against'),
1309 1309 (b'', b'from', b'', b'rev to merge from'),
1310 1310 (b'', b'base', b'', b'the revision to use as base'),
1311 1311 ]
1312 1312 + formatteropts,
1313 1313 )
1314 1314 def perfmergecalculate(ui, repo, **opts):
1315 1315 opts = _byteskwargs(opts)
1316 1316 timer, fm = gettimer(ui, opts)
1317 1317
1318 1318 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1319 1319
1320 1320 def d():
1321 1321 # acceptremote is True because we don't want prompts in the middle of
1322 1322 # our benchmark
1323 1323 merge.calculateupdates(
1324 1324 repo,
1325 1325 wctx,
1326 1326 rctx,
1327 1327 [ancestor],
1328 1328 branchmerge=False,
1329 1329 force=False,
1330 1330 acceptremote=True,
1331 1331 followcopies=True,
1332 1332 )
1333 1333
1334 1334 timer(d)
1335 1335 fm.end()
1336 1336
1337 1337
1338 1338 @command(
1339 1339 b'perf::mergecopies|perfmergecopies',
1340 1340 [
1341 1341 (b'r', b'rev', b'.', b'rev to merge against'),
1342 1342 (b'', b'from', b'', b'rev to merge from'),
1343 1343 (b'', b'base', b'', b'the revision to use as base'),
1344 1344 ]
1345 1345 + formatteropts,
1346 1346 )
1347 1347 def perfmergecopies(ui, repo, **opts):
1348 1348 """measure runtime of `copies.mergecopies`"""
1349 1349 opts = _byteskwargs(opts)
1350 1350 timer, fm = gettimer(ui, opts)
1351 1351 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1352 1352
1353 1353 def d():
1354 1354 # acceptremote is True because we don't want prompts in the middle of
1355 1355 # our benchmark
1356 1356 copies.mergecopies(repo, wctx, rctx, ancestor)
1357 1357
1358 1358 timer(d)
1359 1359 fm.end()
1360 1360
1361 1361
1362 1362 @command(b'perf::pathcopies|perfpathcopies', [], b"REV REV")
1363 1363 def perfpathcopies(ui, repo, rev1, rev2, **opts):
1364 1364 """benchmark the copy tracing logic"""
1365 1365 opts = _byteskwargs(opts)
1366 1366 timer, fm = gettimer(ui, opts)
1367 1367 ctx1 = scmutil.revsingle(repo, rev1, rev1)
1368 1368 ctx2 = scmutil.revsingle(repo, rev2, rev2)
1369 1369
1370 1370 def d():
1371 1371 copies.pathcopies(ctx1, ctx2)
1372 1372
1373 1373 timer(d)
1374 1374 fm.end()
1375 1375
1376 1376
1377 1377 @command(
1378 1378 b'perf::phases|perfphases',
1379 1379 [
1380 1380 (b'', b'full', False, b'include file reading time too'),
1381 1381 ],
1382 1382 b"",
1383 1383 )
1384 1384 def perfphases(ui, repo, **opts):
1385 1385 """benchmark phasesets computation"""
1386 1386 opts = _byteskwargs(opts)
1387 1387 timer, fm = gettimer(ui, opts)
1388 1388 _phases = repo._phasecache
1389 1389 full = opts.get(b'full')
1390 1390
1391 1391 def d():
1392 1392 phases = _phases
1393 1393 if full:
1394 1394 clearfilecache(repo, b'_phasecache')
1395 1395 phases = repo._phasecache
1396 1396 phases.invalidate()
1397 1397 phases.loadphaserevs(repo)
1398 1398
1399 1399 timer(d)
1400 1400 fm.end()
1401 1401
1402 1402
1403 1403 @command(b'perf::phasesremote|perfphasesremote', [], b"[DEST]")
1404 1404 def perfphasesremote(ui, repo, dest=None, **opts):
1405 1405 """benchmark time needed to analyse phases of the remote server"""
1406 1406 from mercurial.node import bin
1407 1407 from mercurial import (
1408 1408 exchange,
1409 1409 hg,
1410 1410 phases,
1411 1411 )
1412 1412
1413 1413 opts = _byteskwargs(opts)
1414 1414 timer, fm = gettimer(ui, opts)
1415 1415
1416 1416 path = ui.getpath(dest, default=(b'default-push', b'default'))
1417 1417 if not path:
1418 1418 raise error.Abort(
1419 1419 b'default repository not configured!',
1420 1420 hint=b"see 'hg help config.paths'",
1421 1421 )
1422 1422 dest = path.pushloc or path.loc
1423 1423 ui.statusnoi18n(b'analysing phase of %s\n' % util.hidepassword(dest))
1424 1424 other = hg.peer(repo, opts, dest)
1425 1425
1426 1426 # easier to perform discovery through the operation
1427 1427 op = exchange.pushoperation(repo, other)
1428 1428 exchange._pushdiscoverychangeset(op)
1429 1429
1430 1430 remotesubset = op.fallbackheads
1431 1431
1432 1432 with other.commandexecutor() as e:
1433 1433 remotephases = e.callcommand(
1434 1434 b'listkeys', {b'namespace': b'phases'}
1435 1435 ).result()
1436 1436 del other
1437 1437 publishing = remotephases.get(b'publishing', False)
1438 1438 if publishing:
1439 1439 ui.statusnoi18n(b'publishing: yes\n')
1440 1440 else:
1441 1441 ui.statusnoi18n(b'publishing: no\n')
1442 1442
1443 1443 has_node = getattr(repo.changelog.index, 'has_node', None)
1444 1444 if has_node is None:
1445 1445 has_node = repo.changelog.nodemap.__contains__
1446 1446 nonpublishroots = 0
1447 1447 for nhex, phase in remotephases.iteritems():
1448 1448 if nhex == b'publishing': # ignore data related to publish option
1449 1449 continue
1450 1450 node = bin(nhex)
1451 1451 if has_node(node) and int(phase):
1452 1452 nonpublishroots += 1
1453 1453 ui.statusnoi18n(b'number of roots: %d\n' % len(remotephases))
1454 1454 ui.statusnoi18n(b'number of known non public roots: %d\n' % nonpublishroots)
1455 1455
1456 1456 def d():
1457 1457 phases.remotephasessummary(repo, remotesubset, remotephases)
1458 1458
1459 1459 timer(d)
1460 1460 fm.end()
1461 1461
1462 1462
1463 1463 @command(
1464 1464 b'perf::manifest|perfmanifest',
1465 1465 [
1466 1466 (b'm', b'manifest-rev', False, b'Look up a manifest node revision'),
1467 1467 (b'', b'clear-disk', False, b'clear on-disk caches too'),
1468 1468 ]
1469 1469 + formatteropts,
1470 1470 b'REV|NODE',
1471 1471 )
1472 1472 def perfmanifest(ui, repo, rev, manifest_rev=False, clear_disk=False, **opts):
1473 1473 """benchmark the time to read a manifest from disk and return a usable
1474 1474 dict-like object
1475 1475
1476 1476 Manifest caches are cleared before retrieval."""
1477 1477 opts = _byteskwargs(opts)
1478 1478 timer, fm = gettimer(ui, opts)
1479 1479 if not manifest_rev:
1480 1480 ctx = scmutil.revsingle(repo, rev, rev)
1481 1481 t = ctx.manifestnode()
1482 1482 else:
1483 1483 from mercurial.node import bin
1484 1484
1485 1485 if len(rev) == 40:
1486 1486 t = bin(rev)
1487 1487 else:
1488 1488 try:
1489 1489 rev = int(rev)
1490 1490
1491 1491 if util.safehasattr(repo.manifestlog, b'getstorage'):
1492 1492 t = repo.manifestlog.getstorage(b'').node(rev)
1493 1493 else:
1494 1494 t = repo.manifestlog._revlog.lookup(rev)
1495 1495 except ValueError:
1496 1496 raise error.Abort(
1497 1497 b'manifest revision must be integer or full node'
1498 1498 )
1499 1499
1500 1500 def d():
1501 1501 repo.manifestlog.clearcaches(clear_persisted_data=clear_disk)
1502 1502 repo.manifestlog[t].read()
1503 1503
1504 1504 timer(d)
1505 1505 fm.end()
1506 1506
1507 1507
1508 1508 @command(b'perf::changeset|perfchangeset', formatteropts)
1509 1509 def perfchangeset(ui, repo, rev, **opts):
1510 1510 opts = _byteskwargs(opts)
1511 1511 timer, fm = gettimer(ui, opts)
1512 1512 n = scmutil.revsingle(repo, rev).node()
1513 1513
1514 1514 def d():
1515 1515 repo.changelog.read(n)
1516 1516 # repo.changelog._cache = None
1517 1517
1518 1518 timer(d)
1519 1519 fm.end()
1520 1520
1521 1521
1522 1522 @command(b'perf::ignore|perfignore', formatteropts)
1523 1523 def perfignore(ui, repo, **opts):
1524 1524 """benchmark operation related to computing ignore"""
1525 1525 opts = _byteskwargs(opts)
1526 1526 timer, fm = gettimer(ui, opts)
1527 1527 dirstate = repo.dirstate
1528 1528
1529 1529 def setupone():
1530 1530 dirstate.invalidate()
1531 1531 clearfilecache(dirstate, b'_ignore')
1532 1532
1533 1533 def runone():
1534 1534 dirstate._ignore
1535 1535
1536 1536 timer(runone, setup=setupone, title=b"load")
1537 1537 fm.end()
1538 1538
1539 1539
1540 1540 @command(
1541 1541 b'perf::index|perfindex',
1542 1542 [
1543 1543 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1544 1544 (b'', b'no-lookup', None, b'do not revision lookup post creation'),
1545 1545 ]
1546 1546 + formatteropts,
1547 1547 )
1548 1548 def perfindex(ui, repo, **opts):
1549 1549 """benchmark index creation time followed by a lookup
1550 1550
1551 1551 The default is to look `tip` up. Depending on the index implementation,
1552 1552 the revision looked up can matters. For example, an implementation
1553 1553 scanning the index will have a faster lookup time for `--rev tip` than for
1554 1554 `--rev 0`. The number of looked up revisions and their order can also
1555 1555 matters.
1556 1556
1557 1557 Example of useful set to test:
1558 1558
1559 1559 * tip
1560 1560 * 0
1561 1561 * -10:
1562 1562 * :10
1563 1563 * -10: + :10
1564 1564 * :10: + -10:
1565 1565 * -10000:
1566 1566 * -10000: + 0
1567 1567
1568 1568 It is not currently possible to check for lookup of a missing node. For
1569 1569 deeper lookup benchmarking, checkout the `perfnodemap` command."""
1570 1570 import mercurial.revlog
1571 1571
1572 1572 opts = _byteskwargs(opts)
1573 1573 timer, fm = gettimer(ui, opts)
1574 1574 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1575 1575 if opts[b'no_lookup']:
1576 1576 if opts['rev']:
1577 1577 raise error.Abort('--no-lookup and --rev are mutually exclusive')
1578 1578 nodes = []
1579 1579 elif not opts[b'rev']:
1580 1580 nodes = [repo[b"tip"].node()]
1581 1581 else:
1582 1582 revs = scmutil.revrange(repo, opts[b'rev'])
1583 1583 cl = repo.changelog
1584 1584 nodes = [cl.node(r) for r in revs]
1585 1585
1586 1586 unfi = repo.unfiltered()
1587 1587 # find the filecache func directly
1588 1588 # This avoid polluting the benchmark with the filecache logic
1589 1589 makecl = unfi.__class__.changelog.func
1590 1590
1591 1591 def setup():
1592 1592 # probably not necessary, but for good measure
1593 1593 clearchangelog(unfi)
1594 1594
1595 1595 def d():
1596 1596 cl = makecl(unfi)
1597 1597 for n in nodes:
1598 1598 cl.rev(n)
1599 1599
1600 1600 timer(d, setup=setup)
1601 1601 fm.end()
1602 1602
1603 1603
1604 1604 @command(
1605 1605 b'perf::nodemap|perfnodemap',
1606 1606 [
1607 1607 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1608 1608 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
1609 1609 ]
1610 1610 + formatteropts,
1611 1611 )
1612 1612 def perfnodemap(ui, repo, **opts):
1613 1613 """benchmark the time necessary to look up revision from a cold nodemap
1614 1614
1615 1615 Depending on the implementation, the amount and order of revision we look
1616 1616 up can varies. Example of useful set to test:
1617 1617 * tip
1618 1618 * 0
1619 1619 * -10:
1620 1620 * :10
1621 1621 * -10: + :10
1622 1622 * :10: + -10:
1623 1623 * -10000:
1624 1624 * -10000: + 0
1625 1625
1626 1626 The command currently focus on valid binary lookup. Benchmarking for
1627 1627 hexlookup, prefix lookup and missing lookup would also be valuable.
1628 1628 """
1629 1629 import mercurial.revlog
1630 1630
1631 1631 opts = _byteskwargs(opts)
1632 1632 timer, fm = gettimer(ui, opts)
1633 1633 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1634 1634
1635 1635 unfi = repo.unfiltered()
1636 1636 clearcaches = opts[b'clear_caches']
1637 1637 # find the filecache func directly
1638 1638 # This avoid polluting the benchmark with the filecache logic
1639 1639 makecl = unfi.__class__.changelog.func
1640 1640 if not opts[b'rev']:
1641 1641 raise error.Abort(b'use --rev to specify revisions to look up')
1642 1642 revs = scmutil.revrange(repo, opts[b'rev'])
1643 1643 cl = repo.changelog
1644 1644 nodes = [cl.node(r) for r in revs]
1645 1645
1646 1646 # use a list to pass reference to a nodemap from one closure to the next
1647 1647 nodeget = [None]
1648 1648
1649 1649 def setnodeget():
1650 1650 # probably not necessary, but for good measure
1651 1651 clearchangelog(unfi)
1652 1652 cl = makecl(unfi)
1653 1653 if util.safehasattr(cl.index, 'get_rev'):
1654 1654 nodeget[0] = cl.index.get_rev
1655 1655 else:
1656 1656 nodeget[0] = cl.nodemap.get
1657 1657
1658 1658 def d():
1659 1659 get = nodeget[0]
1660 1660 for n in nodes:
1661 1661 get(n)
1662 1662
1663 1663 setup = None
1664 1664 if clearcaches:
1665 1665
1666 1666 def setup():
1667 1667 setnodeget()
1668 1668
1669 1669 else:
1670 1670 setnodeget()
1671 1671 d() # prewarm the data structure
1672 1672 timer(d, setup=setup)
1673 1673 fm.end()
1674 1674
1675 1675
1676 1676 @command(b'perf::startup|perfstartup', formatteropts)
1677 1677 def perfstartup(ui, repo, **opts):
1678 1678 opts = _byteskwargs(opts)
1679 1679 timer, fm = gettimer(ui, opts)
1680 1680
1681 1681 def d():
1682 1682 if os.name != 'nt':
1683 1683 os.system(
1684 1684 b"HGRCPATH= %s version -q > /dev/null" % fsencode(sys.argv[0])
1685 1685 )
1686 1686 else:
1687 1687 os.environ['HGRCPATH'] = r' '
1688 1688 os.system("%s version -q > NUL" % sys.argv[0])
1689 1689
1690 1690 timer(d)
1691 1691 fm.end()
1692 1692
1693 1693
1694 1694 @command(b'perf::parents|perfparents', formatteropts)
1695 1695 def perfparents(ui, repo, **opts):
1696 1696 """benchmark the time necessary to fetch one changeset's parents.
1697 1697
1698 1698 The fetch is done using the `node identifier`, traversing all object layers
1699 1699 from the repository object. The first N revisions will be used for this
1700 1700 benchmark. N is controlled by the ``perf.parentscount`` config option
1701 1701 (default: 1000).
1702 1702 """
1703 1703 opts = _byteskwargs(opts)
1704 1704 timer, fm = gettimer(ui, opts)
1705 1705 # control the number of commits perfparents iterates over
1706 1706 # experimental config: perf.parentscount
1707 1707 count = getint(ui, b"perf", b"parentscount", 1000)
1708 1708 if len(repo.changelog) < count:
1709 1709 raise error.Abort(b"repo needs %d commits for this test" % count)
1710 1710 repo = repo.unfiltered()
1711 1711 nl = [repo.changelog.node(i) for i in _xrange(count)]
1712 1712
1713 1713 def d():
1714 1714 for n in nl:
1715 1715 repo.changelog.parents(n)
1716 1716
1717 1717 timer(d)
1718 1718 fm.end()
1719 1719
1720 1720
1721 1721 @command(b'perf::ctxfiles|perfctxfiles', formatteropts)
1722 1722 def perfctxfiles(ui, repo, x, **opts):
1723 1723 opts = _byteskwargs(opts)
1724 1724 x = int(x)
1725 1725 timer, fm = gettimer(ui, opts)
1726 1726
1727 1727 def d():
1728 1728 len(repo[x].files())
1729 1729
1730 1730 timer(d)
1731 1731 fm.end()
1732 1732
1733 1733
1734 1734 @command(b'perf::rawfiles|perfrawfiles', formatteropts)
1735 1735 def perfrawfiles(ui, repo, x, **opts):
1736 1736 opts = _byteskwargs(opts)
1737 1737 x = int(x)
1738 1738 timer, fm = gettimer(ui, opts)
1739 1739 cl = repo.changelog
1740 1740
1741 1741 def d():
1742 1742 len(cl.read(x)[3])
1743 1743
1744 1744 timer(d)
1745 1745 fm.end()
1746 1746
1747 1747
1748 1748 @command(b'perf::lookup|perflookup', formatteropts)
1749 1749 def perflookup(ui, repo, rev, **opts):
1750 1750 opts = _byteskwargs(opts)
1751 1751 timer, fm = gettimer(ui, opts)
1752 1752 timer(lambda: len(repo.lookup(rev)))
1753 1753 fm.end()
1754 1754
1755 1755
1756 1756 @command(
1757 1757 b'perf::linelogedits|perflinelogedits',
1758 1758 [
1759 1759 (b'n', b'edits', 10000, b'number of edits'),
1760 1760 (b'', b'max-hunk-lines', 10, b'max lines in a hunk'),
1761 1761 ],
1762 1762 norepo=True,
1763 1763 )
1764 1764 def perflinelogedits(ui, **opts):
1765 1765 from mercurial import linelog
1766 1766
1767 1767 opts = _byteskwargs(opts)
1768 1768
1769 1769 edits = opts[b'edits']
1770 1770 maxhunklines = opts[b'max_hunk_lines']
1771 1771
1772 1772 maxb1 = 100000
1773 1773 random.seed(0)
1774 1774 randint = random.randint
1775 1775 currentlines = 0
1776 1776 arglist = []
1777 1777 for rev in _xrange(edits):
1778 1778 a1 = randint(0, currentlines)
1779 1779 a2 = randint(a1, min(currentlines, a1 + maxhunklines))
1780 1780 b1 = randint(0, maxb1)
1781 1781 b2 = randint(b1, b1 + maxhunklines)
1782 1782 currentlines += (b2 - b1) - (a2 - a1)
1783 1783 arglist.append((rev, a1, a2, b1, b2))
1784 1784
1785 1785 def d():
1786 1786 ll = linelog.linelog()
1787 1787 for args in arglist:
1788 1788 ll.replacelines(*args)
1789 1789
1790 1790 timer, fm = gettimer(ui, opts)
1791 1791 timer(d)
1792 1792 fm.end()
1793 1793
1794 1794
1795 1795 @command(b'perf::revrange|perfrevrange', formatteropts)
1796 1796 def perfrevrange(ui, repo, *specs, **opts):
1797 1797 opts = _byteskwargs(opts)
1798 1798 timer, fm = gettimer(ui, opts)
1799 1799 revrange = scmutil.revrange
1800 1800 timer(lambda: len(revrange(repo, specs)))
1801 1801 fm.end()
1802 1802
1803 1803
1804 1804 @command(b'perf::nodelookup|perfnodelookup', formatteropts)
1805 1805 def perfnodelookup(ui, repo, rev, **opts):
1806 1806 opts = _byteskwargs(opts)
1807 1807 timer, fm = gettimer(ui, opts)
1808 1808 import mercurial.revlog
1809 1809
1810 1810 mercurial.revlog._prereadsize = 2 ** 24 # disable lazy parser in old hg
1811 1811 n = scmutil.revsingle(repo, rev).node()
1812 1812 cl = mercurial.revlog.revlog(getsvfs(repo), b"00changelog.i")
1813 1813
1814 1814 def d():
1815 1815 cl.rev(n)
1816 1816 clearcaches(cl)
1817 1817
1818 1818 timer(d)
1819 1819 fm.end()
1820 1820
1821 1821
1822 1822 @command(
1823 1823 b'perf::log|perflog',
1824 1824 [(b'', b'rename', False, b'ask log to follow renames')] + formatteropts,
1825 1825 )
1826 1826 def perflog(ui, repo, rev=None, **opts):
1827 1827 opts = _byteskwargs(opts)
1828 1828 if rev is None:
1829 1829 rev = []
1830 1830 timer, fm = gettimer(ui, opts)
1831 1831 ui.pushbuffer()
1832 1832 timer(
1833 1833 lambda: commands.log(
1834 1834 ui, repo, rev=rev, date=b'', user=b'', copies=opts.get(b'rename')
1835 1835 )
1836 1836 )
1837 1837 ui.popbuffer()
1838 1838 fm.end()
1839 1839
1840 1840
1841 1841 @command(b'perf::moonwalk|perfmoonwalk', formatteropts)
1842 1842 def perfmoonwalk(ui, repo, **opts):
1843 1843 """benchmark walking the changelog backwards
1844 1844
1845 1845 This also loads the changelog data for each revision in the changelog.
1846 1846 """
1847 1847 opts = _byteskwargs(opts)
1848 1848 timer, fm = gettimer(ui, opts)
1849 1849
1850 1850 def moonwalk():
1851 1851 for i in repo.changelog.revs(start=(len(repo) - 1), stop=-1):
1852 1852 ctx = repo[i]
1853 1853 ctx.branch() # read changelog data (in addition to the index)
1854 1854
1855 1855 timer(moonwalk)
1856 1856 fm.end()
1857 1857
1858 1858
1859 1859 @command(
1860 1860 b'perf::templating|perftemplating',
1861 1861 [
1862 1862 (b'r', b'rev', [], b'revisions to run the template on'),
1863 1863 ]
1864 1864 + formatteropts,
1865 1865 )
1866 1866 def perftemplating(ui, repo, testedtemplate=None, **opts):
1867 1867 """test the rendering time of a given template"""
1868 1868 if makelogtemplater is None:
1869 1869 raise error.Abort(
1870 1870 b"perftemplating not available with this Mercurial",
1871 1871 hint=b"use 4.3 or later",
1872 1872 )
1873 1873
1874 1874 opts = _byteskwargs(opts)
1875 1875
1876 1876 nullui = ui.copy()
1877 1877 nullui.fout = open(os.devnull, 'wb')
1878 1878 nullui.disablepager()
1879 1879 revs = opts.get(b'rev')
1880 1880 if not revs:
1881 1881 revs = [b'all()']
1882 1882 revs = list(scmutil.revrange(repo, revs))
1883 1883
1884 1884 defaulttemplate = (
1885 1885 b'{date|shortdate} [{rev}:{node|short}]'
1886 1886 b' {author|person}: {desc|firstline}\n'
1887 1887 )
1888 1888 if testedtemplate is None:
1889 1889 testedtemplate = defaulttemplate
1890 1890 displayer = makelogtemplater(nullui, repo, testedtemplate)
1891 1891
1892 1892 def format():
1893 1893 for r in revs:
1894 1894 ctx = repo[r]
1895 1895 displayer.show(ctx)
1896 1896 displayer.flush(ctx)
1897 1897
1898 1898 timer, fm = gettimer(ui, opts)
1899 1899 timer(format)
1900 1900 fm.end()
1901 1901
1902 1902
1903 1903 def _displaystats(ui, opts, entries, data):
1904 1904 # use a second formatter because the data are quite different, not sure
1905 1905 # how it flies with the templater.
1906 1906 fm = ui.formatter(b'perf-stats', opts)
1907 1907 for key, title in entries:
1908 1908 values = data[key]
1909 1909 nbvalues = len(data)
1910 1910 values.sort()
1911 1911 stats = {
1912 1912 'key': key,
1913 1913 'title': title,
1914 1914 'nbitems': len(values),
1915 1915 'min': values[0][0],
1916 1916 '10%': values[(nbvalues * 10) // 100][0],
1917 1917 '25%': values[(nbvalues * 25) // 100][0],
1918 1918 '50%': values[(nbvalues * 50) // 100][0],
1919 1919 '75%': values[(nbvalues * 75) // 100][0],
1920 1920 '80%': values[(nbvalues * 80) // 100][0],
1921 1921 '85%': values[(nbvalues * 85) // 100][0],
1922 1922 '90%': values[(nbvalues * 90) // 100][0],
1923 1923 '95%': values[(nbvalues * 95) // 100][0],
1924 1924 '99%': values[(nbvalues * 99) // 100][0],
1925 1925 'max': values[-1][0],
1926 1926 }
1927 1927 fm.startitem()
1928 1928 fm.data(**stats)
1929 1929 # make node pretty for the human output
1930 1930 fm.plain('### %s (%d items)\n' % (title, len(values)))
1931 1931 lines = [
1932 1932 'min',
1933 1933 '10%',
1934 1934 '25%',
1935 1935 '50%',
1936 1936 '75%',
1937 1937 '80%',
1938 1938 '85%',
1939 1939 '90%',
1940 1940 '95%',
1941 1941 '99%',
1942 1942 'max',
1943 1943 ]
1944 1944 for l in lines:
1945 1945 fm.plain('%s: %s\n' % (l, stats[l]))
1946 1946 fm.end()
1947 1947
1948 1948
1949 1949 @command(
1950 1950 b'perf::helper-mergecopies|perfhelper-mergecopies',
1951 1951 formatteropts
1952 1952 + [
1953 1953 (b'r', b'revs', [], b'restrict search to these revisions'),
1954 1954 (b'', b'timing', False, b'provides extra data (costly)'),
1955 1955 (b'', b'stats', False, b'provides statistic about the measured data'),
1956 1956 ],
1957 1957 )
1958 1958 def perfhelpermergecopies(ui, repo, revs=[], **opts):
1959 1959 """find statistics about potential parameters for `perfmergecopies`
1960 1960
1961 1961 This command find (base, p1, p2) triplet relevant for copytracing
1962 1962 benchmarking in the context of a merge. It reports values for some of the
1963 1963 parameters that impact merge copy tracing time during merge.
1964 1964
1965 1965 If `--timing` is set, rename detection is run and the associated timing
1966 1966 will be reported. The extra details come at the cost of slower command
1967 1967 execution.
1968 1968
1969 1969 Since rename detection is only run once, other factors might easily
1970 1970 affect the precision of the timing. However it should give a good
1971 1971 approximation of which revision triplets are very costly.
1972 1972 """
1973 1973 opts = _byteskwargs(opts)
1974 1974 fm = ui.formatter(b'perf', opts)
1975 1975 dotiming = opts[b'timing']
1976 1976 dostats = opts[b'stats']
1977 1977
1978 1978 output_template = [
1979 1979 ("base", "%(base)12s"),
1980 1980 ("p1", "%(p1.node)12s"),
1981 1981 ("p2", "%(p2.node)12s"),
1982 1982 ("p1.nb-revs", "%(p1.nbrevs)12d"),
1983 1983 ("p1.nb-files", "%(p1.nbmissingfiles)12d"),
1984 1984 ("p1.renames", "%(p1.renamedfiles)12d"),
1985 1985 ("p1.time", "%(p1.time)12.3f"),
1986 1986 ("p2.nb-revs", "%(p2.nbrevs)12d"),
1987 1987 ("p2.nb-files", "%(p2.nbmissingfiles)12d"),
1988 1988 ("p2.renames", "%(p2.renamedfiles)12d"),
1989 1989 ("p2.time", "%(p2.time)12.3f"),
1990 1990 ("renames", "%(nbrenamedfiles)12d"),
1991 1991 ("total.time", "%(time)12.3f"),
1992 1992 ]
1993 1993 if not dotiming:
1994 1994 output_template = [
1995 1995 i
1996 1996 for i in output_template
1997 1997 if not ('time' in i[0] or 'renames' in i[0])
1998 1998 ]
1999 1999 header_names = [h for (h, v) in output_template]
2000 2000 output = ' '.join([v for (h, v) in output_template]) + '\n'
2001 2001 header = ' '.join(['%12s'] * len(header_names)) + '\n'
2002 2002 fm.plain(header % tuple(header_names))
2003 2003
2004 2004 if not revs:
2005 2005 revs = ['all()']
2006 2006 revs = scmutil.revrange(repo, revs)
2007 2007
2008 2008 if dostats:
2009 2009 alldata = {
2010 2010 'nbrevs': [],
2011 2011 'nbmissingfiles': [],
2012 2012 }
2013 2013 if dotiming:
2014 2014 alldata['parentnbrenames'] = []
2015 2015 alldata['totalnbrenames'] = []
2016 2016 alldata['parenttime'] = []
2017 2017 alldata['totaltime'] = []
2018 2018
2019 2019 roi = repo.revs('merge() and %ld', revs)
2020 2020 for r in roi:
2021 2021 ctx = repo[r]
2022 2022 p1 = ctx.p1()
2023 2023 p2 = ctx.p2()
2024 2024 bases = repo.changelog._commonancestorsheads(p1.rev(), p2.rev())
2025 2025 for b in bases:
2026 2026 b = repo[b]
2027 2027 p1missing = copies._computeforwardmissing(b, p1)
2028 2028 p2missing = copies._computeforwardmissing(b, p2)
2029 2029 data = {
2030 2030 b'base': b.hex(),
2031 2031 b'p1.node': p1.hex(),
2032 2032 b'p1.nbrevs': len(repo.revs('only(%d, %d)', p1.rev(), b.rev())),
2033 2033 b'p1.nbmissingfiles': len(p1missing),
2034 2034 b'p2.node': p2.hex(),
2035 2035 b'p2.nbrevs': len(repo.revs('only(%d, %d)', p2.rev(), b.rev())),
2036 2036 b'p2.nbmissingfiles': len(p2missing),
2037 2037 }
2038 2038 if dostats:
2039 2039 if p1missing:
2040 2040 alldata['nbrevs'].append(
2041 2041 (data['p1.nbrevs'], b.hex(), p1.hex())
2042 2042 )
2043 2043 alldata['nbmissingfiles'].append(
2044 2044 (data['p1.nbmissingfiles'], b.hex(), p1.hex())
2045 2045 )
2046 2046 if p2missing:
2047 2047 alldata['nbrevs'].append(
2048 2048 (data['p2.nbrevs'], b.hex(), p2.hex())
2049 2049 )
2050 2050 alldata['nbmissingfiles'].append(
2051 2051 (data['p2.nbmissingfiles'], b.hex(), p2.hex())
2052 2052 )
2053 2053 if dotiming:
2054 2054 begin = util.timer()
2055 2055 mergedata = copies.mergecopies(repo, p1, p2, b)
2056 2056 end = util.timer()
2057 2057 # not very stable timing since we did only one run
2058 2058 data['time'] = end - begin
2059 2059 # mergedata contains five dicts: "copy", "movewithdir",
2060 2060 # "diverge", "renamedelete" and "dirmove".
2061 2061 # The first 4 are about renamed file so lets count that.
2062 2062 renames = len(mergedata[0])
2063 2063 renames += len(mergedata[1])
2064 2064 renames += len(mergedata[2])
2065 2065 renames += len(mergedata[3])
2066 2066 data['nbrenamedfiles'] = renames
2067 2067 begin = util.timer()
2068 2068 p1renames = copies.pathcopies(b, p1)
2069 2069 end = util.timer()
2070 2070 data['p1.time'] = end - begin
2071 2071 begin = util.timer()
2072 2072 p2renames = copies.pathcopies(b, p2)
2073 2073 end = util.timer()
2074 2074 data['p2.time'] = end - begin
2075 2075 data['p1.renamedfiles'] = len(p1renames)
2076 2076 data['p2.renamedfiles'] = len(p2renames)
2077 2077
2078 2078 if dostats:
2079 2079 if p1missing:
2080 2080 alldata['parentnbrenames'].append(
2081 2081 (data['p1.renamedfiles'], b.hex(), p1.hex())
2082 2082 )
2083 2083 alldata['parenttime'].append(
2084 2084 (data['p1.time'], b.hex(), p1.hex())
2085 2085 )
2086 2086 if p2missing:
2087 2087 alldata['parentnbrenames'].append(
2088 2088 (data['p2.renamedfiles'], b.hex(), p2.hex())
2089 2089 )
2090 2090 alldata['parenttime'].append(
2091 2091 (data['p2.time'], b.hex(), p2.hex())
2092 2092 )
2093 2093 if p1missing or p2missing:
2094 2094 alldata['totalnbrenames'].append(
2095 2095 (
2096 2096 data['nbrenamedfiles'],
2097 2097 b.hex(),
2098 2098 p1.hex(),
2099 2099 p2.hex(),
2100 2100 )
2101 2101 )
2102 2102 alldata['totaltime'].append(
2103 2103 (data['time'], b.hex(), p1.hex(), p2.hex())
2104 2104 )
2105 2105 fm.startitem()
2106 2106 fm.data(**data)
2107 2107 # make node pretty for the human output
2108 2108 out = data.copy()
2109 2109 out['base'] = fm.hexfunc(b.node())
2110 2110 out['p1.node'] = fm.hexfunc(p1.node())
2111 2111 out['p2.node'] = fm.hexfunc(p2.node())
2112 2112 fm.plain(output % out)
2113 2113
2114 2114 fm.end()
2115 2115 if dostats:
2116 2116 # use a second formatter because the data are quite different, not sure
2117 2117 # how it flies with the templater.
2118 2118 entries = [
2119 2119 ('nbrevs', 'number of revision covered'),
2120 2120 ('nbmissingfiles', 'number of missing files at head'),
2121 2121 ]
2122 2122 if dotiming:
2123 2123 entries.append(
2124 2124 ('parentnbrenames', 'rename from one parent to base')
2125 2125 )
2126 2126 entries.append(('totalnbrenames', 'total number of renames'))
2127 2127 entries.append(('parenttime', 'time for one parent'))
2128 2128 entries.append(('totaltime', 'time for both parents'))
2129 2129 _displaystats(ui, opts, entries, alldata)
2130 2130
2131 2131
2132 2132 @command(
2133 2133 b'perf::helper-pathcopies|perfhelper-pathcopies',
2134 2134 formatteropts
2135 2135 + [
2136 2136 (b'r', b'revs', [], b'restrict search to these revisions'),
2137 2137 (b'', b'timing', False, b'provides extra data (costly)'),
2138 2138 (b'', b'stats', False, b'provides statistic about the measured data'),
2139 2139 ],
2140 2140 )
2141 2141 def perfhelperpathcopies(ui, repo, revs=[], **opts):
2142 2142 """find statistic about potential parameters for the `perftracecopies`
2143 2143
2144 2144 This command find source-destination pair relevant for copytracing testing.
2145 2145 It report value for some of the parameters that impact copy tracing time.
2146 2146
2147 2147 If `--timing` is set, rename detection is run and the associated timing
2148 2148 will be reported. The extra details comes at the cost of a slower command
2149 2149 execution.
2150 2150
2151 2151 Since the rename detection is only run once, other factors might easily
2152 2152 affect the precision of the timing. However it should give a good
2153 2153 approximation of which revision pairs are very costly.
2154 2154 """
2155 2155 opts = _byteskwargs(opts)
2156 2156 fm = ui.formatter(b'perf', opts)
2157 2157 dotiming = opts[b'timing']
2158 2158 dostats = opts[b'stats']
2159 2159
2160 2160 if dotiming:
2161 2161 header = '%12s %12s %12s %12s %12s %12s\n'
2162 2162 output = (
2163 2163 "%(source)12s %(destination)12s "
2164 2164 "%(nbrevs)12d %(nbmissingfiles)12d "
2165 2165 "%(nbrenamedfiles)12d %(time)18.5f\n"
2166 2166 )
2167 2167 header_names = (
2168 2168 "source",
2169 2169 "destination",
2170 2170 "nb-revs",
2171 2171 "nb-files",
2172 2172 "nb-renames",
2173 2173 "time",
2174 2174 )
2175 2175 fm.plain(header % header_names)
2176 2176 else:
2177 2177 header = '%12s %12s %12s %12s\n'
2178 2178 output = (
2179 2179 "%(source)12s %(destination)12s "
2180 2180 "%(nbrevs)12d %(nbmissingfiles)12d\n"
2181 2181 )
2182 2182 fm.plain(header % ("source", "destination", "nb-revs", "nb-files"))
2183 2183
2184 2184 if not revs:
2185 2185 revs = ['all()']
2186 2186 revs = scmutil.revrange(repo, revs)
2187 2187
2188 2188 if dostats:
2189 2189 alldata = {
2190 2190 'nbrevs': [],
2191 2191 'nbmissingfiles': [],
2192 2192 }
2193 2193 if dotiming:
2194 2194 alldata['nbrenames'] = []
2195 2195 alldata['time'] = []
2196 2196
2197 2197 roi = repo.revs('merge() and %ld', revs)
2198 2198 for r in roi:
2199 2199 ctx = repo[r]
2200 2200 p1 = ctx.p1().rev()
2201 2201 p2 = ctx.p2().rev()
2202 2202 bases = repo.changelog._commonancestorsheads(p1, p2)
2203 2203 for p in (p1, p2):
2204 2204 for b in bases:
2205 2205 base = repo[b]
2206 2206 parent = repo[p]
2207 2207 missing = copies._computeforwardmissing(base, parent)
2208 2208 if not missing:
2209 2209 continue
2210 2210 data = {
2211 2211 b'source': base.hex(),
2212 2212 b'destination': parent.hex(),
2213 2213 b'nbrevs': len(repo.revs('only(%d, %d)', p, b)),
2214 2214 b'nbmissingfiles': len(missing),
2215 2215 }
2216 2216 if dostats:
2217 2217 alldata['nbrevs'].append(
2218 2218 (
2219 2219 data['nbrevs'],
2220 2220 base.hex(),
2221 2221 parent.hex(),
2222 2222 )
2223 2223 )
2224 2224 alldata['nbmissingfiles'].append(
2225 2225 (
2226 2226 data['nbmissingfiles'],
2227 2227 base.hex(),
2228 2228 parent.hex(),
2229 2229 )
2230 2230 )
2231 2231 if dotiming:
2232 2232 begin = util.timer()
2233 2233 renames = copies.pathcopies(base, parent)
2234 2234 end = util.timer()
2235 2235 # not very stable timing since we did only one run
2236 2236 data['time'] = end - begin
2237 2237 data['nbrenamedfiles'] = len(renames)
2238 2238 if dostats:
2239 2239 alldata['time'].append(
2240 2240 (
2241 2241 data['time'],
2242 2242 base.hex(),
2243 2243 parent.hex(),
2244 2244 )
2245 2245 )
2246 2246 alldata['nbrenames'].append(
2247 2247 (
2248 2248 data['nbrenamedfiles'],
2249 2249 base.hex(),
2250 2250 parent.hex(),
2251 2251 )
2252 2252 )
2253 2253 fm.startitem()
2254 2254 fm.data(**data)
2255 2255 out = data.copy()
2256 2256 out['source'] = fm.hexfunc(base.node())
2257 2257 out['destination'] = fm.hexfunc(parent.node())
2258 2258 fm.plain(output % out)
2259 2259
2260 2260 fm.end()
2261 2261 if dostats:
2262 2262 entries = [
2263 2263 ('nbrevs', 'number of revision covered'),
2264 2264 ('nbmissingfiles', 'number of missing files at head'),
2265 2265 ]
2266 2266 if dotiming:
2267 2267 entries.append(('nbrenames', 'renamed files'))
2268 2268 entries.append(('time', 'time'))
2269 2269 _displaystats(ui, opts, entries, alldata)
2270 2270
2271 2271
2272 2272 @command(b'perf::cca|perfcca', formatteropts)
2273 2273 def perfcca(ui, repo, **opts):
2274 2274 opts = _byteskwargs(opts)
2275 2275 timer, fm = gettimer(ui, opts)
2276 2276 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
2277 2277 fm.end()
2278 2278
2279 2279
2280 2280 @command(b'perf::fncacheload|perffncacheload', formatteropts)
2281 2281 def perffncacheload(ui, repo, **opts):
2282 2282 opts = _byteskwargs(opts)
2283 2283 timer, fm = gettimer(ui, opts)
2284 2284 s = repo.store
2285 2285
2286 2286 def d():
2287 2287 s.fncache._load()
2288 2288
2289 2289 timer(d)
2290 2290 fm.end()
2291 2291
2292 2292
2293 2293 @command(b'perf::fncachewrite|perffncachewrite', formatteropts)
2294 2294 def perffncachewrite(ui, repo, **opts):
2295 2295 opts = _byteskwargs(opts)
2296 2296 timer, fm = gettimer(ui, opts)
2297 2297 s = repo.store
2298 2298 lock = repo.lock()
2299 2299 s.fncache._load()
2300 2300 tr = repo.transaction(b'perffncachewrite')
2301 2301 tr.addbackup(b'fncache')
2302 2302
2303 2303 def d():
2304 2304 s.fncache._dirty = True
2305 2305 s.fncache.write(tr)
2306 2306
2307 2307 timer(d)
2308 2308 tr.close()
2309 2309 lock.release()
2310 2310 fm.end()
2311 2311
2312 2312
2313 2313 @command(b'perf::fncacheencode|perffncacheencode', formatteropts)
2314 2314 def perffncacheencode(ui, repo, **opts):
2315 2315 opts = _byteskwargs(opts)
2316 2316 timer, fm = gettimer(ui, opts)
2317 2317 s = repo.store
2318 2318 s.fncache._load()
2319 2319
2320 2320 def d():
2321 2321 for p in s.fncache.entries:
2322 2322 s.encode(p)
2323 2323
2324 2324 timer(d)
2325 2325 fm.end()
2326 2326
2327 2327
2328 2328 def _bdiffworker(q, blocks, xdiff, ready, done):
2329 2329 while not done.is_set():
2330 2330 pair = q.get()
2331 2331 while pair is not None:
2332 2332 if xdiff:
2333 2333 mdiff.bdiff.xdiffblocks(*pair)
2334 2334 elif blocks:
2335 2335 mdiff.bdiff.blocks(*pair)
2336 2336 else:
2337 2337 mdiff.textdiff(*pair)
2338 2338 q.task_done()
2339 2339 pair = q.get()
2340 2340 q.task_done() # for the None one
2341 2341 with ready:
2342 2342 ready.wait()
2343 2343
2344 2344
2345 2345 def _manifestrevision(repo, mnode):
2346 2346 ml = repo.manifestlog
2347 2347
2348 2348 if util.safehasattr(ml, b'getstorage'):
2349 2349 store = ml.getstorage(b'')
2350 2350 else:
2351 2351 store = ml._revlog
2352 2352
2353 2353 return store.revision(mnode)
2354 2354
2355 2355
2356 2356 @command(
2357 2357 b'perf::bdiff|perfbdiff',
2358 2358 revlogopts
2359 2359 + formatteropts
2360 2360 + [
2361 2361 (
2362 2362 b'',
2363 2363 b'count',
2364 2364 1,
2365 2365 b'number of revisions to test (when using --startrev)',
2366 2366 ),
2367 2367 (b'', b'alldata', False, b'test bdiffs for all associated revisions'),
2368 2368 (b'', b'threads', 0, b'number of thread to use (disable with 0)'),
2369 2369 (b'', b'blocks', False, b'test computing diffs into blocks'),
2370 2370 (b'', b'xdiff', False, b'use xdiff algorithm'),
2371 2371 ],
2372 2372 b'-c|-m|FILE REV',
2373 2373 )
2374 2374 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
2375 2375 """benchmark a bdiff between revisions
2376 2376
2377 2377 By default, benchmark a bdiff between its delta parent and itself.
2378 2378
2379 2379 With ``--count``, benchmark bdiffs between delta parents and self for N
2380 2380 revisions starting at the specified revision.
2381 2381
2382 2382 With ``--alldata``, assume the requested revision is a changeset and
2383 2383 measure bdiffs for all changes related to that changeset (manifest
2384 2384 and filelogs).
2385 2385 """
2386 2386 opts = _byteskwargs(opts)
2387 2387
2388 2388 if opts[b'xdiff'] and not opts[b'blocks']:
2389 2389 raise error.CommandError(b'perfbdiff', b'--xdiff requires --blocks')
2390 2390
2391 2391 if opts[b'alldata']:
2392 2392 opts[b'changelog'] = True
2393 2393
2394 2394 if opts.get(b'changelog') or opts.get(b'manifest'):
2395 2395 file_, rev = None, file_
2396 2396 elif rev is None:
2397 2397 raise error.CommandError(b'perfbdiff', b'invalid arguments')
2398 2398
2399 2399 blocks = opts[b'blocks']
2400 2400 xdiff = opts[b'xdiff']
2401 2401 textpairs = []
2402 2402
2403 2403 r = cmdutil.openrevlog(repo, b'perfbdiff', file_, opts)
2404 2404
2405 2405 startrev = r.rev(r.lookup(rev))
2406 2406 for rev in range(startrev, min(startrev + count, len(r) - 1)):
2407 2407 if opts[b'alldata']:
2408 2408 # Load revisions associated with changeset.
2409 2409 ctx = repo[rev]
2410 2410 mtext = _manifestrevision(repo, ctx.manifestnode())
2411 2411 for pctx in ctx.parents():
2412 2412 pman = _manifestrevision(repo, pctx.manifestnode())
2413 2413 textpairs.append((pman, mtext))
2414 2414
2415 2415 # Load filelog revisions by iterating manifest delta.
2416 2416 man = ctx.manifest()
2417 2417 pman = ctx.p1().manifest()
2418 2418 for filename, change in pman.diff(man).items():
2419 2419 fctx = repo.file(filename)
2420 2420 f1 = fctx.revision(change[0][0] or -1)
2421 2421 f2 = fctx.revision(change[1][0] or -1)
2422 2422 textpairs.append((f1, f2))
2423 2423 else:
2424 2424 dp = r.deltaparent(rev)
2425 2425 textpairs.append((r.revision(dp), r.revision(rev)))
2426 2426
2427 2427 withthreads = threads > 0
2428 2428 if not withthreads:
2429 2429
2430 2430 def d():
2431 2431 for pair in textpairs:
2432 2432 if xdiff:
2433 2433 mdiff.bdiff.xdiffblocks(*pair)
2434 2434 elif blocks:
2435 2435 mdiff.bdiff.blocks(*pair)
2436 2436 else:
2437 2437 mdiff.textdiff(*pair)
2438 2438
2439 2439 else:
2440 2440 q = queue()
2441 2441 for i in _xrange(threads):
2442 2442 q.put(None)
2443 2443 ready = threading.Condition()
2444 2444 done = threading.Event()
2445 2445 for i in _xrange(threads):
2446 2446 threading.Thread(
2447 2447 target=_bdiffworker, args=(q, blocks, xdiff, ready, done)
2448 2448 ).start()
2449 2449 q.join()
2450 2450
2451 2451 def d():
2452 2452 for pair in textpairs:
2453 2453 q.put(pair)
2454 2454 for i in _xrange(threads):
2455 2455 q.put(None)
2456 2456 with ready:
2457 2457 ready.notify_all()
2458 2458 q.join()
2459 2459
2460 2460 timer, fm = gettimer(ui, opts)
2461 2461 timer(d)
2462 2462 fm.end()
2463 2463
2464 2464 if withthreads:
2465 2465 done.set()
2466 2466 for i in _xrange(threads):
2467 2467 q.put(None)
2468 2468 with ready:
2469 2469 ready.notify_all()
2470 2470
2471 2471
2472 2472 @command(
2473 2473 b'perf::unidiff|perfunidiff',
2474 2474 revlogopts
2475 2475 + formatteropts
2476 2476 + [
2477 2477 (
2478 2478 b'',
2479 2479 b'count',
2480 2480 1,
2481 2481 b'number of revisions to test (when using --startrev)',
2482 2482 ),
2483 2483 (b'', b'alldata', False, b'test unidiffs for all associated revisions'),
2484 2484 ],
2485 2485 b'-c|-m|FILE REV',
2486 2486 )
2487 2487 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
2488 2488 """benchmark a unified diff between revisions
2489 2489
2490 2490 This doesn't include any copy tracing - it's just a unified diff
2491 2491 of the texts.
2492 2492
2493 2493 By default, benchmark a diff between its delta parent and itself.
2494 2494
2495 2495 With ``--count``, benchmark diffs between delta parents and self for N
2496 2496 revisions starting at the specified revision.
2497 2497
2498 2498 With ``--alldata``, assume the requested revision is a changeset and
2499 2499 measure diffs for all changes related to that changeset (manifest
2500 2500 and filelogs).
2501 2501 """
2502 2502 opts = _byteskwargs(opts)
2503 2503 if opts[b'alldata']:
2504 2504 opts[b'changelog'] = True
2505 2505
2506 2506 if opts.get(b'changelog') or opts.get(b'manifest'):
2507 2507 file_, rev = None, file_
2508 2508 elif rev is None:
2509 2509 raise error.CommandError(b'perfunidiff', b'invalid arguments')
2510 2510
2511 2511 textpairs = []
2512 2512
2513 2513 r = cmdutil.openrevlog(repo, b'perfunidiff', file_, opts)
2514 2514
2515 2515 startrev = r.rev(r.lookup(rev))
2516 2516 for rev in range(startrev, min(startrev + count, len(r) - 1)):
2517 2517 if opts[b'alldata']:
2518 2518 # Load revisions associated with changeset.
2519 2519 ctx = repo[rev]
2520 2520 mtext = _manifestrevision(repo, ctx.manifestnode())
2521 2521 for pctx in ctx.parents():
2522 2522 pman = _manifestrevision(repo, pctx.manifestnode())
2523 2523 textpairs.append((pman, mtext))
2524 2524
2525 2525 # Load filelog revisions by iterating manifest delta.
2526 2526 man = ctx.manifest()
2527 2527 pman = ctx.p1().manifest()
2528 2528 for filename, change in pman.diff(man).items():
2529 2529 fctx = repo.file(filename)
2530 2530 f1 = fctx.revision(change[0][0] or -1)
2531 2531 f2 = fctx.revision(change[1][0] or -1)
2532 2532 textpairs.append((f1, f2))
2533 2533 else:
2534 2534 dp = r.deltaparent(rev)
2535 2535 textpairs.append((r.revision(dp), r.revision(rev)))
2536 2536
2537 2537 def d():
2538 2538 for left, right in textpairs:
2539 2539 # The date strings don't matter, so we pass empty strings.
2540 2540 headerlines, hunks = mdiff.unidiff(
2541 2541 left, b'', right, b'', b'left', b'right', binary=False
2542 2542 )
2543 2543 # consume iterators in roughly the way patch.py does
2544 2544 b'\n'.join(headerlines)
2545 2545 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
2546 2546
2547 2547 timer, fm = gettimer(ui, opts)
2548 2548 timer(d)
2549 2549 fm.end()
2550 2550
2551 2551
2552 2552 @command(b'perf::diffwd|perfdiffwd', formatteropts)
2553 2553 def perfdiffwd(ui, repo, **opts):
2554 2554 """Profile diff of working directory changes"""
2555 2555 opts = _byteskwargs(opts)
2556 2556 timer, fm = gettimer(ui, opts)
2557 2557 options = {
2558 2558 'w': 'ignore_all_space',
2559 2559 'b': 'ignore_space_change',
2560 2560 'B': 'ignore_blank_lines',
2561 2561 }
2562 2562
2563 2563 for diffopt in ('', 'w', 'b', 'B', 'wB'):
2564 2564 opts = {options[c]: b'1' for c in diffopt}
2565 2565
2566 2566 def d():
2567 2567 ui.pushbuffer()
2568 2568 commands.diff(ui, repo, **opts)
2569 2569 ui.popbuffer()
2570 2570
2571 2571 diffopt = diffopt.encode('ascii')
2572 2572 title = b'diffopts: %s' % (diffopt and (b'-' + diffopt) or b'none')
2573 2573 timer(d, title=title)
2574 2574 fm.end()
2575 2575
2576 2576
2577 2577 @command(
2578 2578 b'perf::revlogindex|perfrevlogindex',
2579 2579 revlogopts + formatteropts,
2580 2580 b'-c|-m|FILE',
2581 2581 )
2582 2582 def perfrevlogindex(ui, repo, file_=None, **opts):
2583 2583 """Benchmark operations against a revlog index.
2584 2584
2585 2585 This tests constructing a revlog instance, reading index data,
2586 2586 parsing index data, and performing various operations related to
2587 2587 index data.
2588 2588 """
2589 2589
2590 2590 opts = _byteskwargs(opts)
2591 2591
2592 2592 rl = cmdutil.openrevlog(repo, b'perfrevlogindex', file_, opts)
2593 2593
2594 2594 opener = getattr(rl, 'opener') # trick linter
2595 2595 indexfile = rl.indexfile
2596 2596 data = opener.read(indexfile)
2597 2597
2598 2598 header = struct.unpack(b'>I', data[0:4])[0]
2599 2599 version = header & 0xFFFF
2600 2600 if version == 1:
2601 2601 revlogio = revlog.revlogio()
2602 2602 inline = header & (1 << 16)
2603 2603 else:
2604 2604 raise error.Abort(b'unsupported revlog version: %d' % version)
2605 2605
2606 2606 rllen = len(rl)
2607 2607
2608 2608 node0 = rl.node(0)
2609 2609 node25 = rl.node(rllen // 4)
2610 2610 node50 = rl.node(rllen // 2)
2611 2611 node75 = rl.node(rllen // 4 * 3)
2612 2612 node100 = rl.node(rllen - 1)
2613 2613
2614 2614 allrevs = range(rllen)
2615 2615 allrevsrev = list(reversed(allrevs))
2616 2616 allnodes = [rl.node(rev) for rev in range(rllen)]
2617 2617 allnodesrev = list(reversed(allnodes))
2618 2618
2619 2619 def constructor():
2620 2620 revlog.revlog(opener, indexfile)
2621 2621
2622 2622 def read():
2623 2623 with opener(indexfile) as fh:
2624 2624 fh.read()
2625 2625
2626 2626 def parseindex():
2627 2627 revlogio.parseindex(data, inline)
2628 2628
2629 2629 def getentry(revornode):
2630 2630 index = revlogio.parseindex(data, inline)[0]
2631 2631 index[revornode]
2632 2632
2633 2633 def getentries(revs, count=1):
2634 2634 index = revlogio.parseindex(data, inline)[0]
2635 2635
2636 2636 for i in range(count):
2637 2637 for rev in revs:
2638 2638 index[rev]
2639 2639
2640 2640 def resolvenode(node):
2641 2641 index = revlogio.parseindex(data, inline)[0]
2642 2642 rev = getattr(index, 'rev', None)
2643 2643 if rev is None:
2644 2644 nodemap = getattr(
2645 2645 revlogio.parseindex(data, inline)[0], 'nodemap', None
2646 2646 )
2647 2647 # This only works for the C code.
2648 2648 if nodemap is None:
2649 2649 return
2650 2650 rev = nodemap.__getitem__
2651 2651
2652 2652 try:
2653 2653 rev(node)
2654 2654 except error.RevlogError:
2655 2655 pass
2656 2656
2657 2657 def resolvenodes(nodes, count=1):
2658 2658 index = revlogio.parseindex(data, inline)[0]
2659 2659 rev = getattr(index, 'rev', None)
2660 2660 if rev is None:
2661 2661 nodemap = getattr(
2662 2662 revlogio.parseindex(data, inline)[0], 'nodemap', None
2663 2663 )
2664 2664 # This only works for the C code.
2665 2665 if nodemap is None:
2666 2666 return
2667 2667 rev = nodemap.__getitem__
2668 2668
2669 2669 for i in range(count):
2670 2670 for node in nodes:
2671 2671 try:
2672 2672 rev(node)
2673 2673 except error.RevlogError:
2674 2674 pass
2675 2675
2676 2676 benches = [
2677 2677 (constructor, b'revlog constructor'),
2678 2678 (read, b'read'),
2679 2679 (parseindex, b'create index object'),
2680 2680 (lambda: getentry(0), b'retrieve index entry for rev 0'),
2681 2681 (lambda: resolvenode(b'a' * 20), b'look up missing node'),
2682 2682 (lambda: resolvenode(node0), b'look up node at rev 0'),
2683 2683 (lambda: resolvenode(node25), b'look up node at 1/4 len'),
2684 2684 (lambda: resolvenode(node50), b'look up node at 1/2 len'),
2685 2685 (lambda: resolvenode(node75), b'look up node at 3/4 len'),
2686 2686 (lambda: resolvenode(node100), b'look up node at tip'),
2687 2687 # 2x variation is to measure caching impact.
2688 2688 (lambda: resolvenodes(allnodes), b'look up all nodes (forward)'),
2689 2689 (lambda: resolvenodes(allnodes, 2), b'look up all nodes 2x (forward)'),
2690 2690 (lambda: resolvenodes(allnodesrev), b'look up all nodes (reverse)'),
2691 2691 (
2692 2692 lambda: resolvenodes(allnodesrev, 2),
2693 2693 b'look up all nodes 2x (reverse)',
2694 2694 ),
2695 2695 (lambda: getentries(allrevs), b'retrieve all index entries (forward)'),
2696 2696 (
2697 2697 lambda: getentries(allrevs, 2),
2698 2698 b'retrieve all index entries 2x (forward)',
2699 2699 ),
2700 2700 (
2701 2701 lambda: getentries(allrevsrev),
2702 2702 b'retrieve all index entries (reverse)',
2703 2703 ),
2704 2704 (
2705 2705 lambda: getentries(allrevsrev, 2),
2706 2706 b'retrieve all index entries 2x (reverse)',
2707 2707 ),
2708 2708 ]
2709 2709
2710 2710 for fn, title in benches:
2711 2711 timer, fm = gettimer(ui, opts)
2712 2712 timer(fn, title=title)
2713 2713 fm.end()
2714 2714
2715 2715
2716 2716 @command(
2717 2717 b'perf::revlogrevisions|perfrevlogrevisions',
2718 2718 revlogopts
2719 2719 + formatteropts
2720 2720 + [
2721 2721 (b'd', b'dist', 100, b'distance between the revisions'),
2722 2722 (b's', b'startrev', 0, b'revision to start reading at'),
2723 2723 (b'', b'reverse', False, b'read in reverse'),
2724 2724 ],
2725 2725 b'-c|-m|FILE',
2726 2726 )
2727 2727 def perfrevlogrevisions(
2728 2728 ui, repo, file_=None, startrev=0, reverse=False, **opts
2729 2729 ):
2730 2730 """Benchmark reading a series of revisions from a revlog.
2731 2731
2732 2732 By default, we read every ``-d/--dist`` revision from 0 to tip of
2733 2733 the specified revlog.
2734 2734
2735 2735 The start revision can be defined via ``-s/--startrev``.
2736 2736 """
2737 2737 opts = _byteskwargs(opts)
2738 2738
2739 2739 rl = cmdutil.openrevlog(repo, b'perfrevlogrevisions', file_, opts)
2740 2740 rllen = getlen(ui)(rl)
2741 2741
2742 2742 if startrev < 0:
2743 2743 startrev = rllen + startrev
2744 2744
2745 2745 def d():
2746 2746 rl.clearcaches()
2747 2747
2748 2748 beginrev = startrev
2749 2749 endrev = rllen
2750 2750 dist = opts[b'dist']
2751 2751
2752 2752 if reverse:
2753 2753 beginrev, endrev = endrev - 1, beginrev - 1
2754 2754 dist = -1 * dist
2755 2755
2756 2756 for x in _xrange(beginrev, endrev, dist):
2757 2757 # Old revisions don't support passing int.
2758 2758 n = rl.node(x)
2759 2759 rl.revision(n)
2760 2760
2761 2761 timer, fm = gettimer(ui, opts)
2762 2762 timer(d)
2763 2763 fm.end()
2764 2764
2765 2765
2766 2766 @command(
2767 2767 b'perf::revlogwrite|perfrevlogwrite',
2768 2768 revlogopts
2769 2769 + formatteropts
2770 2770 + [
2771 2771 (b's', b'startrev', 1000, b'revision to start writing at'),
2772 2772 (b'', b'stoprev', -1, b'last revision to write'),
2773 2773 (b'', b'count', 3, b'number of passes to perform'),
2774 2774 (b'', b'details', False, b'print timing for every revisions tested'),
2775 2775 (b'', b'source', b'full', b'the kind of data feed in the revlog'),
2776 2776 (b'', b'lazydeltabase', True, b'try the provided delta first'),
2777 2777 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
2778 2778 ],
2779 2779 b'-c|-m|FILE',
2780 2780 )
2781 2781 def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts):
2782 2782 """Benchmark writing a series of revisions to a revlog.
2783 2783
2784 2784 Possible source values are:
2785 2785 * `full`: add from a full text (default).
2786 2786 * `parent-1`: add from a delta to the first parent
2787 2787 * `parent-2`: add from a delta to the second parent if it exists
2788 2788 (use a delta from the first parent otherwise)
2789 2789 * `parent-smallest`: add from the smallest delta (either p1 or p2)
2790 2790 * `storage`: add from the existing precomputed deltas
2791 2791
2792 2792 Note: This performance command measures performance in a custom way. As a
2793 2793 result some of the global configuration of the 'perf' command does not
2794 2794 apply to it:
2795 2795
2796 2796 * ``pre-run``: disabled
2797 2797
2798 2798 * ``profile-benchmark``: disabled
2799 2799
2800 2800 * ``run-limits``: disabled use --count instead
2801 2801 """
2802 2802 opts = _byteskwargs(opts)
2803 2803
2804 2804 rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts)
2805 2805 rllen = getlen(ui)(rl)
2806 2806 if startrev < 0:
2807 2807 startrev = rllen + startrev
2808 2808 if stoprev < 0:
2809 2809 stoprev = rllen + stoprev
2810 2810
2811 2811 lazydeltabase = opts['lazydeltabase']
2812 2812 source = opts['source']
2813 2813 clearcaches = opts['clear_caches']
2814 2814 validsource = (
2815 2815 b'full',
2816 2816 b'parent-1',
2817 2817 b'parent-2',
2818 2818 b'parent-smallest',
2819 2819 b'storage',
2820 2820 )
2821 2821 if source not in validsource:
2822 2822 raise error.Abort('invalid source type: %s' % source)
2823 2823
2824 2824 ### actually gather results
2825 2825 count = opts['count']
2826 2826 if count <= 0:
2827 2827 raise error.Abort('invalide run count: %d' % count)
2828 2828 allresults = []
2829 2829 for c in range(count):
2830 2830 timing = _timeonewrite(
2831 2831 ui,
2832 2832 rl,
2833 2833 source,
2834 2834 startrev,
2835 2835 stoprev,
2836 2836 c + 1,
2837 2837 lazydeltabase=lazydeltabase,
2838 2838 clearcaches=clearcaches,
2839 2839 )
2840 2840 allresults.append(timing)
2841 2841
2842 2842 ### consolidate the results in a single list
2843 2843 results = []
2844 2844 for idx, (rev, t) in enumerate(allresults[0]):
2845 2845 ts = [t]
2846 2846 for other in allresults[1:]:
2847 2847 orev, ot = other[idx]
2848 2848 assert orev == rev
2849 2849 ts.append(ot)
2850 2850 results.append((rev, ts))
2851 2851 resultcount = len(results)
2852 2852
2853 2853 ### Compute and display relevant statistics
2854 2854
2855 2855 # get a formatter
2856 2856 fm = ui.formatter(b'perf', opts)
2857 2857 displayall = ui.configbool(b"perf", b"all-timing", False)
2858 2858
2859 2859 # print individual details if requested
2860 2860 if opts['details']:
2861 2861 for idx, item in enumerate(results, 1):
2862 2862 rev, data = item
2863 2863 title = 'revisions #%d of %d, rev %d' % (idx, resultcount, rev)
2864 2864 formatone(fm, data, title=title, displayall=displayall)
2865 2865
2866 2866 # sorts results by median time
2867 2867 results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2])
2868 2868 # list of (name, index) to display)
2869 2869 relevants = [
2870 2870 ("min", 0),
2871 2871 ("10%", resultcount * 10 // 100),
2872 2872 ("25%", resultcount * 25 // 100),
2873 2873 ("50%", resultcount * 70 // 100),
2874 2874 ("75%", resultcount * 75 // 100),
2875 2875 ("90%", resultcount * 90 // 100),
2876 2876 ("95%", resultcount * 95 // 100),
2877 2877 ("99%", resultcount * 99 // 100),
2878 2878 ("99.9%", resultcount * 999 // 1000),
2879 2879 ("99.99%", resultcount * 9999 // 10000),
2880 2880 ("99.999%", resultcount * 99999 // 100000),
2881 2881 ("max", -1),
2882 2882 ]
2883 2883 if not ui.quiet:
2884 2884 for name, idx in relevants:
2885 2885 data = results[idx]
2886 2886 title = '%s of %d, rev %d' % (name, resultcount, data[0])
2887 2887 formatone(fm, data[1], title=title, displayall=displayall)
2888 2888
2889 2889 # XXX summing that many float will not be very precise, we ignore this fact
2890 2890 # for now
2891 2891 totaltime = []
2892 2892 for item in allresults:
2893 2893 totaltime.append(
2894 2894 (
2895 2895 sum(x[1][0] for x in item),
2896 2896 sum(x[1][1] for x in item),
2897 2897 sum(x[1][2] for x in item),
2898 2898 )
2899 2899 )
2900 2900 formatone(
2901 2901 fm,
2902 2902 totaltime,
2903 2903 title="total time (%d revs)" % resultcount,
2904 2904 displayall=displayall,
2905 2905 )
2906 2906 fm.end()
2907 2907
2908 2908
2909 2909 class _faketr(object):
2910 2910 def add(s, x, y, z=None):
2911 2911 return None
2912 2912
2913 2913
2914 2914 def _timeonewrite(
2915 2915 ui,
2916 2916 orig,
2917 2917 source,
2918 2918 startrev,
2919 2919 stoprev,
2920 2920 runidx=None,
2921 2921 lazydeltabase=True,
2922 2922 clearcaches=True,
2923 2923 ):
2924 2924 timings = []
2925 2925 tr = _faketr()
2926 2926 with _temprevlog(ui, orig, startrev) as dest:
2927 2927 dest._lazydeltabase = lazydeltabase
2928 2928 revs = list(orig.revs(startrev, stoprev))
2929 2929 total = len(revs)
2930 2930 topic = 'adding'
2931 2931 if runidx is not None:
2932 2932 topic += ' (run #%d)' % runidx
2933 2933 # Support both old and new progress API
2934 2934 if util.safehasattr(ui, 'makeprogress'):
2935 2935 progress = ui.makeprogress(topic, unit='revs', total=total)
2936 2936
2937 2937 def updateprogress(pos):
2938 2938 progress.update(pos)
2939 2939
2940 2940 def completeprogress():
2941 2941 progress.complete()
2942 2942
2943 2943 else:
2944 2944
2945 2945 def updateprogress(pos):
2946 2946 ui.progress(topic, pos, unit='revs', total=total)
2947 2947
2948 2948 def completeprogress():
2949 2949 ui.progress(topic, None, unit='revs', total=total)
2950 2950
2951 2951 for idx, rev in enumerate(revs):
2952 2952 updateprogress(idx)
2953 2953 addargs, addkwargs = _getrevisionseed(orig, rev, tr, source)
2954 2954 if clearcaches:
2955 2955 dest.index.clearcaches()
2956 2956 dest.clearcaches()
2957 2957 with timeone() as r:
2958 2958 dest.addrawrevision(*addargs, **addkwargs)
2959 2959 timings.append((rev, r[0]))
2960 2960 updateprogress(total)
2961 2961 completeprogress()
2962 2962 return timings
2963 2963
2964 2964
2965 2965 def _getrevisionseed(orig, rev, tr, source):
2966 2966 from mercurial.node import nullid
2967 2967
2968 2968 linkrev = orig.linkrev(rev)
2969 2969 node = orig.node(rev)
2970 2970 p1, p2 = orig.parents(node)
2971 2971 flags = orig.flags(rev)
2972 2972 cachedelta = None
2973 2973 text = None
2974 2974
2975 2975 if source == b'full':
2976 2976 text = orig.revision(rev)
2977 2977 elif source == b'parent-1':
2978 2978 baserev = orig.rev(p1)
2979 2979 cachedelta = (baserev, orig.revdiff(p1, rev))
2980 2980 elif source == b'parent-2':
2981 2981 parent = p2
2982 2982 if p2 == nullid:
2983 2983 parent = p1
2984 2984 baserev = orig.rev(parent)
2985 2985 cachedelta = (baserev, orig.revdiff(parent, rev))
2986 2986 elif source == b'parent-smallest':
2987 2987 p1diff = orig.revdiff(p1, rev)
2988 2988 parent = p1
2989 2989 diff = p1diff
2990 2990 if p2 != nullid:
2991 2991 p2diff = orig.revdiff(p2, rev)
2992 2992 if len(p1diff) > len(p2diff):
2993 2993 parent = p2
2994 2994 diff = p2diff
2995 2995 baserev = orig.rev(parent)
2996 2996 cachedelta = (baserev, diff)
2997 2997 elif source == b'storage':
2998 2998 baserev = orig.deltaparent(rev)
2999 2999 cachedelta = (baserev, orig.revdiff(orig.node(baserev), rev))
3000 3000
3001 3001 return (
3002 3002 (text, tr, linkrev, p1, p2),
3003 3003 {'node': node, 'flags': flags, 'cachedelta': cachedelta},
3004 3004 )
3005 3005
3006 3006
3007 3007 @contextlib.contextmanager
3008 3008 def _temprevlog(ui, orig, truncaterev):
3009 3009 from mercurial import vfs as vfsmod
3010 3010
3011 3011 if orig._inline:
3012 3012 raise error.Abort('not supporting inline revlog (yet)')
3013 3013 revlogkwargs = {}
3014 3014 k = 'upperboundcomp'
3015 3015 if util.safehasattr(orig, k):
3016 3016 revlogkwargs[k] = getattr(orig, k)
3017 3017
3018 3018 origindexpath = orig.opener.join(orig.indexfile)
3019 3019 origdatapath = orig.opener.join(orig.datafile)
3020 3020 indexname = 'revlog.i'
3021 3021 dataname = 'revlog.d'
3022 3022
3023 3023 tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
3024 3024 try:
3025 3025 # copy the data file in a temporary directory
3026 3026 ui.debug('copying data in %s\n' % tmpdir)
3027 3027 destindexpath = os.path.join(tmpdir, 'revlog.i')
3028 3028 destdatapath = os.path.join(tmpdir, 'revlog.d')
3029 3029 shutil.copyfile(origindexpath, destindexpath)
3030 3030 shutil.copyfile(origdatapath, destdatapath)
3031 3031
3032 3032 # remove the data we want to add again
3033 3033 ui.debug('truncating data to be rewritten\n')
3034 3034 with open(destindexpath, 'ab') as index:
3035 3035 index.seek(0)
3036 3036 index.truncate(truncaterev * orig._io.size)
3037 3037 with open(destdatapath, 'ab') as data:
3038 3038 data.seek(0)
3039 3039 data.truncate(orig.start(truncaterev))
3040 3040
3041 3041 # instantiate a new revlog from the temporary copy
3042 3042 ui.debug('truncating adding to be rewritten\n')
3043 3043 vfs = vfsmod.vfs(tmpdir)
3044 3044 vfs.options = getattr(orig.opener, 'options', None)
3045 3045
3046 3046 dest = revlog.revlog(
3047 3047 vfs, indexfile=indexname, datafile=dataname, **revlogkwargs
3048 3048 )
3049 3049 if dest._inline:
3050 3050 raise error.Abort('not supporting inline revlog (yet)')
3051 3051 # make sure internals are initialized
3052 3052 dest.revision(len(dest) - 1)
3053 3053 yield dest
3054 3054 del dest, vfs
3055 3055 finally:
3056 3056 shutil.rmtree(tmpdir, True)
3057 3057
3058 3058
3059 3059 @command(
3060 3060 b'perf::revlogchunks|perfrevlogchunks',
3061 3061 revlogopts
3062 3062 + formatteropts
3063 3063 + [
3064 3064 (b'e', b'engines', b'', b'compression engines to use'),
3065 3065 (b's', b'startrev', 0, b'revision to start at'),
3066 3066 ],
3067 3067 b'-c|-m|FILE',
3068 3068 )
3069 3069 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
3070 3070 """Benchmark operations on revlog chunks.
3071 3071
3072 3072 Logically, each revlog is a collection of fulltext revisions. However,
3073 3073 stored within each revlog are "chunks" of possibly compressed data. This
3074 3074 data needs to be read and decompressed or compressed and written.
3075 3075
3076 3076 This command measures the time it takes to read+decompress and recompress
3077 3077 chunks in a revlog. It effectively isolates I/O and compression performance.
3078 3078 For measurements of higher-level operations like resolving revisions,
3079 3079 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
3080 3080 """
3081 3081 opts = _byteskwargs(opts)
3082 3082
3083 3083 rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
3084 3084
3085 3085 # _chunkraw was renamed to _getsegmentforrevs.
3086 3086 try:
3087 3087 segmentforrevs = rl._getsegmentforrevs
3088 3088 except AttributeError:
3089 3089 segmentforrevs = rl._chunkraw
3090 3090
3091 3091 # Verify engines argument.
3092 3092 if engines:
3093 3093 engines = {e.strip() for e in engines.split(b',')}
3094 3094 for engine in engines:
3095 3095 try:
3096 3096 util.compressionengines[engine]
3097 3097 except KeyError:
3098 3098 raise error.Abort(b'unknown compression engine: %s' % engine)
3099 3099 else:
3100 3100 engines = []
3101 3101 for e in util.compengines:
3102 3102 engine = util.compengines[e]
3103 3103 try:
3104 3104 if engine.available():
3105 3105 engine.revlogcompressor().compress(b'dummy')
3106 3106 engines.append(e)
3107 3107 except NotImplementedError:
3108 3108 pass
3109 3109
3110 3110 revs = list(rl.revs(startrev, len(rl) - 1))
3111 3111
3112 3112 def rlfh(rl):
3113 3113 if rl._inline:
3114 3114 return getsvfs(repo)(rl.indexfile)
3115 3115 else:
3116 3116 return getsvfs(repo)(rl.datafile)
3117 3117
3118 3118 def doread():
3119 3119 rl.clearcaches()
3120 3120 for rev in revs:
3121 3121 segmentforrevs(rev, rev)
3122 3122
3123 3123 def doreadcachedfh():
3124 3124 rl.clearcaches()
3125 3125 fh = rlfh(rl)
3126 3126 for rev in revs:
3127 3127 segmentforrevs(rev, rev, df=fh)
3128 3128
3129 3129 def doreadbatch():
3130 3130 rl.clearcaches()
3131 3131 segmentforrevs(revs[0], revs[-1])
3132 3132
3133 3133 def doreadbatchcachedfh():
3134 3134 rl.clearcaches()
3135 3135 fh = rlfh(rl)
3136 3136 segmentforrevs(revs[0], revs[-1], df=fh)
3137 3137
3138 3138 def dochunk():
3139 3139 rl.clearcaches()
3140 3140 fh = rlfh(rl)
3141 3141 for rev in revs:
3142 3142 rl._chunk(rev, df=fh)
3143 3143
3144 3144 chunks = [None]
3145 3145
3146 3146 def dochunkbatch():
3147 3147 rl.clearcaches()
3148 3148 fh = rlfh(rl)
3149 3149 # Save chunks as a side-effect.
3150 3150 chunks[0] = rl._chunks(revs, df=fh)
3151 3151
3152 3152 def docompress(compressor):
3153 3153 rl.clearcaches()
3154 3154
3155 3155 try:
3156 3156 # Swap in the requested compression engine.
3157 3157 oldcompressor = rl._compressor
3158 3158 rl._compressor = compressor
3159 3159 for chunk in chunks[0]:
3160 3160 rl.compress(chunk)
3161 3161 finally:
3162 3162 rl._compressor = oldcompressor
3163 3163
3164 3164 benches = [
3165 3165 (lambda: doread(), b'read'),
3166 3166 (lambda: doreadcachedfh(), b'read w/ reused fd'),
3167 3167 (lambda: doreadbatch(), b'read batch'),
3168 3168 (lambda: doreadbatchcachedfh(), b'read batch w/ reused fd'),
3169 3169 (lambda: dochunk(), b'chunk'),
3170 3170 (lambda: dochunkbatch(), b'chunk batch'),
3171 3171 ]
3172 3172
3173 3173 for engine in sorted(engines):
3174 3174 compressor = util.compengines[engine].revlogcompressor()
3175 3175 benches.append(
3176 3176 (
3177 3177 functools.partial(docompress, compressor),
3178 3178 b'compress w/ %s' % engine,
3179 3179 )
3180 3180 )
3181 3181
3182 3182 for fn, title in benches:
3183 3183 timer, fm = gettimer(ui, opts)
3184 3184 timer(fn, title=title)
3185 3185 fm.end()
3186 3186
3187 3187
3188 3188 @command(
3189 3189 b'perf::revlogrevision|perfrevlogrevision',
3190 3190 revlogopts
3191 3191 + formatteropts
3192 3192 + [(b'', b'cache', False, b'use caches instead of clearing')],
3193 3193 b'-c|-m|FILE REV',
3194 3194 )
3195 3195 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
3196 3196 """Benchmark obtaining a revlog revision.
3197 3197
3198 3198 Obtaining a revlog revision consists of roughly the following steps:
3199 3199
3200 3200 1. Compute the delta chain
3201 3201 2. Slice the delta chain if applicable
3202 3202 3. Obtain the raw chunks for that delta chain
3203 3203 4. Decompress each raw chunk
3204 3204 5. Apply binary patches to obtain fulltext
3205 3205 6. Verify hash of fulltext
3206 3206
3207 3207 This command measures the time spent in each of these phases.
3208 3208 """
3209 3209 opts = _byteskwargs(opts)
3210 3210
3211 3211 if opts.get(b'changelog') or opts.get(b'manifest'):
3212 3212 file_, rev = None, file_
3213 3213 elif rev is None:
3214 3214 raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
3215 3215
3216 3216 r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
3217 3217
3218 3218 # _chunkraw was renamed to _getsegmentforrevs.
3219 3219 try:
3220 3220 segmentforrevs = r._getsegmentforrevs
3221 3221 except AttributeError:
3222 3222 segmentforrevs = r._chunkraw
3223 3223
3224 3224 node = r.lookup(rev)
3225 3225 rev = r.rev(node)
3226 3226
3227 3227 def getrawchunks(data, chain):
3228 3228 start = r.start
3229 3229 length = r.length
3230 3230 inline = r._inline
3231 iosize = r._io.size
3231 try:
3232 iosize = r.index.entry_size
3233 except AttributeError:
3234 iosize = r._io.size
3232 3235 buffer = util.buffer
3233 3236
3234 3237 chunks = []
3235 3238 ladd = chunks.append
3236 3239 for idx, item in enumerate(chain):
3237 3240 offset = start(item[0])
3238 3241 bits = data[idx]
3239 3242 for rev in item:
3240 3243 chunkstart = start(rev)
3241 3244 if inline:
3242 3245 chunkstart += (rev + 1) * iosize
3243 3246 chunklength = length(rev)
3244 3247 ladd(buffer(bits, chunkstart - offset, chunklength))
3245 3248
3246 3249 return chunks
3247 3250
3248 3251 def dodeltachain(rev):
3249 3252 if not cache:
3250 3253 r.clearcaches()
3251 3254 r._deltachain(rev)
3252 3255
3253 3256 def doread(chain):
3254 3257 if not cache:
3255 3258 r.clearcaches()
3256 3259 for item in slicedchain:
3257 3260 segmentforrevs(item[0], item[-1])
3258 3261
3259 3262 def doslice(r, chain, size):
3260 3263 for s in slicechunk(r, chain, targetsize=size):
3261 3264 pass
3262 3265
3263 3266 def dorawchunks(data, chain):
3264 3267 if not cache:
3265 3268 r.clearcaches()
3266 3269 getrawchunks(data, chain)
3267 3270
3268 3271 def dodecompress(chunks):
3269 3272 decomp = r.decompress
3270 3273 for chunk in chunks:
3271 3274 decomp(chunk)
3272 3275
3273 3276 def dopatch(text, bins):
3274 3277 if not cache:
3275 3278 r.clearcaches()
3276 3279 mdiff.patches(text, bins)
3277 3280
3278 3281 def dohash(text):
3279 3282 if not cache:
3280 3283 r.clearcaches()
3281 3284 r.checkhash(text, node, rev=rev)
3282 3285
3283 3286 def dorevision():
3284 3287 if not cache:
3285 3288 r.clearcaches()
3286 3289 r.revision(node)
3287 3290
3288 3291 try:
3289 3292 from mercurial.revlogutils.deltas import slicechunk
3290 3293 except ImportError:
3291 3294 slicechunk = getattr(revlog, '_slicechunk', None)
3292 3295
3293 3296 size = r.length(rev)
3294 3297 chain = r._deltachain(rev)[0]
3295 3298 if not getattr(r, '_withsparseread', False):
3296 3299 slicedchain = (chain,)
3297 3300 else:
3298 3301 slicedchain = tuple(slicechunk(r, chain, targetsize=size))
3299 3302 data = [segmentforrevs(seg[0], seg[-1])[1] for seg in slicedchain]
3300 3303 rawchunks = getrawchunks(data, slicedchain)
3301 3304 bins = r._chunks(chain)
3302 3305 text = bytes(bins[0])
3303 3306 bins = bins[1:]
3304 3307 text = mdiff.patches(text, bins)
3305 3308
3306 3309 benches = [
3307 3310 (lambda: dorevision(), b'full'),
3308 3311 (lambda: dodeltachain(rev), b'deltachain'),
3309 3312 (lambda: doread(chain), b'read'),
3310 3313 ]
3311 3314
3312 3315 if getattr(r, '_withsparseread', False):
3313 3316 slicing = (lambda: doslice(r, chain, size), b'slice-sparse-chain')
3314 3317 benches.append(slicing)
3315 3318
3316 3319 benches.extend(
3317 3320 [
3318 3321 (lambda: dorawchunks(data, slicedchain), b'rawchunks'),
3319 3322 (lambda: dodecompress(rawchunks), b'decompress'),
3320 3323 (lambda: dopatch(text, bins), b'patch'),
3321 3324 (lambda: dohash(text), b'hash'),
3322 3325 ]
3323 3326 )
3324 3327
3325 3328 timer, fm = gettimer(ui, opts)
3326 3329 for fn, title in benches:
3327 3330 timer(fn, title=title)
3328 3331 fm.end()
3329 3332
3330 3333
3331 3334 @command(
3332 3335 b'perf::revset|perfrevset',
3333 3336 [
3334 3337 (b'C', b'clear', False, b'clear volatile cache between each call.'),
3335 3338 (b'', b'contexts', False, b'obtain changectx for each revision'),
3336 3339 ]
3337 3340 + formatteropts,
3338 3341 b"REVSET",
3339 3342 )
3340 3343 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
3341 3344 """benchmark the execution time of a revset
3342 3345
3343 3346 Use the --clean option if need to evaluate the impact of build volatile
3344 3347 revisions set cache on the revset execution. Volatile cache hold filtered
3345 3348 and obsolete related cache."""
3346 3349 opts = _byteskwargs(opts)
3347 3350
3348 3351 timer, fm = gettimer(ui, opts)
3349 3352
3350 3353 def d():
3351 3354 if clear:
3352 3355 repo.invalidatevolatilesets()
3353 3356 if contexts:
3354 3357 for ctx in repo.set(expr):
3355 3358 pass
3356 3359 else:
3357 3360 for r in repo.revs(expr):
3358 3361 pass
3359 3362
3360 3363 timer(d)
3361 3364 fm.end()
3362 3365
3363 3366
3364 3367 @command(
3365 3368 b'perf::volatilesets|perfvolatilesets',
3366 3369 [
3367 3370 (b'', b'clear-obsstore', False, b'drop obsstore between each call.'),
3368 3371 ]
3369 3372 + formatteropts,
3370 3373 )
3371 3374 def perfvolatilesets(ui, repo, *names, **opts):
3372 3375 """benchmark the computation of various volatile set
3373 3376
3374 3377 Volatile set computes element related to filtering and obsolescence."""
3375 3378 opts = _byteskwargs(opts)
3376 3379 timer, fm = gettimer(ui, opts)
3377 3380 repo = repo.unfiltered()
3378 3381
3379 3382 def getobs(name):
3380 3383 def d():
3381 3384 repo.invalidatevolatilesets()
3382 3385 if opts[b'clear_obsstore']:
3383 3386 clearfilecache(repo, b'obsstore')
3384 3387 obsolete.getrevs(repo, name)
3385 3388
3386 3389 return d
3387 3390
3388 3391 allobs = sorted(obsolete.cachefuncs)
3389 3392 if names:
3390 3393 allobs = [n for n in allobs if n in names]
3391 3394
3392 3395 for name in allobs:
3393 3396 timer(getobs(name), title=name)
3394 3397
3395 3398 def getfiltered(name):
3396 3399 def d():
3397 3400 repo.invalidatevolatilesets()
3398 3401 if opts[b'clear_obsstore']:
3399 3402 clearfilecache(repo, b'obsstore')
3400 3403 repoview.filterrevs(repo, name)
3401 3404
3402 3405 return d
3403 3406
3404 3407 allfilter = sorted(repoview.filtertable)
3405 3408 if names:
3406 3409 allfilter = [n for n in allfilter if n in names]
3407 3410
3408 3411 for name in allfilter:
3409 3412 timer(getfiltered(name), title=name)
3410 3413 fm.end()
3411 3414
3412 3415
3413 3416 @command(
3414 3417 b'perf::branchmap|perfbranchmap',
3415 3418 [
3416 3419 (b'f', b'full', False, b'Includes build time of subset'),
3417 3420 (
3418 3421 b'',
3419 3422 b'clear-revbranch',
3420 3423 False,
3421 3424 b'purge the revbranch cache between computation',
3422 3425 ),
3423 3426 ]
3424 3427 + formatteropts,
3425 3428 )
3426 3429 def perfbranchmap(ui, repo, *filternames, **opts):
3427 3430 """benchmark the update of a branchmap
3428 3431
3429 3432 This benchmarks the full repo.branchmap() call with read and write disabled
3430 3433 """
3431 3434 opts = _byteskwargs(opts)
3432 3435 full = opts.get(b"full", False)
3433 3436 clear_revbranch = opts.get(b"clear_revbranch", False)
3434 3437 timer, fm = gettimer(ui, opts)
3435 3438
3436 3439 def getbranchmap(filtername):
3437 3440 """generate a benchmark function for the filtername"""
3438 3441 if filtername is None:
3439 3442 view = repo
3440 3443 else:
3441 3444 view = repo.filtered(filtername)
3442 3445 if util.safehasattr(view._branchcaches, '_per_filter'):
3443 3446 filtered = view._branchcaches._per_filter
3444 3447 else:
3445 3448 # older versions
3446 3449 filtered = view._branchcaches
3447 3450
3448 3451 def d():
3449 3452 if clear_revbranch:
3450 3453 repo.revbranchcache()._clear()
3451 3454 if full:
3452 3455 view._branchcaches.clear()
3453 3456 else:
3454 3457 filtered.pop(filtername, None)
3455 3458 view.branchmap()
3456 3459
3457 3460 return d
3458 3461
3459 3462 # add filter in smaller subset to bigger subset
3460 3463 possiblefilters = set(repoview.filtertable)
3461 3464 if filternames:
3462 3465 possiblefilters &= set(filternames)
3463 3466 subsettable = getbranchmapsubsettable()
3464 3467 allfilters = []
3465 3468 while possiblefilters:
3466 3469 for name in possiblefilters:
3467 3470 subset = subsettable.get(name)
3468 3471 if subset not in possiblefilters:
3469 3472 break
3470 3473 else:
3471 3474 assert False, b'subset cycle %s!' % possiblefilters
3472 3475 allfilters.append(name)
3473 3476 possiblefilters.remove(name)
3474 3477
3475 3478 # warm the cache
3476 3479 if not full:
3477 3480 for name in allfilters:
3478 3481 repo.filtered(name).branchmap()
3479 3482 if not filternames or b'unfiltered' in filternames:
3480 3483 # add unfiltered
3481 3484 allfilters.append(None)
3482 3485
3483 3486 if util.safehasattr(branchmap.branchcache, 'fromfile'):
3484 3487 branchcacheread = safeattrsetter(branchmap.branchcache, b'fromfile')
3485 3488 branchcacheread.set(classmethod(lambda *args: None))
3486 3489 else:
3487 3490 # older versions
3488 3491 branchcacheread = safeattrsetter(branchmap, b'read')
3489 3492 branchcacheread.set(lambda *args: None)
3490 3493 branchcachewrite = safeattrsetter(branchmap.branchcache, b'write')
3491 3494 branchcachewrite.set(lambda *args: None)
3492 3495 try:
3493 3496 for name in allfilters:
3494 3497 printname = name
3495 3498 if name is None:
3496 3499 printname = b'unfiltered'
3497 3500 timer(getbranchmap(name), title=printname)
3498 3501 finally:
3499 3502 branchcacheread.restore()
3500 3503 branchcachewrite.restore()
3501 3504 fm.end()
3502 3505
3503 3506
3504 3507 @command(
3505 3508 b'perf::branchmapupdate|perfbranchmapupdate',
3506 3509 [
3507 3510 (b'', b'base', [], b'subset of revision to start from'),
3508 3511 (b'', b'target', [], b'subset of revision to end with'),
3509 3512 (b'', b'clear-caches', False, b'clear cache between each runs'),
3510 3513 ]
3511 3514 + formatteropts,
3512 3515 )
3513 3516 def perfbranchmapupdate(ui, repo, base=(), target=(), **opts):
3514 3517 """benchmark branchmap update from for <base> revs to <target> revs
3515 3518
3516 3519 If `--clear-caches` is passed, the following items will be reset before
3517 3520 each update:
3518 3521 * the changelog instance and associated indexes
3519 3522 * the rev-branch-cache instance
3520 3523
3521 3524 Examples:
3522 3525
3523 3526 # update for the one last revision
3524 3527 $ hg perfbranchmapupdate --base 'not tip' --target 'tip'
3525 3528
3526 3529 $ update for change coming with a new branch
3527 3530 $ hg perfbranchmapupdate --base 'stable' --target 'default'
3528 3531 """
3529 3532 from mercurial import branchmap
3530 3533 from mercurial import repoview
3531 3534
3532 3535 opts = _byteskwargs(opts)
3533 3536 timer, fm = gettimer(ui, opts)
3534 3537 clearcaches = opts[b'clear_caches']
3535 3538 unfi = repo.unfiltered()
3536 3539 x = [None] # used to pass data between closure
3537 3540
3538 3541 # we use a `list` here to avoid possible side effect from smartset
3539 3542 baserevs = list(scmutil.revrange(repo, base))
3540 3543 targetrevs = list(scmutil.revrange(repo, target))
3541 3544 if not baserevs:
3542 3545 raise error.Abort(b'no revisions selected for --base')
3543 3546 if not targetrevs:
3544 3547 raise error.Abort(b'no revisions selected for --target')
3545 3548
3546 3549 # make sure the target branchmap also contains the one in the base
3547 3550 targetrevs = list(set(baserevs) | set(targetrevs))
3548 3551 targetrevs.sort()
3549 3552
3550 3553 cl = repo.changelog
3551 3554 allbaserevs = list(cl.ancestors(baserevs, inclusive=True))
3552 3555 allbaserevs.sort()
3553 3556 alltargetrevs = frozenset(cl.ancestors(targetrevs, inclusive=True))
3554 3557
3555 3558 newrevs = list(alltargetrevs.difference(allbaserevs))
3556 3559 newrevs.sort()
3557 3560
3558 3561 allrevs = frozenset(unfi.changelog.revs())
3559 3562 basefilterrevs = frozenset(allrevs.difference(allbaserevs))
3560 3563 targetfilterrevs = frozenset(allrevs.difference(alltargetrevs))
3561 3564
3562 3565 def basefilter(repo, visibilityexceptions=None):
3563 3566 return basefilterrevs
3564 3567
3565 3568 def targetfilter(repo, visibilityexceptions=None):
3566 3569 return targetfilterrevs
3567 3570
3568 3571 msg = b'benchmark of branchmap with %d revisions with %d new ones\n'
3569 3572 ui.status(msg % (len(allbaserevs), len(newrevs)))
3570 3573 if targetfilterrevs:
3571 3574 msg = b'(%d revisions still filtered)\n'
3572 3575 ui.status(msg % len(targetfilterrevs))
3573 3576
3574 3577 try:
3575 3578 repoview.filtertable[b'__perf_branchmap_update_base'] = basefilter
3576 3579 repoview.filtertable[b'__perf_branchmap_update_target'] = targetfilter
3577 3580
3578 3581 baserepo = repo.filtered(b'__perf_branchmap_update_base')
3579 3582 targetrepo = repo.filtered(b'__perf_branchmap_update_target')
3580 3583
3581 3584 # try to find an existing branchmap to reuse
3582 3585 subsettable = getbranchmapsubsettable()
3583 3586 candidatefilter = subsettable.get(None)
3584 3587 while candidatefilter is not None:
3585 3588 candidatebm = repo.filtered(candidatefilter).branchmap()
3586 3589 if candidatebm.validfor(baserepo):
3587 3590 filtered = repoview.filterrevs(repo, candidatefilter)
3588 3591 missing = [r for r in allbaserevs if r in filtered]
3589 3592 base = candidatebm.copy()
3590 3593 base.update(baserepo, missing)
3591 3594 break
3592 3595 candidatefilter = subsettable.get(candidatefilter)
3593 3596 else:
3594 3597 # no suitable subset where found
3595 3598 base = branchmap.branchcache()
3596 3599 base.update(baserepo, allbaserevs)
3597 3600
3598 3601 def setup():
3599 3602 x[0] = base.copy()
3600 3603 if clearcaches:
3601 3604 unfi._revbranchcache = None
3602 3605 clearchangelog(repo)
3603 3606
3604 3607 def bench():
3605 3608 x[0].update(targetrepo, newrevs)
3606 3609
3607 3610 timer(bench, setup=setup)
3608 3611 fm.end()
3609 3612 finally:
3610 3613 repoview.filtertable.pop(b'__perf_branchmap_update_base', None)
3611 3614 repoview.filtertable.pop(b'__perf_branchmap_update_target', None)
3612 3615
3613 3616
3614 3617 @command(
3615 3618 b'perf::branchmapload|perfbranchmapload',
3616 3619 [
3617 3620 (b'f', b'filter', b'', b'Specify repoview filter'),
3618 3621 (b'', b'list', False, b'List brachmap filter caches'),
3619 3622 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
3620 3623 ]
3621 3624 + formatteropts,
3622 3625 )
3623 3626 def perfbranchmapload(ui, repo, filter=b'', list=False, **opts):
3624 3627 """benchmark reading the branchmap"""
3625 3628 opts = _byteskwargs(opts)
3626 3629 clearrevlogs = opts[b'clear_revlogs']
3627 3630
3628 3631 if list:
3629 3632 for name, kind, st in repo.cachevfs.readdir(stat=True):
3630 3633 if name.startswith(b'branch2'):
3631 3634 filtername = name.partition(b'-')[2] or b'unfiltered'
3632 3635 ui.status(
3633 3636 b'%s - %s\n' % (filtername, util.bytecount(st.st_size))
3634 3637 )
3635 3638 return
3636 3639 if not filter:
3637 3640 filter = None
3638 3641 subsettable = getbranchmapsubsettable()
3639 3642 if filter is None:
3640 3643 repo = repo.unfiltered()
3641 3644 else:
3642 3645 repo = repoview.repoview(repo, filter)
3643 3646
3644 3647 repo.branchmap() # make sure we have a relevant, up to date branchmap
3645 3648
3646 3649 try:
3647 3650 fromfile = branchmap.branchcache.fromfile
3648 3651 except AttributeError:
3649 3652 # older versions
3650 3653 fromfile = branchmap.read
3651 3654
3652 3655 currentfilter = filter
3653 3656 # try once without timer, the filter may not be cached
3654 3657 while fromfile(repo) is None:
3655 3658 currentfilter = subsettable.get(currentfilter)
3656 3659 if currentfilter is None:
3657 3660 raise error.Abort(
3658 3661 b'No branchmap cached for %s repo' % (filter or b'unfiltered')
3659 3662 )
3660 3663 repo = repo.filtered(currentfilter)
3661 3664 timer, fm = gettimer(ui, opts)
3662 3665
3663 3666 def setup():
3664 3667 if clearrevlogs:
3665 3668 clearchangelog(repo)
3666 3669
3667 3670 def bench():
3668 3671 fromfile(repo)
3669 3672
3670 3673 timer(bench, setup=setup)
3671 3674 fm.end()
3672 3675
3673 3676
3674 3677 @command(b'perf::loadmarkers|perfloadmarkers')
3675 3678 def perfloadmarkers(ui, repo):
3676 3679 """benchmark the time to parse the on-disk markers for a repo
3677 3680
3678 3681 Result is the number of markers in the repo."""
3679 3682 timer, fm = gettimer(ui)
3680 3683 svfs = getsvfs(repo)
3681 3684 timer(lambda: len(obsolete.obsstore(repo, svfs)))
3682 3685 fm.end()
3683 3686
3684 3687
3685 3688 @command(
3686 3689 b'perf::lrucachedict|perflrucachedict',
3687 3690 formatteropts
3688 3691 + [
3689 3692 (b'', b'costlimit', 0, b'maximum total cost of items in cache'),
3690 3693 (b'', b'mincost', 0, b'smallest cost of items in cache'),
3691 3694 (b'', b'maxcost', 100, b'maximum cost of items in cache'),
3692 3695 (b'', b'size', 4, b'size of cache'),
3693 3696 (b'', b'gets', 10000, b'number of key lookups'),
3694 3697 (b'', b'sets', 10000, b'number of key sets'),
3695 3698 (b'', b'mixed', 10000, b'number of mixed mode operations'),
3696 3699 (
3697 3700 b'',
3698 3701 b'mixedgetfreq',
3699 3702 50,
3700 3703 b'frequency of get vs set ops in mixed mode',
3701 3704 ),
3702 3705 ],
3703 3706 norepo=True,
3704 3707 )
3705 3708 def perflrucache(
3706 3709 ui,
3707 3710 mincost=0,
3708 3711 maxcost=100,
3709 3712 costlimit=0,
3710 3713 size=4,
3711 3714 gets=10000,
3712 3715 sets=10000,
3713 3716 mixed=10000,
3714 3717 mixedgetfreq=50,
3715 3718 **opts
3716 3719 ):
3717 3720 opts = _byteskwargs(opts)
3718 3721
3719 3722 def doinit():
3720 3723 for i in _xrange(10000):
3721 3724 util.lrucachedict(size)
3722 3725
3723 3726 costrange = list(range(mincost, maxcost + 1))
3724 3727
3725 3728 values = []
3726 3729 for i in _xrange(size):
3727 3730 values.append(random.randint(0, _maxint))
3728 3731
3729 3732 # Get mode fills the cache and tests raw lookup performance with no
3730 3733 # eviction.
3731 3734 getseq = []
3732 3735 for i in _xrange(gets):
3733 3736 getseq.append(random.choice(values))
3734 3737
3735 3738 def dogets():
3736 3739 d = util.lrucachedict(size)
3737 3740 for v in values:
3738 3741 d[v] = v
3739 3742 for key in getseq:
3740 3743 value = d[key]
3741 3744 value # silence pyflakes warning
3742 3745
3743 3746 def dogetscost():
3744 3747 d = util.lrucachedict(size, maxcost=costlimit)
3745 3748 for i, v in enumerate(values):
3746 3749 d.insert(v, v, cost=costs[i])
3747 3750 for key in getseq:
3748 3751 try:
3749 3752 value = d[key]
3750 3753 value # silence pyflakes warning
3751 3754 except KeyError:
3752 3755 pass
3753 3756
3754 3757 # Set mode tests insertion speed with cache eviction.
3755 3758 setseq = []
3756 3759 costs = []
3757 3760 for i in _xrange(sets):
3758 3761 setseq.append(random.randint(0, _maxint))
3759 3762 costs.append(random.choice(costrange))
3760 3763
3761 3764 def doinserts():
3762 3765 d = util.lrucachedict(size)
3763 3766 for v in setseq:
3764 3767 d.insert(v, v)
3765 3768
3766 3769 def doinsertscost():
3767 3770 d = util.lrucachedict(size, maxcost=costlimit)
3768 3771 for i, v in enumerate(setseq):
3769 3772 d.insert(v, v, cost=costs[i])
3770 3773
3771 3774 def dosets():
3772 3775 d = util.lrucachedict(size)
3773 3776 for v in setseq:
3774 3777 d[v] = v
3775 3778
3776 3779 # Mixed mode randomly performs gets and sets with eviction.
3777 3780 mixedops = []
3778 3781 for i in _xrange(mixed):
3779 3782 r = random.randint(0, 100)
3780 3783 if r < mixedgetfreq:
3781 3784 op = 0
3782 3785 else:
3783 3786 op = 1
3784 3787
3785 3788 mixedops.append(
3786 3789 (op, random.randint(0, size * 2), random.choice(costrange))
3787 3790 )
3788 3791
3789 3792 def domixed():
3790 3793 d = util.lrucachedict(size)
3791 3794
3792 3795 for op, v, cost in mixedops:
3793 3796 if op == 0:
3794 3797 try:
3795 3798 d[v]
3796 3799 except KeyError:
3797 3800 pass
3798 3801 else:
3799 3802 d[v] = v
3800 3803
3801 3804 def domixedcost():
3802 3805 d = util.lrucachedict(size, maxcost=costlimit)
3803 3806
3804 3807 for op, v, cost in mixedops:
3805 3808 if op == 0:
3806 3809 try:
3807 3810 d[v]
3808 3811 except KeyError:
3809 3812 pass
3810 3813 else:
3811 3814 d.insert(v, v, cost=cost)
3812 3815
3813 3816 benches = [
3814 3817 (doinit, b'init'),
3815 3818 ]
3816 3819
3817 3820 if costlimit:
3818 3821 benches.extend(
3819 3822 [
3820 3823 (dogetscost, b'gets w/ cost limit'),
3821 3824 (doinsertscost, b'inserts w/ cost limit'),
3822 3825 (domixedcost, b'mixed w/ cost limit'),
3823 3826 ]
3824 3827 )
3825 3828 else:
3826 3829 benches.extend(
3827 3830 [
3828 3831 (dogets, b'gets'),
3829 3832 (doinserts, b'inserts'),
3830 3833 (dosets, b'sets'),
3831 3834 (domixed, b'mixed'),
3832 3835 ]
3833 3836 )
3834 3837
3835 3838 for fn, title in benches:
3836 3839 timer, fm = gettimer(ui, opts)
3837 3840 timer(fn, title=title)
3838 3841 fm.end()
3839 3842
3840 3843
3841 3844 @command(
3842 3845 b'perf::write|perfwrite',
3843 3846 formatteropts
3844 3847 + [
3845 3848 (b'', b'write-method', b'write', b'ui write method'),
3846 3849 (b'', b'nlines', 100, b'number of lines'),
3847 3850 (b'', b'nitems', 100, b'number of items (per line)'),
3848 3851 (b'', b'item', b'x', b'item that is written'),
3849 3852 (b'', b'batch-line', None, b'pass whole line to write method at once'),
3850 3853 (b'', b'flush-line', None, b'flush after each line'),
3851 3854 ],
3852 3855 )
3853 3856 def perfwrite(ui, repo, **opts):
3854 3857 """microbenchmark ui.write (and others)"""
3855 3858 opts = _byteskwargs(opts)
3856 3859
3857 3860 write = getattr(ui, _sysstr(opts[b'write_method']))
3858 3861 nlines = int(opts[b'nlines'])
3859 3862 nitems = int(opts[b'nitems'])
3860 3863 item = opts[b'item']
3861 3864 batch_line = opts.get(b'batch_line')
3862 3865 flush_line = opts.get(b'flush_line')
3863 3866
3864 3867 if batch_line:
3865 3868 line = item * nitems + b'\n'
3866 3869
3867 3870 def benchmark():
3868 3871 for i in pycompat.xrange(nlines):
3869 3872 if batch_line:
3870 3873 write(line)
3871 3874 else:
3872 3875 for i in pycompat.xrange(nitems):
3873 3876 write(item)
3874 3877 write(b'\n')
3875 3878 if flush_line:
3876 3879 ui.flush()
3877 3880 ui.flush()
3878 3881
3879 3882 timer, fm = gettimer(ui, opts)
3880 3883 timer(benchmark)
3881 3884 fm.end()
3882 3885
3883 3886
3884 3887 def uisetup(ui):
3885 3888 if util.safehasattr(cmdutil, b'openrevlog') and not util.safehasattr(
3886 3889 commands, b'debugrevlogopts'
3887 3890 ):
3888 3891 # for "historical portability":
3889 3892 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
3890 3893 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
3891 3894 # openrevlog() should cause failure, because it has been
3892 3895 # available since 3.5 (or 49c583ca48c4).
3893 3896 def openrevlog(orig, repo, cmd, file_, opts):
3894 3897 if opts.get(b'dir') and not util.safehasattr(repo, b'dirlog'):
3895 3898 raise error.Abort(
3896 3899 b"This version doesn't support --dir option",
3897 3900 hint=b"use 3.5 or later",
3898 3901 )
3899 3902 return orig(repo, cmd, file_, opts)
3900 3903
3901 3904 extensions.wrapfunction(cmdutil, b'openrevlog', openrevlog)
3902 3905
3903 3906
3904 3907 @command(
3905 3908 b'perf::progress|perfprogress',
3906 3909 formatteropts
3907 3910 + [
3908 3911 (b'', b'topic', b'topic', b'topic for progress messages'),
3909 3912 (b'c', b'total', 1000000, b'total value we are progressing to'),
3910 3913 ],
3911 3914 norepo=True,
3912 3915 )
3913 3916 def perfprogress(ui, topic=None, total=None, **opts):
3914 3917 """printing of progress bars"""
3915 3918 opts = _byteskwargs(opts)
3916 3919
3917 3920 timer, fm = gettimer(ui, opts)
3918 3921
3919 3922 def doprogress():
3920 3923 with ui.makeprogress(topic, total=total) as progress:
3921 3924 for i in _xrange(total):
3922 3925 progress.increment()
3923 3926
3924 3927 timer(doprogress)
3925 3928 fm.end()
@@ -1,2977 +1,2984 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define PY_SSIZE_T_CLEAN
11 11 #include <Python.h>
12 12 #include <assert.h>
13 13 #include <ctype.h>
14 14 #include <limits.h>
15 15 #include <stddef.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 #include <structmember.h>
18 19
19 20 #include "bitmanipulation.h"
20 21 #include "charencode.h"
21 22 #include "compat.h"
22 23 #include "revlog.h"
23 24 #include "util.h"
24 25
25 26 #ifdef IS_PY3K
26 27 /* The mapping of Python types is meant to be temporary to get Python
27 28 * 3 to compile. We should remove this once Python 3 support is fully
28 29 * supported and proper types are used in the extensions themselves. */
29 30 #define PyInt_Check PyLong_Check
30 31 #define PyInt_FromLong PyLong_FromLong
31 32 #define PyInt_FromSsize_t PyLong_FromSsize_t
32 33 #define PyInt_AsLong PyLong_AsLong
33 34 #endif
34 35
35 36 typedef struct indexObjectStruct indexObject;
36 37
37 38 typedef struct {
38 39 int children[16];
39 40 } nodetreenode;
40 41
41 42 typedef struct {
42 43 int abi_version;
43 44 Py_ssize_t (*index_length)(const indexObject *);
44 45 const char *(*index_node)(indexObject *, Py_ssize_t);
45 46 int (*index_parents)(PyObject *, int, int *);
46 47 } Revlog_CAPI;
47 48
48 49 /*
49 50 * A base-16 trie for fast node->rev mapping.
50 51 *
51 52 * Positive value is index of the next node in the trie
52 53 * Negative value is a leaf: -(rev + 2)
53 54 * Zero is empty
54 55 */
55 56 typedef struct {
56 57 indexObject *index;
57 58 nodetreenode *nodes;
58 59 Py_ssize_t nodelen;
59 60 size_t length; /* # nodes in use */
60 61 size_t capacity; /* # nodes allocated */
61 62 int depth; /* maximum depth of tree */
62 63 int splits; /* # splits performed */
63 64 } nodetree;
64 65
65 66 typedef struct {
66 67 PyObject_HEAD /* ; */
67 68 nodetree nt;
68 69 } nodetreeObject;
69 70
70 71 /*
71 72 * This class has two behaviors.
72 73 *
73 74 * When used in a list-like way (with integer keys), we decode an
74 75 * entry in a RevlogNG index file on demand. We have limited support for
75 76 * integer-keyed insert and delete, only at elements right before the
76 77 * end.
77 78 *
78 79 * With string keys, we lazily perform a reverse mapping from node to
79 80 * rev, using a base-16 trie.
80 81 */
81 82 struct indexObjectStruct {
82 83 PyObject_HEAD
83 84 /* Type-specific fields go here. */
84 85 PyObject *data; /* raw bytes of index */
85 86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
86 87 PyObject *nullentry; /* fast path for references to null */
87 88 Py_buffer buf; /* buffer of data */
88 89 const char **offsets; /* populated on demand */
89 90 Py_ssize_t length; /* current on-disk number of elements */
90 91 unsigned new_length; /* number of added elements */
91 92 unsigned added_length; /* space reserved for added elements */
92 93 char *added; /* populated on demand */
93 94 PyObject *headrevs; /* cache, invalidated on changes */
94 95 PyObject *filteredrevs; /* filtered revs set */
95 96 nodetree nt; /* base-16 trie */
96 97 int ntinitialized; /* 0 or 1 */
97 98 int ntrev; /* last rev scanned */
98 99 int ntlookups; /* # lookups */
99 100 int ntmisses; /* # lookups that miss the cache */
100 101 int inlined;
101 102 long hdrsize; /* size of index headers. Differs in v1 v.s. v2 format */
102 103 };
103 104
104 105 static Py_ssize_t index_length(const indexObject *self)
105 106 {
106 107 return self->length + self->new_length;
107 108 }
108 109
109 110 static const char nullid[32] = {0};
110 111 static const Py_ssize_t nullrev = -1;
111 112
112 113 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
113 114
114 115 static int index_find_node(indexObject *self, const char *node);
115 116
116 117 #if LONG_MAX == 0x7fffffffL
117 118 static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
118 119 static const char *const v2_tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki");
119 120 #else
120 121 static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
121 122 static const char *const v2_tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki");
122 123 #endif
123 124
124 125 /* A RevlogNG v1 index entry is 64 bytes long. */
125 126 static const long v1_hdrsize = 64;
126 127
127 128 /* A Revlogv2 index entry is 96 bytes long. */
128 129 static const long v2_hdrsize = 96;
129 130
130 131 static void raise_revlog_error(void)
131 132 {
132 133 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
133 134
134 135 mod = PyImport_ImportModule("mercurial.error");
135 136 if (mod == NULL) {
136 137 goto cleanup;
137 138 }
138 139
139 140 dict = PyModule_GetDict(mod);
140 141 if (dict == NULL) {
141 142 goto cleanup;
142 143 }
143 144 Py_INCREF(dict);
144 145
145 146 errclass = PyDict_GetItemString(dict, "RevlogError");
146 147 if (errclass == NULL) {
147 148 PyErr_SetString(PyExc_SystemError,
148 149 "could not find RevlogError");
149 150 goto cleanup;
150 151 }
151 152
152 153 /* value of exception is ignored by callers */
153 154 PyErr_SetString(errclass, "RevlogError");
154 155
155 156 cleanup:
156 157 Py_XDECREF(dict);
157 158 Py_XDECREF(mod);
158 159 }
159 160
160 161 /*
161 162 * Return a pointer to the beginning of a RevlogNG record.
162 163 */
163 164 static const char *index_deref(indexObject *self, Py_ssize_t pos)
164 165 {
165 166 if (pos >= self->length)
166 167 return self->added + (pos - self->length) * self->hdrsize;
167 168
168 169 if (self->inlined && pos > 0) {
169 170 if (self->offsets == NULL) {
170 171 Py_ssize_t ret;
171 172 self->offsets =
172 173 PyMem_Malloc(self->length * sizeof(*self->offsets));
173 174 if (self->offsets == NULL)
174 175 return (const char *)PyErr_NoMemory();
175 176 ret = inline_scan(self, self->offsets);
176 177 if (ret == -1) {
177 178 return NULL;
178 179 };
179 180 }
180 181 return self->offsets[pos];
181 182 }
182 183
183 184 return (const char *)(self->buf.buf) + pos * self->hdrsize;
184 185 }
185 186
186 187 /*
187 188 * Get parents of the given rev.
188 189 *
189 190 * The specified rev must be valid and must not be nullrev. A returned
190 191 * parent revision may be nullrev, but is guaranteed to be in valid range.
191 192 */
192 193 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
193 194 int maxrev)
194 195 {
195 196 const char *data = index_deref(self, rev);
196 197
197 198 ps[0] = getbe32(data + 24);
198 199 ps[1] = getbe32(data + 28);
199 200
200 201 /* If index file is corrupted, ps[] may point to invalid revisions. So
201 202 * there is a risk of buffer overflow to trust them unconditionally. */
202 203 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
203 204 PyErr_SetString(PyExc_ValueError, "parent out of range");
204 205 return -1;
205 206 }
206 207 return 0;
207 208 }
208 209
209 210 /*
210 211 * Get parents of the given rev.
211 212 *
212 213 * If the specified rev is out of range, IndexError will be raised. If the
213 214 * revlog entry is corrupted, ValueError may be raised.
214 215 *
215 216 * Returns 0 on success or -1 on failure.
216 217 */
217 218 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
218 219 {
219 220 int tiprev;
220 221 if (!op || !HgRevlogIndex_Check(op) || !ps) {
221 222 PyErr_BadInternalCall();
222 223 return -1;
223 224 }
224 225 tiprev = (int)index_length((indexObject *)op) - 1;
225 226 if (rev < -1 || rev > tiprev) {
226 227 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
227 228 return -1;
228 229 } else if (rev == -1) {
229 230 ps[0] = ps[1] = -1;
230 231 return 0;
231 232 } else {
232 233 return index_get_parents((indexObject *)op, rev, ps, tiprev);
233 234 }
234 235 }
235 236
236 237 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
237 238 {
238 239 const char *data;
239 240 uint64_t offset;
240 241
241 242 if (rev == nullrev)
242 243 return 0;
243 244
244 245 data = index_deref(self, rev);
245 246 offset = getbe32(data + 4);
246 247 if (rev == 0) {
247 248 /* mask out version number for the first entry */
248 249 offset &= 0xFFFF;
249 250 } else {
250 251 uint32_t offset_high = getbe32(data);
251 252 offset |= ((uint64_t)offset_high) << 32;
252 253 }
253 254 return (int64_t)(offset >> 16);
254 255 }
255 256
256 257 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
257 258 {
258 259 const char *data;
259 260 int tmp;
260 261
261 262 if (rev == nullrev)
262 263 return 0;
263 264
264 265 data = index_deref(self, rev);
265 266
266 267 tmp = (int)getbe32(data + 8);
267 268 if (tmp < 0) {
268 269 PyErr_Format(PyExc_OverflowError,
269 270 "revlog entry size out of bound (%d)", tmp);
270 271 return -1;
271 272 }
272 273 return tmp;
273 274 }
274 275
275 276 /*
276 277 * RevlogNG format (all in big endian, data may be inlined):
277 278 * 6 bytes: offset
278 279 * 2 bytes: flags
279 280 * 4 bytes: compressed length
280 281 * 4 bytes: uncompressed length
281 282 * 4 bytes: base revision
282 283 * 4 bytes: link revision
283 284 * 4 bytes: parent 1 revision
284 285 * 4 bytes: parent 2 revision
285 286 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
286 287 */
287 288 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
288 289 {
289 290 uint64_t offset_flags, sidedata_offset;
290 291 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
291 292 sidedata_comp_len;
292 293 const char *c_node_id;
293 294 const char *data;
294 295 Py_ssize_t length = index_length(self);
295 296
296 297 if (pos == nullrev) {
297 298 Py_INCREF(self->nullentry);
298 299 return self->nullentry;
299 300 }
300 301
301 302 if (pos < 0 || pos >= length) {
302 303 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
303 304 return NULL;
304 305 }
305 306
306 307 data = index_deref(self, pos);
307 308 if (data == NULL)
308 309 return NULL;
309 310
310 311 offset_flags = getbe32(data + 4);
311 312 /*
312 313 * The first entry on-disk needs the version number masked out,
313 314 * but this doesn't apply if entries are added to an empty index.
314 315 */
315 316 if (self->length && pos == 0)
316 317 offset_flags &= 0xFFFF;
317 318 else {
318 319 uint32_t offset_high = getbe32(data);
319 320 offset_flags |= ((uint64_t)offset_high) << 32;
320 321 }
321 322
322 323 comp_len = getbe32(data + 8);
323 324 uncomp_len = getbe32(data + 12);
324 325 base_rev = getbe32(data + 16);
325 326 link_rev = getbe32(data + 20);
326 327 parent_1 = getbe32(data + 24);
327 328 parent_2 = getbe32(data + 28);
328 329 c_node_id = data + 32;
329 330
330 331 if (self->hdrsize == v1_hdrsize) {
331 332 return Py_BuildValue(v1_tuple_format, offset_flags, comp_len,
332 333 uncomp_len, base_rev, link_rev, parent_1,
333 334 parent_2, c_node_id, self->nodelen);
334 335 } else {
335 336 sidedata_offset = getbe64(data + 64);
336 337 sidedata_comp_len = getbe32(data + 72);
337 338
338 339 return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
339 340 uncomp_len, base_rev, link_rev, parent_1,
340 341 parent_2, c_node_id, self->nodelen,
341 342 sidedata_offset, sidedata_comp_len);
342 343 }
343 344 }
344 345
345 346 /*
346 347 * Return the hash of node corresponding to the given rev.
347 348 */
348 349 static const char *index_node(indexObject *self, Py_ssize_t pos)
349 350 {
350 351 Py_ssize_t length = index_length(self);
351 352 const char *data;
352 353
353 354 if (pos == nullrev)
354 355 return nullid;
355 356
356 357 if (pos >= length)
357 358 return NULL;
358 359
359 360 data = index_deref(self, pos);
360 361 return data ? data + 32 : NULL;
361 362 }
362 363
363 364 /*
364 365 * Return the hash of the node corresponding to the given rev. The
365 366 * rev is assumed to be existing. If not, an exception is set.
366 367 */
367 368 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
368 369 {
369 370 const char *node = index_node(self, pos);
370 371 if (node == NULL) {
371 372 PyErr_Format(PyExc_IndexError, "could not access rev %d",
372 373 (int)pos);
373 374 }
374 375 return node;
375 376 }
376 377
377 378 static int nt_insert(nodetree *self, const char *node, int rev);
378 379
379 380 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
380 381 {
381 382 Py_ssize_t thisnodelen;
382 383 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
383 384 return -1;
384 385 if (nodelen == thisnodelen)
385 386 return 0;
386 387 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
387 388 thisnodelen, nodelen);
388 389 return -1;
389 390 }
390 391
391 392 static PyObject *index_append(indexObject *self, PyObject *obj)
392 393 {
393 394 uint64_t offset_flags, sidedata_offset;
394 395 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
395 396 Py_ssize_t c_node_id_len, sidedata_comp_len;
396 397 const char *c_node_id;
397 398 char *data;
398 399
399 400 if (self->hdrsize == v1_hdrsize) {
400 401 if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags,
401 402 &comp_len, &uncomp_len, &base_rev,
402 403 &link_rev, &parent_1, &parent_2,
403 404 &c_node_id, &c_node_id_len)) {
404 405 PyErr_SetString(PyExc_TypeError, "8-tuple required");
405 406 return NULL;
406 407 }
407 408 } else {
408 409 if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags,
409 410 &comp_len, &uncomp_len, &base_rev,
410 411 &link_rev, &parent_1, &parent_2,
411 412 &c_node_id, &c_node_id_len,
412 413 &sidedata_offset, &sidedata_comp_len)) {
413 414 PyErr_SetString(PyExc_TypeError, "10-tuple required");
414 415 return NULL;
415 416 }
416 417 }
417 418
418 419 if (c_node_id_len != self->nodelen) {
419 420 PyErr_SetString(PyExc_TypeError, "invalid node");
420 421 return NULL;
421 422 }
422 423
423 424 if (self->new_length == self->added_length) {
424 425 size_t new_added_length =
425 426 self->added_length ? self->added_length * 2 : 4096;
426 427 void *new_added = PyMem_Realloc(self->added, new_added_length *
427 428 self->hdrsize);
428 429 if (!new_added)
429 430 return PyErr_NoMemory();
430 431 self->added = new_added;
431 432 self->added_length = new_added_length;
432 433 }
433 434 rev = self->length + self->new_length;
434 435 data = self->added + self->hdrsize * self->new_length++;
435 436 putbe32(offset_flags >> 32, data);
436 437 putbe32(offset_flags & 0xffffffffU, data + 4);
437 438 putbe32(comp_len, data + 8);
438 439 putbe32(uncomp_len, data + 12);
439 440 putbe32(base_rev, data + 16);
440 441 putbe32(link_rev, data + 20);
441 442 putbe32(parent_1, data + 24);
442 443 putbe32(parent_2, data + 28);
443 444 memcpy(data + 32, c_node_id, c_node_id_len);
444 445 /* Padding since SHA-1 is only 20 bytes for now */
445 446 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
446 447 if (self->hdrsize != v1_hdrsize) {
447 448 putbe64(sidedata_offset, data + 64);
448 449 putbe32(sidedata_comp_len, data + 72);
449 450 /* Padding for 96 bytes alignment */
450 451 memset(data + 76, 0, self->hdrsize - 76);
451 452 }
452 453
453 454 if (self->ntinitialized)
454 455 nt_insert(&self->nt, c_node_id, rev);
455 456
456 457 Py_CLEAR(self->headrevs);
457 458 Py_RETURN_NONE;
458 459 }
459 460
460 461 /* Replace an existing index entry's sidedata offset and length with new ones.
461 462 This cannot be used outside of the context of sidedata rewriting,
462 463 inside the transaction that creates the given revision. */
463 464 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
464 465 {
465 466 uint64_t sidedata_offset;
466 467 int rev;
467 468 Py_ssize_t sidedata_comp_len;
468 469 char *data;
469 470 #if LONG_MAX == 0x7fffffffL
470 471 const char *const sidedata_format = PY23("nKi", "nKi");
471 472 #else
472 473 const char *const sidedata_format = PY23("nki", "nki");
473 474 #endif
474 475
475 476 if (self->hdrsize == v1_hdrsize || self->inlined) {
476 477 /*
477 478 There is a bug in the transaction handling when going from an
478 479 inline revlog to a separate index and data file. Turn it off until
479 480 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
480 481 See issue6485.
481 482 */
482 483 raise_revlog_error();
483 484 return NULL;
484 485 }
485 486
486 487 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
487 488 &sidedata_comp_len))
488 489 return NULL;
489 490
490 491 if (rev < 0 || rev >= index_length(self)) {
491 492 PyErr_SetString(PyExc_IndexError, "revision outside index");
492 493 return NULL;
493 494 }
494 495 if (rev < self->length) {
495 496 PyErr_SetString(
496 497 PyExc_IndexError,
497 498 "cannot rewrite entries outside of this transaction");
498 499 return NULL;
499 500 }
500 501
501 502 /* Find the newly added node, offset from the "already on-disk" length
502 503 */
503 504 data = self->added + self->hdrsize * (rev - self->length);
504 505 putbe64(sidedata_offset, data + 64);
505 506 putbe32(sidedata_comp_len, data + 72);
506 507
507 508 Py_RETURN_NONE;
508 509 }
509 510
510 511 static PyObject *index_stats(indexObject *self)
511 512 {
512 513 PyObject *obj = PyDict_New();
513 514 PyObject *s = NULL;
514 515 PyObject *t = NULL;
515 516
516 517 if (obj == NULL)
517 518 return NULL;
518 519
519 520 #define istat(__n, __d) \
520 521 do { \
521 522 s = PyBytes_FromString(__d); \
522 523 t = PyInt_FromSsize_t(self->__n); \
523 524 if (!s || !t) \
524 525 goto bail; \
525 526 if (PyDict_SetItem(obj, s, t) == -1) \
526 527 goto bail; \
527 528 Py_CLEAR(s); \
528 529 Py_CLEAR(t); \
529 530 } while (0)
530 531
531 532 if (self->added_length)
532 533 istat(new_length, "index entries added");
533 534 istat(length, "revs in memory");
534 535 istat(ntlookups, "node trie lookups");
535 536 istat(ntmisses, "node trie misses");
536 537 istat(ntrev, "node trie last rev scanned");
537 538 if (self->ntinitialized) {
538 539 istat(nt.capacity, "node trie capacity");
539 540 istat(nt.depth, "node trie depth");
540 541 istat(nt.length, "node trie count");
541 542 istat(nt.splits, "node trie splits");
542 543 }
543 544
544 545 #undef istat
545 546
546 547 return obj;
547 548
548 549 bail:
549 550 Py_XDECREF(obj);
550 551 Py_XDECREF(s);
551 552 Py_XDECREF(t);
552 553 return NULL;
553 554 }
554 555
555 556 /*
556 557 * When we cache a list, we want to be sure the caller can't mutate
557 558 * the cached copy.
558 559 */
559 560 static PyObject *list_copy(PyObject *list)
560 561 {
561 562 Py_ssize_t len = PyList_GET_SIZE(list);
562 563 PyObject *newlist = PyList_New(len);
563 564 Py_ssize_t i;
564 565
565 566 if (newlist == NULL)
566 567 return NULL;
567 568
568 569 for (i = 0; i < len; i++) {
569 570 PyObject *obj = PyList_GET_ITEM(list, i);
570 571 Py_INCREF(obj);
571 572 PyList_SET_ITEM(newlist, i, obj);
572 573 }
573 574
574 575 return newlist;
575 576 }
576 577
577 578 static int check_filter(PyObject *filter, Py_ssize_t arg)
578 579 {
579 580 if (filter) {
580 581 PyObject *arglist, *result;
581 582 int isfiltered;
582 583
583 584 arglist = Py_BuildValue("(n)", arg);
584 585 if (!arglist) {
585 586 return -1;
586 587 }
587 588
588 589 result = PyObject_Call(filter, arglist, NULL);
589 590 Py_DECREF(arglist);
590 591 if (!result) {
591 592 return -1;
592 593 }
593 594
594 595 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
595 596 * same as this function, so we can just return it directly.*/
596 597 isfiltered = PyObject_IsTrue(result);
597 598 Py_DECREF(result);
598 599 return isfiltered;
599 600 } else {
600 601 return 0;
601 602 }
602 603 }
603 604
604 605 static inline void set_phase_from_parents(char *phases, int parent_1,
605 606 int parent_2, Py_ssize_t i)
606 607 {
607 608 if (parent_1 >= 0 && phases[parent_1] > phases[i])
608 609 phases[i] = phases[parent_1];
609 610 if (parent_2 >= 0 && phases[parent_2] > phases[i])
610 611 phases[i] = phases[parent_2];
611 612 }
612 613
613 614 static PyObject *reachableroots2(indexObject *self, PyObject *args)
614 615 {
615 616
616 617 /* Input */
617 618 long minroot;
618 619 PyObject *includepatharg = NULL;
619 620 int includepath = 0;
620 621 /* heads and roots are lists */
621 622 PyObject *heads = NULL;
622 623 PyObject *roots = NULL;
623 624 PyObject *reachable = NULL;
624 625
625 626 PyObject *val;
626 627 Py_ssize_t len = index_length(self);
627 628 long revnum;
628 629 Py_ssize_t k;
629 630 Py_ssize_t i;
630 631 Py_ssize_t l;
631 632 int r;
632 633 int parents[2];
633 634
634 635 /* Internal data structure:
635 636 * tovisit: array of length len+1 (all revs + nullrev), filled upto
636 637 * lentovisit
637 638 *
638 639 * revstates: array of length len+1 (all revs + nullrev) */
639 640 int *tovisit = NULL;
640 641 long lentovisit = 0;
641 642 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
642 643 char *revstates = NULL;
643 644
644 645 /* Get arguments */
645 646 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
646 647 &PyList_Type, &roots, &PyBool_Type,
647 648 &includepatharg))
648 649 goto bail;
649 650
650 651 if (includepatharg == Py_True)
651 652 includepath = 1;
652 653
653 654 /* Initialize return set */
654 655 reachable = PyList_New(0);
655 656 if (reachable == NULL)
656 657 goto bail;
657 658
658 659 /* Initialize internal datastructures */
659 660 tovisit = (int *)malloc((len + 1) * sizeof(int));
660 661 if (tovisit == NULL) {
661 662 PyErr_NoMemory();
662 663 goto bail;
663 664 }
664 665
665 666 revstates = (char *)calloc(len + 1, 1);
666 667 if (revstates == NULL) {
667 668 PyErr_NoMemory();
668 669 goto bail;
669 670 }
670 671
671 672 l = PyList_GET_SIZE(roots);
672 673 for (i = 0; i < l; i++) {
673 674 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
674 675 if (revnum == -1 && PyErr_Occurred())
675 676 goto bail;
676 677 /* If root is out of range, e.g. wdir(), it must be unreachable
677 678 * from heads. So we can just ignore it. */
678 679 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
679 680 continue;
680 681 revstates[revnum + 1] |= RS_ROOT;
681 682 }
682 683
683 684 /* Populate tovisit with all the heads */
684 685 l = PyList_GET_SIZE(heads);
685 686 for (i = 0; i < l; i++) {
686 687 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
687 688 if (revnum == -1 && PyErr_Occurred())
688 689 goto bail;
689 690 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
690 691 PyErr_SetString(PyExc_IndexError, "head out of range");
691 692 goto bail;
692 693 }
693 694 if (!(revstates[revnum + 1] & RS_SEEN)) {
694 695 tovisit[lentovisit++] = (int)revnum;
695 696 revstates[revnum + 1] |= RS_SEEN;
696 697 }
697 698 }
698 699
699 700 /* Visit the tovisit list and find the reachable roots */
700 701 k = 0;
701 702 while (k < lentovisit) {
702 703 /* Add the node to reachable if it is a root*/
703 704 revnum = tovisit[k++];
704 705 if (revstates[revnum + 1] & RS_ROOT) {
705 706 revstates[revnum + 1] |= RS_REACHABLE;
706 707 val = PyInt_FromLong(revnum);
707 708 if (val == NULL)
708 709 goto bail;
709 710 r = PyList_Append(reachable, val);
710 711 Py_DECREF(val);
711 712 if (r < 0)
712 713 goto bail;
713 714 if (includepath == 0)
714 715 continue;
715 716 }
716 717
717 718 /* Add its parents to the list of nodes to visit */
718 719 if (revnum == nullrev)
719 720 continue;
720 721 r = index_get_parents(self, revnum, parents, (int)len - 1);
721 722 if (r < 0)
722 723 goto bail;
723 724 for (i = 0; i < 2; i++) {
724 725 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
725 726 parents[i] >= minroot) {
726 727 tovisit[lentovisit++] = parents[i];
727 728 revstates[parents[i] + 1] |= RS_SEEN;
728 729 }
729 730 }
730 731 }
731 732
732 733 /* Find all the nodes in between the roots we found and the heads
733 734 * and add them to the reachable set */
734 735 if (includepath == 1) {
735 736 long minidx = minroot;
736 737 if (minidx < 0)
737 738 minidx = 0;
738 739 for (i = minidx; i < len; i++) {
739 740 if (!(revstates[i + 1] & RS_SEEN))
740 741 continue;
741 742 r = index_get_parents(self, i, parents, (int)len - 1);
742 743 /* Corrupted index file, error is set from
743 744 * index_get_parents */
744 745 if (r < 0)
745 746 goto bail;
746 747 if (((revstates[parents[0] + 1] |
747 748 revstates[parents[1] + 1]) &
748 749 RS_REACHABLE) &&
749 750 !(revstates[i + 1] & RS_REACHABLE)) {
750 751 revstates[i + 1] |= RS_REACHABLE;
751 752 val = PyInt_FromSsize_t(i);
752 753 if (val == NULL)
753 754 goto bail;
754 755 r = PyList_Append(reachable, val);
755 756 Py_DECREF(val);
756 757 if (r < 0)
757 758 goto bail;
758 759 }
759 760 }
760 761 }
761 762
762 763 free(revstates);
763 764 free(tovisit);
764 765 return reachable;
765 766 bail:
766 767 Py_XDECREF(reachable);
767 768 free(revstates);
768 769 free(tovisit);
769 770 return NULL;
770 771 }
771 772
772 773 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
773 774 char phase)
774 775 {
775 776 Py_ssize_t len = index_length(self);
776 777 PyObject *item;
777 778 PyObject *iterator;
778 779 int rev, minrev = -1;
779 780 char *node;
780 781
781 782 if (!PySet_Check(roots)) {
782 783 PyErr_SetString(PyExc_TypeError,
783 784 "roots must be a set of nodes");
784 785 return -2;
785 786 }
786 787 iterator = PyObject_GetIter(roots);
787 788 if (iterator == NULL)
788 789 return -2;
789 790 while ((item = PyIter_Next(iterator))) {
790 791 if (node_check(self->nodelen, item, &node) == -1)
791 792 goto failed;
792 793 rev = index_find_node(self, node);
793 794 /* null is implicitly public, so negative is invalid */
794 795 if (rev < 0 || rev >= len)
795 796 goto failed;
796 797 phases[rev] = phase;
797 798 if (minrev == -1 || minrev > rev)
798 799 minrev = rev;
799 800 Py_DECREF(item);
800 801 }
801 802 Py_DECREF(iterator);
802 803 return minrev;
803 804 failed:
804 805 Py_DECREF(iterator);
805 806 Py_DECREF(item);
806 807 return -2;
807 808 }
808 809
809 810 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
810 811 {
811 812 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
812 813 96: internal */
813 814 static const char trackedphases[] = {1, 2, 32, 96};
814 815 PyObject *roots = Py_None;
815 816 PyObject *phasesetsdict = NULL;
816 817 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
817 818 Py_ssize_t len = index_length(self);
818 819 char *phases = NULL;
819 820 int minphaserev = -1, rev, i;
820 821 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
821 822
822 823 if (!PyArg_ParseTuple(args, "O", &roots))
823 824 return NULL;
824 825 if (roots == NULL || !PyDict_Check(roots)) {
825 826 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
826 827 return NULL;
827 828 }
828 829
829 830 phases = calloc(len, 1);
830 831 if (phases == NULL) {
831 832 PyErr_NoMemory();
832 833 return NULL;
833 834 }
834 835
835 836 for (i = 0; i < numphases; ++i) {
836 837 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
837 838 PyObject *phaseroots = NULL;
838 839 if (pyphase == NULL)
839 840 goto release;
840 841 phaseroots = PyDict_GetItem(roots, pyphase);
841 842 Py_DECREF(pyphase);
842 843 if (phaseroots == NULL)
843 844 continue;
844 845 rev = add_roots_get_min(self, phaseroots, phases,
845 846 trackedphases[i]);
846 847 if (rev == -2)
847 848 goto release;
848 849 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
849 850 minphaserev = rev;
850 851 }
851 852
852 853 for (i = 0; i < numphases; ++i) {
853 854 phasesets[i] = PySet_New(NULL);
854 855 if (phasesets[i] == NULL)
855 856 goto release;
856 857 }
857 858
858 859 if (minphaserev == -1)
859 860 minphaserev = len;
860 861 for (rev = minphaserev; rev < len; ++rev) {
861 862 PyObject *pyphase = NULL;
862 863 PyObject *pyrev = NULL;
863 864 int parents[2];
864 865 /*
865 866 * The parent lookup could be skipped for phaseroots, but
866 867 * phase --force would historically not recompute them
867 868 * correctly, leaving descendents with a lower phase around.
868 869 * As such, unconditionally recompute the phase.
869 870 */
870 871 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
871 872 goto release;
872 873 set_phase_from_parents(phases, parents[0], parents[1], rev);
873 874 switch (phases[rev]) {
874 875 case 0:
875 876 continue;
876 877 case 1:
877 878 pyphase = phasesets[0];
878 879 break;
879 880 case 2:
880 881 pyphase = phasesets[1];
881 882 break;
882 883 case 32:
883 884 pyphase = phasesets[2];
884 885 break;
885 886 case 96:
886 887 pyphase = phasesets[3];
887 888 break;
888 889 default:
889 890 /* this should never happen since the phase number is
890 891 * specified by this function. */
891 892 PyErr_SetString(PyExc_SystemError,
892 893 "bad phase number in internal list");
893 894 goto release;
894 895 }
895 896 pyrev = PyInt_FromLong(rev);
896 897 if (pyrev == NULL)
897 898 goto release;
898 899 if (PySet_Add(pyphase, pyrev) == -1) {
899 900 Py_DECREF(pyrev);
900 901 goto release;
901 902 }
902 903 Py_DECREF(pyrev);
903 904 }
904 905
905 906 phasesetsdict = _dict_new_presized(numphases);
906 907 if (phasesetsdict == NULL)
907 908 goto release;
908 909 for (i = 0; i < numphases; ++i) {
909 910 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
910 911 if (pyphase == NULL)
911 912 goto release;
912 913 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
913 914 -1) {
914 915 Py_DECREF(pyphase);
915 916 goto release;
916 917 }
917 918 Py_DECREF(phasesets[i]);
918 919 phasesets[i] = NULL;
919 920 }
920 921
921 922 return Py_BuildValue("nN", len, phasesetsdict);
922 923
923 924 release:
924 925 for (i = 0; i < numphases; ++i)
925 926 Py_XDECREF(phasesets[i]);
926 927 Py_XDECREF(phasesetsdict);
927 928
928 929 free(phases);
929 930 return NULL;
930 931 }
931 932
932 933 static PyObject *index_headrevs(indexObject *self, PyObject *args)
933 934 {
934 935 Py_ssize_t i, j, len;
935 936 char *nothead = NULL;
936 937 PyObject *heads = NULL;
937 938 PyObject *filter = NULL;
938 939 PyObject *filteredrevs = Py_None;
939 940
940 941 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
941 942 return NULL;
942 943 }
943 944
944 945 if (self->headrevs && filteredrevs == self->filteredrevs)
945 946 return list_copy(self->headrevs);
946 947
947 948 Py_DECREF(self->filteredrevs);
948 949 self->filteredrevs = filteredrevs;
949 950 Py_INCREF(filteredrevs);
950 951
951 952 if (filteredrevs != Py_None) {
952 953 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
953 954 if (!filter) {
954 955 PyErr_SetString(
955 956 PyExc_TypeError,
956 957 "filteredrevs has no attribute __contains__");
957 958 goto bail;
958 959 }
959 960 }
960 961
961 962 len = index_length(self);
962 963 heads = PyList_New(0);
963 964 if (heads == NULL)
964 965 goto bail;
965 966 if (len == 0) {
966 967 PyObject *nullid = PyInt_FromLong(-1);
967 968 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
968 969 Py_XDECREF(nullid);
969 970 goto bail;
970 971 }
971 972 goto done;
972 973 }
973 974
974 975 nothead = calloc(len, 1);
975 976 if (nothead == NULL) {
976 977 PyErr_NoMemory();
977 978 goto bail;
978 979 }
979 980
980 981 for (i = len - 1; i >= 0; i--) {
981 982 int isfiltered;
982 983 int parents[2];
983 984
984 985 /* If nothead[i] == 1, it means we've seen an unfiltered child
985 986 * of this node already, and therefore this node is not
986 987 * filtered. So we can skip the expensive check_filter step.
987 988 */
988 989 if (nothead[i] != 1) {
989 990 isfiltered = check_filter(filter, i);
990 991 if (isfiltered == -1) {
991 992 PyErr_SetString(PyExc_TypeError,
992 993 "unable to check filter");
993 994 goto bail;
994 995 }
995 996
996 997 if (isfiltered) {
997 998 nothead[i] = 1;
998 999 continue;
999 1000 }
1000 1001 }
1001 1002
1002 1003 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1003 1004 goto bail;
1004 1005 for (j = 0; j < 2; j++) {
1005 1006 if (parents[j] >= 0)
1006 1007 nothead[parents[j]] = 1;
1007 1008 }
1008 1009 }
1009 1010
1010 1011 for (i = 0; i < len; i++) {
1011 1012 PyObject *head;
1012 1013
1013 1014 if (nothead[i])
1014 1015 continue;
1015 1016 head = PyInt_FromSsize_t(i);
1016 1017 if (head == NULL || PyList_Append(heads, head) == -1) {
1017 1018 Py_XDECREF(head);
1018 1019 goto bail;
1019 1020 }
1020 1021 }
1021 1022
1022 1023 done:
1023 1024 self->headrevs = heads;
1024 1025 Py_XDECREF(filter);
1025 1026 free(nothead);
1026 1027 return list_copy(self->headrevs);
1027 1028 bail:
1028 1029 Py_XDECREF(filter);
1029 1030 Py_XDECREF(heads);
1030 1031 free(nothead);
1031 1032 return NULL;
1032 1033 }
1033 1034
1034 1035 /**
1035 1036 * Obtain the base revision index entry.
1036 1037 *
1037 1038 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1038 1039 */
1039 1040 static inline int index_baserev(indexObject *self, int rev)
1040 1041 {
1041 1042 const char *data;
1042 1043 int result;
1043 1044
1044 1045 data = index_deref(self, rev);
1045 1046 if (data == NULL)
1046 1047 return -2;
1047 1048 result = getbe32(data + 16);
1048 1049
1049 1050 if (result > rev) {
1050 1051 PyErr_Format(
1051 1052 PyExc_ValueError,
1052 1053 "corrupted revlog, revision base above revision: %d, %d",
1053 1054 rev, result);
1054 1055 return -2;
1055 1056 }
1056 1057 if (result < -1) {
1057 1058 PyErr_Format(
1058 1059 PyExc_ValueError,
1059 1060 "corrupted revlog, revision base out of range: %d, %d", rev,
1060 1061 result);
1061 1062 return -2;
1062 1063 }
1063 1064 return result;
1064 1065 }
1065 1066
1066 1067 /**
1067 1068 * Find if a revision is a snapshot or not
1068 1069 *
1069 1070 * Only relevant for sparse-revlog case.
1070 1071 * Callers must ensure that rev is in a valid range.
1071 1072 */
1072 1073 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1073 1074 {
1074 1075 int ps[2];
1075 1076 Py_ssize_t base;
1076 1077 while (rev >= 0) {
1077 1078 base = (Py_ssize_t)index_baserev(self, rev);
1078 1079 if (base == rev) {
1079 1080 base = -1;
1080 1081 }
1081 1082 if (base == -2) {
1082 1083 assert(PyErr_Occurred());
1083 1084 return -1;
1084 1085 }
1085 1086 if (base == -1) {
1086 1087 return 1;
1087 1088 }
1088 1089 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1089 1090 assert(PyErr_Occurred());
1090 1091 return -1;
1091 1092 };
1092 1093 if (base == ps[0] || base == ps[1]) {
1093 1094 return 0;
1094 1095 }
1095 1096 rev = base;
1096 1097 }
1097 1098 return rev == -1;
1098 1099 }
1099 1100
1100 1101 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1101 1102 {
1102 1103 long rev;
1103 1104 int issnap;
1104 1105 Py_ssize_t length = index_length(self);
1105 1106
1106 1107 if (!pylong_to_long(value, &rev)) {
1107 1108 return NULL;
1108 1109 }
1109 1110 if (rev < -1 || rev >= length) {
1110 1111 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1111 1112 rev);
1112 1113 return NULL;
1113 1114 };
1114 1115 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1115 1116 if (issnap < 0) {
1116 1117 return NULL;
1117 1118 };
1118 1119 return PyBool_FromLong((long)issnap);
1119 1120 }
1120 1121
1121 1122 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1122 1123 {
1123 1124 Py_ssize_t start_rev;
1124 1125 PyObject *cache;
1125 1126 Py_ssize_t base;
1126 1127 Py_ssize_t rev;
1127 1128 PyObject *key = NULL;
1128 1129 PyObject *value = NULL;
1129 1130 const Py_ssize_t length = index_length(self);
1130 1131 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1131 1132 return NULL;
1132 1133 }
1133 1134 for (rev = start_rev; rev < length; rev++) {
1134 1135 int issnap;
1135 1136 PyObject *allvalues = NULL;
1136 1137 issnap = index_issnapshotrev(self, rev);
1137 1138 if (issnap < 0) {
1138 1139 goto bail;
1139 1140 }
1140 1141 if (issnap == 0) {
1141 1142 continue;
1142 1143 }
1143 1144 base = (Py_ssize_t)index_baserev(self, rev);
1144 1145 if (base == rev) {
1145 1146 base = -1;
1146 1147 }
1147 1148 if (base == -2) {
1148 1149 assert(PyErr_Occurred());
1149 1150 goto bail;
1150 1151 }
1151 1152 key = PyInt_FromSsize_t(base);
1152 1153 allvalues = PyDict_GetItem(cache, key);
1153 1154 if (allvalues == NULL && PyErr_Occurred()) {
1154 1155 goto bail;
1155 1156 }
1156 1157 if (allvalues == NULL) {
1157 1158 int r;
1158 1159 allvalues = PyList_New(0);
1159 1160 if (!allvalues) {
1160 1161 goto bail;
1161 1162 }
1162 1163 r = PyDict_SetItem(cache, key, allvalues);
1163 1164 Py_DECREF(allvalues);
1164 1165 if (r < 0) {
1165 1166 goto bail;
1166 1167 }
1167 1168 }
1168 1169 value = PyInt_FromSsize_t(rev);
1169 1170 if (PyList_Append(allvalues, value)) {
1170 1171 goto bail;
1171 1172 }
1172 1173 Py_CLEAR(key);
1173 1174 Py_CLEAR(value);
1174 1175 }
1175 1176 Py_RETURN_NONE;
1176 1177 bail:
1177 1178 Py_XDECREF(key);
1178 1179 Py_XDECREF(value);
1179 1180 return NULL;
1180 1181 }
1181 1182
1182 1183 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1183 1184 {
1184 1185 int rev, generaldelta;
1185 1186 PyObject *stoparg;
1186 1187 int stoprev, iterrev, baserev = -1;
1187 1188 int stopped;
1188 1189 PyObject *chain = NULL, *result = NULL;
1189 1190 const Py_ssize_t length = index_length(self);
1190 1191
1191 1192 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1192 1193 return NULL;
1193 1194 }
1194 1195
1195 1196 if (PyInt_Check(stoparg)) {
1196 1197 stoprev = (int)PyInt_AsLong(stoparg);
1197 1198 if (stoprev == -1 && PyErr_Occurred()) {
1198 1199 return NULL;
1199 1200 }
1200 1201 } else if (stoparg == Py_None) {
1201 1202 stoprev = -2;
1202 1203 } else {
1203 1204 PyErr_SetString(PyExc_ValueError,
1204 1205 "stoprev must be integer or None");
1205 1206 return NULL;
1206 1207 }
1207 1208
1208 1209 if (rev < 0 || rev >= length) {
1209 1210 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1210 1211 return NULL;
1211 1212 }
1212 1213
1213 1214 chain = PyList_New(0);
1214 1215 if (chain == NULL) {
1215 1216 return NULL;
1216 1217 }
1217 1218
1218 1219 baserev = index_baserev(self, rev);
1219 1220
1220 1221 /* This should never happen. */
1221 1222 if (baserev <= -2) {
1222 1223 /* Error should be set by index_deref() */
1223 1224 assert(PyErr_Occurred());
1224 1225 goto bail;
1225 1226 }
1226 1227
1227 1228 iterrev = rev;
1228 1229
1229 1230 while (iterrev != baserev && iterrev != stoprev) {
1230 1231 PyObject *value = PyInt_FromLong(iterrev);
1231 1232 if (value == NULL) {
1232 1233 goto bail;
1233 1234 }
1234 1235 if (PyList_Append(chain, value)) {
1235 1236 Py_DECREF(value);
1236 1237 goto bail;
1237 1238 }
1238 1239 Py_DECREF(value);
1239 1240
1240 1241 if (generaldelta) {
1241 1242 iterrev = baserev;
1242 1243 } else {
1243 1244 iterrev--;
1244 1245 }
1245 1246
1246 1247 if (iterrev < 0) {
1247 1248 break;
1248 1249 }
1249 1250
1250 1251 if (iterrev >= length) {
1251 1252 PyErr_SetString(PyExc_IndexError,
1252 1253 "revision outside index");
1253 1254 return NULL;
1254 1255 }
1255 1256
1256 1257 baserev = index_baserev(self, iterrev);
1257 1258
1258 1259 /* This should never happen. */
1259 1260 if (baserev <= -2) {
1260 1261 /* Error should be set by index_deref() */
1261 1262 assert(PyErr_Occurred());
1262 1263 goto bail;
1263 1264 }
1264 1265 }
1265 1266
1266 1267 if (iterrev == stoprev) {
1267 1268 stopped = 1;
1268 1269 } else {
1269 1270 PyObject *value = PyInt_FromLong(iterrev);
1270 1271 if (value == NULL) {
1271 1272 goto bail;
1272 1273 }
1273 1274 if (PyList_Append(chain, value)) {
1274 1275 Py_DECREF(value);
1275 1276 goto bail;
1276 1277 }
1277 1278 Py_DECREF(value);
1278 1279
1279 1280 stopped = 0;
1280 1281 }
1281 1282
1282 1283 if (PyList_Reverse(chain)) {
1283 1284 goto bail;
1284 1285 }
1285 1286
1286 1287 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1287 1288 Py_DECREF(chain);
1288 1289 return result;
1289 1290
1290 1291 bail:
1291 1292 Py_DECREF(chain);
1292 1293 return NULL;
1293 1294 }
1294 1295
1295 1296 static inline int64_t
1296 1297 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1297 1298 {
1298 1299 int64_t start_offset;
1299 1300 int64_t end_offset;
1300 1301 int end_size;
1301 1302 start_offset = index_get_start(self, start_rev);
1302 1303 if (start_offset < 0) {
1303 1304 return -1;
1304 1305 }
1305 1306 end_offset = index_get_start(self, end_rev);
1306 1307 if (end_offset < 0) {
1307 1308 return -1;
1308 1309 }
1309 1310 end_size = index_get_length(self, end_rev);
1310 1311 if (end_size < 0) {
1311 1312 return -1;
1312 1313 }
1313 1314 if (end_offset < start_offset) {
1314 1315 PyErr_Format(PyExc_ValueError,
1315 1316 "corrupted revlog index: inconsistent offset "
1316 1317 "between revisions (%zd) and (%zd)",
1317 1318 start_rev, end_rev);
1318 1319 return -1;
1319 1320 }
1320 1321 return (end_offset - start_offset) + (int64_t)end_size;
1321 1322 }
1322 1323
1323 1324 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1324 1325 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1325 1326 Py_ssize_t startidx, Py_ssize_t endidx)
1326 1327 {
1327 1328 int length;
1328 1329 while (endidx > 1 && endidx > startidx) {
1329 1330 length = index_get_length(self, revs[endidx - 1]);
1330 1331 if (length < 0) {
1331 1332 return -1;
1332 1333 }
1333 1334 if (length != 0) {
1334 1335 break;
1335 1336 }
1336 1337 endidx -= 1;
1337 1338 }
1338 1339 return endidx;
1339 1340 }
1340 1341
1341 1342 struct Gap {
1342 1343 int64_t size;
1343 1344 Py_ssize_t idx;
1344 1345 };
1345 1346
1346 1347 static int gap_compare(const void *left, const void *right)
1347 1348 {
1348 1349 const struct Gap *l_left = ((const struct Gap *)left);
1349 1350 const struct Gap *l_right = ((const struct Gap *)right);
1350 1351 if (l_left->size < l_right->size) {
1351 1352 return -1;
1352 1353 } else if (l_left->size > l_right->size) {
1353 1354 return 1;
1354 1355 }
1355 1356 return 0;
1356 1357 }
1357 1358 static int Py_ssize_t_compare(const void *left, const void *right)
1358 1359 {
1359 1360 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1360 1361 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1361 1362 if (l_left < l_right) {
1362 1363 return -1;
1363 1364 } else if (l_left > l_right) {
1364 1365 return 1;
1365 1366 }
1366 1367 return 0;
1367 1368 }
1368 1369
1369 1370 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1370 1371 {
1371 1372 /* method arguments */
1372 1373 PyObject *list_revs = NULL; /* revisions in the chain */
1373 1374 double targetdensity = 0; /* min density to achieve */
1374 1375 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1375 1376
1376 1377 /* other core variables */
1377 1378 Py_ssize_t idxlen = index_length(self);
1378 1379 Py_ssize_t i; /* used for various iteration */
1379 1380 PyObject *result = NULL; /* the final return of the function */
1380 1381
1381 1382 /* generic information about the delta chain being slice */
1382 1383 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1383 1384 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1384 1385 int64_t chainpayload = 0; /* sum of all delta in the chain */
1385 1386 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1386 1387
1387 1388 /* variable used for slicing the delta chain */
1388 1389 int64_t readdata = 0; /* amount of data currently planned to be read */
1389 1390 double density = 0; /* ration of payload data compared to read ones */
1390 1391 int64_t previous_end;
1391 1392 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1392 1393 Py_ssize_t num_gaps =
1393 1394 0; /* total number of notable gap recorded so far */
1394 1395 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1395 1396 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1396 1397 PyObject *chunk = NULL; /* individual slice */
1397 1398 PyObject *allchunks = NULL; /* all slices */
1398 1399 Py_ssize_t previdx;
1399 1400
1400 1401 /* parsing argument */
1401 1402 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1402 1403 &targetdensity, &mingapsize)) {
1403 1404 goto bail;
1404 1405 }
1405 1406
1406 1407 /* If the delta chain contains a single element, we do not need slicing
1407 1408 */
1408 1409 num_revs = PyList_GET_SIZE(list_revs);
1409 1410 if (num_revs <= 1) {
1410 1411 result = PyTuple_Pack(1, list_revs);
1411 1412 goto done;
1412 1413 }
1413 1414
1414 1415 /* Turn the python list into a native integer array (for efficiency) */
1415 1416 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1416 1417 if (revs == NULL) {
1417 1418 PyErr_NoMemory();
1418 1419 goto bail;
1419 1420 }
1420 1421 for (i = 0; i < num_revs; i++) {
1421 1422 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1422 1423 if (revnum == -1 && PyErr_Occurred()) {
1423 1424 goto bail;
1424 1425 }
1425 1426 if (revnum < nullrev || revnum >= idxlen) {
1426 1427 PyErr_Format(PyExc_IndexError,
1427 1428 "index out of range: %zd", revnum);
1428 1429 goto bail;
1429 1430 }
1430 1431 revs[i] = revnum;
1431 1432 }
1432 1433
1433 1434 /* Compute and check various property of the unsliced delta chain */
1434 1435 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1435 1436 if (deltachainspan < 0) {
1436 1437 goto bail;
1437 1438 }
1438 1439
1439 1440 if (deltachainspan <= mingapsize) {
1440 1441 result = PyTuple_Pack(1, list_revs);
1441 1442 goto done;
1442 1443 }
1443 1444 chainpayload = 0;
1444 1445 for (i = 0; i < num_revs; i++) {
1445 1446 int tmp = index_get_length(self, revs[i]);
1446 1447 if (tmp < 0) {
1447 1448 goto bail;
1448 1449 }
1449 1450 chainpayload += tmp;
1450 1451 }
1451 1452
1452 1453 readdata = deltachainspan;
1453 1454 density = 1.0;
1454 1455
1455 1456 if (0 < deltachainspan) {
1456 1457 density = (double)chainpayload / (double)deltachainspan;
1457 1458 }
1458 1459
1459 1460 if (density >= targetdensity) {
1460 1461 result = PyTuple_Pack(1, list_revs);
1461 1462 goto done;
1462 1463 }
1463 1464
1464 1465 /* if chain is too sparse, look for relevant gaps */
1465 1466 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1466 1467 if (gaps == NULL) {
1467 1468 PyErr_NoMemory();
1468 1469 goto bail;
1469 1470 }
1470 1471
1471 1472 previous_end = -1;
1472 1473 for (i = 0; i < num_revs; i++) {
1473 1474 int64_t revstart;
1474 1475 int revsize;
1475 1476 revstart = index_get_start(self, revs[i]);
1476 1477 if (revstart < 0) {
1477 1478 goto bail;
1478 1479 };
1479 1480 revsize = index_get_length(self, revs[i]);
1480 1481 if (revsize < 0) {
1481 1482 goto bail;
1482 1483 };
1483 1484 if (revsize == 0) {
1484 1485 continue;
1485 1486 }
1486 1487 if (previous_end >= 0) {
1487 1488 int64_t gapsize = revstart - previous_end;
1488 1489 if (gapsize > mingapsize) {
1489 1490 gaps[num_gaps].size = gapsize;
1490 1491 gaps[num_gaps].idx = i;
1491 1492 num_gaps += 1;
1492 1493 }
1493 1494 }
1494 1495 previous_end = revstart + revsize;
1495 1496 }
1496 1497 if (num_gaps == 0) {
1497 1498 result = PyTuple_Pack(1, list_revs);
1498 1499 goto done;
1499 1500 }
1500 1501 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1501 1502
1502 1503 /* Slice the largest gap first, they improve the density the most */
1503 1504 selected_indices =
1504 1505 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1505 1506 if (selected_indices == NULL) {
1506 1507 PyErr_NoMemory();
1507 1508 goto bail;
1508 1509 }
1509 1510
1510 1511 for (i = num_gaps - 1; i >= 0; i--) {
1511 1512 selected_indices[num_selected] = gaps[i].idx;
1512 1513 readdata -= gaps[i].size;
1513 1514 num_selected += 1;
1514 1515 if (readdata <= 0) {
1515 1516 density = 1.0;
1516 1517 } else {
1517 1518 density = (double)chainpayload / (double)readdata;
1518 1519 }
1519 1520 if (density >= targetdensity) {
1520 1521 break;
1521 1522 }
1522 1523 }
1523 1524 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1524 1525 &Py_ssize_t_compare);
1525 1526
1526 1527 /* create the resulting slice */
1527 1528 allchunks = PyList_New(0);
1528 1529 if (allchunks == NULL) {
1529 1530 goto bail;
1530 1531 }
1531 1532 previdx = 0;
1532 1533 selected_indices[num_selected] = num_revs;
1533 1534 for (i = 0; i <= num_selected; i++) {
1534 1535 Py_ssize_t idx = selected_indices[i];
1535 1536 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1536 1537 if (endidx < 0) {
1537 1538 goto bail;
1538 1539 }
1539 1540 if (previdx < endidx) {
1540 1541 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1541 1542 if (chunk == NULL) {
1542 1543 goto bail;
1543 1544 }
1544 1545 if (PyList_Append(allchunks, chunk) == -1) {
1545 1546 goto bail;
1546 1547 }
1547 1548 Py_DECREF(chunk);
1548 1549 chunk = NULL;
1549 1550 }
1550 1551 previdx = idx;
1551 1552 }
1552 1553 result = allchunks;
1553 1554 goto done;
1554 1555
1555 1556 bail:
1556 1557 Py_XDECREF(allchunks);
1557 1558 Py_XDECREF(chunk);
1558 1559 done:
1559 1560 free(revs);
1560 1561 free(gaps);
1561 1562 free(selected_indices);
1562 1563 return result;
1563 1564 }
1564 1565
1565 1566 static inline int nt_level(const char *node, Py_ssize_t level)
1566 1567 {
1567 1568 int v = node[level >> 1];
1568 1569 if (!(level & 1))
1569 1570 v >>= 4;
1570 1571 return v & 0xf;
1571 1572 }
1572 1573
1573 1574 /*
1574 1575 * Return values:
1575 1576 *
1576 1577 * -4: match is ambiguous (multiple candidates)
1577 1578 * -2: not found
1578 1579 * rest: valid rev
1579 1580 */
1580 1581 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1581 1582 int hex)
1582 1583 {
1583 1584 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1584 1585 int level, maxlevel, off;
1585 1586
1586 1587 /* If the input is binary, do a fast check for the nullid first. */
1587 1588 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1588 1589 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1589 1590 return -1;
1590 1591
1591 1592 if (hex)
1592 1593 maxlevel = nodelen;
1593 1594 else
1594 1595 maxlevel = 2 * nodelen;
1595 1596 if (maxlevel > 2 * self->nodelen)
1596 1597 maxlevel = 2 * self->nodelen;
1597 1598
1598 1599 for (level = off = 0; level < maxlevel; level++) {
1599 1600 int k = getnybble(node, level);
1600 1601 nodetreenode *n = &self->nodes[off];
1601 1602 int v = n->children[k];
1602 1603
1603 1604 if (v < 0) {
1604 1605 const char *n;
1605 1606 Py_ssize_t i;
1606 1607
1607 1608 v = -(v + 2);
1608 1609 n = index_node(self->index, v);
1609 1610 if (n == NULL)
1610 1611 return -2;
1611 1612 for (i = level; i < maxlevel; i++)
1612 1613 if (getnybble(node, i) != nt_level(n, i))
1613 1614 return -2;
1614 1615 return v;
1615 1616 }
1616 1617 if (v == 0)
1617 1618 return -2;
1618 1619 off = v;
1619 1620 }
1620 1621 /* multiple matches against an ambiguous prefix */
1621 1622 return -4;
1622 1623 }
1623 1624
1624 1625 static int nt_new(nodetree *self)
1625 1626 {
1626 1627 if (self->length == self->capacity) {
1627 1628 size_t newcapacity;
1628 1629 nodetreenode *newnodes;
1629 1630 newcapacity = self->capacity * 2;
1630 1631 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1631 1632 PyErr_SetString(PyExc_MemoryError,
1632 1633 "overflow in nt_new");
1633 1634 return -1;
1634 1635 }
1635 1636 newnodes =
1636 1637 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1637 1638 if (newnodes == NULL) {
1638 1639 PyErr_SetString(PyExc_MemoryError, "out of memory");
1639 1640 return -1;
1640 1641 }
1641 1642 self->capacity = newcapacity;
1642 1643 self->nodes = newnodes;
1643 1644 memset(&self->nodes[self->length], 0,
1644 1645 sizeof(nodetreenode) * (self->capacity - self->length));
1645 1646 }
1646 1647 return self->length++;
1647 1648 }
1648 1649
1649 1650 static int nt_insert(nodetree *self, const char *node, int rev)
1650 1651 {
1651 1652 int level = 0;
1652 1653 int off = 0;
1653 1654
1654 1655 while (level < 2 * self->nodelen) {
1655 1656 int k = nt_level(node, level);
1656 1657 nodetreenode *n;
1657 1658 int v;
1658 1659
1659 1660 n = &self->nodes[off];
1660 1661 v = n->children[k];
1661 1662
1662 1663 if (v == 0) {
1663 1664 n->children[k] = -rev - 2;
1664 1665 return 0;
1665 1666 }
1666 1667 if (v < 0) {
1667 1668 const char *oldnode =
1668 1669 index_node_existing(self->index, -(v + 2));
1669 1670 int noff;
1670 1671
1671 1672 if (oldnode == NULL)
1672 1673 return -1;
1673 1674 if (!memcmp(oldnode, node, self->nodelen)) {
1674 1675 n->children[k] = -rev - 2;
1675 1676 return 0;
1676 1677 }
1677 1678 noff = nt_new(self);
1678 1679 if (noff == -1)
1679 1680 return -1;
1680 1681 /* self->nodes may have been changed by realloc */
1681 1682 self->nodes[off].children[k] = noff;
1682 1683 off = noff;
1683 1684 n = &self->nodes[off];
1684 1685 n->children[nt_level(oldnode, ++level)] = v;
1685 1686 if (level > self->depth)
1686 1687 self->depth = level;
1687 1688 self->splits += 1;
1688 1689 } else {
1689 1690 level += 1;
1690 1691 off = v;
1691 1692 }
1692 1693 }
1693 1694
1694 1695 return -1;
1695 1696 }
1696 1697
1697 1698 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1698 1699 {
1699 1700 Py_ssize_t rev;
1700 1701 const char *node;
1701 1702 Py_ssize_t length;
1702 1703 if (!PyArg_ParseTuple(args, "n", &rev))
1703 1704 return NULL;
1704 1705 length = index_length(self->nt.index);
1705 1706 if (rev < 0 || rev >= length) {
1706 1707 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1707 1708 return NULL;
1708 1709 }
1709 1710 node = index_node_existing(self->nt.index, rev);
1710 1711 if (nt_insert(&self->nt, node, (int)rev) == -1)
1711 1712 return NULL;
1712 1713 Py_RETURN_NONE;
1713 1714 }
1714 1715
1715 1716 static int nt_delete_node(nodetree *self, const char *node)
1716 1717 {
1717 1718 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1718 1719 */
1719 1720 return nt_insert(self, node, -2);
1720 1721 }
1721 1722
1722 1723 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1723 1724 {
1724 1725 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1725 1726 self->nodes = NULL;
1726 1727
1727 1728 self->index = index;
1728 1729 /* The input capacity is in terms of revisions, while the field is in
1729 1730 * terms of nodetree nodes. */
1730 1731 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1731 1732 self->nodelen = index->nodelen;
1732 1733 self->depth = 0;
1733 1734 self->splits = 0;
1734 1735 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1735 1736 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1736 1737 return -1;
1737 1738 }
1738 1739 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1739 1740 if (self->nodes == NULL) {
1740 1741 PyErr_NoMemory();
1741 1742 return -1;
1742 1743 }
1743 1744 self->length = 1;
1744 1745 return 0;
1745 1746 }
1746 1747
1747 1748 static int ntobj_init(nodetreeObject *self, PyObject *args)
1748 1749 {
1749 1750 PyObject *index;
1750 1751 unsigned capacity;
1751 1752 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1752 1753 &capacity))
1753 1754 return -1;
1754 1755 Py_INCREF(index);
1755 1756 return nt_init(&self->nt, (indexObject *)index, capacity);
1756 1757 }
1757 1758
1758 1759 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1759 1760 {
1760 1761 return nt_find(self, node, nodelen, 1);
1761 1762 }
1762 1763
1763 1764 /*
1764 1765 * Find the length of the shortest unique prefix of node.
1765 1766 *
1766 1767 * Return values:
1767 1768 *
1768 1769 * -3: error (exception set)
1769 1770 * -2: not found (no exception set)
1770 1771 * rest: length of shortest prefix
1771 1772 */
1772 1773 static int nt_shortest(nodetree *self, const char *node)
1773 1774 {
1774 1775 int level, off;
1775 1776
1776 1777 for (level = off = 0; level < 2 * self->nodelen; level++) {
1777 1778 int k, v;
1778 1779 nodetreenode *n = &self->nodes[off];
1779 1780 k = nt_level(node, level);
1780 1781 v = n->children[k];
1781 1782 if (v < 0) {
1782 1783 const char *n;
1783 1784 v = -(v + 2);
1784 1785 n = index_node_existing(self->index, v);
1785 1786 if (n == NULL)
1786 1787 return -3;
1787 1788 if (memcmp(node, n, self->nodelen) != 0)
1788 1789 /*
1789 1790 * Found a unique prefix, but it wasn't for the
1790 1791 * requested node (i.e the requested node does
1791 1792 * not exist).
1792 1793 */
1793 1794 return -2;
1794 1795 return level + 1;
1795 1796 }
1796 1797 if (v == 0)
1797 1798 return -2;
1798 1799 off = v;
1799 1800 }
1800 1801 /*
1801 1802 * The node was still not unique after 40 hex digits, so this won't
1802 1803 * happen. Also, if we get here, then there's a programming error in
1803 1804 * this file that made us insert a node longer than 40 hex digits.
1804 1805 */
1805 1806 PyErr_SetString(PyExc_Exception, "broken node tree");
1806 1807 return -3;
1807 1808 }
1808 1809
1809 1810 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1810 1811 {
1811 1812 PyObject *val;
1812 1813 char *node;
1813 1814 int length;
1814 1815
1815 1816 if (!PyArg_ParseTuple(args, "O", &val))
1816 1817 return NULL;
1817 1818 if (node_check(self->nt.nodelen, val, &node) == -1)
1818 1819 return NULL;
1819 1820
1820 1821 length = nt_shortest(&self->nt, node);
1821 1822 if (length == -3)
1822 1823 return NULL;
1823 1824 if (length == -2) {
1824 1825 raise_revlog_error();
1825 1826 return NULL;
1826 1827 }
1827 1828 return PyInt_FromLong(length);
1828 1829 }
1829 1830
1830 1831 static void nt_dealloc(nodetree *self)
1831 1832 {
1832 1833 free(self->nodes);
1833 1834 self->nodes = NULL;
1834 1835 }
1835 1836
1836 1837 static void ntobj_dealloc(nodetreeObject *self)
1837 1838 {
1838 1839 Py_XDECREF(self->nt.index);
1839 1840 nt_dealloc(&self->nt);
1840 1841 PyObject_Del(self);
1841 1842 }
1842 1843
1843 1844 static PyMethodDef ntobj_methods[] = {
1844 1845 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1845 1846 "insert an index entry"},
1846 1847 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1847 1848 "find length of shortest hex nodeid of a binary ID"},
1848 1849 {NULL} /* Sentinel */
1849 1850 };
1850 1851
1851 1852 static PyTypeObject nodetreeType = {
1852 1853 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1853 1854 "parsers.nodetree", /* tp_name */
1854 1855 sizeof(nodetreeObject), /* tp_basicsize */
1855 1856 0, /* tp_itemsize */
1856 1857 (destructor)ntobj_dealloc, /* tp_dealloc */
1857 1858 0, /* tp_print */
1858 1859 0, /* tp_getattr */
1859 1860 0, /* tp_setattr */
1860 1861 0, /* tp_compare */
1861 1862 0, /* tp_repr */
1862 1863 0, /* tp_as_number */
1863 1864 0, /* tp_as_sequence */
1864 1865 0, /* tp_as_mapping */
1865 1866 0, /* tp_hash */
1866 1867 0, /* tp_call */
1867 1868 0, /* tp_str */
1868 1869 0, /* tp_getattro */
1869 1870 0, /* tp_setattro */
1870 1871 0, /* tp_as_buffer */
1871 1872 Py_TPFLAGS_DEFAULT, /* tp_flags */
1872 1873 "nodetree", /* tp_doc */
1873 1874 0, /* tp_traverse */
1874 1875 0, /* tp_clear */
1875 1876 0, /* tp_richcompare */
1876 1877 0, /* tp_weaklistoffset */
1877 1878 0, /* tp_iter */
1878 1879 0, /* tp_iternext */
1879 1880 ntobj_methods, /* tp_methods */
1880 1881 0, /* tp_members */
1881 1882 0, /* tp_getset */
1882 1883 0, /* tp_base */
1883 1884 0, /* tp_dict */
1884 1885 0, /* tp_descr_get */
1885 1886 0, /* tp_descr_set */
1886 1887 0, /* tp_dictoffset */
1887 1888 (initproc)ntobj_init, /* tp_init */
1888 1889 0, /* tp_alloc */
1889 1890 };
1890 1891
1891 1892 static int index_init_nt(indexObject *self)
1892 1893 {
1893 1894 if (!self->ntinitialized) {
1894 1895 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1895 1896 nt_dealloc(&self->nt);
1896 1897 return -1;
1897 1898 }
1898 1899 if (nt_insert(&self->nt, nullid, -1) == -1) {
1899 1900 nt_dealloc(&self->nt);
1900 1901 return -1;
1901 1902 }
1902 1903 self->ntinitialized = 1;
1903 1904 self->ntrev = (int)index_length(self);
1904 1905 self->ntlookups = 1;
1905 1906 self->ntmisses = 0;
1906 1907 }
1907 1908 return 0;
1908 1909 }
1909 1910
1910 1911 /*
1911 1912 * Return values:
1912 1913 *
1913 1914 * -3: error (exception set)
1914 1915 * -2: not found (no exception set)
1915 1916 * rest: valid rev
1916 1917 */
1917 1918 static int index_find_node(indexObject *self, const char *node)
1918 1919 {
1919 1920 int rev;
1920 1921
1921 1922 if (index_init_nt(self) == -1)
1922 1923 return -3;
1923 1924
1924 1925 self->ntlookups++;
1925 1926 rev = nt_find(&self->nt, node, self->nodelen, 0);
1926 1927 if (rev >= -1)
1927 1928 return rev;
1928 1929
1929 1930 /*
1930 1931 * For the first handful of lookups, we scan the entire index,
1931 1932 * and cache only the matching nodes. This optimizes for cases
1932 1933 * like "hg tip", where only a few nodes are accessed.
1933 1934 *
1934 1935 * After that, we cache every node we visit, using a single
1935 1936 * scan amortized over multiple lookups. This gives the best
1936 1937 * bulk performance, e.g. for "hg log".
1937 1938 */
1938 1939 if (self->ntmisses++ < 4) {
1939 1940 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1940 1941 const char *n = index_node_existing(self, rev);
1941 1942 if (n == NULL)
1942 1943 return -3;
1943 1944 if (memcmp(node, n, self->nodelen) == 0) {
1944 1945 if (nt_insert(&self->nt, n, rev) == -1)
1945 1946 return -3;
1946 1947 break;
1947 1948 }
1948 1949 }
1949 1950 } else {
1950 1951 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1951 1952 const char *n = index_node_existing(self, rev);
1952 1953 if (n == NULL)
1953 1954 return -3;
1954 1955 if (nt_insert(&self->nt, n, rev) == -1) {
1955 1956 self->ntrev = rev + 1;
1956 1957 return -3;
1957 1958 }
1958 1959 if (memcmp(node, n, self->nodelen) == 0) {
1959 1960 break;
1960 1961 }
1961 1962 }
1962 1963 self->ntrev = rev;
1963 1964 }
1964 1965
1965 1966 if (rev >= 0)
1966 1967 return rev;
1967 1968 return -2;
1968 1969 }
1969 1970
1970 1971 static PyObject *index_getitem(indexObject *self, PyObject *value)
1971 1972 {
1972 1973 char *node;
1973 1974 int rev;
1974 1975
1975 1976 if (PyInt_Check(value)) {
1976 1977 long idx;
1977 1978 if (!pylong_to_long(value, &idx)) {
1978 1979 return NULL;
1979 1980 }
1980 1981 return index_get(self, idx);
1981 1982 }
1982 1983
1983 1984 if (node_check(self->nodelen, value, &node) == -1)
1984 1985 return NULL;
1985 1986 rev = index_find_node(self, node);
1986 1987 if (rev >= -1)
1987 1988 return PyInt_FromLong(rev);
1988 1989 if (rev == -2)
1989 1990 raise_revlog_error();
1990 1991 return NULL;
1991 1992 }
1992 1993
1993 1994 /*
1994 1995 * Fully populate the radix tree.
1995 1996 */
1996 1997 static int index_populate_nt(indexObject *self)
1997 1998 {
1998 1999 int rev;
1999 2000 if (self->ntrev > 0) {
2000 2001 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2001 2002 const char *n = index_node_existing(self, rev);
2002 2003 if (n == NULL)
2003 2004 return -1;
2004 2005 if (nt_insert(&self->nt, n, rev) == -1)
2005 2006 return -1;
2006 2007 }
2007 2008 self->ntrev = -1;
2008 2009 }
2009 2010 return 0;
2010 2011 }
2011 2012
2012 2013 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2013 2014 {
2014 2015 const char *fullnode;
2015 2016 Py_ssize_t nodelen;
2016 2017 char *node;
2017 2018 int rev, i;
2018 2019
2019 2020 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2020 2021 return NULL;
2021 2022
2022 2023 if (nodelen < 1) {
2023 2024 PyErr_SetString(PyExc_ValueError, "key too short");
2024 2025 return NULL;
2025 2026 }
2026 2027
2027 2028 if (nodelen > 2 * self->nodelen) {
2028 2029 PyErr_SetString(PyExc_ValueError, "key too long");
2029 2030 return NULL;
2030 2031 }
2031 2032
2032 2033 for (i = 0; i < nodelen; i++)
2033 2034 hexdigit(node, i);
2034 2035 if (PyErr_Occurred()) {
2035 2036 /* input contains non-hex characters */
2036 2037 PyErr_Clear();
2037 2038 Py_RETURN_NONE;
2038 2039 }
2039 2040
2040 2041 if (index_init_nt(self) == -1)
2041 2042 return NULL;
2042 2043 if (index_populate_nt(self) == -1)
2043 2044 return NULL;
2044 2045 rev = nt_partialmatch(&self->nt, node, nodelen);
2045 2046
2046 2047 switch (rev) {
2047 2048 case -4:
2048 2049 raise_revlog_error();
2049 2050 return NULL;
2050 2051 case -2:
2051 2052 Py_RETURN_NONE;
2052 2053 case -1:
2053 2054 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2054 2055 }
2055 2056
2056 2057 fullnode = index_node_existing(self, rev);
2057 2058 if (fullnode == NULL) {
2058 2059 return NULL;
2059 2060 }
2060 2061 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2061 2062 }
2062 2063
2063 2064 static PyObject *index_shortest(indexObject *self, PyObject *args)
2064 2065 {
2065 2066 PyObject *val;
2066 2067 char *node;
2067 2068 int length;
2068 2069
2069 2070 if (!PyArg_ParseTuple(args, "O", &val))
2070 2071 return NULL;
2071 2072 if (node_check(self->nodelen, val, &node) == -1)
2072 2073 return NULL;
2073 2074
2074 2075 self->ntlookups++;
2075 2076 if (index_init_nt(self) == -1)
2076 2077 return NULL;
2077 2078 if (index_populate_nt(self) == -1)
2078 2079 return NULL;
2079 2080 length = nt_shortest(&self->nt, node);
2080 2081 if (length == -3)
2081 2082 return NULL;
2082 2083 if (length == -2) {
2083 2084 raise_revlog_error();
2084 2085 return NULL;
2085 2086 }
2086 2087 return PyInt_FromLong(length);
2087 2088 }
2088 2089
2089 2090 static PyObject *index_m_get(indexObject *self, PyObject *args)
2090 2091 {
2091 2092 PyObject *val;
2092 2093 char *node;
2093 2094 int rev;
2094 2095
2095 2096 if (!PyArg_ParseTuple(args, "O", &val))
2096 2097 return NULL;
2097 2098 if (node_check(self->nodelen, val, &node) == -1)
2098 2099 return NULL;
2099 2100 rev = index_find_node(self, node);
2100 2101 if (rev == -3)
2101 2102 return NULL;
2102 2103 if (rev == -2)
2103 2104 Py_RETURN_NONE;
2104 2105 return PyInt_FromLong(rev);
2105 2106 }
2106 2107
2107 2108 static int index_contains(indexObject *self, PyObject *value)
2108 2109 {
2109 2110 char *node;
2110 2111
2111 2112 if (PyInt_Check(value)) {
2112 2113 long rev;
2113 2114 if (!pylong_to_long(value, &rev)) {
2114 2115 return -1;
2115 2116 }
2116 2117 return rev >= -1 && rev < index_length(self);
2117 2118 }
2118 2119
2119 2120 if (node_check(self->nodelen, value, &node) == -1)
2120 2121 return -1;
2121 2122
2122 2123 switch (index_find_node(self, node)) {
2123 2124 case -3:
2124 2125 return -1;
2125 2126 case -2:
2126 2127 return 0;
2127 2128 default:
2128 2129 return 1;
2129 2130 }
2130 2131 }
2131 2132
2132 2133 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2133 2134 {
2134 2135 int ret = index_contains(self, args);
2135 2136 if (ret < 0)
2136 2137 return NULL;
2137 2138 return PyBool_FromLong((long)ret);
2138 2139 }
2139 2140
2140 2141 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2141 2142 {
2142 2143 char *node;
2143 2144 int rev;
2144 2145
2145 2146 if (node_check(self->nodelen, val, &node) == -1)
2146 2147 return NULL;
2147 2148 rev = index_find_node(self, node);
2148 2149 if (rev >= -1)
2149 2150 return PyInt_FromLong(rev);
2150 2151 if (rev == -2)
2151 2152 raise_revlog_error();
2152 2153 return NULL;
2153 2154 }
2154 2155
2155 2156 typedef uint64_t bitmask;
2156 2157
2157 2158 /*
2158 2159 * Given a disjoint set of revs, return all candidates for the
2159 2160 * greatest common ancestor. In revset notation, this is the set
2160 2161 * "heads(::a and ::b and ...)"
2161 2162 */
2162 2163 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2163 2164 int revcount)
2164 2165 {
2165 2166 const bitmask allseen = (1ull << revcount) - 1;
2166 2167 const bitmask poison = 1ull << revcount;
2167 2168 PyObject *gca = PyList_New(0);
2168 2169 int i, v, interesting;
2169 2170 int maxrev = -1;
2170 2171 bitmask sp;
2171 2172 bitmask *seen;
2172 2173
2173 2174 if (gca == NULL)
2174 2175 return PyErr_NoMemory();
2175 2176
2176 2177 for (i = 0; i < revcount; i++) {
2177 2178 if (revs[i] > maxrev)
2178 2179 maxrev = revs[i];
2179 2180 }
2180 2181
2181 2182 seen = calloc(sizeof(*seen), maxrev + 1);
2182 2183 if (seen == NULL) {
2183 2184 Py_DECREF(gca);
2184 2185 return PyErr_NoMemory();
2185 2186 }
2186 2187
2187 2188 for (i = 0; i < revcount; i++)
2188 2189 seen[revs[i]] = 1ull << i;
2189 2190
2190 2191 interesting = revcount;
2191 2192
2192 2193 for (v = maxrev; v >= 0 && interesting; v--) {
2193 2194 bitmask sv = seen[v];
2194 2195 int parents[2];
2195 2196
2196 2197 if (!sv)
2197 2198 continue;
2198 2199
2199 2200 if (sv < poison) {
2200 2201 interesting -= 1;
2201 2202 if (sv == allseen) {
2202 2203 PyObject *obj = PyInt_FromLong(v);
2203 2204 if (obj == NULL)
2204 2205 goto bail;
2205 2206 if (PyList_Append(gca, obj) == -1) {
2206 2207 Py_DECREF(obj);
2207 2208 goto bail;
2208 2209 }
2209 2210 sv |= poison;
2210 2211 for (i = 0; i < revcount; i++) {
2211 2212 if (revs[i] == v)
2212 2213 goto done;
2213 2214 }
2214 2215 }
2215 2216 }
2216 2217 if (index_get_parents(self, v, parents, maxrev) < 0)
2217 2218 goto bail;
2218 2219
2219 2220 for (i = 0; i < 2; i++) {
2220 2221 int p = parents[i];
2221 2222 if (p == -1)
2222 2223 continue;
2223 2224 sp = seen[p];
2224 2225 if (sv < poison) {
2225 2226 if (sp == 0) {
2226 2227 seen[p] = sv;
2227 2228 interesting++;
2228 2229 } else if (sp != sv)
2229 2230 seen[p] |= sv;
2230 2231 } else {
2231 2232 if (sp && sp < poison)
2232 2233 interesting--;
2233 2234 seen[p] = sv;
2234 2235 }
2235 2236 }
2236 2237 }
2237 2238
2238 2239 done:
2239 2240 free(seen);
2240 2241 return gca;
2241 2242 bail:
2242 2243 free(seen);
2243 2244 Py_XDECREF(gca);
2244 2245 return NULL;
2245 2246 }
2246 2247
2247 2248 /*
2248 2249 * Given a disjoint set of revs, return the subset with the longest
2249 2250 * path to the root.
2250 2251 */
2251 2252 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2252 2253 {
2253 2254 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2254 2255 static const Py_ssize_t capacity = 24;
2255 2256 int *depth, *interesting = NULL;
2256 2257 int i, j, v, ninteresting;
2257 2258 PyObject *dict = NULL, *keys = NULL;
2258 2259 long *seen = NULL;
2259 2260 int maxrev = -1;
2260 2261 long final;
2261 2262
2262 2263 if (revcount > capacity) {
2263 2264 PyErr_Format(PyExc_OverflowError,
2264 2265 "bitset size (%ld) > capacity (%ld)",
2265 2266 (long)revcount, (long)capacity);
2266 2267 return NULL;
2267 2268 }
2268 2269
2269 2270 for (i = 0; i < revcount; i++) {
2270 2271 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2271 2272 if (n > maxrev)
2272 2273 maxrev = n;
2273 2274 }
2274 2275
2275 2276 depth = calloc(sizeof(*depth), maxrev + 1);
2276 2277 if (depth == NULL)
2277 2278 return PyErr_NoMemory();
2278 2279
2279 2280 seen = calloc(sizeof(*seen), maxrev + 1);
2280 2281 if (seen == NULL) {
2281 2282 PyErr_NoMemory();
2282 2283 goto bail;
2283 2284 }
2284 2285
2285 2286 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2286 2287 if (interesting == NULL) {
2287 2288 PyErr_NoMemory();
2288 2289 goto bail;
2289 2290 }
2290 2291
2291 2292 if (PyList_Sort(revs) == -1)
2292 2293 goto bail;
2293 2294
2294 2295 for (i = 0; i < revcount; i++) {
2295 2296 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2296 2297 long b = 1l << i;
2297 2298 depth[n] = 1;
2298 2299 seen[n] = b;
2299 2300 interesting[b] = 1;
2300 2301 }
2301 2302
2302 2303 /* invariant: ninteresting is the number of non-zero entries in
2303 2304 * interesting. */
2304 2305 ninteresting = (int)revcount;
2305 2306
2306 2307 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2307 2308 int dv = depth[v];
2308 2309 int parents[2];
2309 2310 long sv;
2310 2311
2311 2312 if (dv == 0)
2312 2313 continue;
2313 2314
2314 2315 sv = seen[v];
2315 2316 if (index_get_parents(self, v, parents, maxrev) < 0)
2316 2317 goto bail;
2317 2318
2318 2319 for (i = 0; i < 2; i++) {
2319 2320 int p = parents[i];
2320 2321 long sp;
2321 2322 int dp;
2322 2323
2323 2324 if (p == -1)
2324 2325 continue;
2325 2326
2326 2327 dp = depth[p];
2327 2328 sp = seen[p];
2328 2329 if (dp <= dv) {
2329 2330 depth[p] = dv + 1;
2330 2331 if (sp != sv) {
2331 2332 interesting[sv] += 1;
2332 2333 seen[p] = sv;
2333 2334 if (sp) {
2334 2335 interesting[sp] -= 1;
2335 2336 if (interesting[sp] == 0)
2336 2337 ninteresting -= 1;
2337 2338 }
2338 2339 }
2339 2340 } else if (dv == dp - 1) {
2340 2341 long nsp = sp | sv;
2341 2342 if (nsp == sp)
2342 2343 continue;
2343 2344 seen[p] = nsp;
2344 2345 interesting[sp] -= 1;
2345 2346 if (interesting[sp] == 0)
2346 2347 ninteresting -= 1;
2347 2348 if (interesting[nsp] == 0)
2348 2349 ninteresting += 1;
2349 2350 interesting[nsp] += 1;
2350 2351 }
2351 2352 }
2352 2353 interesting[sv] -= 1;
2353 2354 if (interesting[sv] == 0)
2354 2355 ninteresting -= 1;
2355 2356 }
2356 2357
2357 2358 final = 0;
2358 2359 j = ninteresting;
2359 2360 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2360 2361 if (interesting[i] == 0)
2361 2362 continue;
2362 2363 final |= i;
2363 2364 j -= 1;
2364 2365 }
2365 2366 if (final == 0) {
2366 2367 keys = PyList_New(0);
2367 2368 goto bail;
2368 2369 }
2369 2370
2370 2371 dict = PyDict_New();
2371 2372 if (dict == NULL)
2372 2373 goto bail;
2373 2374
2374 2375 for (i = 0; i < revcount; i++) {
2375 2376 PyObject *key;
2376 2377
2377 2378 if ((final & (1 << i)) == 0)
2378 2379 continue;
2379 2380
2380 2381 key = PyList_GET_ITEM(revs, i);
2381 2382 Py_INCREF(key);
2382 2383 Py_INCREF(Py_None);
2383 2384 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2384 2385 Py_DECREF(key);
2385 2386 Py_DECREF(Py_None);
2386 2387 goto bail;
2387 2388 }
2388 2389 }
2389 2390
2390 2391 keys = PyDict_Keys(dict);
2391 2392
2392 2393 bail:
2393 2394 free(depth);
2394 2395 free(seen);
2395 2396 free(interesting);
2396 2397 Py_XDECREF(dict);
2397 2398
2398 2399 return keys;
2399 2400 }
2400 2401
2401 2402 /*
2402 2403 * Given a (possibly overlapping) set of revs, return all the
2403 2404 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2404 2405 */
2405 2406 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2406 2407 {
2407 2408 PyObject *ret = NULL;
2408 2409 Py_ssize_t argcount, i, len;
2409 2410 bitmask repeat = 0;
2410 2411 int revcount = 0;
2411 2412 int *revs;
2412 2413
2413 2414 argcount = PySequence_Length(args);
2414 2415 revs = PyMem_Malloc(argcount * sizeof(*revs));
2415 2416 if (argcount > 0 && revs == NULL)
2416 2417 return PyErr_NoMemory();
2417 2418 len = index_length(self);
2418 2419
2419 2420 for (i = 0; i < argcount; i++) {
2420 2421 static const int capacity = 24;
2421 2422 PyObject *obj = PySequence_GetItem(args, i);
2422 2423 bitmask x;
2423 2424 long val;
2424 2425
2425 2426 if (!PyInt_Check(obj)) {
2426 2427 PyErr_SetString(PyExc_TypeError,
2427 2428 "arguments must all be ints");
2428 2429 Py_DECREF(obj);
2429 2430 goto bail;
2430 2431 }
2431 2432 val = PyInt_AsLong(obj);
2432 2433 Py_DECREF(obj);
2433 2434 if (val == -1) {
2434 2435 ret = PyList_New(0);
2435 2436 goto done;
2436 2437 }
2437 2438 if (val < 0 || val >= len) {
2438 2439 PyErr_SetString(PyExc_IndexError, "index out of range");
2439 2440 goto bail;
2440 2441 }
2441 2442 /* this cheesy bloom filter lets us avoid some more
2442 2443 * expensive duplicate checks in the common set-is-disjoint
2443 2444 * case */
2444 2445 x = 1ull << (val & 0x3f);
2445 2446 if (repeat & x) {
2446 2447 int k;
2447 2448 for (k = 0; k < revcount; k++) {
2448 2449 if (val == revs[k])
2449 2450 goto duplicate;
2450 2451 }
2451 2452 } else
2452 2453 repeat |= x;
2453 2454 if (revcount >= capacity) {
2454 2455 PyErr_Format(PyExc_OverflowError,
2455 2456 "bitset size (%d) > capacity (%d)",
2456 2457 revcount, capacity);
2457 2458 goto bail;
2458 2459 }
2459 2460 revs[revcount++] = (int)val;
2460 2461 duplicate:;
2461 2462 }
2462 2463
2463 2464 if (revcount == 0) {
2464 2465 ret = PyList_New(0);
2465 2466 goto done;
2466 2467 }
2467 2468 if (revcount == 1) {
2468 2469 PyObject *obj;
2469 2470 ret = PyList_New(1);
2470 2471 if (ret == NULL)
2471 2472 goto bail;
2472 2473 obj = PyInt_FromLong(revs[0]);
2473 2474 if (obj == NULL)
2474 2475 goto bail;
2475 2476 PyList_SET_ITEM(ret, 0, obj);
2476 2477 goto done;
2477 2478 }
2478 2479
2479 2480 ret = find_gca_candidates(self, revs, revcount);
2480 2481 if (ret == NULL)
2481 2482 goto bail;
2482 2483
2483 2484 done:
2484 2485 PyMem_Free(revs);
2485 2486 return ret;
2486 2487
2487 2488 bail:
2488 2489 PyMem_Free(revs);
2489 2490 Py_XDECREF(ret);
2490 2491 return NULL;
2491 2492 }
2492 2493
2493 2494 /*
2494 2495 * Given a (possibly overlapping) set of revs, return the greatest
2495 2496 * common ancestors: those with the longest path to the root.
2496 2497 */
2497 2498 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2498 2499 {
2499 2500 PyObject *ret;
2500 2501 PyObject *gca = index_commonancestorsheads(self, args);
2501 2502 if (gca == NULL)
2502 2503 return NULL;
2503 2504
2504 2505 if (PyList_GET_SIZE(gca) <= 1) {
2505 2506 return gca;
2506 2507 }
2507 2508
2508 2509 ret = find_deepest(self, gca);
2509 2510 Py_DECREF(gca);
2510 2511 return ret;
2511 2512 }
2512 2513
2513 2514 /*
2514 2515 * Invalidate any trie entries introduced by added revs.
2515 2516 */
2516 2517 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2517 2518 {
2518 2519 Py_ssize_t i, len;
2519 2520
2520 2521 len = self->length + self->new_length;
2521 2522 i = start - self->length;
2522 2523 if (i < 0)
2523 2524 return;
2524 2525
2525 2526 for (i = start; i < len; i++)
2526 2527 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2527 2528
2528 2529 self->new_length = start - self->length;
2529 2530 }
2530 2531
2531 2532 /*
2532 2533 * Delete a numeric range of revs, which must be at the end of the
2533 2534 * range.
2534 2535 */
2535 2536 static int index_slice_del(indexObject *self, PyObject *item)
2536 2537 {
2537 2538 Py_ssize_t start, stop, step, slicelength;
2538 2539 Py_ssize_t length = index_length(self) + 1;
2539 2540 int ret = 0;
2540 2541
2541 2542 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2542 2543 #ifdef IS_PY3K
2543 2544 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2544 2545 &slicelength) < 0)
2545 2546 #else
2546 2547 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2547 2548 &step, &slicelength) < 0)
2548 2549 #endif
2549 2550 return -1;
2550 2551
2551 2552 if (slicelength <= 0)
2552 2553 return 0;
2553 2554
2554 2555 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2555 2556 stop = start;
2556 2557
2557 2558 if (step < 0) {
2558 2559 stop = start + 1;
2559 2560 start = stop + step * (slicelength - 1) - 1;
2560 2561 step = -step;
2561 2562 }
2562 2563
2563 2564 if (step != 1) {
2564 2565 PyErr_SetString(PyExc_ValueError,
2565 2566 "revlog index delete requires step size of 1");
2566 2567 return -1;
2567 2568 }
2568 2569
2569 2570 if (stop != length - 1) {
2570 2571 PyErr_SetString(PyExc_IndexError,
2571 2572 "revlog index deletion indices are invalid");
2572 2573 return -1;
2573 2574 }
2574 2575
2575 2576 if (start < self->length) {
2576 2577 if (self->ntinitialized) {
2577 2578 Py_ssize_t i;
2578 2579
2579 2580 for (i = start; i < self->length; i++) {
2580 2581 const char *node = index_node_existing(self, i);
2581 2582 if (node == NULL)
2582 2583 return -1;
2583 2584
2584 2585 nt_delete_node(&self->nt, node);
2585 2586 }
2586 2587 if (self->new_length)
2587 2588 index_invalidate_added(self, self->length);
2588 2589 if (self->ntrev > start)
2589 2590 self->ntrev = (int)start;
2590 2591 } else if (self->new_length) {
2591 2592 self->new_length = 0;
2592 2593 }
2593 2594
2594 2595 self->length = start;
2595 2596 goto done;
2596 2597 }
2597 2598
2598 2599 if (self->ntinitialized) {
2599 2600 index_invalidate_added(self, start);
2600 2601 if (self->ntrev > start)
2601 2602 self->ntrev = (int)start;
2602 2603 } else {
2603 2604 self->new_length = start - self->length;
2604 2605 }
2605 2606 done:
2606 2607 Py_CLEAR(self->headrevs);
2607 2608 return ret;
2608 2609 }
2609 2610
2610 2611 /*
2611 2612 * Supported ops:
2612 2613 *
2613 2614 * slice deletion
2614 2615 * string assignment (extend node->rev mapping)
2615 2616 * string deletion (shrink node->rev mapping)
2616 2617 */
2617 2618 static int index_assign_subscript(indexObject *self, PyObject *item,
2618 2619 PyObject *value)
2619 2620 {
2620 2621 char *node;
2621 2622 long rev;
2622 2623
2623 2624 if (PySlice_Check(item) && value == NULL)
2624 2625 return index_slice_del(self, item);
2625 2626
2626 2627 if (node_check(self->nodelen, item, &node) == -1)
2627 2628 return -1;
2628 2629
2629 2630 if (value == NULL)
2630 2631 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2631 2632 : 0;
2632 2633 rev = PyInt_AsLong(value);
2633 2634 if (rev > INT_MAX || rev < 0) {
2634 2635 if (!PyErr_Occurred())
2635 2636 PyErr_SetString(PyExc_ValueError, "rev out of range");
2636 2637 return -1;
2637 2638 }
2638 2639
2639 2640 if (index_init_nt(self) == -1)
2640 2641 return -1;
2641 2642 return nt_insert(&self->nt, node, (int)rev);
2642 2643 }
2643 2644
2644 2645 /*
2645 2646 * Find all RevlogNG entries in an index that has inline data. Update
2646 2647 * the optional "offsets" table with those entries.
2647 2648 */
2648 2649 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2649 2650 {
2650 2651 const char *data = (const char *)self->buf.buf;
2651 2652 Py_ssize_t pos = 0;
2652 2653 Py_ssize_t end = self->buf.len;
2653 2654 long incr = self->hdrsize;
2654 2655 Py_ssize_t len = 0;
2655 2656
2656 2657 while (pos + self->hdrsize <= end && pos >= 0) {
2657 2658 uint32_t comp_len, sidedata_comp_len = 0;
2658 2659 /* 3rd element of header is length of compressed inline data */
2659 2660 comp_len = getbe32(data + pos + 8);
2660 2661 if (self->hdrsize == v2_hdrsize) {
2661 2662 sidedata_comp_len = getbe32(data + pos + 72);
2662 2663 }
2663 2664 incr = self->hdrsize + comp_len + sidedata_comp_len;
2664 2665 if (offsets)
2665 2666 offsets[len] = data + pos;
2666 2667 len++;
2667 2668 pos += incr;
2668 2669 }
2669 2670
2670 2671 if (pos != end) {
2671 2672 if (!PyErr_Occurred())
2672 2673 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2673 2674 return -1;
2674 2675 }
2675 2676
2676 2677 return len;
2677 2678 }
2678 2679
2679 2680 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2680 2681 {
2681 2682 PyObject *data_obj, *inlined_obj, *revlogv2;
2682 2683 Py_ssize_t size;
2683 2684
2684 2685 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2685 2686
2686 2687 /* Initialize before argument-checking to avoid index_dealloc() crash.
2687 2688 */
2688 2689 self->added = NULL;
2689 2690 self->new_length = 0;
2690 2691 self->added_length = 0;
2691 2692 self->data = NULL;
2692 2693 memset(&self->buf, 0, sizeof(self->buf));
2693 2694 self->headrevs = NULL;
2694 2695 self->filteredrevs = Py_None;
2695 2696 Py_INCREF(Py_None);
2696 2697 self->ntinitialized = 0;
2697 2698 self->offsets = NULL;
2698 2699 self->nodelen = 20;
2699 2700 self->nullentry = NULL;
2700 2701
2701 2702 revlogv2 = NULL;
2702 2703 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2703 2704 &data_obj, &inlined_obj, &revlogv2))
2704 2705 return -1;
2705 2706 if (!PyObject_CheckBuffer(data_obj)) {
2706 2707 PyErr_SetString(PyExc_TypeError,
2707 2708 "data does not support buffer interface");
2708 2709 return -1;
2709 2710 }
2710 2711 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2711 2712 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2712 2713 return -1;
2713 2714 }
2714 2715
2715 2716 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2716 2717 self->hdrsize = v2_hdrsize;
2717 2718 } else {
2718 2719 self->hdrsize = v1_hdrsize;
2719 2720 }
2720 2721
2721 2722 if (self->hdrsize == v1_hdrsize) {
2722 2723 self->nullentry =
2723 2724 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
2724 2725 -1, -1, -1, nullid, self->nodelen);
2725 2726 } else {
2726 2727 self->nullentry =
2727 2728 Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0,
2728 2729 -1, -1, -1, -1, nullid, self->nodelen, 0, 0);
2729 2730 }
2730 2731
2731 2732 if (!self->nullentry)
2732 2733 return -1;
2733 2734 PyObject_GC_UnTrack(self->nullentry);
2734 2735
2735 2736 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2736 2737 return -1;
2737 2738 size = self->buf.len;
2738 2739
2739 2740 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2740 2741 self->data = data_obj;
2741 2742
2742 2743 self->ntlookups = self->ntmisses = 0;
2743 2744 self->ntrev = -1;
2744 2745 Py_INCREF(self->data);
2745 2746
2746 2747 if (self->inlined) {
2747 2748 Py_ssize_t len = inline_scan(self, NULL);
2748 2749 if (len == -1)
2749 2750 goto bail;
2750 2751 self->length = len;
2751 2752 } else {
2752 2753 if (size % self->hdrsize) {
2753 2754 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2754 2755 goto bail;
2755 2756 }
2756 2757 self->length = size / self->hdrsize;
2757 2758 }
2758 2759
2759 2760 return 0;
2760 2761 bail:
2761 2762 return -1;
2762 2763 }
2763 2764
2764 2765 static PyObject *index_nodemap(indexObject *self)
2765 2766 {
2766 2767 Py_INCREF(self);
2767 2768 return (PyObject *)self;
2768 2769 }
2769 2770
2770 2771 static void _index_clearcaches(indexObject *self)
2771 2772 {
2772 2773 if (self->offsets) {
2773 2774 PyMem_Free((void *)self->offsets);
2774 2775 self->offsets = NULL;
2775 2776 }
2776 2777 if (self->ntinitialized) {
2777 2778 nt_dealloc(&self->nt);
2778 2779 }
2779 2780 self->ntinitialized = 0;
2780 2781 Py_CLEAR(self->headrevs);
2781 2782 }
2782 2783
2783 2784 static PyObject *index_clearcaches(indexObject *self)
2784 2785 {
2785 2786 _index_clearcaches(self);
2786 2787 self->ntrev = -1;
2787 2788 self->ntlookups = self->ntmisses = 0;
2788 2789 Py_RETURN_NONE;
2789 2790 }
2790 2791
2791 2792 static void index_dealloc(indexObject *self)
2792 2793 {
2793 2794 _index_clearcaches(self);
2794 2795 Py_XDECREF(self->filteredrevs);
2795 2796 if (self->buf.buf) {
2796 2797 PyBuffer_Release(&self->buf);
2797 2798 memset(&self->buf, 0, sizeof(self->buf));
2798 2799 }
2799 2800 Py_XDECREF(self->data);
2800 2801 PyMem_Free(self->added);
2801 2802 Py_XDECREF(self->nullentry);
2802 2803 PyObject_Del(self);
2803 2804 }
2804 2805
2805 2806 static PySequenceMethods index_sequence_methods = {
2806 2807 (lenfunc)index_length, /* sq_length */
2807 2808 0, /* sq_concat */
2808 2809 0, /* sq_repeat */
2809 2810 (ssizeargfunc)index_get, /* sq_item */
2810 2811 0, /* sq_slice */
2811 2812 0, /* sq_ass_item */
2812 2813 0, /* sq_ass_slice */
2813 2814 (objobjproc)index_contains, /* sq_contains */
2814 2815 };
2815 2816
2816 2817 static PyMappingMethods index_mapping_methods = {
2817 2818 (lenfunc)index_length, /* mp_length */
2818 2819 (binaryfunc)index_getitem, /* mp_subscript */
2819 2820 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2820 2821 };
2821 2822
2822 2823 static PyMethodDef index_methods[] = {
2823 2824 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2824 2825 "return the gca set of the given revs"},
2825 2826 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2826 2827 METH_VARARGS,
2827 2828 "return the heads of the common ancestors of the given revs"},
2828 2829 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2829 2830 "clear the index caches"},
2830 2831 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2831 2832 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2832 2833 "return `rev` associated with a node or None"},
2833 2834 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2834 2835 "return True if the node exist in the index"},
2835 2836 {"rev", (PyCFunction)index_m_rev, METH_O,
2836 2837 "return `rev` associated with a node or raise RevlogError"},
2837 2838 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2838 2839 "compute phases"},
2839 2840 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2840 2841 "reachableroots"},
2841 2842 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2842 2843 METH_VARARGS, "replace an existing index entry with a new value"},
2843 2844 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2844 2845 "get head revisions"}, /* Can do filtering since 3.2 */
2845 2846 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2846 2847 "get filtered head revisions"}, /* Can always do filtering */
2847 2848 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2848 2849 "True if the object is a snapshot"},
2849 2850 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2850 2851 "Gather snapshot data in a cache dict"},
2851 2852 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2852 2853 "determine revisions with deltas to reconstruct fulltext"},
2853 2854 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2854 2855 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2855 2856 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2856 2857 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2857 2858 "match a potentially ambiguous node ID"},
2858 2859 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2859 2860 "find length of shortest hex nodeid of a binary ID"},
2860 2861 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2861 2862 {NULL} /* Sentinel */
2862 2863 };
2863 2864
2864 2865 static PyGetSetDef index_getset[] = {
2865 2866 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2866 2867 {NULL} /* Sentinel */
2867 2868 };
2868 2869
2870 static PyMemberDef index_members[] = {
2871 {"entry_size", T_LONG, offsetof(indexObject, hdrsize), 0,
2872 "size of an index entry"},
2873 {NULL} /* Sentinel */
2874 };
2875
2869 2876 PyTypeObject HgRevlogIndex_Type = {
2870 2877 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2871 2878 "parsers.index", /* tp_name */
2872 2879 sizeof(indexObject), /* tp_basicsize */
2873 2880 0, /* tp_itemsize */
2874 2881 (destructor)index_dealloc, /* tp_dealloc */
2875 2882 0, /* tp_print */
2876 2883 0, /* tp_getattr */
2877 2884 0, /* tp_setattr */
2878 2885 0, /* tp_compare */
2879 2886 0, /* tp_repr */
2880 2887 0, /* tp_as_number */
2881 2888 &index_sequence_methods, /* tp_as_sequence */
2882 2889 &index_mapping_methods, /* tp_as_mapping */
2883 2890 0, /* tp_hash */
2884 2891 0, /* tp_call */
2885 2892 0, /* tp_str */
2886 2893 0, /* tp_getattro */
2887 2894 0, /* tp_setattro */
2888 2895 0, /* tp_as_buffer */
2889 2896 Py_TPFLAGS_DEFAULT, /* tp_flags */
2890 2897 "revlog index", /* tp_doc */
2891 2898 0, /* tp_traverse */
2892 2899 0, /* tp_clear */
2893 2900 0, /* tp_richcompare */
2894 2901 0, /* tp_weaklistoffset */
2895 2902 0, /* tp_iter */
2896 2903 0, /* tp_iternext */
2897 2904 index_methods, /* tp_methods */
2898 0, /* tp_members */
2905 index_members, /* tp_members */
2899 2906 index_getset, /* tp_getset */
2900 2907 0, /* tp_base */
2901 2908 0, /* tp_dict */
2902 2909 0, /* tp_descr_get */
2903 2910 0, /* tp_descr_set */
2904 2911 0, /* tp_dictoffset */
2905 2912 (initproc)index_init, /* tp_init */
2906 2913 0, /* tp_alloc */
2907 2914 };
2908 2915
2909 2916 /*
2910 2917 * returns a tuple of the form (index, cache) with elements as
2911 2918 * follows:
2912 2919 *
2913 2920 * index: an index object that lazily parses Revlog (v1 or v2) records
2914 2921 * cache: if data is inlined, a tuple (0, index_file_content), else None
2915 2922 * index_file_content could be a string, or a buffer
2916 2923 *
2917 2924 * added complications are for backwards compatibility
2918 2925 */
2919 2926 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2920 2927 {
2921 2928 PyObject *cache = NULL;
2922 2929 indexObject *idx;
2923 2930 int ret;
2924 2931
2925 2932 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2926 2933 if (idx == NULL)
2927 2934 goto bail;
2928 2935
2929 2936 ret = index_init(idx, args, kwargs);
2930 2937 if (ret == -1)
2931 2938 goto bail;
2932 2939
2933 2940 if (idx->inlined) {
2934 2941 cache = Py_BuildValue("iO", 0, idx->data);
2935 2942 if (cache == NULL)
2936 2943 goto bail;
2937 2944 } else {
2938 2945 cache = Py_None;
2939 2946 Py_INCREF(cache);
2940 2947 }
2941 2948
2942 2949 return Py_BuildValue("NN", idx, cache);
2943 2950
2944 2951 bail:
2945 2952 Py_XDECREF(idx);
2946 2953 Py_XDECREF(cache);
2947 2954 return NULL;
2948 2955 }
2949 2956
2950 2957 static Revlog_CAPI CAPI = {
2951 2958 /* increment the abi_version field upon each change in the Revlog_CAPI
2952 2959 struct or in the ABI of the listed functions */
2953 2960 2,
2954 2961 index_length,
2955 2962 index_node,
2956 2963 HgRevlogIndex_GetParents,
2957 2964 };
2958 2965
2959 2966 void revlog_module_init(PyObject *mod)
2960 2967 {
2961 2968 PyObject *caps = NULL;
2962 2969 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
2963 2970 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
2964 2971 return;
2965 2972 Py_INCREF(&HgRevlogIndex_Type);
2966 2973 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
2967 2974
2968 2975 nodetreeType.tp_new = PyType_GenericNew;
2969 2976 if (PyType_Ready(&nodetreeType) < 0)
2970 2977 return;
2971 2978 Py_INCREF(&nodetreeType);
2972 2979 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
2973 2980
2974 2981 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
2975 2982 if (caps != NULL)
2976 2983 PyModule_AddObject(mod, "revlog_CAPI", caps);
2977 2984 }
@@ -1,352 +1,353 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import nullid, nullrev
14 14 from .. import (
15 15 pycompat,
16 16 util,
17 17 )
18 18
19 19 from ..revlogutils import nodemap as nodemaputil
20 20 from ..revlogutils import constants as revlog_constants
21 21
22 22 stringio = pycompat.bytesio
23 23
24 24
25 25 _pack = struct.pack
26 26 _unpack = struct.unpack
27 27 _compress = zlib.compress
28 28 _decompress = zlib.decompress
29 29
30 30 # Some code below makes tuples directly because it's more convenient. However,
31 31 # code outside this module should always use dirstatetuple.
32 32 def dirstatetuple(*x):
33 33 # x is a tuple
34 34 return x
35 35
36 36
37 37 def gettype(q):
38 38 return int(q & 0xFFFF)
39 39
40 40
41 41 def offset_type(offset, type):
42 42 return int(int(offset) << 16 | type)
43 43
44 44
45 45 class BaseIndexObject(object):
46 46 # Format of an index entry according to Python's `struct` language
47 47 index_format = revlog_constants.INDEX_ENTRY_V1
48 48 # Size of a C unsigned long long int, platform independent
49 49 big_int_size = struct.calcsize(b'>Q')
50 50 # Size of a C long int, platform independent
51 51 int_size = struct.calcsize(b'>i')
52 # Size of the entire index format
53 index_size = revlog_constants.INDEX_ENTRY_V1.size
54 52 # An empty index entry, used as a default value to be overridden, or nullrev
55 53 null_item = (0, 0, 0, -1, -1, -1, -1, nullid)
56 54
55 @util.propertycache
56 def entry_size(self):
57 return self.index_format.size
58
57 59 @property
58 60 def nodemap(self):
59 61 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
60 62 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
61 63 return self._nodemap
62 64
63 65 @util.propertycache
64 66 def _nodemap(self):
65 67 nodemap = nodemaputil.NodeMap({nullid: nullrev})
66 68 for r in range(0, len(self)):
67 69 n = self[r][7]
68 70 nodemap[n] = r
69 71 return nodemap
70 72
71 73 def has_node(self, node):
72 74 """return True if the node exist in the index"""
73 75 return node in self._nodemap
74 76
75 77 def rev(self, node):
76 78 """return a revision for a node
77 79
78 80 If the node is unknown, raise a RevlogError"""
79 81 return self._nodemap[node]
80 82
81 83 def get_rev(self, node):
82 84 """return a revision for a node
83 85
84 86 If the node is unknown, return None"""
85 87 return self._nodemap.get(node)
86 88
87 89 def _stripnodes(self, start):
88 90 if '_nodemap' in vars(self):
89 91 for r in range(start, len(self)):
90 92 n = self[r][7]
91 93 del self._nodemap[n]
92 94
93 95 def clearcaches(self):
94 96 self.__dict__.pop('_nodemap', None)
95 97
96 98 def __len__(self):
97 99 return self._lgt + len(self._extra)
98 100
99 101 def append(self, tup):
100 102 if '_nodemap' in vars(self):
101 103 self._nodemap[tup[7]] = len(self)
102 104 data = self.index_format.pack(*tup)
103 105 self._extra.append(data)
104 106
105 107 def _check_index(self, i):
106 108 if not isinstance(i, int):
107 109 raise TypeError(b"expecting int indexes")
108 110 if i < 0 or i >= len(self):
109 111 raise IndexError
110 112
111 113 def __getitem__(self, i):
112 114 if i == -1:
113 115 return self.null_item
114 116 self._check_index(i)
115 117 if i >= self._lgt:
116 118 data = self._extra[i - self._lgt]
117 119 else:
118 120 index = self._calculate_index(i)
119 data = self._data[index : index + self.index_size]
121 data = self._data[index : index + self.entry_size]
120 122 r = self.index_format.unpack(data)
121 123 if self._lgt and i == 0:
122 124 r = (offset_type(0, gettype(r[0])),) + r[1:]
123 125 return r
124 126
125 127
126 128 class IndexObject(BaseIndexObject):
127 129 def __init__(self, data):
128 assert len(data) % self.index_size == 0
130 assert len(data) % self.entry_size == 0
129 131 self._data = data
130 self._lgt = len(data) // self.index_size
132 self._lgt = len(data) // self.entry_size
131 133 self._extra = []
132 134
133 135 def _calculate_index(self, i):
134 return i * self.index_size
136 return i * self.entry_size
135 137
136 138 def __delitem__(self, i):
137 139 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
138 140 raise ValueError(b"deleting slices only supports a:-1 with step 1")
139 141 i = i.start
140 142 self._check_index(i)
141 143 self._stripnodes(i)
142 144 if i < self._lgt:
143 self._data = self._data[: i * self.index_size]
145 self._data = self._data[: i * self.entry_size]
144 146 self._lgt = i
145 147 self._extra = []
146 148 else:
147 149 self._extra = self._extra[: i - self._lgt]
148 150
149 151
150 152 class PersistentNodeMapIndexObject(IndexObject):
151 153 """a Debug oriented class to test persistent nodemap
152 154
153 155 We need a simple python object to test API and higher level behavior. See
154 156 the Rust implementation for more serious usage. This should be used only
155 157 through the dedicated `devel.persistent-nodemap` config.
156 158 """
157 159
158 160 def nodemap_data_all(self):
159 161 """Return bytes containing a full serialization of a nodemap
160 162
161 163 The nodemap should be valid for the full set of revisions in the
162 164 index."""
163 165 return nodemaputil.persistent_data(self)
164 166
165 167 def nodemap_data_incremental(self):
166 168 """Return bytes containing a incremental update to persistent nodemap
167 169
168 170 This containst the data for an append-only update of the data provided
169 171 in the last call to `update_nodemap_data`.
170 172 """
171 173 if self._nm_root is None:
172 174 return None
173 175 docket = self._nm_docket
174 176 changed, data = nodemaputil.update_persistent_data(
175 177 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
176 178 )
177 179
178 180 self._nm_root = self._nm_max_idx = self._nm_docket = None
179 181 return docket, changed, data
180 182
181 183 def update_nodemap_data(self, docket, nm_data):
182 184 """provide full block of persisted binary data for a nodemap
183 185
184 186 The data are expected to come from disk. See `nodemap_data_all` for a
185 187 produceur of such data."""
186 188 if nm_data is not None:
187 189 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
188 190 if self._nm_root:
189 191 self._nm_docket = docket
190 192 else:
191 193 self._nm_root = self._nm_max_idx = self._nm_docket = None
192 194
193 195
194 196 class InlinedIndexObject(BaseIndexObject):
195 197 def __init__(self, data, inline=0):
196 198 self._data = data
197 199 self._lgt = self._inline_scan(None)
198 200 self._inline_scan(self._lgt)
199 201 self._extra = []
200 202
201 203 def _inline_scan(self, lgt):
202 204 off = 0
203 205 if lgt is not None:
204 206 self._offsets = [0] * lgt
205 207 count = 0
206 while off <= len(self._data) - self.index_size:
208 while off <= len(self._data) - self.entry_size:
207 209 start = off + self.big_int_size
208 210 (s,) = struct.unpack(
209 211 b'>i',
210 212 self._data[start : start + self.int_size],
211 213 )
212 214 if lgt is not None:
213 215 self._offsets[count] = off
214 216 count += 1
215 off += self.index_size + s
217 off += self.entry_size + s
216 218 if off != len(self._data):
217 219 raise ValueError(b"corrupted data")
218 220 return count
219 221
220 222 def __delitem__(self, i):
221 223 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
222 224 raise ValueError(b"deleting slices only supports a:-1 with step 1")
223 225 i = i.start
224 226 self._check_index(i)
225 227 self._stripnodes(i)
226 228 if i < self._lgt:
227 229 self._offsets = self._offsets[:i]
228 230 self._lgt = i
229 231 self._extra = []
230 232 else:
231 233 self._extra = self._extra[: i - self._lgt]
232 234
233 235 def _calculate_index(self, i):
234 236 return self._offsets[i]
235 237
236 238
237 239 def parse_index2(data, inline, revlogv2=False):
238 240 if not inline:
239 241 cls = IndexObject2 if revlogv2 else IndexObject
240 242 return cls(data), None
241 243 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
242 244 return cls(data, inline), (0, data)
243 245
244 246
245 247 class Index2Mixin(object):
246 248 index_format = revlog_constants.INDEX_ENTRY_V2
247 index_size = revlog_constants.INDEX_ENTRY_V2.size
248 249 null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0)
249 250
250 251 def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
251 252 """
252 253 Replace an existing index entry's sidedata offset and length with new
253 254 ones.
254 255 This cannot be used outside of the context of sidedata rewriting,
255 256 inside the transaction that creates the revision `i`.
256 257 """
257 258 if i < 0:
258 259 raise KeyError
259 260 self._check_index(i)
260 261 sidedata_format = b">Qi"
261 262 packed_size = struct.calcsize(sidedata_format)
262 263 if i >= self._lgt:
263 264 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
264 265 old = self._extra[i - self._lgt]
265 266 new = old[:64] + packed + old[64 + packed_size :]
266 267 self._extra[i - self._lgt] = new
267 268 else:
268 269 msg = b"cannot rewrite entries outside of this transaction"
269 270 raise KeyError(msg)
270 271
271 272
272 273 class IndexObject2(Index2Mixin, IndexObject):
273 274 pass
274 275
275 276
276 277 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
277 278 def _inline_scan(self, lgt):
278 279 sidedata_length_pos = 72
279 280 off = 0
280 281 if lgt is not None:
281 282 self._offsets = [0] * lgt
282 283 count = 0
283 while off <= len(self._data) - self.index_size:
284 while off <= len(self._data) - self.entry_size:
284 285 start = off + self.big_int_size
285 286 (data_size,) = struct.unpack(
286 287 b'>i',
287 288 self._data[start : start + self.int_size],
288 289 )
289 290 start = off + sidedata_length_pos
290 291 (side_data_size,) = struct.unpack(
291 292 b'>i', self._data[start : start + self.int_size]
292 293 )
293 294 if lgt is not None:
294 295 self._offsets[count] = off
295 296 count += 1
296 off += self.index_size + data_size + side_data_size
297 off += self.entry_size + data_size + side_data_size
297 298 if off != len(self._data):
298 299 raise ValueError(b"corrupted data")
299 300 return count
300 301
301 302
302 303 def parse_index_devel_nodemap(data, inline):
303 304 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
304 305 return PersistentNodeMapIndexObject(data), None
305 306
306 307
307 308 def parse_dirstate(dmap, copymap, st):
308 309 parents = [st[:20], st[20:40]]
309 310 # dereference fields so they will be local in loop
310 311 format = b">cllll"
311 312 e_size = struct.calcsize(format)
312 313 pos1 = 40
313 314 l = len(st)
314 315
315 316 # the inner loop
316 317 while pos1 < l:
317 318 pos2 = pos1 + e_size
318 319 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
319 320 pos1 = pos2 + e[4]
320 321 f = st[pos2:pos1]
321 322 if b'\0' in f:
322 323 f, c = f.split(b'\0')
323 324 copymap[f] = c
324 325 dmap[f] = e[:4]
325 326 return parents
326 327
327 328
328 329 def pack_dirstate(dmap, copymap, pl, now):
329 330 now = int(now)
330 331 cs = stringio()
331 332 write = cs.write
332 333 write(b"".join(pl))
333 334 for f, e in pycompat.iteritems(dmap):
334 335 if e[0] == b'n' and e[3] == now:
335 336 # The file was last modified "simultaneously" with the current
336 337 # write to dirstate (i.e. within the same second for file-
337 338 # systems with a granularity of 1 sec). This commonly happens
338 339 # for at least a couple of files on 'update'.
339 340 # The user could change the file without changing its size
340 341 # within the same second. Invalidate the file's mtime in
341 342 # dirstate, forcing future 'status' calls to compare the
342 343 # contents of the file if the size is the same. This prevents
343 344 # mistakenly treating such files as clean.
344 345 e = dirstatetuple(e[0], e[1], e[2], -1)
345 346 dmap[f] = e
346 347
347 348 if f in copymap:
348 349 f = b"%s\0%s" % (f, copymap[f])
349 350 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
350 351 write(e)
351 352 write(f)
352 353 return cs.getvalue()
@@ -1,3249 +1,3242 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullhex,
30 30 nullid,
31 31 nullrev,
32 32 sha1nodeconstants,
33 33 short,
34 34 wdirfilenodeids,
35 35 wdirhex,
36 36 wdirid,
37 37 wdirrev,
38 38 )
39 39 from .i18n import _
40 40 from .pycompat import getattr
41 41 from .revlogutils.constants import (
42 42 FLAG_GENERALDELTA,
43 43 FLAG_INLINE_DATA,
44 44 INDEX_ENTRY_V0,
45 45 INDEX_ENTRY_V1,
46 46 INDEX_ENTRY_V2,
47 47 INDEX_HEADER,
48 48 REVLOGV0,
49 49 REVLOGV1,
50 50 REVLOGV1_FLAGS,
51 51 REVLOGV2,
52 52 REVLOGV2_FLAGS,
53 53 REVLOG_DEFAULT_FLAGS,
54 54 REVLOG_DEFAULT_FORMAT,
55 55 REVLOG_DEFAULT_VERSION,
56 56 )
57 57 from .revlogutils.flagutil import (
58 58 REVIDX_DEFAULT_FLAGS,
59 59 REVIDX_ELLIPSIS,
60 60 REVIDX_EXTSTORED,
61 61 REVIDX_FLAGS_ORDER,
62 62 REVIDX_HASCOPIESINFO,
63 63 REVIDX_ISCENSORED,
64 64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 65 REVIDX_SIDEDATA,
66 66 )
67 67 from .thirdparty import attr
68 68 from . import (
69 69 ancestor,
70 70 dagop,
71 71 error,
72 72 mdiff,
73 73 policy,
74 74 pycompat,
75 75 templatefilters,
76 76 util,
77 77 )
78 78 from .interfaces import (
79 79 repository,
80 80 util as interfaceutil,
81 81 )
82 82 from .revlogutils import (
83 83 deltas as deltautil,
84 84 flagutil,
85 85 nodemap as nodemaputil,
86 86 sidedata as sidedatautil,
87 87 )
88 88 from .utils import (
89 89 storageutil,
90 90 stringutil,
91 91 )
92 92
93 93 # blanked usage of all the name to prevent pyflakes constraints
94 94 # We need these name available in the module for extensions.
95 95 REVLOGV0
96 96 REVLOGV1
97 97 REVLOGV2
98 98 FLAG_INLINE_DATA
99 99 FLAG_GENERALDELTA
100 100 REVLOG_DEFAULT_FLAGS
101 101 REVLOG_DEFAULT_FORMAT
102 102 REVLOG_DEFAULT_VERSION
103 103 REVLOGV1_FLAGS
104 104 REVLOGV2_FLAGS
105 105 REVIDX_ISCENSORED
106 106 REVIDX_ELLIPSIS
107 107 REVIDX_SIDEDATA
108 108 REVIDX_HASCOPIESINFO
109 109 REVIDX_EXTSTORED
110 110 REVIDX_DEFAULT_FLAGS
111 111 REVIDX_FLAGS_ORDER
112 112 REVIDX_RAWTEXT_CHANGING_FLAGS
113 113
114 114 parsers = policy.importmod('parsers')
115 115 rustancestor = policy.importrust('ancestor')
116 116 rustdagop = policy.importrust('dagop')
117 117 rustrevlog = policy.importrust('revlog')
118 118
119 119 # Aliased for performance.
120 120 _zlibdecompress = zlib.decompress
121 121
122 122 # max size of revlog with inline data
123 123 _maxinline = 131072
124 124 _chunksize = 1048576
125 125
126 126 # Flag processors for REVIDX_ELLIPSIS.
127 127 def ellipsisreadprocessor(rl, text):
128 128 return text, False
129 129
130 130
131 131 def ellipsiswriteprocessor(rl, text):
132 132 return text, False
133 133
134 134
135 135 def ellipsisrawprocessor(rl, text):
136 136 return False
137 137
138 138
139 139 ellipsisprocessor = (
140 140 ellipsisreadprocessor,
141 141 ellipsiswriteprocessor,
142 142 ellipsisrawprocessor,
143 143 )
144 144
145 145
146 146 def getoffset(q):
147 147 return int(q >> 16)
148 148
149 149
150 150 def gettype(q):
151 151 return int(q & 0xFFFF)
152 152
153 153
154 154 def offset_type(offset, type):
155 155 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
156 156 raise ValueError(b'unknown revlog index flags')
157 157 return int(int(offset) << 16 | type)
158 158
159 159
160 160 def _verify_revision(rl, skipflags, state, node):
161 161 """Verify the integrity of the given revlog ``node`` while providing a hook
162 162 point for extensions to influence the operation."""
163 163 if skipflags:
164 164 state[b'skipread'].add(node)
165 165 else:
166 166 # Side-effect: read content and verify hash.
167 167 rl.revision(node)
168 168
169 169
170 170 # True if a fast implementation for persistent-nodemap is available
171 171 #
172 172 # We also consider we have a "fast" implementation in "pure" python because
173 173 # people using pure don't really have performance consideration (and a
174 174 # wheelbarrow of other slowness source)
175 175 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
176 176 parsers, 'BaseIndexObject'
177 177 )
178 178
179 179
180 180 @attr.s(slots=True, frozen=True)
181 181 class _revisioninfo(object):
182 182 """Information about a revision that allows building its fulltext
183 183 node: expected hash of the revision
184 184 p1, p2: parent revs of the revision
185 185 btext: built text cache consisting of a one-element list
186 186 cachedelta: (baserev, uncompressed_delta) or None
187 187 flags: flags associated to the revision storage
188 188
189 189 One of btext[0] or cachedelta must be set.
190 190 """
191 191
192 192 node = attr.ib()
193 193 p1 = attr.ib()
194 194 p2 = attr.ib()
195 195 btext = attr.ib()
196 196 textlen = attr.ib()
197 197 cachedelta = attr.ib()
198 198 flags = attr.ib()
199 199
200 200
201 201 @interfaceutil.implementer(repository.irevisiondelta)
202 202 @attr.s(slots=True)
203 203 class revlogrevisiondelta(object):
204 204 node = attr.ib()
205 205 p1node = attr.ib()
206 206 p2node = attr.ib()
207 207 basenode = attr.ib()
208 208 flags = attr.ib()
209 209 baserevisionsize = attr.ib()
210 210 revision = attr.ib()
211 211 delta = attr.ib()
212 212 sidedata = attr.ib()
213 213 linknode = attr.ib(default=None)
214 214
215 215
216 216 @interfaceutil.implementer(repository.iverifyproblem)
217 217 @attr.s(frozen=True)
218 218 class revlogproblem(object):
219 219 warning = attr.ib(default=None)
220 220 error = attr.ib(default=None)
221 221 node = attr.ib(default=None)
222 222
223 223
224 224 class revlogoldindex(list):
225 entry_size = INDEX_ENTRY_V0.size
226
225 227 @property
226 228 def nodemap(self):
227 229 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
228 230 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
229 231 return self._nodemap
230 232
231 233 @util.propertycache
232 234 def _nodemap(self):
233 235 nodemap = nodemaputil.NodeMap({nullid: nullrev})
234 236 for r in range(0, len(self)):
235 237 n = self[r][7]
236 238 nodemap[n] = r
237 239 return nodemap
238 240
239 241 def has_node(self, node):
240 242 """return True if the node exist in the index"""
241 243 return node in self._nodemap
242 244
243 245 def rev(self, node):
244 246 """return a revision for a node
245 247
246 248 If the node is unknown, raise a RevlogError"""
247 249 return self._nodemap[node]
248 250
249 251 def get_rev(self, node):
250 252 """return a revision for a node
251 253
252 254 If the node is unknown, return None"""
253 255 return self._nodemap.get(node)
254 256
255 257 def append(self, tup):
256 258 self._nodemap[tup[7]] = len(self)
257 259 super(revlogoldindex, self).append(tup)
258 260
259 261 def __delitem__(self, i):
260 262 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
261 263 raise ValueError(b"deleting slices only supports a:-1 with step 1")
262 264 for r in pycompat.xrange(i.start, len(self)):
263 265 del self._nodemap[self[r][7]]
264 266 super(revlogoldindex, self).__delitem__(i)
265 267
266 268 def clearcaches(self):
267 269 self.__dict__.pop('_nodemap', None)
268 270
269 271 def __getitem__(self, i):
270 272 if i == -1:
271 273 return (0, 0, 0, -1, -1, -1, -1, nullid)
272 274 return list.__getitem__(self, i)
273 275
274 276
275 277 class revlogoldio(object):
276 def __init__(self):
277 self.size = INDEX_ENTRY_V0.size
278
279 278 def parseindex(self, data, inline):
280 s = self.size
279 s = INDEX_ENTRY_V0.size
281 280 index = []
282 281 nodemap = nodemaputil.NodeMap({nullid: nullrev})
283 282 n = off = 0
284 283 l = len(data)
285 284 while off + s <= l:
286 285 cur = data[off : off + s]
287 286 off += s
288 287 e = INDEX_ENTRY_V0.unpack(cur)
289 288 # transform to revlogv1 format
290 289 e2 = (
291 290 offset_type(e[0], 0),
292 291 e[1],
293 292 -1,
294 293 e[2],
295 294 e[3],
296 295 nodemap.get(e[4], nullrev),
297 296 nodemap.get(e[5], nullrev),
298 297 e[6],
299 298 )
300 299 index.append(e2)
301 300 nodemap[e[6]] = n
302 301 n += 1
303 302
304 303 index = revlogoldindex(index)
305 304 return index, None
306 305
307 306 def packentry(self, entry, node, version, rev):
308 307 """return the binary representation of an entry
309 308
310 309 entry: a tuple containing all the values (see index.__getitem__)
311 310 node: a callback to convert a revision to nodeid
312 311 version: the changelog version
313 312 rev: the revision number
314 313 """
315 314 if gettype(entry[0]):
316 315 raise error.RevlogError(
317 316 _(b'index entry flags need revlog version 1')
318 317 )
319 318 e2 = (
320 319 getoffset(entry[0]),
321 320 entry[1],
322 321 entry[3],
323 322 entry[4],
324 323 node(entry[5]),
325 324 node(entry[6]),
326 325 entry[7],
327 326 )
328 327 return INDEX_ENTRY_V0.pack(*e2)
329 328
330 329
331 330 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
332 331 # signed integer)
333 332 _maxentrysize = 0x7FFFFFFF
334 333
335 334
336 335 class revlogio(object):
337 def __init__(self):
338 self.size = INDEX_ENTRY_V1.size
339
340 336 def parseindex(self, data, inline):
341 337 # call the C implementation to parse the index data
342 338 index, cache = parsers.parse_index2(data, inline)
343 339 return index, cache
344 340
345 341 def packentry(self, entry, node, version, rev):
346 342 p = INDEX_ENTRY_V1.pack(*entry)
347 343 if rev == 0:
348 344 p = INDEX_HEADER.pack(version) + p[4:]
349 345 return p
350 346
351 347
352 348 class revlogv2io(object):
353 def __init__(self):
354 self.size = INDEX_ENTRY_V2.size
355
356 349 def parseindex(self, data, inline):
357 350 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
358 351 return index, cache
359 352
360 353 def packentry(self, entry, node, version, rev):
361 354 p = INDEX_ENTRY_V2.pack(*entry)
362 355 if rev == 0:
363 356 p = INDEX_HEADER.pack(version) + p[4:]
364 357 return p
365 358
366 359
367 360 NodemapRevlogIO = None
368 361
369 362 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
370 363
371 364 class NodemapRevlogIO(revlogio):
372 365 """A debug oriented IO class that return a PersistentNodeMapIndexObject
373 366
374 367 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
375 368 """
376 369
377 370 def parseindex(self, data, inline):
378 371 index, cache = parsers.parse_index_devel_nodemap(data, inline)
379 372 return index, cache
380 373
381 374
382 375 class rustrevlogio(revlogio):
383 376 def parseindex(self, data, inline):
384 377 index, cache = super(rustrevlogio, self).parseindex(data, inline)
385 378 return rustrevlog.MixedIndex(index), cache
386 379
387 380
388 381 class revlog(object):
389 382 """
390 383 the underlying revision storage object
391 384
392 385 A revlog consists of two parts, an index and the revision data.
393 386
394 387 The index is a file with a fixed record size containing
395 388 information on each revision, including its nodeid (hash), the
396 389 nodeids of its parents, the position and offset of its data within
397 390 the data file, and the revision it's based on. Finally, each entry
398 391 contains a linkrev entry that can serve as a pointer to external
399 392 data.
400 393
401 394 The revision data itself is a linear collection of data chunks.
402 395 Each chunk represents a revision and is usually represented as a
403 396 delta against the previous chunk. To bound lookup time, runs of
404 397 deltas are limited to about 2 times the length of the original
405 398 version data. This makes retrieval of a version proportional to
406 399 its size, or O(1) relative to the number of revisions.
407 400
408 401 Both pieces of the revlog are written to in an append-only
409 402 fashion, which means we never need to rewrite a file to insert or
410 403 remove data, and can use some simple techniques to avoid the need
411 404 for locking while reading.
412 405
413 406 If checkambig, indexfile is opened with checkambig=True at
414 407 writing, to avoid file stat ambiguity.
415 408
416 409 If mmaplargeindex is True, and an mmapindexthreshold is set, the
417 410 index will be mmapped rather than read if it is larger than the
418 411 configured threshold.
419 412
420 413 If censorable is True, the revlog can have censored revisions.
421 414
422 415 If `upperboundcomp` is not None, this is the expected maximal gain from
423 416 compression for the data content.
424 417
425 418 `concurrencychecker` is an optional function that receives 3 arguments: a
426 419 file handle, a filename, and an expected position. It should check whether
427 420 the current position in the file handle is valid, and log/warn/fail (by
428 421 raising).
429 422 """
430 423
431 424 _flagserrorclass = error.RevlogError
432 425
433 426 def __init__(
434 427 self,
435 428 opener,
436 429 indexfile,
437 430 datafile=None,
438 431 checkambig=False,
439 432 mmaplargeindex=False,
440 433 censorable=False,
441 434 upperboundcomp=None,
442 435 persistentnodemap=False,
443 436 concurrencychecker=None,
444 437 ):
445 438 """
446 439 create a revlog object
447 440
448 441 opener is a function that abstracts the file opening operation
449 442 and can be used to implement COW semantics or the like.
450 443
451 444 """
452 445 self.upperboundcomp = upperboundcomp
453 446 self.indexfile = indexfile
454 447 self.datafile = datafile or (indexfile[:-2] + b".d")
455 448 self.nodemap_file = None
456 449 if persistentnodemap:
457 450 self.nodemap_file = nodemaputil.get_nodemap_file(
458 451 opener, self.indexfile
459 452 )
460 453
461 454 self.opener = opener
462 455 # When True, indexfile is opened with checkambig=True at writing, to
463 456 # avoid file stat ambiguity.
464 457 self._checkambig = checkambig
465 458 self._mmaplargeindex = mmaplargeindex
466 459 self._censorable = censorable
467 460 # 3-tuple of (node, rev, text) for a raw revision.
468 461 self._revisioncache = None
469 462 # Maps rev to chain base rev.
470 463 self._chainbasecache = util.lrucachedict(100)
471 464 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
472 465 self._chunkcache = (0, b'')
473 466 # How much data to read and cache into the raw revlog data cache.
474 467 self._chunkcachesize = 65536
475 468 self._maxchainlen = None
476 469 self._deltabothparents = True
477 470 self.index = None
478 471 self._nodemap_docket = None
479 472 # Mapping of partial identifiers to full nodes.
480 473 self._pcache = {}
481 474 # Mapping of revision integer to full node.
482 475 self._compengine = b'zlib'
483 476 self._compengineopts = {}
484 477 self._maxdeltachainspan = -1
485 478 self._withsparseread = False
486 479 self._sparserevlog = False
487 480 self._srdensitythreshold = 0.50
488 481 self._srmingapsize = 262144
489 482
490 483 # Make copy of flag processors so each revlog instance can support
491 484 # custom flags.
492 485 self._flagprocessors = dict(flagutil.flagprocessors)
493 486
494 487 # 2-tuple of file handles being used for active writing.
495 488 self._writinghandles = None
496 489
497 490 self._loadindex()
498 491
499 492 self._concurrencychecker = concurrencychecker
500 493
501 494 def _loadindex(self):
502 495 mmapindexthreshold = None
503 496 opts = self.opener.options
504 497
505 498 if b'revlogv2' in opts:
506 499 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
507 500 elif b'revlogv1' in opts:
508 501 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
509 502 if b'generaldelta' in opts:
510 503 newversionflags |= FLAG_GENERALDELTA
511 504 elif b'revlogv0' in self.opener.options:
512 505 newversionflags = REVLOGV0
513 506 else:
514 507 newversionflags = REVLOG_DEFAULT_VERSION
515 508
516 509 if b'chunkcachesize' in opts:
517 510 self._chunkcachesize = opts[b'chunkcachesize']
518 511 if b'maxchainlen' in opts:
519 512 self._maxchainlen = opts[b'maxchainlen']
520 513 if b'deltabothparents' in opts:
521 514 self._deltabothparents = opts[b'deltabothparents']
522 515 self._lazydelta = bool(opts.get(b'lazydelta', True))
523 516 self._lazydeltabase = False
524 517 if self._lazydelta:
525 518 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
526 519 if b'compengine' in opts:
527 520 self._compengine = opts[b'compengine']
528 521 if b'zlib.level' in opts:
529 522 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
530 523 if b'zstd.level' in opts:
531 524 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
532 525 if b'maxdeltachainspan' in opts:
533 526 self._maxdeltachainspan = opts[b'maxdeltachainspan']
534 527 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
535 528 mmapindexthreshold = opts[b'mmapindexthreshold']
536 529 self.hassidedata = bool(opts.get(b'side-data', False))
537 530 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
538 531 withsparseread = bool(opts.get(b'with-sparse-read', False))
539 532 # sparse-revlog forces sparse-read
540 533 self._withsparseread = self._sparserevlog or withsparseread
541 534 if b'sparse-read-density-threshold' in opts:
542 535 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
543 536 if b'sparse-read-min-gap-size' in opts:
544 537 self._srmingapsize = opts[b'sparse-read-min-gap-size']
545 538 if opts.get(b'enableellipsis'):
546 539 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
547 540
548 541 # revlog v0 doesn't have flag processors
549 542 for flag, processor in pycompat.iteritems(
550 543 opts.get(b'flagprocessors', {})
551 544 ):
552 545 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
553 546
554 547 if self._chunkcachesize <= 0:
555 548 raise error.RevlogError(
556 549 _(b'revlog chunk cache size %r is not greater than 0')
557 550 % self._chunkcachesize
558 551 )
559 552 elif self._chunkcachesize & (self._chunkcachesize - 1):
560 553 raise error.RevlogError(
561 554 _(b'revlog chunk cache size %r is not a power of 2')
562 555 % self._chunkcachesize
563 556 )
564 557
565 558 indexdata = b''
566 559 self._initempty = True
567 560 try:
568 561 with self._indexfp() as f:
569 562 if (
570 563 mmapindexthreshold is not None
571 564 and self.opener.fstat(f).st_size >= mmapindexthreshold
572 565 ):
573 566 # TODO: should .close() to release resources without
574 567 # relying on Python GC
575 568 indexdata = util.buffer(util.mmapread(f))
576 569 else:
577 570 indexdata = f.read()
578 571 if len(indexdata) > 0:
579 572 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
580 573 self._initempty = False
581 574 else:
582 575 versionflags = newversionflags
583 576 except IOError as inst:
584 577 if inst.errno != errno.ENOENT:
585 578 raise
586 579
587 580 versionflags = newversionflags
588 581
589 582 self.version = versionflags
590 583
591 584 flags = versionflags & ~0xFFFF
592 585 fmt = versionflags & 0xFFFF
593 586
594 587 if fmt == REVLOGV0:
595 588 if flags:
596 589 raise error.RevlogError(
597 590 _(b'unknown flags (%#04x) in version %d revlog %s')
598 591 % (flags >> 16, fmt, self.indexfile)
599 592 )
600 593
601 594 self._inline = False
602 595 self._generaldelta = False
603 596
604 597 elif fmt == REVLOGV1:
605 598 if flags & ~REVLOGV1_FLAGS:
606 599 raise error.RevlogError(
607 600 _(b'unknown flags (%#04x) in version %d revlog %s')
608 601 % (flags >> 16, fmt, self.indexfile)
609 602 )
610 603
611 604 self._inline = versionflags & FLAG_INLINE_DATA
612 605 self._generaldelta = versionflags & FLAG_GENERALDELTA
613 606
614 607 elif fmt == REVLOGV2:
615 608 if flags & ~REVLOGV2_FLAGS:
616 609 raise error.RevlogError(
617 610 _(b'unknown flags (%#04x) in version %d revlog %s')
618 611 % (flags >> 16, fmt, self.indexfile)
619 612 )
620 613
621 614 # There is a bug in the transaction handling when going from an
622 615 # inline revlog to a separate index and data file. Turn it off until
623 616 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
624 617 # See issue6485
625 618 self._inline = False
626 619 # generaldelta implied by version 2 revlogs.
627 620 self._generaldelta = True
628 621
629 622 else:
630 623 raise error.RevlogError(
631 624 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
632 625 )
633 626
634 627 self.nodeconstants = sha1nodeconstants
635 628 self.nullid = self.nodeconstants.nullid
636 629
637 630 # sparse-revlog can't be on without general-delta (issue6056)
638 631 if not self._generaldelta:
639 632 self._sparserevlog = False
640 633
641 634 self._storedeltachains = True
642 635
643 636 devel_nodemap = (
644 637 self.nodemap_file
645 638 and opts.get(b'devel-force-nodemap', False)
646 639 and NodemapRevlogIO is not None
647 640 )
648 641
649 642 use_rust_index = False
650 643 if rustrevlog is not None:
651 644 if self.nodemap_file is not None:
652 645 use_rust_index = True
653 646 else:
654 647 use_rust_index = self.opener.options.get(b'rust.index')
655 648
656 649 self._io = revlogio()
657 650 if self.version == REVLOGV0:
658 651 self._io = revlogoldio()
659 652 elif fmt == REVLOGV2:
660 653 self._io = revlogv2io()
661 654 elif devel_nodemap:
662 655 self._io = NodemapRevlogIO()
663 656 elif use_rust_index:
664 657 self._io = rustrevlogio()
665 658 try:
666 659 d = self._io.parseindex(indexdata, self._inline)
667 660 index, _chunkcache = d
668 661 use_nodemap = (
669 662 not self._inline
670 663 and self.nodemap_file is not None
671 664 and util.safehasattr(index, 'update_nodemap_data')
672 665 )
673 666 if use_nodemap:
674 667 nodemap_data = nodemaputil.persisted_data(self)
675 668 if nodemap_data is not None:
676 669 docket = nodemap_data[0]
677 670 if (
678 671 len(d[0]) > docket.tip_rev
679 672 and d[0][docket.tip_rev][7] == docket.tip_node
680 673 ):
681 674 # no changelog tampering
682 675 self._nodemap_docket = docket
683 676 index.update_nodemap_data(*nodemap_data)
684 677 except (ValueError, IndexError):
685 678 raise error.RevlogError(
686 679 _(b"index %s is corrupted") % self.indexfile
687 680 )
688 681 self.index, self._chunkcache = d
689 682 if not self._chunkcache:
690 683 self._chunkclear()
691 684 # revnum -> (chain-length, sum-delta-length)
692 685 self._chaininfocache = util.lrucachedict(500)
693 686 # revlog header -> revlog compressor
694 687 self._decompressors = {}
695 688
696 689 @util.propertycache
697 690 def _compressor(self):
698 691 engine = util.compengines[self._compengine]
699 692 return engine.revlogcompressor(self._compengineopts)
700 693
701 694 def _indexfp(self, mode=b'r'):
702 695 """file object for the revlog's index file"""
703 696 args = {'mode': mode}
704 697 if mode != b'r':
705 698 args['checkambig'] = self._checkambig
706 699 if mode == b'w':
707 700 args['atomictemp'] = True
708 701 return self.opener(self.indexfile, **args)
709 702
710 703 def _datafp(self, mode=b'r'):
711 704 """file object for the revlog's data file"""
712 705 return self.opener(self.datafile, mode=mode)
713 706
714 707 @contextlib.contextmanager
715 708 def _datareadfp(self, existingfp=None):
716 709 """file object suitable to read data"""
717 710 # Use explicit file handle, if given.
718 711 if existingfp is not None:
719 712 yield existingfp
720 713
721 714 # Use a file handle being actively used for writes, if available.
722 715 # There is some danger to doing this because reads will seek the
723 716 # file. However, _writeentry() performs a SEEK_END before all writes,
724 717 # so we should be safe.
725 718 elif self._writinghandles:
726 719 if self._inline:
727 720 yield self._writinghandles[0]
728 721 else:
729 722 yield self._writinghandles[1]
730 723
731 724 # Otherwise open a new file handle.
732 725 else:
733 726 if self._inline:
734 727 func = self._indexfp
735 728 else:
736 729 func = self._datafp
737 730 with func() as fp:
738 731 yield fp
739 732
740 733 def tiprev(self):
741 734 return len(self.index) - 1
742 735
743 736 def tip(self):
744 737 return self.node(self.tiprev())
745 738
746 739 def __contains__(self, rev):
747 740 return 0 <= rev < len(self)
748 741
749 742 def __len__(self):
750 743 return len(self.index)
751 744
752 745 def __iter__(self):
753 746 return iter(pycompat.xrange(len(self)))
754 747
755 748 def revs(self, start=0, stop=None):
756 749 """iterate over all rev in this revlog (from start to stop)"""
757 750 return storageutil.iterrevs(len(self), start=start, stop=stop)
758 751
759 752 @property
760 753 def nodemap(self):
761 754 msg = (
762 755 b"revlog.nodemap is deprecated, "
763 756 b"use revlog.index.[has_node|rev|get_rev]"
764 757 )
765 758 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
766 759 return self.index.nodemap
767 760
768 761 @property
769 762 def _nodecache(self):
770 763 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
771 764 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
772 765 return self.index.nodemap
773 766
774 767 def hasnode(self, node):
775 768 try:
776 769 self.rev(node)
777 770 return True
778 771 except KeyError:
779 772 return False
780 773
781 774 def candelta(self, baserev, rev):
782 775 """whether two revisions (baserev, rev) can be delta-ed or not"""
783 776 # Disable delta if either rev requires a content-changing flag
784 777 # processor (ex. LFS). This is because such flag processor can alter
785 778 # the rawtext content that the delta will be based on, and two clients
786 779 # could have a same revlog node with different flags (i.e. different
787 780 # rawtext contents) and the delta could be incompatible.
788 781 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
789 782 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
790 783 ):
791 784 return False
792 785 return True
793 786
794 787 def update_caches(self, transaction):
795 788 if self.nodemap_file is not None:
796 789 if transaction is None:
797 790 nodemaputil.update_persistent_nodemap(self)
798 791 else:
799 792 nodemaputil.setup_persistent_nodemap(transaction, self)
800 793
801 794 def clearcaches(self):
802 795 self._revisioncache = None
803 796 self._chainbasecache.clear()
804 797 self._chunkcache = (0, b'')
805 798 self._pcache = {}
806 799 self._nodemap_docket = None
807 800 self.index.clearcaches()
808 801 # The python code is the one responsible for validating the docket, we
809 802 # end up having to refresh it here.
810 803 use_nodemap = (
811 804 not self._inline
812 805 and self.nodemap_file is not None
813 806 and util.safehasattr(self.index, 'update_nodemap_data')
814 807 )
815 808 if use_nodemap:
816 809 nodemap_data = nodemaputil.persisted_data(self)
817 810 if nodemap_data is not None:
818 811 self._nodemap_docket = nodemap_data[0]
819 812 self.index.update_nodemap_data(*nodemap_data)
820 813
821 814 def rev(self, node):
822 815 try:
823 816 return self.index.rev(node)
824 817 except TypeError:
825 818 raise
826 819 except error.RevlogError:
827 820 # parsers.c radix tree lookup failed
828 821 if node == wdirid or node in wdirfilenodeids:
829 822 raise error.WdirUnsupported
830 823 raise error.LookupError(node, self.indexfile, _(b'no node'))
831 824
832 825 # Accessors for index entries.
833 826
834 827 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
835 828 # are flags.
836 829 def start(self, rev):
837 830 return int(self.index[rev][0] >> 16)
838 831
839 832 def flags(self, rev):
840 833 return self.index[rev][0] & 0xFFFF
841 834
842 835 def length(self, rev):
843 836 return self.index[rev][1]
844 837
845 838 def sidedata_length(self, rev):
846 839 if self.version & 0xFFFF != REVLOGV2:
847 840 return 0
848 841 return self.index[rev][9]
849 842
850 843 def rawsize(self, rev):
851 844 """return the length of the uncompressed text for a given revision"""
852 845 l = self.index[rev][2]
853 846 if l >= 0:
854 847 return l
855 848
856 849 t = self.rawdata(rev)
857 850 return len(t)
858 851
859 852 def size(self, rev):
860 853 """length of non-raw text (processed by a "read" flag processor)"""
861 854 # fast path: if no "read" flag processor could change the content,
862 855 # size is rawsize. note: ELLIPSIS is known to not change the content.
863 856 flags = self.flags(rev)
864 857 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
865 858 return self.rawsize(rev)
866 859
867 860 return len(self.revision(rev, raw=False))
868 861
869 862 def chainbase(self, rev):
870 863 base = self._chainbasecache.get(rev)
871 864 if base is not None:
872 865 return base
873 866
874 867 index = self.index
875 868 iterrev = rev
876 869 base = index[iterrev][3]
877 870 while base != iterrev:
878 871 iterrev = base
879 872 base = index[iterrev][3]
880 873
881 874 self._chainbasecache[rev] = base
882 875 return base
883 876
884 877 def linkrev(self, rev):
885 878 return self.index[rev][4]
886 879
887 880 def parentrevs(self, rev):
888 881 try:
889 882 entry = self.index[rev]
890 883 except IndexError:
891 884 if rev == wdirrev:
892 885 raise error.WdirUnsupported
893 886 raise
894 887 if entry[5] == nullrev:
895 888 return entry[6], entry[5]
896 889 else:
897 890 return entry[5], entry[6]
898 891
899 892 # fast parentrevs(rev) where rev isn't filtered
900 893 _uncheckedparentrevs = parentrevs
901 894
902 895 def node(self, rev):
903 896 try:
904 897 return self.index[rev][7]
905 898 except IndexError:
906 899 if rev == wdirrev:
907 900 raise error.WdirUnsupported
908 901 raise
909 902
910 903 # Derived from index values.
911 904
912 905 def end(self, rev):
913 906 return self.start(rev) + self.length(rev)
914 907
915 908 def parents(self, node):
916 909 i = self.index
917 910 d = i[self.rev(node)]
918 911 # inline node() to avoid function call overhead
919 912 if d[5] == nullid:
920 913 return i[d[6]][7], i[d[5]][7]
921 914 else:
922 915 return i[d[5]][7], i[d[6]][7]
923 916
924 917 def chainlen(self, rev):
925 918 return self._chaininfo(rev)[0]
926 919
927 920 def _chaininfo(self, rev):
928 921 chaininfocache = self._chaininfocache
929 922 if rev in chaininfocache:
930 923 return chaininfocache[rev]
931 924 index = self.index
932 925 generaldelta = self._generaldelta
933 926 iterrev = rev
934 927 e = index[iterrev]
935 928 clen = 0
936 929 compresseddeltalen = 0
937 930 while iterrev != e[3]:
938 931 clen += 1
939 932 compresseddeltalen += e[1]
940 933 if generaldelta:
941 934 iterrev = e[3]
942 935 else:
943 936 iterrev -= 1
944 937 if iterrev in chaininfocache:
945 938 t = chaininfocache[iterrev]
946 939 clen += t[0]
947 940 compresseddeltalen += t[1]
948 941 break
949 942 e = index[iterrev]
950 943 else:
951 944 # Add text length of base since decompressing that also takes
952 945 # work. For cache hits the length is already included.
953 946 compresseddeltalen += e[1]
954 947 r = (clen, compresseddeltalen)
955 948 chaininfocache[rev] = r
956 949 return r
957 950
958 951 def _deltachain(self, rev, stoprev=None):
959 952 """Obtain the delta chain for a revision.
960 953
961 954 ``stoprev`` specifies a revision to stop at. If not specified, we
962 955 stop at the base of the chain.
963 956
964 957 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
965 958 revs in ascending order and ``stopped`` is a bool indicating whether
966 959 ``stoprev`` was hit.
967 960 """
968 961 # Try C implementation.
969 962 try:
970 963 return self.index.deltachain(rev, stoprev, self._generaldelta)
971 964 except AttributeError:
972 965 pass
973 966
974 967 chain = []
975 968
976 969 # Alias to prevent attribute lookup in tight loop.
977 970 index = self.index
978 971 generaldelta = self._generaldelta
979 972
980 973 iterrev = rev
981 974 e = index[iterrev]
982 975 while iterrev != e[3] and iterrev != stoprev:
983 976 chain.append(iterrev)
984 977 if generaldelta:
985 978 iterrev = e[3]
986 979 else:
987 980 iterrev -= 1
988 981 e = index[iterrev]
989 982
990 983 if iterrev == stoprev:
991 984 stopped = True
992 985 else:
993 986 chain.append(iterrev)
994 987 stopped = False
995 988
996 989 chain.reverse()
997 990 return chain, stopped
998 991
999 992 def ancestors(self, revs, stoprev=0, inclusive=False):
1000 993 """Generate the ancestors of 'revs' in reverse revision order.
1001 994 Does not generate revs lower than stoprev.
1002 995
1003 996 See the documentation for ancestor.lazyancestors for more details."""
1004 997
1005 998 # first, make sure start revisions aren't filtered
1006 999 revs = list(revs)
1007 1000 checkrev = self.node
1008 1001 for r in revs:
1009 1002 checkrev(r)
1010 1003 # and we're sure ancestors aren't filtered as well
1011 1004
1012 1005 if rustancestor is not None:
1013 1006 lazyancestors = rustancestor.LazyAncestors
1014 1007 arg = self.index
1015 1008 else:
1016 1009 lazyancestors = ancestor.lazyancestors
1017 1010 arg = self._uncheckedparentrevs
1018 1011 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1019 1012
1020 1013 def descendants(self, revs):
1021 1014 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1022 1015
1023 1016 def findcommonmissing(self, common=None, heads=None):
1024 1017 """Return a tuple of the ancestors of common and the ancestors of heads
1025 1018 that are not ancestors of common. In revset terminology, we return the
1026 1019 tuple:
1027 1020
1028 1021 ::common, (::heads) - (::common)
1029 1022
1030 1023 The list is sorted by revision number, meaning it is
1031 1024 topologically sorted.
1032 1025
1033 1026 'heads' and 'common' are both lists of node IDs. If heads is
1034 1027 not supplied, uses all of the revlog's heads. If common is not
1035 1028 supplied, uses nullid."""
1036 1029 if common is None:
1037 1030 common = [nullid]
1038 1031 if heads is None:
1039 1032 heads = self.heads()
1040 1033
1041 1034 common = [self.rev(n) for n in common]
1042 1035 heads = [self.rev(n) for n in heads]
1043 1036
1044 1037 # we want the ancestors, but inclusive
1045 1038 class lazyset(object):
1046 1039 def __init__(self, lazyvalues):
1047 1040 self.addedvalues = set()
1048 1041 self.lazyvalues = lazyvalues
1049 1042
1050 1043 def __contains__(self, value):
1051 1044 return value in self.addedvalues or value in self.lazyvalues
1052 1045
1053 1046 def __iter__(self):
1054 1047 added = self.addedvalues
1055 1048 for r in added:
1056 1049 yield r
1057 1050 for r in self.lazyvalues:
1058 1051 if not r in added:
1059 1052 yield r
1060 1053
1061 1054 def add(self, value):
1062 1055 self.addedvalues.add(value)
1063 1056
1064 1057 def update(self, values):
1065 1058 self.addedvalues.update(values)
1066 1059
1067 1060 has = lazyset(self.ancestors(common))
1068 1061 has.add(nullrev)
1069 1062 has.update(common)
1070 1063
1071 1064 # take all ancestors from heads that aren't in has
1072 1065 missing = set()
1073 1066 visit = collections.deque(r for r in heads if r not in has)
1074 1067 while visit:
1075 1068 r = visit.popleft()
1076 1069 if r in missing:
1077 1070 continue
1078 1071 else:
1079 1072 missing.add(r)
1080 1073 for p in self.parentrevs(r):
1081 1074 if p not in has:
1082 1075 visit.append(p)
1083 1076 missing = list(missing)
1084 1077 missing.sort()
1085 1078 return has, [self.node(miss) for miss in missing]
1086 1079
1087 1080 def incrementalmissingrevs(self, common=None):
1088 1081 """Return an object that can be used to incrementally compute the
1089 1082 revision numbers of the ancestors of arbitrary sets that are not
1090 1083 ancestors of common. This is an ancestor.incrementalmissingancestors
1091 1084 object.
1092 1085
1093 1086 'common' is a list of revision numbers. If common is not supplied, uses
1094 1087 nullrev.
1095 1088 """
1096 1089 if common is None:
1097 1090 common = [nullrev]
1098 1091
1099 1092 if rustancestor is not None:
1100 1093 return rustancestor.MissingAncestors(self.index, common)
1101 1094 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1102 1095
1103 1096 def findmissingrevs(self, common=None, heads=None):
1104 1097 """Return the revision numbers of the ancestors of heads that
1105 1098 are not ancestors of common.
1106 1099
1107 1100 More specifically, return a list of revision numbers corresponding to
1108 1101 nodes N such that every N satisfies the following constraints:
1109 1102
1110 1103 1. N is an ancestor of some node in 'heads'
1111 1104 2. N is not an ancestor of any node in 'common'
1112 1105
1113 1106 The list is sorted by revision number, meaning it is
1114 1107 topologically sorted.
1115 1108
1116 1109 'heads' and 'common' are both lists of revision numbers. If heads is
1117 1110 not supplied, uses all of the revlog's heads. If common is not
1118 1111 supplied, uses nullid."""
1119 1112 if common is None:
1120 1113 common = [nullrev]
1121 1114 if heads is None:
1122 1115 heads = self.headrevs()
1123 1116
1124 1117 inc = self.incrementalmissingrevs(common=common)
1125 1118 return inc.missingancestors(heads)
1126 1119
1127 1120 def findmissing(self, common=None, heads=None):
1128 1121 """Return the ancestors of heads that are not ancestors of common.
1129 1122
1130 1123 More specifically, return a list of nodes N such that every N
1131 1124 satisfies the following constraints:
1132 1125
1133 1126 1. N is an ancestor of some node in 'heads'
1134 1127 2. N is not an ancestor of any node in 'common'
1135 1128
1136 1129 The list is sorted by revision number, meaning it is
1137 1130 topologically sorted.
1138 1131
1139 1132 'heads' and 'common' are both lists of node IDs. If heads is
1140 1133 not supplied, uses all of the revlog's heads. If common is not
1141 1134 supplied, uses nullid."""
1142 1135 if common is None:
1143 1136 common = [nullid]
1144 1137 if heads is None:
1145 1138 heads = self.heads()
1146 1139
1147 1140 common = [self.rev(n) for n in common]
1148 1141 heads = [self.rev(n) for n in heads]
1149 1142
1150 1143 inc = self.incrementalmissingrevs(common=common)
1151 1144 return [self.node(r) for r in inc.missingancestors(heads)]
1152 1145
1153 1146 def nodesbetween(self, roots=None, heads=None):
1154 1147 """Return a topological path from 'roots' to 'heads'.
1155 1148
1156 1149 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1157 1150 topologically sorted list of all nodes N that satisfy both of
1158 1151 these constraints:
1159 1152
1160 1153 1. N is a descendant of some node in 'roots'
1161 1154 2. N is an ancestor of some node in 'heads'
1162 1155
1163 1156 Every node is considered to be both a descendant and an ancestor
1164 1157 of itself, so every reachable node in 'roots' and 'heads' will be
1165 1158 included in 'nodes'.
1166 1159
1167 1160 'outroots' is the list of reachable nodes in 'roots', i.e., the
1168 1161 subset of 'roots' that is returned in 'nodes'. Likewise,
1169 1162 'outheads' is the subset of 'heads' that is also in 'nodes'.
1170 1163
1171 1164 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1172 1165 unspecified, uses nullid as the only root. If 'heads' is
1173 1166 unspecified, uses list of all of the revlog's heads."""
1174 1167 nonodes = ([], [], [])
1175 1168 if roots is not None:
1176 1169 roots = list(roots)
1177 1170 if not roots:
1178 1171 return nonodes
1179 1172 lowestrev = min([self.rev(n) for n in roots])
1180 1173 else:
1181 1174 roots = [nullid] # Everybody's a descendant of nullid
1182 1175 lowestrev = nullrev
1183 1176 if (lowestrev == nullrev) and (heads is None):
1184 1177 # We want _all_ the nodes!
1185 1178 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1186 1179 if heads is None:
1187 1180 # All nodes are ancestors, so the latest ancestor is the last
1188 1181 # node.
1189 1182 highestrev = len(self) - 1
1190 1183 # Set ancestors to None to signal that every node is an ancestor.
1191 1184 ancestors = None
1192 1185 # Set heads to an empty dictionary for later discovery of heads
1193 1186 heads = {}
1194 1187 else:
1195 1188 heads = list(heads)
1196 1189 if not heads:
1197 1190 return nonodes
1198 1191 ancestors = set()
1199 1192 # Turn heads into a dictionary so we can remove 'fake' heads.
1200 1193 # Also, later we will be using it to filter out the heads we can't
1201 1194 # find from roots.
1202 1195 heads = dict.fromkeys(heads, False)
1203 1196 # Start at the top and keep marking parents until we're done.
1204 1197 nodestotag = set(heads)
1205 1198 # Remember where the top was so we can use it as a limit later.
1206 1199 highestrev = max([self.rev(n) for n in nodestotag])
1207 1200 while nodestotag:
1208 1201 # grab a node to tag
1209 1202 n = nodestotag.pop()
1210 1203 # Never tag nullid
1211 1204 if n == nullid:
1212 1205 continue
1213 1206 # A node's revision number represents its place in a
1214 1207 # topologically sorted list of nodes.
1215 1208 r = self.rev(n)
1216 1209 if r >= lowestrev:
1217 1210 if n not in ancestors:
1218 1211 # If we are possibly a descendant of one of the roots
1219 1212 # and we haven't already been marked as an ancestor
1220 1213 ancestors.add(n) # Mark as ancestor
1221 1214 # Add non-nullid parents to list of nodes to tag.
1222 1215 nodestotag.update(
1223 1216 [p for p in self.parents(n) if p != nullid]
1224 1217 )
1225 1218 elif n in heads: # We've seen it before, is it a fake head?
1226 1219 # So it is, real heads should not be the ancestors of
1227 1220 # any other heads.
1228 1221 heads.pop(n)
1229 1222 if not ancestors:
1230 1223 return nonodes
1231 1224 # Now that we have our set of ancestors, we want to remove any
1232 1225 # roots that are not ancestors.
1233 1226
1234 1227 # If one of the roots was nullid, everything is included anyway.
1235 1228 if lowestrev > nullrev:
1236 1229 # But, since we weren't, let's recompute the lowest rev to not
1237 1230 # include roots that aren't ancestors.
1238 1231
1239 1232 # Filter out roots that aren't ancestors of heads
1240 1233 roots = [root for root in roots if root in ancestors]
1241 1234 # Recompute the lowest revision
1242 1235 if roots:
1243 1236 lowestrev = min([self.rev(root) for root in roots])
1244 1237 else:
1245 1238 # No more roots? Return empty list
1246 1239 return nonodes
1247 1240 else:
1248 1241 # We are descending from nullid, and don't need to care about
1249 1242 # any other roots.
1250 1243 lowestrev = nullrev
1251 1244 roots = [nullid]
1252 1245 # Transform our roots list into a set.
1253 1246 descendants = set(roots)
1254 1247 # Also, keep the original roots so we can filter out roots that aren't
1255 1248 # 'real' roots (i.e. are descended from other roots).
1256 1249 roots = descendants.copy()
1257 1250 # Our topologically sorted list of output nodes.
1258 1251 orderedout = []
1259 1252 # Don't start at nullid since we don't want nullid in our output list,
1260 1253 # and if nullid shows up in descendants, empty parents will look like
1261 1254 # they're descendants.
1262 1255 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1263 1256 n = self.node(r)
1264 1257 isdescendant = False
1265 1258 if lowestrev == nullrev: # Everybody is a descendant of nullid
1266 1259 isdescendant = True
1267 1260 elif n in descendants:
1268 1261 # n is already a descendant
1269 1262 isdescendant = True
1270 1263 # This check only needs to be done here because all the roots
1271 1264 # will start being marked is descendants before the loop.
1272 1265 if n in roots:
1273 1266 # If n was a root, check if it's a 'real' root.
1274 1267 p = tuple(self.parents(n))
1275 1268 # If any of its parents are descendants, it's not a root.
1276 1269 if (p[0] in descendants) or (p[1] in descendants):
1277 1270 roots.remove(n)
1278 1271 else:
1279 1272 p = tuple(self.parents(n))
1280 1273 # A node is a descendant if either of its parents are
1281 1274 # descendants. (We seeded the dependents list with the roots
1282 1275 # up there, remember?)
1283 1276 if (p[0] in descendants) or (p[1] in descendants):
1284 1277 descendants.add(n)
1285 1278 isdescendant = True
1286 1279 if isdescendant and ((ancestors is None) or (n in ancestors)):
1287 1280 # Only include nodes that are both descendants and ancestors.
1288 1281 orderedout.append(n)
1289 1282 if (ancestors is not None) and (n in heads):
1290 1283 # We're trying to figure out which heads are reachable
1291 1284 # from roots.
1292 1285 # Mark this head as having been reached
1293 1286 heads[n] = True
1294 1287 elif ancestors is None:
1295 1288 # Otherwise, we're trying to discover the heads.
1296 1289 # Assume this is a head because if it isn't, the next step
1297 1290 # will eventually remove it.
1298 1291 heads[n] = True
1299 1292 # But, obviously its parents aren't.
1300 1293 for p in self.parents(n):
1301 1294 heads.pop(p, None)
1302 1295 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1303 1296 roots = list(roots)
1304 1297 assert orderedout
1305 1298 assert roots
1306 1299 assert heads
1307 1300 return (orderedout, roots, heads)
1308 1301
1309 1302 def headrevs(self, revs=None):
1310 1303 if revs is None:
1311 1304 try:
1312 1305 return self.index.headrevs()
1313 1306 except AttributeError:
1314 1307 return self._headrevs()
1315 1308 if rustdagop is not None:
1316 1309 return rustdagop.headrevs(self.index, revs)
1317 1310 return dagop.headrevs(revs, self._uncheckedparentrevs)
1318 1311
1319 1312 def computephases(self, roots):
1320 1313 return self.index.computephasesmapsets(roots)
1321 1314
1322 1315 def _headrevs(self):
1323 1316 count = len(self)
1324 1317 if not count:
1325 1318 return [nullrev]
1326 1319 # we won't iter over filtered rev so nobody is a head at start
1327 1320 ishead = [0] * (count + 1)
1328 1321 index = self.index
1329 1322 for r in self:
1330 1323 ishead[r] = 1 # I may be an head
1331 1324 e = index[r]
1332 1325 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1333 1326 return [r for r, val in enumerate(ishead) if val]
1334 1327
1335 1328 def heads(self, start=None, stop=None):
1336 1329 """return the list of all nodes that have no children
1337 1330
1338 1331 if start is specified, only heads that are descendants of
1339 1332 start will be returned
1340 1333 if stop is specified, it will consider all the revs from stop
1341 1334 as if they had no children
1342 1335 """
1343 1336 if start is None and stop is None:
1344 1337 if not len(self):
1345 1338 return [nullid]
1346 1339 return [self.node(r) for r in self.headrevs()]
1347 1340
1348 1341 if start is None:
1349 1342 start = nullrev
1350 1343 else:
1351 1344 start = self.rev(start)
1352 1345
1353 1346 stoprevs = {self.rev(n) for n in stop or []}
1354 1347
1355 1348 revs = dagop.headrevssubset(
1356 1349 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1357 1350 )
1358 1351
1359 1352 return [self.node(rev) for rev in revs]
1360 1353
1361 1354 def children(self, node):
1362 1355 """find the children of a given node"""
1363 1356 c = []
1364 1357 p = self.rev(node)
1365 1358 for r in self.revs(start=p + 1):
1366 1359 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1367 1360 if prevs:
1368 1361 for pr in prevs:
1369 1362 if pr == p:
1370 1363 c.append(self.node(r))
1371 1364 elif p == nullrev:
1372 1365 c.append(self.node(r))
1373 1366 return c
1374 1367
1375 1368 def commonancestorsheads(self, a, b):
1376 1369 """calculate all the heads of the common ancestors of nodes a and b"""
1377 1370 a, b = self.rev(a), self.rev(b)
1378 1371 ancs = self._commonancestorsheads(a, b)
1379 1372 return pycompat.maplist(self.node, ancs)
1380 1373
1381 1374 def _commonancestorsheads(self, *revs):
1382 1375 """calculate all the heads of the common ancestors of revs"""
1383 1376 try:
1384 1377 ancs = self.index.commonancestorsheads(*revs)
1385 1378 except (AttributeError, OverflowError): # C implementation failed
1386 1379 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1387 1380 return ancs
1388 1381
1389 1382 def isancestor(self, a, b):
1390 1383 """return True if node a is an ancestor of node b
1391 1384
1392 1385 A revision is considered an ancestor of itself."""
1393 1386 a, b = self.rev(a), self.rev(b)
1394 1387 return self.isancestorrev(a, b)
1395 1388
1396 1389 def isancestorrev(self, a, b):
1397 1390 """return True if revision a is an ancestor of revision b
1398 1391
1399 1392 A revision is considered an ancestor of itself.
1400 1393
1401 1394 The implementation of this is trivial but the use of
1402 1395 reachableroots is not."""
1403 1396 if a == nullrev:
1404 1397 return True
1405 1398 elif a == b:
1406 1399 return True
1407 1400 elif a > b:
1408 1401 return False
1409 1402 return bool(self.reachableroots(a, [b], [a], includepath=False))
1410 1403
1411 1404 def reachableroots(self, minroot, heads, roots, includepath=False):
1412 1405 """return (heads(::(<roots> and <roots>::<heads>)))
1413 1406
1414 1407 If includepath is True, return (<roots>::<heads>)."""
1415 1408 try:
1416 1409 return self.index.reachableroots2(
1417 1410 minroot, heads, roots, includepath
1418 1411 )
1419 1412 except AttributeError:
1420 1413 return dagop._reachablerootspure(
1421 1414 self.parentrevs, minroot, roots, heads, includepath
1422 1415 )
1423 1416
1424 1417 def ancestor(self, a, b):
1425 1418 """calculate the "best" common ancestor of nodes a and b"""
1426 1419
1427 1420 a, b = self.rev(a), self.rev(b)
1428 1421 try:
1429 1422 ancs = self.index.ancestors(a, b)
1430 1423 except (AttributeError, OverflowError):
1431 1424 ancs = ancestor.ancestors(self.parentrevs, a, b)
1432 1425 if ancs:
1433 1426 # choose a consistent winner when there's a tie
1434 1427 return min(map(self.node, ancs))
1435 1428 return nullid
1436 1429
1437 1430 def _match(self, id):
1438 1431 if isinstance(id, int):
1439 1432 # rev
1440 1433 return self.node(id)
1441 1434 if len(id) == 20:
1442 1435 # possibly a binary node
1443 1436 # odds of a binary node being all hex in ASCII are 1 in 10**25
1444 1437 try:
1445 1438 node = id
1446 1439 self.rev(node) # quick search the index
1447 1440 return node
1448 1441 except error.LookupError:
1449 1442 pass # may be partial hex id
1450 1443 try:
1451 1444 # str(rev)
1452 1445 rev = int(id)
1453 1446 if b"%d" % rev != id:
1454 1447 raise ValueError
1455 1448 if rev < 0:
1456 1449 rev = len(self) + rev
1457 1450 if rev < 0 or rev >= len(self):
1458 1451 raise ValueError
1459 1452 return self.node(rev)
1460 1453 except (ValueError, OverflowError):
1461 1454 pass
1462 1455 if len(id) == 40:
1463 1456 try:
1464 1457 # a full hex nodeid?
1465 1458 node = bin(id)
1466 1459 self.rev(node)
1467 1460 return node
1468 1461 except (TypeError, error.LookupError):
1469 1462 pass
1470 1463
1471 1464 def _partialmatch(self, id):
1472 1465 # we don't care wdirfilenodeids as they should be always full hash
1473 1466 maybewdir = wdirhex.startswith(id)
1474 1467 try:
1475 1468 partial = self.index.partialmatch(id)
1476 1469 if partial and self.hasnode(partial):
1477 1470 if maybewdir:
1478 1471 # single 'ff...' match in radix tree, ambiguous with wdir
1479 1472 raise error.RevlogError
1480 1473 return partial
1481 1474 if maybewdir:
1482 1475 # no 'ff...' match in radix tree, wdir identified
1483 1476 raise error.WdirUnsupported
1484 1477 return None
1485 1478 except error.RevlogError:
1486 1479 # parsers.c radix tree lookup gave multiple matches
1487 1480 # fast path: for unfiltered changelog, radix tree is accurate
1488 1481 if not getattr(self, 'filteredrevs', None):
1489 1482 raise error.AmbiguousPrefixLookupError(
1490 1483 id, self.indexfile, _(b'ambiguous identifier')
1491 1484 )
1492 1485 # fall through to slow path that filters hidden revisions
1493 1486 except (AttributeError, ValueError):
1494 1487 # we are pure python, or key was too short to search radix tree
1495 1488 pass
1496 1489
1497 1490 if id in self._pcache:
1498 1491 return self._pcache[id]
1499 1492
1500 1493 if len(id) <= 40:
1501 1494 try:
1502 1495 # hex(node)[:...]
1503 1496 l = len(id) // 2 # grab an even number of digits
1504 1497 prefix = bin(id[: l * 2])
1505 1498 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1506 1499 nl = [
1507 1500 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1508 1501 ]
1509 1502 if nullhex.startswith(id):
1510 1503 nl.append(nullid)
1511 1504 if len(nl) > 0:
1512 1505 if len(nl) == 1 and not maybewdir:
1513 1506 self._pcache[id] = nl[0]
1514 1507 return nl[0]
1515 1508 raise error.AmbiguousPrefixLookupError(
1516 1509 id, self.indexfile, _(b'ambiguous identifier')
1517 1510 )
1518 1511 if maybewdir:
1519 1512 raise error.WdirUnsupported
1520 1513 return None
1521 1514 except TypeError:
1522 1515 pass
1523 1516
1524 1517 def lookup(self, id):
1525 1518 """locate a node based on:
1526 1519 - revision number or str(revision number)
1527 1520 - nodeid or subset of hex nodeid
1528 1521 """
1529 1522 n = self._match(id)
1530 1523 if n is not None:
1531 1524 return n
1532 1525 n = self._partialmatch(id)
1533 1526 if n:
1534 1527 return n
1535 1528
1536 1529 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1537 1530
1538 1531 def shortest(self, node, minlength=1):
1539 1532 """Find the shortest unambiguous prefix that matches node."""
1540 1533
1541 1534 def isvalid(prefix):
1542 1535 try:
1543 1536 matchednode = self._partialmatch(prefix)
1544 1537 except error.AmbiguousPrefixLookupError:
1545 1538 return False
1546 1539 except error.WdirUnsupported:
1547 1540 # single 'ff...' match
1548 1541 return True
1549 1542 if matchednode is None:
1550 1543 raise error.LookupError(node, self.indexfile, _(b'no node'))
1551 1544 return True
1552 1545
1553 1546 def maybewdir(prefix):
1554 1547 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1555 1548
1556 1549 hexnode = hex(node)
1557 1550
1558 1551 def disambiguate(hexnode, minlength):
1559 1552 """Disambiguate against wdirid."""
1560 1553 for length in range(minlength, len(hexnode) + 1):
1561 1554 prefix = hexnode[:length]
1562 1555 if not maybewdir(prefix):
1563 1556 return prefix
1564 1557
1565 1558 if not getattr(self, 'filteredrevs', None):
1566 1559 try:
1567 1560 length = max(self.index.shortest(node), minlength)
1568 1561 return disambiguate(hexnode, length)
1569 1562 except error.RevlogError:
1570 1563 if node != wdirid:
1571 1564 raise error.LookupError(node, self.indexfile, _(b'no node'))
1572 1565 except AttributeError:
1573 1566 # Fall through to pure code
1574 1567 pass
1575 1568
1576 1569 if node == wdirid:
1577 1570 for length in range(minlength, len(hexnode) + 1):
1578 1571 prefix = hexnode[:length]
1579 1572 if isvalid(prefix):
1580 1573 return prefix
1581 1574
1582 1575 for length in range(minlength, len(hexnode) + 1):
1583 1576 prefix = hexnode[:length]
1584 1577 if isvalid(prefix):
1585 1578 return disambiguate(hexnode, length)
1586 1579
1587 1580 def cmp(self, node, text):
1588 1581 """compare text with a given file revision
1589 1582
1590 1583 returns True if text is different than what is stored.
1591 1584 """
1592 1585 p1, p2 = self.parents(node)
1593 1586 return storageutil.hashrevisionsha1(text, p1, p2) != node
1594 1587
1595 1588 def _cachesegment(self, offset, data):
1596 1589 """Add a segment to the revlog cache.
1597 1590
1598 1591 Accepts an absolute offset and the data that is at that location.
1599 1592 """
1600 1593 o, d = self._chunkcache
1601 1594 # try to add to existing cache
1602 1595 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1603 1596 self._chunkcache = o, d + data
1604 1597 else:
1605 1598 self._chunkcache = offset, data
1606 1599
1607 1600 def _readsegment(self, offset, length, df=None):
1608 1601 """Load a segment of raw data from the revlog.
1609 1602
1610 1603 Accepts an absolute offset, length to read, and an optional existing
1611 1604 file handle to read from.
1612 1605
1613 1606 If an existing file handle is passed, it will be seeked and the
1614 1607 original seek position will NOT be restored.
1615 1608
1616 1609 Returns a str or buffer of raw byte data.
1617 1610
1618 1611 Raises if the requested number of bytes could not be read.
1619 1612 """
1620 1613 # Cache data both forward and backward around the requested
1621 1614 # data, in a fixed size window. This helps speed up operations
1622 1615 # involving reading the revlog backwards.
1623 1616 cachesize = self._chunkcachesize
1624 1617 realoffset = offset & ~(cachesize - 1)
1625 1618 reallength = (
1626 1619 (offset + length + cachesize) & ~(cachesize - 1)
1627 1620 ) - realoffset
1628 1621 with self._datareadfp(df) as df:
1629 1622 df.seek(realoffset)
1630 1623 d = df.read(reallength)
1631 1624
1632 1625 self._cachesegment(realoffset, d)
1633 1626 if offset != realoffset or reallength != length:
1634 1627 startoffset = offset - realoffset
1635 1628 if len(d) - startoffset < length:
1636 1629 raise error.RevlogError(
1637 1630 _(
1638 1631 b'partial read of revlog %s; expected %d bytes from '
1639 1632 b'offset %d, got %d'
1640 1633 )
1641 1634 % (
1642 1635 self.indexfile if self._inline else self.datafile,
1643 1636 length,
1644 1637 realoffset,
1645 1638 len(d) - startoffset,
1646 1639 )
1647 1640 )
1648 1641
1649 1642 return util.buffer(d, startoffset, length)
1650 1643
1651 1644 if len(d) < length:
1652 1645 raise error.RevlogError(
1653 1646 _(
1654 1647 b'partial read of revlog %s; expected %d bytes from offset '
1655 1648 b'%d, got %d'
1656 1649 )
1657 1650 % (
1658 1651 self.indexfile if self._inline else self.datafile,
1659 1652 length,
1660 1653 offset,
1661 1654 len(d),
1662 1655 )
1663 1656 )
1664 1657
1665 1658 return d
1666 1659
1667 1660 def _getsegment(self, offset, length, df=None):
1668 1661 """Obtain a segment of raw data from the revlog.
1669 1662
1670 1663 Accepts an absolute offset, length of bytes to obtain, and an
1671 1664 optional file handle to the already-opened revlog. If the file
1672 1665 handle is used, it's original seek position will not be preserved.
1673 1666
1674 1667 Requests for data may be returned from a cache.
1675 1668
1676 1669 Returns a str or a buffer instance of raw byte data.
1677 1670 """
1678 1671 o, d = self._chunkcache
1679 1672 l = len(d)
1680 1673
1681 1674 # is it in the cache?
1682 1675 cachestart = offset - o
1683 1676 cacheend = cachestart + length
1684 1677 if cachestart >= 0 and cacheend <= l:
1685 1678 if cachestart == 0 and cacheend == l:
1686 1679 return d # avoid a copy
1687 1680 return util.buffer(d, cachestart, cacheend - cachestart)
1688 1681
1689 1682 return self._readsegment(offset, length, df=df)
1690 1683
1691 1684 def _getsegmentforrevs(self, startrev, endrev, df=None):
1692 1685 """Obtain a segment of raw data corresponding to a range of revisions.
1693 1686
1694 1687 Accepts the start and end revisions and an optional already-open
1695 1688 file handle to be used for reading. If the file handle is read, its
1696 1689 seek position will not be preserved.
1697 1690
1698 1691 Requests for data may be satisfied by a cache.
1699 1692
1700 1693 Returns a 2-tuple of (offset, data) for the requested range of
1701 1694 revisions. Offset is the integer offset from the beginning of the
1702 1695 revlog and data is a str or buffer of the raw byte data.
1703 1696
1704 1697 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1705 1698 to determine where each revision's data begins and ends.
1706 1699 """
1707 1700 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1708 1701 # (functions are expensive).
1709 1702 index = self.index
1710 1703 istart = index[startrev]
1711 1704 start = int(istart[0] >> 16)
1712 1705 if startrev == endrev:
1713 1706 end = start + istart[1]
1714 1707 else:
1715 1708 iend = index[endrev]
1716 1709 end = int(iend[0] >> 16) + iend[1]
1717 1710
1718 1711 if self._inline:
1719 start += (startrev + 1) * self._io.size
1720 end += (endrev + 1) * self._io.size
1712 start += (startrev + 1) * self.index.entry_size
1713 end += (endrev + 1) * self.index.entry_size
1721 1714 length = end - start
1722 1715
1723 1716 return start, self._getsegment(start, length, df=df)
1724 1717
1725 1718 def _chunk(self, rev, df=None):
1726 1719 """Obtain a single decompressed chunk for a revision.
1727 1720
1728 1721 Accepts an integer revision and an optional already-open file handle
1729 1722 to be used for reading. If used, the seek position of the file will not
1730 1723 be preserved.
1731 1724
1732 1725 Returns a str holding uncompressed data for the requested revision.
1733 1726 """
1734 1727 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1735 1728
1736 1729 def _chunks(self, revs, df=None, targetsize=None):
1737 1730 """Obtain decompressed chunks for the specified revisions.
1738 1731
1739 1732 Accepts an iterable of numeric revisions that are assumed to be in
1740 1733 ascending order. Also accepts an optional already-open file handle
1741 1734 to be used for reading. If used, the seek position of the file will
1742 1735 not be preserved.
1743 1736
1744 1737 This function is similar to calling ``self._chunk()`` multiple times,
1745 1738 but is faster.
1746 1739
1747 1740 Returns a list with decompressed data for each requested revision.
1748 1741 """
1749 1742 if not revs:
1750 1743 return []
1751 1744 start = self.start
1752 1745 length = self.length
1753 1746 inline = self._inline
1754 iosize = self._io.size
1747 iosize = self.index.entry_size
1755 1748 buffer = util.buffer
1756 1749
1757 1750 l = []
1758 1751 ladd = l.append
1759 1752
1760 1753 if not self._withsparseread:
1761 1754 slicedchunks = (revs,)
1762 1755 else:
1763 1756 slicedchunks = deltautil.slicechunk(
1764 1757 self, revs, targetsize=targetsize
1765 1758 )
1766 1759
1767 1760 for revschunk in slicedchunks:
1768 1761 firstrev = revschunk[0]
1769 1762 # Skip trailing revisions with empty diff
1770 1763 for lastrev in revschunk[::-1]:
1771 1764 if length(lastrev) != 0:
1772 1765 break
1773 1766
1774 1767 try:
1775 1768 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1776 1769 except OverflowError:
1777 1770 # issue4215 - we can't cache a run of chunks greater than
1778 1771 # 2G on Windows
1779 1772 return [self._chunk(rev, df=df) for rev in revschunk]
1780 1773
1781 1774 decomp = self.decompress
1782 1775 for rev in revschunk:
1783 1776 chunkstart = start(rev)
1784 1777 if inline:
1785 1778 chunkstart += (rev + 1) * iosize
1786 1779 chunklength = length(rev)
1787 1780 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1788 1781
1789 1782 return l
1790 1783
1791 1784 def _chunkclear(self):
1792 1785 """Clear the raw chunk cache."""
1793 1786 self._chunkcache = (0, b'')
1794 1787
1795 1788 def deltaparent(self, rev):
1796 1789 """return deltaparent of the given revision"""
1797 1790 base = self.index[rev][3]
1798 1791 if base == rev:
1799 1792 return nullrev
1800 1793 elif self._generaldelta:
1801 1794 return base
1802 1795 else:
1803 1796 return rev - 1
1804 1797
1805 1798 def issnapshot(self, rev):
1806 1799 """tells whether rev is a snapshot"""
1807 1800 if not self._sparserevlog:
1808 1801 return self.deltaparent(rev) == nullrev
1809 1802 elif util.safehasattr(self.index, b'issnapshot'):
1810 1803 # directly assign the method to cache the testing and access
1811 1804 self.issnapshot = self.index.issnapshot
1812 1805 return self.issnapshot(rev)
1813 1806 if rev == nullrev:
1814 1807 return True
1815 1808 entry = self.index[rev]
1816 1809 base = entry[3]
1817 1810 if base == rev:
1818 1811 return True
1819 1812 if base == nullrev:
1820 1813 return True
1821 1814 p1 = entry[5]
1822 1815 p2 = entry[6]
1823 1816 if base == p1 or base == p2:
1824 1817 return False
1825 1818 return self.issnapshot(base)
1826 1819
1827 1820 def snapshotdepth(self, rev):
1828 1821 """number of snapshot in the chain before this one"""
1829 1822 if not self.issnapshot(rev):
1830 1823 raise error.ProgrammingError(b'revision %d not a snapshot')
1831 1824 return len(self._deltachain(rev)[0]) - 1
1832 1825
1833 1826 def revdiff(self, rev1, rev2):
1834 1827 """return or calculate a delta between two revisions
1835 1828
1836 1829 The delta calculated is in binary form and is intended to be written to
1837 1830 revlog data directly. So this function needs raw revision data.
1838 1831 """
1839 1832 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1840 1833 return bytes(self._chunk(rev2))
1841 1834
1842 1835 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1843 1836
1844 1837 def _processflags(self, text, flags, operation, raw=False):
1845 1838 """deprecated entry point to access flag processors"""
1846 1839 msg = b'_processflag(...) use the specialized variant'
1847 1840 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1848 1841 if raw:
1849 1842 return text, flagutil.processflagsraw(self, text, flags)
1850 1843 elif operation == b'read':
1851 1844 return flagutil.processflagsread(self, text, flags)
1852 1845 else: # write operation
1853 1846 return flagutil.processflagswrite(self, text, flags)
1854 1847
1855 1848 def revision(self, nodeorrev, _df=None, raw=False):
1856 1849 """return an uncompressed revision of a given node or revision
1857 1850 number.
1858 1851
1859 1852 _df - an existing file handle to read from. (internal-only)
1860 1853 raw - an optional argument specifying if the revision data is to be
1861 1854 treated as raw data when applying flag transforms. 'raw' should be set
1862 1855 to True when generating changegroups or in debug commands.
1863 1856 """
1864 1857 if raw:
1865 1858 msg = (
1866 1859 b'revlog.revision(..., raw=True) is deprecated, '
1867 1860 b'use revlog.rawdata(...)'
1868 1861 )
1869 1862 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1870 1863 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1871 1864
1872 1865 def sidedata(self, nodeorrev, _df=None):
1873 1866 """a map of extra data related to the changeset but not part of the hash
1874 1867
1875 1868 This function currently return a dictionary. However, more advanced
1876 1869 mapping object will likely be used in the future for a more
1877 1870 efficient/lazy code.
1878 1871 """
1879 1872 return self._revisiondata(nodeorrev, _df)[1]
1880 1873
1881 1874 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1882 1875 # deal with <nodeorrev> argument type
1883 1876 if isinstance(nodeorrev, int):
1884 1877 rev = nodeorrev
1885 1878 node = self.node(rev)
1886 1879 else:
1887 1880 node = nodeorrev
1888 1881 rev = None
1889 1882
1890 1883 # fast path the special `nullid` rev
1891 1884 if node == nullid:
1892 1885 return b"", {}
1893 1886
1894 1887 # ``rawtext`` is the text as stored inside the revlog. Might be the
1895 1888 # revision or might need to be processed to retrieve the revision.
1896 1889 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1897 1890
1898 1891 if self.version & 0xFFFF == REVLOGV2:
1899 1892 if rev is None:
1900 1893 rev = self.rev(node)
1901 1894 sidedata = self._sidedata(rev)
1902 1895 else:
1903 1896 sidedata = {}
1904 1897
1905 1898 if raw and validated:
1906 1899 # if we don't want to process the raw text and that raw
1907 1900 # text is cached, we can exit early.
1908 1901 return rawtext, sidedata
1909 1902 if rev is None:
1910 1903 rev = self.rev(node)
1911 1904 # the revlog's flag for this revision
1912 1905 # (usually alter its state or content)
1913 1906 flags = self.flags(rev)
1914 1907
1915 1908 if validated and flags == REVIDX_DEFAULT_FLAGS:
1916 1909 # no extra flags set, no flag processor runs, text = rawtext
1917 1910 return rawtext, sidedata
1918 1911
1919 1912 if raw:
1920 1913 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1921 1914 text = rawtext
1922 1915 else:
1923 1916 r = flagutil.processflagsread(self, rawtext, flags)
1924 1917 text, validatehash = r
1925 1918 if validatehash:
1926 1919 self.checkhash(text, node, rev=rev)
1927 1920 if not validated:
1928 1921 self._revisioncache = (node, rev, rawtext)
1929 1922
1930 1923 return text, sidedata
1931 1924
1932 1925 def _rawtext(self, node, rev, _df=None):
1933 1926 """return the possibly unvalidated rawtext for a revision
1934 1927
1935 1928 returns (rev, rawtext, validated)
1936 1929 """
1937 1930
1938 1931 # revision in the cache (could be useful to apply delta)
1939 1932 cachedrev = None
1940 1933 # An intermediate text to apply deltas to
1941 1934 basetext = None
1942 1935
1943 1936 # Check if we have the entry in cache
1944 1937 # The cache entry looks like (node, rev, rawtext)
1945 1938 if self._revisioncache:
1946 1939 if self._revisioncache[0] == node:
1947 1940 return (rev, self._revisioncache[2], True)
1948 1941 cachedrev = self._revisioncache[1]
1949 1942
1950 1943 if rev is None:
1951 1944 rev = self.rev(node)
1952 1945
1953 1946 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1954 1947 if stopped:
1955 1948 basetext = self._revisioncache[2]
1956 1949
1957 1950 # drop cache to save memory, the caller is expected to
1958 1951 # update self._revisioncache after validating the text
1959 1952 self._revisioncache = None
1960 1953
1961 1954 targetsize = None
1962 1955 rawsize = self.index[rev][2]
1963 1956 if 0 <= rawsize:
1964 1957 targetsize = 4 * rawsize
1965 1958
1966 1959 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1967 1960 if basetext is None:
1968 1961 basetext = bytes(bins[0])
1969 1962 bins = bins[1:]
1970 1963
1971 1964 rawtext = mdiff.patches(basetext, bins)
1972 1965 del basetext # let us have a chance to free memory early
1973 1966 return (rev, rawtext, False)
1974 1967
1975 1968 def _sidedata(self, rev):
1976 1969 """Return the sidedata for a given revision number."""
1977 1970 index_entry = self.index[rev]
1978 1971 sidedata_offset = index_entry[8]
1979 1972 sidedata_size = index_entry[9]
1980 1973
1981 1974 if self._inline:
1982 sidedata_offset += self._io.size * (1 + rev)
1975 sidedata_offset += self.index.entry_size * (1 + rev)
1983 1976 if sidedata_size == 0:
1984 1977 return {}
1985 1978
1986 1979 segment = self._getsegment(sidedata_offset, sidedata_size)
1987 1980 sidedata = sidedatautil.deserialize_sidedata(segment)
1988 1981 return sidedata
1989 1982
1990 1983 def rawdata(self, nodeorrev, _df=None):
1991 1984 """return an uncompressed raw data of a given node or revision number.
1992 1985
1993 1986 _df - an existing file handle to read from. (internal-only)
1994 1987 """
1995 1988 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1996 1989
1997 1990 def hash(self, text, p1, p2):
1998 1991 """Compute a node hash.
1999 1992
2000 1993 Available as a function so that subclasses can replace the hash
2001 1994 as needed.
2002 1995 """
2003 1996 return storageutil.hashrevisionsha1(text, p1, p2)
2004 1997
2005 1998 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2006 1999 """Check node hash integrity.
2007 2000
2008 2001 Available as a function so that subclasses can extend hash mismatch
2009 2002 behaviors as needed.
2010 2003 """
2011 2004 try:
2012 2005 if p1 is None and p2 is None:
2013 2006 p1, p2 = self.parents(node)
2014 2007 if node != self.hash(text, p1, p2):
2015 2008 # Clear the revision cache on hash failure. The revision cache
2016 2009 # only stores the raw revision and clearing the cache does have
2017 2010 # the side-effect that we won't have a cache hit when the raw
2018 2011 # revision data is accessed. But this case should be rare and
2019 2012 # it is extra work to teach the cache about the hash
2020 2013 # verification state.
2021 2014 if self._revisioncache and self._revisioncache[0] == node:
2022 2015 self._revisioncache = None
2023 2016
2024 2017 revornode = rev
2025 2018 if revornode is None:
2026 2019 revornode = templatefilters.short(hex(node))
2027 2020 raise error.RevlogError(
2028 2021 _(b"integrity check failed on %s:%s")
2029 2022 % (self.indexfile, pycompat.bytestr(revornode))
2030 2023 )
2031 2024 except error.RevlogError:
2032 2025 if self._censorable and storageutil.iscensoredtext(text):
2033 2026 raise error.CensoredNodeError(self.indexfile, node, text)
2034 2027 raise
2035 2028
2036 2029 def _enforceinlinesize(self, tr, fp=None):
2037 2030 """Check if the revlog is too big for inline and convert if so.
2038 2031
2039 2032 This should be called after revisions are added to the revlog. If the
2040 2033 revlog has grown too large to be an inline revlog, it will convert it
2041 2034 to use multiple index and data files.
2042 2035 """
2043 2036 tiprev = len(self) - 1
2044 2037 if (
2045 2038 not self._inline
2046 2039 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2047 2040 ):
2048 2041 return
2049 2042
2050 2043 troffset = tr.findoffset(self.indexfile)
2051 2044 if troffset is None:
2052 2045 raise error.RevlogError(
2053 2046 _(b"%s not found in the transaction") % self.indexfile
2054 2047 )
2055 2048 trindex = 0
2056 2049 tr.add(self.datafile, 0)
2057 2050
2058 2051 if fp:
2059 2052 fp.flush()
2060 2053 fp.close()
2061 2054 # We can't use the cached file handle after close(). So prevent
2062 2055 # its usage.
2063 2056 self._writinghandles = None
2064 2057
2065 2058 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2066 2059 for r in self:
2067 2060 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2068 2061 if troffset <= self.start(r):
2069 2062 trindex = r
2070 2063
2071 2064 with self._indexfp(b'w') as fp:
2072 2065 self.version &= ~FLAG_INLINE_DATA
2073 2066 self._inline = False
2074 2067 io = self._io
2075 2068 for i in self:
2076 2069 e = io.packentry(self.index[i], self.node, self.version, i)
2077 2070 fp.write(e)
2078 2071
2079 2072 # the temp file replace the real index when we exit the context
2080 2073 # manager
2081 2074
2082 tr.replace(self.indexfile, trindex * self._io.size)
2075 tr.replace(self.indexfile, trindex * self.index.entry_size)
2083 2076 nodemaputil.setup_persistent_nodemap(tr, self)
2084 2077 self._chunkclear()
2085 2078
2086 2079 def _nodeduplicatecallback(self, transaction, node):
2087 2080 """called when trying to add a node already stored."""
2088 2081
2089 2082 def addrevision(
2090 2083 self,
2091 2084 text,
2092 2085 transaction,
2093 2086 link,
2094 2087 p1,
2095 2088 p2,
2096 2089 cachedelta=None,
2097 2090 node=None,
2098 2091 flags=REVIDX_DEFAULT_FLAGS,
2099 2092 deltacomputer=None,
2100 2093 sidedata=None,
2101 2094 ):
2102 2095 """add a revision to the log
2103 2096
2104 2097 text - the revision data to add
2105 2098 transaction - the transaction object used for rollback
2106 2099 link - the linkrev data to add
2107 2100 p1, p2 - the parent nodeids of the revision
2108 2101 cachedelta - an optional precomputed delta
2109 2102 node - nodeid of revision; typically node is not specified, and it is
2110 2103 computed by default as hash(text, p1, p2), however subclasses might
2111 2104 use different hashing method (and override checkhash() in such case)
2112 2105 flags - the known flags to set on the revision
2113 2106 deltacomputer - an optional deltacomputer instance shared between
2114 2107 multiple calls
2115 2108 """
2116 2109 if link == nullrev:
2117 2110 raise error.RevlogError(
2118 2111 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2119 2112 )
2120 2113
2121 2114 if sidedata is None:
2122 2115 sidedata = {}
2123 2116 elif not self.hassidedata:
2124 2117 raise error.ProgrammingError(
2125 2118 _(b"trying to add sidedata to a revlog who don't support them")
2126 2119 )
2127 2120
2128 2121 if flags:
2129 2122 node = node or self.hash(text, p1, p2)
2130 2123
2131 2124 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2132 2125
2133 2126 # If the flag processor modifies the revision data, ignore any provided
2134 2127 # cachedelta.
2135 2128 if rawtext != text:
2136 2129 cachedelta = None
2137 2130
2138 2131 if len(rawtext) > _maxentrysize:
2139 2132 raise error.RevlogError(
2140 2133 _(
2141 2134 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2142 2135 )
2143 2136 % (self.indexfile, len(rawtext))
2144 2137 )
2145 2138
2146 2139 node = node or self.hash(rawtext, p1, p2)
2147 2140 rev = self.index.get_rev(node)
2148 2141 if rev is not None:
2149 2142 return rev
2150 2143
2151 2144 if validatehash:
2152 2145 self.checkhash(rawtext, node, p1=p1, p2=p2)
2153 2146
2154 2147 return self.addrawrevision(
2155 2148 rawtext,
2156 2149 transaction,
2157 2150 link,
2158 2151 p1,
2159 2152 p2,
2160 2153 node,
2161 2154 flags,
2162 2155 cachedelta=cachedelta,
2163 2156 deltacomputer=deltacomputer,
2164 2157 sidedata=sidedata,
2165 2158 )
2166 2159
2167 2160 def addrawrevision(
2168 2161 self,
2169 2162 rawtext,
2170 2163 transaction,
2171 2164 link,
2172 2165 p1,
2173 2166 p2,
2174 2167 node,
2175 2168 flags,
2176 2169 cachedelta=None,
2177 2170 deltacomputer=None,
2178 2171 sidedata=None,
2179 2172 ):
2180 2173 """add a raw revision with known flags, node and parents
2181 2174 useful when reusing a revision not stored in this revlog (ex: received
2182 2175 over wire, or read from an external bundle).
2183 2176 """
2184 2177 dfh = None
2185 2178 if not self._inline:
2186 2179 dfh = self._datafp(b"a+")
2187 2180 ifh = self._indexfp(b"a+")
2188 2181 try:
2189 2182 return self._addrevision(
2190 2183 node,
2191 2184 rawtext,
2192 2185 transaction,
2193 2186 link,
2194 2187 p1,
2195 2188 p2,
2196 2189 flags,
2197 2190 cachedelta,
2198 2191 ifh,
2199 2192 dfh,
2200 2193 deltacomputer=deltacomputer,
2201 2194 sidedata=sidedata,
2202 2195 )
2203 2196 finally:
2204 2197 if dfh:
2205 2198 dfh.close()
2206 2199 ifh.close()
2207 2200
2208 2201 def compress(self, data):
2209 2202 """Generate a possibly-compressed representation of data."""
2210 2203 if not data:
2211 2204 return b'', data
2212 2205
2213 2206 compressed = self._compressor.compress(data)
2214 2207
2215 2208 if compressed:
2216 2209 # The revlog compressor added the header in the returned data.
2217 2210 return b'', compressed
2218 2211
2219 2212 if data[0:1] == b'\0':
2220 2213 return b'', data
2221 2214 return b'u', data
2222 2215
2223 2216 def decompress(self, data):
2224 2217 """Decompress a revlog chunk.
2225 2218
2226 2219 The chunk is expected to begin with a header identifying the
2227 2220 format type so it can be routed to an appropriate decompressor.
2228 2221 """
2229 2222 if not data:
2230 2223 return data
2231 2224
2232 2225 # Revlogs are read much more frequently than they are written and many
2233 2226 # chunks only take microseconds to decompress, so performance is
2234 2227 # important here.
2235 2228 #
2236 2229 # We can make a few assumptions about revlogs:
2237 2230 #
2238 2231 # 1) the majority of chunks will be compressed (as opposed to inline
2239 2232 # raw data).
2240 2233 # 2) decompressing *any* data will likely by at least 10x slower than
2241 2234 # returning raw inline data.
2242 2235 # 3) we want to prioritize common and officially supported compression
2243 2236 # engines
2244 2237 #
2245 2238 # It follows that we want to optimize for "decompress compressed data
2246 2239 # when encoded with common and officially supported compression engines"
2247 2240 # case over "raw data" and "data encoded by less common or non-official
2248 2241 # compression engines." That is why we have the inline lookup first
2249 2242 # followed by the compengines lookup.
2250 2243 #
2251 2244 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2252 2245 # compressed chunks. And this matters for changelog and manifest reads.
2253 2246 t = data[0:1]
2254 2247
2255 2248 if t == b'x':
2256 2249 try:
2257 2250 return _zlibdecompress(data)
2258 2251 except zlib.error as e:
2259 2252 raise error.RevlogError(
2260 2253 _(b'revlog decompress error: %s')
2261 2254 % stringutil.forcebytestr(e)
2262 2255 )
2263 2256 # '\0' is more common than 'u' so it goes first.
2264 2257 elif t == b'\0':
2265 2258 return data
2266 2259 elif t == b'u':
2267 2260 return util.buffer(data, 1)
2268 2261
2269 2262 try:
2270 2263 compressor = self._decompressors[t]
2271 2264 except KeyError:
2272 2265 try:
2273 2266 engine = util.compengines.forrevlogheader(t)
2274 2267 compressor = engine.revlogcompressor(self._compengineopts)
2275 2268 self._decompressors[t] = compressor
2276 2269 except KeyError:
2277 2270 raise error.RevlogError(
2278 2271 _(b'unknown compression type %s') % binascii.hexlify(t)
2279 2272 )
2280 2273
2281 2274 return compressor.decompress(data)
2282 2275
2283 2276 def _addrevision(
2284 2277 self,
2285 2278 node,
2286 2279 rawtext,
2287 2280 transaction,
2288 2281 link,
2289 2282 p1,
2290 2283 p2,
2291 2284 flags,
2292 2285 cachedelta,
2293 2286 ifh,
2294 2287 dfh,
2295 2288 alwayscache=False,
2296 2289 deltacomputer=None,
2297 2290 sidedata=None,
2298 2291 ):
2299 2292 """internal function to add revisions to the log
2300 2293
2301 2294 see addrevision for argument descriptions.
2302 2295
2303 2296 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2304 2297
2305 2298 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2306 2299 be used.
2307 2300
2308 2301 invariants:
2309 2302 - rawtext is optional (can be None); if not set, cachedelta must be set.
2310 2303 if both are set, they must correspond to each other.
2311 2304 """
2312 2305 if node == nullid:
2313 2306 raise error.RevlogError(
2314 2307 _(b"%s: attempt to add null revision") % self.indexfile
2315 2308 )
2316 2309 if node == wdirid or node in wdirfilenodeids:
2317 2310 raise error.RevlogError(
2318 2311 _(b"%s: attempt to add wdir revision") % self.indexfile
2319 2312 )
2320 2313
2321 2314 if self._inline:
2322 2315 fh = ifh
2323 2316 else:
2324 2317 fh = dfh
2325 2318
2326 2319 btext = [rawtext]
2327 2320
2328 2321 curr = len(self)
2329 2322 prev = curr - 1
2330 2323
2331 2324 offset = self._get_data_offset(prev)
2332 2325
2333 2326 if self._concurrencychecker:
2334 2327 if self._inline:
2335 2328 # offset is "as if" it were in the .d file, so we need to add on
2336 2329 # the size of the entry metadata.
2337 2330 self._concurrencychecker(
2338 ifh, self.indexfile, offset + curr * self._io.size
2331 ifh, self.indexfile, offset + curr * self.index.entry_size
2339 2332 )
2340 2333 else:
2341 2334 # Entries in the .i are a consistent size.
2342 2335 self._concurrencychecker(
2343 ifh, self.indexfile, curr * self._io.size
2336 ifh, self.indexfile, curr * self.index.entry_size
2344 2337 )
2345 2338 self._concurrencychecker(dfh, self.datafile, offset)
2346 2339
2347 2340 p1r, p2r = self.rev(p1), self.rev(p2)
2348 2341
2349 2342 # full versions are inserted when the needed deltas
2350 2343 # become comparable to the uncompressed text
2351 2344 if rawtext is None:
2352 2345 # need rawtext size, before changed by flag processors, which is
2353 2346 # the non-raw size. use revlog explicitly to avoid filelog's extra
2354 2347 # logic that might remove metadata size.
2355 2348 textlen = mdiff.patchedsize(
2356 2349 revlog.size(self, cachedelta[0]), cachedelta[1]
2357 2350 )
2358 2351 else:
2359 2352 textlen = len(rawtext)
2360 2353
2361 2354 if deltacomputer is None:
2362 2355 deltacomputer = deltautil.deltacomputer(self)
2363 2356
2364 2357 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2365 2358
2366 2359 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2367 2360
2368 2361 if sidedata:
2369 2362 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2370 2363 sidedata_offset = offset + deltainfo.deltalen
2371 2364 else:
2372 2365 serialized_sidedata = b""
2373 2366 # Don't store the offset if the sidedata is empty, that way
2374 2367 # we can easily detect empty sidedata and they will be no different
2375 2368 # than ones we manually add.
2376 2369 sidedata_offset = 0
2377 2370
2378 2371 e = (
2379 2372 offset_type(offset, flags),
2380 2373 deltainfo.deltalen,
2381 2374 textlen,
2382 2375 deltainfo.base,
2383 2376 link,
2384 2377 p1r,
2385 2378 p2r,
2386 2379 node,
2387 2380 sidedata_offset,
2388 2381 len(serialized_sidedata),
2389 2382 )
2390 2383
2391 2384 if self.version & 0xFFFF != REVLOGV2:
2392 2385 e = e[:8]
2393 2386
2394 2387 self.index.append(e)
2395 2388 entry = self._io.packentry(e, self.node, self.version, curr)
2396 2389 self._writeentry(
2397 2390 transaction,
2398 2391 ifh,
2399 2392 dfh,
2400 2393 entry,
2401 2394 deltainfo.data,
2402 2395 link,
2403 2396 offset,
2404 2397 serialized_sidedata,
2405 2398 )
2406 2399
2407 2400 rawtext = btext[0]
2408 2401
2409 2402 if alwayscache and rawtext is None:
2410 2403 rawtext = deltacomputer.buildtext(revinfo, fh)
2411 2404
2412 2405 if type(rawtext) == bytes: # only accept immutable objects
2413 2406 self._revisioncache = (node, curr, rawtext)
2414 2407 self._chainbasecache[curr] = deltainfo.chainbase
2415 2408 return curr
2416 2409
2417 2410 def _get_data_offset(self, prev):
2418 2411 """Returns the current offset in the (in-transaction) data file.
2419 2412 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2420 2413 file to store that information: since sidedata can be rewritten to the
2421 2414 end of the data file within a transaction, you can have cases where, for
2422 2415 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2423 2416 to `n - 1`'s sidedata being written after `n`'s data.
2424 2417
2425 2418 TODO cache this in a docket file before getting out of experimental."""
2426 2419 if self.version & 0xFFFF != REVLOGV2:
2427 2420 return self.end(prev)
2428 2421
2429 2422 offset = 0
2430 2423 for rev, entry in enumerate(self.index):
2431 2424 sidedata_end = entry[8] + entry[9]
2432 2425 # Sidedata for a previous rev has potentially been written after
2433 2426 # this rev's end, so take the max.
2434 2427 offset = max(self.end(rev), offset, sidedata_end)
2435 2428 return offset
2436 2429
2437 2430 def _writeentry(
2438 2431 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2439 2432 ):
2440 2433 # Files opened in a+ mode have inconsistent behavior on various
2441 2434 # platforms. Windows requires that a file positioning call be made
2442 2435 # when the file handle transitions between reads and writes. See
2443 2436 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2444 2437 # platforms, Python or the platform itself can be buggy. Some versions
2445 2438 # of Solaris have been observed to not append at the end of the file
2446 2439 # if the file was seeked to before the end. See issue4943 for more.
2447 2440 #
2448 2441 # We work around this issue by inserting a seek() before writing.
2449 2442 # Note: This is likely not necessary on Python 3. However, because
2450 2443 # the file handle is reused for reads and may be seeked there, we need
2451 2444 # to be careful before changing this.
2452 2445 ifh.seek(0, os.SEEK_END)
2453 2446 if dfh:
2454 2447 dfh.seek(0, os.SEEK_END)
2455 2448
2456 2449 curr = len(self) - 1
2457 2450 if not self._inline:
2458 2451 transaction.add(self.datafile, offset)
2459 2452 transaction.add(self.indexfile, curr * len(entry))
2460 2453 if data[0]:
2461 2454 dfh.write(data[0])
2462 2455 dfh.write(data[1])
2463 2456 if sidedata:
2464 2457 dfh.write(sidedata)
2465 2458 ifh.write(entry)
2466 2459 else:
2467 offset += curr * self._io.size
2460 offset += curr * self.index.entry_size
2468 2461 transaction.add(self.indexfile, offset)
2469 2462 ifh.write(entry)
2470 2463 ifh.write(data[0])
2471 2464 ifh.write(data[1])
2472 2465 if sidedata:
2473 2466 ifh.write(sidedata)
2474 2467 self._enforceinlinesize(transaction, ifh)
2475 2468 nodemaputil.setup_persistent_nodemap(transaction, self)
2476 2469
2477 2470 def addgroup(
2478 2471 self,
2479 2472 deltas,
2480 2473 linkmapper,
2481 2474 transaction,
2482 2475 alwayscache=False,
2483 2476 addrevisioncb=None,
2484 2477 duplicaterevisioncb=None,
2485 2478 ):
2486 2479 """
2487 2480 add a delta group
2488 2481
2489 2482 given a set of deltas, add them to the revision log. the
2490 2483 first delta is against its parent, which should be in our
2491 2484 log, the rest are against the previous delta.
2492 2485
2493 2486 If ``addrevisioncb`` is defined, it will be called with arguments of
2494 2487 this revlog and the node that was added.
2495 2488 """
2496 2489
2497 2490 if self._writinghandles:
2498 2491 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2499 2492
2500 2493 r = len(self)
2501 2494 end = 0
2502 2495 if r:
2503 2496 end = self.end(r - 1)
2504 2497 ifh = self._indexfp(b"a+")
2505 isize = r * self._io.size
2498 isize = r * self.index.entry_size
2506 2499 if self._inline:
2507 2500 transaction.add(self.indexfile, end + isize)
2508 2501 dfh = None
2509 2502 else:
2510 2503 transaction.add(self.indexfile, isize)
2511 2504 transaction.add(self.datafile, end)
2512 2505 dfh = self._datafp(b"a+")
2513 2506
2514 2507 def flush():
2515 2508 if dfh:
2516 2509 dfh.flush()
2517 2510 ifh.flush()
2518 2511
2519 2512 self._writinghandles = (ifh, dfh)
2520 2513 empty = True
2521 2514
2522 2515 try:
2523 2516 deltacomputer = deltautil.deltacomputer(self)
2524 2517 # loop through our set of deltas
2525 2518 for data in deltas:
2526 2519 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2527 2520 link = linkmapper(linknode)
2528 2521 flags = flags or REVIDX_DEFAULT_FLAGS
2529 2522
2530 2523 rev = self.index.get_rev(node)
2531 2524 if rev is not None:
2532 2525 # this can happen if two branches make the same change
2533 2526 self._nodeduplicatecallback(transaction, rev)
2534 2527 if duplicaterevisioncb:
2535 2528 duplicaterevisioncb(self, rev)
2536 2529 empty = False
2537 2530 continue
2538 2531
2539 2532 for p in (p1, p2):
2540 2533 if not self.index.has_node(p):
2541 2534 raise error.LookupError(
2542 2535 p, self.indexfile, _(b'unknown parent')
2543 2536 )
2544 2537
2545 2538 if not self.index.has_node(deltabase):
2546 2539 raise error.LookupError(
2547 2540 deltabase, self.indexfile, _(b'unknown delta base')
2548 2541 )
2549 2542
2550 2543 baserev = self.rev(deltabase)
2551 2544
2552 2545 if baserev != nullrev and self.iscensored(baserev):
2553 2546 # if base is censored, delta must be full replacement in a
2554 2547 # single patch operation
2555 2548 hlen = struct.calcsize(b">lll")
2556 2549 oldlen = self.rawsize(baserev)
2557 2550 newlen = len(delta) - hlen
2558 2551 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2559 2552 raise error.CensoredBaseError(
2560 2553 self.indexfile, self.node(baserev)
2561 2554 )
2562 2555
2563 2556 if not flags and self._peek_iscensored(baserev, delta, flush):
2564 2557 flags |= REVIDX_ISCENSORED
2565 2558
2566 2559 # We assume consumers of addrevisioncb will want to retrieve
2567 2560 # the added revision, which will require a call to
2568 2561 # revision(). revision() will fast path if there is a cache
2569 2562 # hit. So, we tell _addrevision() to always cache in this case.
2570 2563 # We're only using addgroup() in the context of changegroup
2571 2564 # generation so the revision data can always be handled as raw
2572 2565 # by the flagprocessor.
2573 2566 rev = self._addrevision(
2574 2567 node,
2575 2568 None,
2576 2569 transaction,
2577 2570 link,
2578 2571 p1,
2579 2572 p2,
2580 2573 flags,
2581 2574 (baserev, delta),
2582 2575 ifh,
2583 2576 dfh,
2584 2577 alwayscache=alwayscache,
2585 2578 deltacomputer=deltacomputer,
2586 2579 sidedata=sidedata,
2587 2580 )
2588 2581
2589 2582 if addrevisioncb:
2590 2583 addrevisioncb(self, rev)
2591 2584 empty = False
2592 2585
2593 2586 if not dfh and not self._inline:
2594 2587 # addrevision switched from inline to conventional
2595 2588 # reopen the index
2596 2589 ifh.close()
2597 2590 dfh = self._datafp(b"a+")
2598 2591 ifh = self._indexfp(b"a+")
2599 2592 self._writinghandles = (ifh, dfh)
2600 2593 finally:
2601 2594 self._writinghandles = None
2602 2595
2603 2596 if dfh:
2604 2597 dfh.close()
2605 2598 ifh.close()
2606 2599 return not empty
2607 2600
2608 2601 def iscensored(self, rev):
2609 2602 """Check if a file revision is censored."""
2610 2603 if not self._censorable:
2611 2604 return False
2612 2605
2613 2606 return self.flags(rev) & REVIDX_ISCENSORED
2614 2607
2615 2608 def _peek_iscensored(self, baserev, delta, flush):
2616 2609 """Quickly check if a delta produces a censored revision."""
2617 2610 if not self._censorable:
2618 2611 return False
2619 2612
2620 2613 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2621 2614
2622 2615 def getstrippoint(self, minlink):
2623 2616 """find the minimum rev that must be stripped to strip the linkrev
2624 2617
2625 2618 Returns a tuple containing the minimum rev and a set of all revs that
2626 2619 have linkrevs that will be broken by this strip.
2627 2620 """
2628 2621 return storageutil.resolvestripinfo(
2629 2622 minlink,
2630 2623 len(self) - 1,
2631 2624 self.headrevs(),
2632 2625 self.linkrev,
2633 2626 self.parentrevs,
2634 2627 )
2635 2628
2636 2629 def strip(self, minlink, transaction):
2637 2630 """truncate the revlog on the first revision with a linkrev >= minlink
2638 2631
2639 2632 This function is called when we're stripping revision minlink and
2640 2633 its descendants from the repository.
2641 2634
2642 2635 We have to remove all revisions with linkrev >= minlink, because
2643 2636 the equivalent changelog revisions will be renumbered after the
2644 2637 strip.
2645 2638
2646 2639 So we truncate the revlog on the first of these revisions, and
2647 2640 trust that the caller has saved the revisions that shouldn't be
2648 2641 removed and that it'll re-add them after this truncation.
2649 2642 """
2650 2643 if len(self) == 0:
2651 2644 return
2652 2645
2653 2646 rev, _ = self.getstrippoint(minlink)
2654 2647 if rev == len(self):
2655 2648 return
2656 2649
2657 2650 # first truncate the files on disk
2658 2651 end = self.start(rev)
2659 2652 if not self._inline:
2660 2653 transaction.add(self.datafile, end)
2661 end = rev * self._io.size
2654 end = rev * self.index.entry_size
2662 2655 else:
2663 end += rev * self._io.size
2656 end += rev * self.index.entry_size
2664 2657
2665 2658 transaction.add(self.indexfile, end)
2666 2659
2667 2660 # then reset internal state in memory to forget those revisions
2668 2661 self._revisioncache = None
2669 2662 self._chaininfocache = util.lrucachedict(500)
2670 2663 self._chunkclear()
2671 2664
2672 2665 del self.index[rev:-1]
2673 2666
2674 2667 def checksize(self):
2675 2668 """Check size of index and data files
2676 2669
2677 2670 return a (dd, di) tuple.
2678 2671 - dd: extra bytes for the "data" file
2679 2672 - di: extra bytes for the "index" file
2680 2673
2681 2674 A healthy revlog will return (0, 0).
2682 2675 """
2683 2676 expected = 0
2684 2677 if len(self):
2685 2678 expected = max(0, self.end(len(self) - 1))
2686 2679
2687 2680 try:
2688 2681 with self._datafp() as f:
2689 2682 f.seek(0, io.SEEK_END)
2690 2683 actual = f.tell()
2691 2684 dd = actual - expected
2692 2685 except IOError as inst:
2693 2686 if inst.errno != errno.ENOENT:
2694 2687 raise
2695 2688 dd = 0
2696 2689
2697 2690 try:
2698 2691 f = self.opener(self.indexfile)
2699 2692 f.seek(0, io.SEEK_END)
2700 2693 actual = f.tell()
2701 2694 f.close()
2702 s = self._io.size
2695 s = self.index.entry_size
2703 2696 i = max(0, actual // s)
2704 2697 di = actual - (i * s)
2705 2698 if self._inline:
2706 2699 databytes = 0
2707 2700 for r in self:
2708 2701 databytes += max(0, self.length(r))
2709 2702 dd = 0
2710 2703 di = actual - len(self) * s - databytes
2711 2704 except IOError as inst:
2712 2705 if inst.errno != errno.ENOENT:
2713 2706 raise
2714 2707 di = 0
2715 2708
2716 2709 return (dd, di)
2717 2710
2718 2711 def files(self):
2719 2712 res = [self.indexfile]
2720 2713 if not self._inline:
2721 2714 res.append(self.datafile)
2722 2715 return res
2723 2716
2724 2717 def emitrevisions(
2725 2718 self,
2726 2719 nodes,
2727 2720 nodesorder=None,
2728 2721 revisiondata=False,
2729 2722 assumehaveparentrevisions=False,
2730 2723 deltamode=repository.CG_DELTAMODE_STD,
2731 2724 sidedata_helpers=None,
2732 2725 ):
2733 2726 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2734 2727 raise error.ProgrammingError(
2735 2728 b'unhandled value for nodesorder: %s' % nodesorder
2736 2729 )
2737 2730
2738 2731 if nodesorder is None and not self._generaldelta:
2739 2732 nodesorder = b'storage'
2740 2733
2741 2734 if (
2742 2735 not self._storedeltachains
2743 2736 and deltamode != repository.CG_DELTAMODE_PREV
2744 2737 ):
2745 2738 deltamode = repository.CG_DELTAMODE_FULL
2746 2739
2747 2740 return storageutil.emitrevisions(
2748 2741 self,
2749 2742 nodes,
2750 2743 nodesorder,
2751 2744 revlogrevisiondelta,
2752 2745 deltaparentfn=self.deltaparent,
2753 2746 candeltafn=self.candelta,
2754 2747 rawsizefn=self.rawsize,
2755 2748 revdifffn=self.revdiff,
2756 2749 flagsfn=self.flags,
2757 2750 deltamode=deltamode,
2758 2751 revisiondata=revisiondata,
2759 2752 assumehaveparentrevisions=assumehaveparentrevisions,
2760 2753 sidedata_helpers=sidedata_helpers,
2761 2754 )
2762 2755
2763 2756 DELTAREUSEALWAYS = b'always'
2764 2757 DELTAREUSESAMEREVS = b'samerevs'
2765 2758 DELTAREUSENEVER = b'never'
2766 2759
2767 2760 DELTAREUSEFULLADD = b'fulladd'
2768 2761
2769 2762 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2770 2763
2771 2764 def clone(
2772 2765 self,
2773 2766 tr,
2774 2767 destrevlog,
2775 2768 addrevisioncb=None,
2776 2769 deltareuse=DELTAREUSESAMEREVS,
2777 2770 forcedeltabothparents=None,
2778 2771 sidedatacompanion=None,
2779 2772 ):
2780 2773 """Copy this revlog to another, possibly with format changes.
2781 2774
2782 2775 The destination revlog will contain the same revisions and nodes.
2783 2776 However, it may not be bit-for-bit identical due to e.g. delta encoding
2784 2777 differences.
2785 2778
2786 2779 The ``deltareuse`` argument control how deltas from the existing revlog
2787 2780 are preserved in the destination revlog. The argument can have the
2788 2781 following values:
2789 2782
2790 2783 DELTAREUSEALWAYS
2791 2784 Deltas will always be reused (if possible), even if the destination
2792 2785 revlog would not select the same revisions for the delta. This is the
2793 2786 fastest mode of operation.
2794 2787 DELTAREUSESAMEREVS
2795 2788 Deltas will be reused if the destination revlog would pick the same
2796 2789 revisions for the delta. This mode strikes a balance between speed
2797 2790 and optimization.
2798 2791 DELTAREUSENEVER
2799 2792 Deltas will never be reused. This is the slowest mode of execution.
2800 2793 This mode can be used to recompute deltas (e.g. if the diff/delta
2801 2794 algorithm changes).
2802 2795 DELTAREUSEFULLADD
2803 2796 Revision will be re-added as if their were new content. This is
2804 2797 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2805 2798 eg: large file detection and handling.
2806 2799
2807 2800 Delta computation can be slow, so the choice of delta reuse policy can
2808 2801 significantly affect run time.
2809 2802
2810 2803 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2811 2804 two extremes. Deltas will be reused if they are appropriate. But if the
2812 2805 delta could choose a better revision, it will do so. This means if you
2813 2806 are converting a non-generaldelta revlog to a generaldelta revlog,
2814 2807 deltas will be recomputed if the delta's parent isn't a parent of the
2815 2808 revision.
2816 2809
2817 2810 In addition to the delta policy, the ``forcedeltabothparents``
2818 2811 argument controls whether to force compute deltas against both parents
2819 2812 for merges. By default, the current default is used.
2820 2813
2821 2814 If not None, the `sidedatacompanion` is callable that accept two
2822 2815 arguments:
2823 2816
2824 2817 (srcrevlog, rev)
2825 2818
2826 2819 and return a quintet that control changes to sidedata content from the
2827 2820 old revision to the new clone result:
2828 2821
2829 2822 (dropall, filterout, update, new_flags, dropped_flags)
2830 2823
2831 2824 * if `dropall` is True, all sidedata should be dropped
2832 2825 * `filterout` is a set of sidedata keys that should be dropped
2833 2826 * `update` is a mapping of additionnal/new key -> value
2834 2827 * new_flags is a bitfields of new flags that the revision should get
2835 2828 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2836 2829 """
2837 2830 if deltareuse not in self.DELTAREUSEALL:
2838 2831 raise ValueError(
2839 2832 _(b'value for deltareuse invalid: %s') % deltareuse
2840 2833 )
2841 2834
2842 2835 if len(destrevlog):
2843 2836 raise ValueError(_(b'destination revlog is not empty'))
2844 2837
2845 2838 if getattr(self, 'filteredrevs', None):
2846 2839 raise ValueError(_(b'source revlog has filtered revisions'))
2847 2840 if getattr(destrevlog, 'filteredrevs', None):
2848 2841 raise ValueError(_(b'destination revlog has filtered revisions'))
2849 2842
2850 2843 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2851 2844 # if possible.
2852 2845 oldlazydelta = destrevlog._lazydelta
2853 2846 oldlazydeltabase = destrevlog._lazydeltabase
2854 2847 oldamd = destrevlog._deltabothparents
2855 2848
2856 2849 try:
2857 2850 if deltareuse == self.DELTAREUSEALWAYS:
2858 2851 destrevlog._lazydeltabase = True
2859 2852 destrevlog._lazydelta = True
2860 2853 elif deltareuse == self.DELTAREUSESAMEREVS:
2861 2854 destrevlog._lazydeltabase = False
2862 2855 destrevlog._lazydelta = True
2863 2856 elif deltareuse == self.DELTAREUSENEVER:
2864 2857 destrevlog._lazydeltabase = False
2865 2858 destrevlog._lazydelta = False
2866 2859
2867 2860 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2868 2861
2869 2862 self._clone(
2870 2863 tr,
2871 2864 destrevlog,
2872 2865 addrevisioncb,
2873 2866 deltareuse,
2874 2867 forcedeltabothparents,
2875 2868 sidedatacompanion,
2876 2869 )
2877 2870
2878 2871 finally:
2879 2872 destrevlog._lazydelta = oldlazydelta
2880 2873 destrevlog._lazydeltabase = oldlazydeltabase
2881 2874 destrevlog._deltabothparents = oldamd
2882 2875
2883 2876 def _clone(
2884 2877 self,
2885 2878 tr,
2886 2879 destrevlog,
2887 2880 addrevisioncb,
2888 2881 deltareuse,
2889 2882 forcedeltabothparents,
2890 2883 sidedatacompanion,
2891 2884 ):
2892 2885 """perform the core duty of `revlog.clone` after parameter processing"""
2893 2886 deltacomputer = deltautil.deltacomputer(destrevlog)
2894 2887 index = self.index
2895 2888 for rev in self:
2896 2889 entry = index[rev]
2897 2890
2898 2891 # Some classes override linkrev to take filtered revs into
2899 2892 # account. Use raw entry from index.
2900 2893 flags = entry[0] & 0xFFFF
2901 2894 linkrev = entry[4]
2902 2895 p1 = index[entry[5]][7]
2903 2896 p2 = index[entry[6]][7]
2904 2897 node = entry[7]
2905 2898
2906 2899 sidedataactions = (False, [], {}, 0, 0)
2907 2900 if sidedatacompanion is not None:
2908 2901 sidedataactions = sidedatacompanion(self, rev)
2909 2902
2910 2903 # (Possibly) reuse the delta from the revlog if allowed and
2911 2904 # the revlog chunk is a delta.
2912 2905 cachedelta = None
2913 2906 rawtext = None
2914 2907 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2915 2908 dropall = sidedataactions[0]
2916 2909 filterout = sidedataactions[1]
2917 2910 update = sidedataactions[2]
2918 2911 new_flags = sidedataactions[3]
2919 2912 dropped_flags = sidedataactions[4]
2920 2913 text, sidedata = self._revisiondata(rev)
2921 2914 if dropall:
2922 2915 sidedata = {}
2923 2916 for key in filterout:
2924 2917 sidedata.pop(key, None)
2925 2918 sidedata.update(update)
2926 2919 if not sidedata:
2927 2920 sidedata = None
2928 2921
2929 2922 flags |= new_flags
2930 2923 flags &= ~dropped_flags
2931 2924
2932 2925 destrevlog.addrevision(
2933 2926 text,
2934 2927 tr,
2935 2928 linkrev,
2936 2929 p1,
2937 2930 p2,
2938 2931 cachedelta=cachedelta,
2939 2932 node=node,
2940 2933 flags=flags,
2941 2934 deltacomputer=deltacomputer,
2942 2935 sidedata=sidedata,
2943 2936 )
2944 2937 else:
2945 2938 if destrevlog._lazydelta:
2946 2939 dp = self.deltaparent(rev)
2947 2940 if dp != nullrev:
2948 2941 cachedelta = (dp, bytes(self._chunk(rev)))
2949 2942
2950 2943 if not cachedelta:
2951 2944 rawtext = self.rawdata(rev)
2952 2945
2953 2946 ifh = destrevlog.opener(
2954 2947 destrevlog.indexfile, b'a+', checkambig=False
2955 2948 )
2956 2949 dfh = None
2957 2950 if not destrevlog._inline:
2958 2951 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2959 2952 try:
2960 2953 destrevlog._addrevision(
2961 2954 node,
2962 2955 rawtext,
2963 2956 tr,
2964 2957 linkrev,
2965 2958 p1,
2966 2959 p2,
2967 2960 flags,
2968 2961 cachedelta,
2969 2962 ifh,
2970 2963 dfh,
2971 2964 deltacomputer=deltacomputer,
2972 2965 )
2973 2966 finally:
2974 2967 if dfh:
2975 2968 dfh.close()
2976 2969 ifh.close()
2977 2970
2978 2971 if addrevisioncb:
2979 2972 addrevisioncb(self, rev, node)
2980 2973
2981 2974 def censorrevision(self, tr, censornode, tombstone=b''):
2982 2975 if (self.version & 0xFFFF) == REVLOGV0:
2983 2976 raise error.RevlogError(
2984 2977 _(b'cannot censor with version %d revlogs') % self.version
2985 2978 )
2986 2979
2987 2980 censorrev = self.rev(censornode)
2988 2981 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2989 2982
2990 2983 if len(tombstone) > self.rawsize(censorrev):
2991 2984 raise error.Abort(
2992 2985 _(b'censor tombstone must be no longer than censored data')
2993 2986 )
2994 2987
2995 2988 # Rewriting the revlog in place is hard. Our strategy for censoring is
2996 2989 # to create a new revlog, copy all revisions to it, then replace the
2997 2990 # revlogs on transaction close.
2998 2991
2999 2992 newindexfile = self.indexfile + b'.tmpcensored'
3000 2993 newdatafile = self.datafile + b'.tmpcensored'
3001 2994
3002 2995 # This is a bit dangerous. We could easily have a mismatch of state.
3003 2996 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3004 2997 newrl.version = self.version
3005 2998 newrl._generaldelta = self._generaldelta
3006 2999 newrl._io = self._io
3007 3000
3008 3001 for rev in self.revs():
3009 3002 node = self.node(rev)
3010 3003 p1, p2 = self.parents(node)
3011 3004
3012 3005 if rev == censorrev:
3013 3006 newrl.addrawrevision(
3014 3007 tombstone,
3015 3008 tr,
3016 3009 self.linkrev(censorrev),
3017 3010 p1,
3018 3011 p2,
3019 3012 censornode,
3020 3013 REVIDX_ISCENSORED,
3021 3014 )
3022 3015
3023 3016 if newrl.deltaparent(rev) != nullrev:
3024 3017 raise error.Abort(
3025 3018 _(
3026 3019 b'censored revision stored as delta; '
3027 3020 b'cannot censor'
3028 3021 ),
3029 3022 hint=_(
3030 3023 b'censoring of revlogs is not '
3031 3024 b'fully implemented; please report '
3032 3025 b'this bug'
3033 3026 ),
3034 3027 )
3035 3028 continue
3036 3029
3037 3030 if self.iscensored(rev):
3038 3031 if self.deltaparent(rev) != nullrev:
3039 3032 raise error.Abort(
3040 3033 _(
3041 3034 b'cannot censor due to censored '
3042 3035 b'revision having delta stored'
3043 3036 )
3044 3037 )
3045 3038 rawtext = self._chunk(rev)
3046 3039 else:
3047 3040 rawtext = self.rawdata(rev)
3048 3041
3049 3042 newrl.addrawrevision(
3050 3043 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3051 3044 )
3052 3045
3053 3046 tr.addbackup(self.indexfile, location=b'store')
3054 3047 if not self._inline:
3055 3048 tr.addbackup(self.datafile, location=b'store')
3056 3049
3057 3050 self.opener.rename(newrl.indexfile, self.indexfile)
3058 3051 if not self._inline:
3059 3052 self.opener.rename(newrl.datafile, self.datafile)
3060 3053
3061 3054 self.clearcaches()
3062 3055 self._loadindex()
3063 3056
3064 3057 def verifyintegrity(self, state):
3065 3058 """Verifies the integrity of the revlog.
3066 3059
3067 3060 Yields ``revlogproblem`` instances describing problems that are
3068 3061 found.
3069 3062 """
3070 3063 dd, di = self.checksize()
3071 3064 if dd:
3072 3065 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3073 3066 if di:
3074 3067 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3075 3068
3076 3069 version = self.version & 0xFFFF
3077 3070
3078 3071 # The verifier tells us what version revlog we should be.
3079 3072 if version != state[b'expectedversion']:
3080 3073 yield revlogproblem(
3081 3074 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3082 3075 % (self.indexfile, version, state[b'expectedversion'])
3083 3076 )
3084 3077
3085 3078 state[b'skipread'] = set()
3086 3079 state[b'safe_renamed'] = set()
3087 3080
3088 3081 for rev in self:
3089 3082 node = self.node(rev)
3090 3083
3091 3084 # Verify contents. 4 cases to care about:
3092 3085 #
3093 3086 # common: the most common case
3094 3087 # rename: with a rename
3095 3088 # meta: file content starts with b'\1\n', the metadata
3096 3089 # header defined in filelog.py, but without a rename
3097 3090 # ext: content stored externally
3098 3091 #
3099 3092 # More formally, their differences are shown below:
3100 3093 #
3101 3094 # | common | rename | meta | ext
3102 3095 # -------------------------------------------------------
3103 3096 # flags() | 0 | 0 | 0 | not 0
3104 3097 # renamed() | False | True | False | ?
3105 3098 # rawtext[0:2]=='\1\n'| False | True | True | ?
3106 3099 #
3107 3100 # "rawtext" means the raw text stored in revlog data, which
3108 3101 # could be retrieved by "rawdata(rev)". "text"
3109 3102 # mentioned below is "revision(rev)".
3110 3103 #
3111 3104 # There are 3 different lengths stored physically:
3112 3105 # 1. L1: rawsize, stored in revlog index
3113 3106 # 2. L2: len(rawtext), stored in revlog data
3114 3107 # 3. L3: len(text), stored in revlog data if flags==0, or
3115 3108 # possibly somewhere else if flags!=0
3116 3109 #
3117 3110 # L1 should be equal to L2. L3 could be different from them.
3118 3111 # "text" may or may not affect commit hash depending on flag
3119 3112 # processors (see flagutil.addflagprocessor).
3120 3113 #
3121 3114 # | common | rename | meta | ext
3122 3115 # -------------------------------------------------
3123 3116 # rawsize() | L1 | L1 | L1 | L1
3124 3117 # size() | L1 | L2-LM | L1(*) | L1 (?)
3125 3118 # len(rawtext) | L2 | L2 | L2 | L2
3126 3119 # len(text) | L2 | L2 | L2 | L3
3127 3120 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3128 3121 #
3129 3122 # LM: length of metadata, depending on rawtext
3130 3123 # (*): not ideal, see comment in filelog.size
3131 3124 # (?): could be "- len(meta)" if the resolved content has
3132 3125 # rename metadata
3133 3126 #
3134 3127 # Checks needed to be done:
3135 3128 # 1. length check: L1 == L2, in all cases.
3136 3129 # 2. hash check: depending on flag processor, we may need to
3137 3130 # use either "text" (external), or "rawtext" (in revlog).
3138 3131
3139 3132 try:
3140 3133 skipflags = state.get(b'skipflags', 0)
3141 3134 if skipflags:
3142 3135 skipflags &= self.flags(rev)
3143 3136
3144 3137 _verify_revision(self, skipflags, state, node)
3145 3138
3146 3139 l1 = self.rawsize(rev)
3147 3140 l2 = len(self.rawdata(node))
3148 3141
3149 3142 if l1 != l2:
3150 3143 yield revlogproblem(
3151 3144 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3152 3145 node=node,
3153 3146 )
3154 3147
3155 3148 except error.CensoredNodeError:
3156 3149 if state[b'erroroncensored']:
3157 3150 yield revlogproblem(
3158 3151 error=_(b'censored file data'), node=node
3159 3152 )
3160 3153 state[b'skipread'].add(node)
3161 3154 except Exception as e:
3162 3155 yield revlogproblem(
3163 3156 error=_(b'unpacking %s: %s')
3164 3157 % (short(node), stringutil.forcebytestr(e)),
3165 3158 node=node,
3166 3159 )
3167 3160 state[b'skipread'].add(node)
3168 3161
3169 3162 def storageinfo(
3170 3163 self,
3171 3164 exclusivefiles=False,
3172 3165 sharedfiles=False,
3173 3166 revisionscount=False,
3174 3167 trackedsize=False,
3175 3168 storedsize=False,
3176 3169 ):
3177 3170 d = {}
3178 3171
3179 3172 if exclusivefiles:
3180 3173 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3181 3174 if not self._inline:
3182 3175 d[b'exclusivefiles'].append((self.opener, self.datafile))
3183 3176
3184 3177 if sharedfiles:
3185 3178 d[b'sharedfiles'] = []
3186 3179
3187 3180 if revisionscount:
3188 3181 d[b'revisionscount'] = len(self)
3189 3182
3190 3183 if trackedsize:
3191 3184 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3192 3185
3193 3186 if storedsize:
3194 3187 d[b'storedsize'] = sum(
3195 3188 self.opener.stat(path).st_size for path in self.files()
3196 3189 )
3197 3190
3198 3191 return d
3199 3192
3200 3193 def rewrite_sidedata(self, helpers, startrev, endrev):
3201 3194 if self.version & 0xFFFF != REVLOGV2:
3202 3195 return
3203 3196 # inline are not yet supported because they suffer from an issue when
3204 3197 # rewriting them (since it's not an append-only operation).
3205 3198 # See issue6485.
3206 3199 assert not self._inline
3207 3200 if not helpers[1] and not helpers[2]:
3208 3201 # Nothing to generate or remove
3209 3202 return
3210 3203
3211 3204 new_entries = []
3212 3205 # append the new sidedata
3213 3206 with self._datafp(b'a+') as fp:
3214 3207 # Maybe this bug still exists, see revlog._writeentry
3215 3208 fp.seek(0, os.SEEK_END)
3216 3209 current_offset = fp.tell()
3217 3210 for rev in range(startrev, endrev + 1):
3218 3211 entry = self.index[rev]
3219 3212 new_sidedata = storageutil.run_sidedata_helpers(
3220 3213 store=self,
3221 3214 sidedata_helpers=helpers,
3222 3215 sidedata={},
3223 3216 rev=rev,
3224 3217 )
3225 3218
3226 3219 serialized_sidedata = sidedatautil.serialize_sidedata(
3227 3220 new_sidedata
3228 3221 )
3229 3222 if entry[8] != 0 or entry[9] != 0:
3230 3223 # rewriting entries that already have sidedata is not
3231 3224 # supported yet, because it introduces garbage data in the
3232 3225 # revlog.
3233 3226 msg = b"Rewriting existing sidedata is not supported yet"
3234 3227 raise error.Abort(msg)
3235 3228 entry = entry[:8]
3236 3229 entry += (current_offset, len(serialized_sidedata))
3237 3230
3238 3231 fp.write(serialized_sidedata)
3239 3232 new_entries.append(entry)
3240 3233 current_offset += len(serialized_sidedata)
3241 3234
3242 3235 # rewrite the new index entries
3243 3236 with self._indexfp(b'w+') as fp:
3244 fp.seek(startrev * self._io.size)
3237 fp.seek(startrev * self.index.entry_size)
3245 3238 for i, entry in enumerate(new_entries):
3246 3239 rev = startrev + i
3247 3240 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3248 3241 packed = self._io.packentry(entry, self.node, self.version, rev)
3249 3242 fp.write(packed)
@@ -1,490 +1,494 b''
1 1 // revlog.rs
2 2 //
3 3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 cindex,
10 10 utils::{node_from_py_bytes, node_from_py_object},
11 11 };
12 12 use cpython::{
13 13 buffer::{Element, PyBuffer},
14 14 exc::{IndexError, ValueError},
15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyModule, PyObject,
16 PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
16 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
17 17 };
18 18 use hg::{
19 19 nodemap::{Block, NodeMapError, NodeTree},
20 20 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
21 21 Revision,
22 22 };
23 23 use std::cell::RefCell;
24 24
25 25 /// Return a Struct implementing the Graph trait
26 26 pub(crate) fn pyindex_to_graph(
27 27 py: Python,
28 28 index: PyObject,
29 29 ) -> PyResult<cindex::Index> {
30 30 match index.extract::<MixedIndex>(py) {
31 31 Ok(midx) => Ok(midx.clone_cindex(py)),
32 32 Err(_) => cindex::Index::new(py, index),
33 33 }
34 34 }
35 35
36 36 py_class!(pub class MixedIndex |py| {
37 37 data cindex: RefCell<cindex::Index>;
38 38 data nt: RefCell<Option<NodeTree>>;
39 39 data docket: RefCell<Option<PyObject>>;
40 40 // Holds a reference to the mmap'ed persistent nodemap data
41 41 data mmap: RefCell<Option<PyBuffer>>;
42 42
43 43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
44 44 Self::new(py, cindex)
45 45 }
46 46
47 47 /// Compatibility layer used for Python consumers needing access to the C index
48 48 ///
49 49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
50 50 /// that may need to build a custom `nodetree`, based on a specified revset.
51 51 /// With a Rust implementation of the nodemap, we will be able to get rid of
52 52 /// this, by exposing our own standalone nodemap class,
53 53 /// ready to accept `MixedIndex`.
54 54 def get_cindex(&self) -> PyResult<PyObject> {
55 55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
56 56 }
57 57
58 58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
59 59
60 60 /// Return Revision if found, raises a bare `error.RevlogError`
61 61 /// in case of ambiguity, same as C version does
62 62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
63 63 let opt = self.get_nodetree(py)?.borrow();
64 64 let nt = opt.as_ref().unwrap();
65 65 let idx = &*self.cindex(py).borrow();
66 66 let node = node_from_py_bytes(py, &node)?;
67 67 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
68 68 }
69 69
70 70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
71 71 /// is not found.
72 72 ///
73 73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
74 74 /// will catch and rewrap with it
75 75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
76 76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
77 77 }
78 78
79 79 /// return True if the node exist in the index
80 80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
81 81 self.get_rev(py, node).map(|opt| opt.is_some())
82 82 }
83 83
84 84 /// find length of shortest hex nodeid of a binary ID
85 85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
86 86 let opt = self.get_nodetree(py)?.borrow();
87 87 let nt = opt.as_ref().unwrap();
88 88 let idx = &*self.cindex(py).borrow();
89 89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
90 90 {
91 91 Ok(Some(l)) => Ok(l),
92 92 Ok(None) => Err(revlog_error(py)),
93 93 Err(e) => Err(nodemap_error(py, e)),
94 94 }
95 95 }
96 96
97 97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
98 98 let opt = self.get_nodetree(py)?.borrow();
99 99 let nt = opt.as_ref().unwrap();
100 100 let idx = &*self.cindex(py).borrow();
101 101
102 102 let node_as_string = if cfg!(feature = "python3-sys") {
103 103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
104 104 }
105 105 else {
106 106 let node = node.extract::<PyBytes>(py)?;
107 107 String::from_utf8_lossy(node.data(py)).to_string()
108 108 };
109 109
110 110 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
111 111
112 112 nt.find_bin(idx, prefix)
113 113 // TODO make an inner API returning the node directly
114 114 .map(|opt| opt.map(
115 115 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
116 116 .map_err(|e| nodemap_error(py, e))
117 117
118 118 }
119 119
120 120 /// append an index entry
121 121 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
122 122 if tup.len(py) < 8 {
123 123 // this is better than the panic promised by tup.get_item()
124 124 return Err(
125 125 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
126 126 }
127 127 let node_bytes = tup.get_item(py, 7).extract(py)?;
128 128 let node = node_from_py_object(py, &node_bytes)?;
129 129
130 130 let mut idx = self.cindex(py).borrow_mut();
131 131 let rev = idx.len() as Revision;
132 132
133 133 idx.append(py, tup)?;
134 134 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
135 135 .insert(&*idx, &node, rev)
136 136 .map_err(|e| nodemap_error(py, e))?;
137 137 Ok(py.None())
138 138 }
139 139
140 140 def __delitem__(&self, key: PyObject) -> PyResult<()> {
141 141 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
142 142 self.cindex(py).borrow().inner().del_item(py, key)?;
143 143 let mut opt = self.get_nodetree(py)?.borrow_mut();
144 144 let mut nt = opt.as_mut().unwrap();
145 145 nt.invalidate_all();
146 146 self.fill_nodemap(py, &mut nt)?;
147 147 Ok(())
148 148 }
149 149
150 150 //
151 151 // Reforwarded C index API
152 152 //
153 153
154 154 // index_methods (tp_methods). Same ordering as in revlog.c
155 155
156 156 /// return the gca set of the given revs
157 157 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
158 158 self.call_cindex(py, "ancestors", args, kw)
159 159 }
160 160
161 161 /// return the heads of the common ancestors of the given revs
162 162 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
163 163 self.call_cindex(py, "commonancestorsheads", args, kw)
164 164 }
165 165
166 166 /// Clear the index caches and inner py_class data.
167 167 /// It is Python's responsibility to call `update_nodemap_data` again.
168 168 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
169 169 self.nt(py).borrow_mut().take();
170 170 self.docket(py).borrow_mut().take();
171 171 self.mmap(py).borrow_mut().take();
172 172 self.call_cindex(py, "clearcaches", args, kw)
173 173 }
174 174
175 175 /// get an index entry
176 176 def get(&self, *args, **kw) -> PyResult<PyObject> {
177 177 self.call_cindex(py, "get", args, kw)
178 178 }
179 179
180 180 /// compute phases
181 181 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
182 182 self.call_cindex(py, "computephasesmapsets", args, kw)
183 183 }
184 184
185 185 /// reachableroots
186 186 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
187 187 self.call_cindex(py, "reachableroots2", args, kw)
188 188 }
189 189
190 190 /// get head revisions
191 191 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
192 192 self.call_cindex(py, "headrevs", args, kw)
193 193 }
194 194
195 195 /// get filtered head revisions
196 196 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
197 197 self.call_cindex(py, "headrevsfiltered", args, kw)
198 198 }
199 199
200 200 /// True if the object is a snapshot
201 201 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
202 202 self.call_cindex(py, "issnapshot", args, kw)
203 203 }
204 204
205 205 /// Gather snapshot data in a cache dict
206 206 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
207 207 self.call_cindex(py, "findsnapshots", args, kw)
208 208 }
209 209
210 210 /// determine revisions with deltas to reconstruct fulltext
211 211 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
212 212 self.call_cindex(py, "deltachain", args, kw)
213 213 }
214 214
215 215 /// slice planned chunk read to reach a density threshold
216 216 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
217 217 self.call_cindex(py, "slicechunktodensity", args, kw)
218 218 }
219 219
220 220 /// stats for the index
221 221 def stats(&self, *args, **kw) -> PyResult<PyObject> {
222 222 self.call_cindex(py, "stats", args, kw)
223 223 }
224 224
225 225 // index_sequence_methods and index_mapping_methods.
226 226 //
227 227 // Since we call back through the high level Python API,
228 228 // there's no point making a distinction between index_get
229 229 // and index_getitem.
230 230
231 231 def __len__(&self) -> PyResult<usize> {
232 232 self.cindex(py).borrow().inner().len(py)
233 233 }
234 234
235 235 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
236 236 // this conversion seems needless, but that's actually because
237 237 // `index_getitem` does not handle conversion from PyLong,
238 238 // which expressions such as [e for e in index] internally use.
239 239 // Note that we don't seem to have a direct way to call
240 240 // PySequence_GetItem (does the job), which would possibly be better
241 241 // for performance
242 242 let key = match key.extract::<Revision>(py) {
243 243 Ok(rev) => rev.to_py_object(py).into_object(),
244 244 Err(_) => key,
245 245 };
246 246 self.cindex(py).borrow().inner().get_item(py, key)
247 247 }
248 248
249 249 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
250 250 self.cindex(py).borrow().inner().set_item(py, key, value)
251 251 }
252 252
253 253 def __contains__(&self, item: PyObject) -> PyResult<bool> {
254 254 // ObjectProtocol does not seem to provide contains(), so
255 255 // this is an equivalent implementation of the index_contains()
256 256 // defined in revlog.c
257 257 let cindex = self.cindex(py).borrow();
258 258 match item.extract::<Revision>(py) {
259 259 Ok(rev) => {
260 260 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
261 261 }
262 262 Err(_) => {
263 263 cindex.inner().call_method(
264 264 py,
265 265 "has_node",
266 266 PyTuple::new(py, &[item]),
267 267 None)?
268 268 .extract(py)
269 269 }
270 270 }
271 271 }
272 272
273 273 def nodemap_data_all(&self) -> PyResult<PyBytes> {
274 274 self.inner_nodemap_data_all(py)
275 275 }
276 276
277 277 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
278 278 self.inner_nodemap_data_incremental(py)
279 279 }
280 280 def update_nodemap_data(
281 281 &self,
282 282 docket: PyObject,
283 283 nm_data: PyObject
284 284 ) -> PyResult<PyObject> {
285 285 self.inner_update_nodemap_data(py, docket, nm_data)
286 286 }
287 287
288 @property
289 def entry_size(&self) -> PyResult<PyInt> {
290 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
291 }
288 292
289 293 });
290 294
291 295 impl MixedIndex {
292 296 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
293 297 Self::create_instance(
294 298 py,
295 299 RefCell::new(cindex::Index::new(py, cindex)?),
296 300 RefCell::new(None),
297 301 RefCell::new(None),
298 302 RefCell::new(None),
299 303 )
300 304 }
301 305
302 306 /// This is scaffolding at this point, but it could also become
303 307 /// a way to start a persistent nodemap or perform a
304 308 /// vacuum / repack operation
305 309 fn fill_nodemap(
306 310 &self,
307 311 py: Python,
308 312 nt: &mut NodeTree,
309 313 ) -> PyResult<PyObject> {
310 314 let index = self.cindex(py).borrow();
311 315 for r in 0..index.len() {
312 316 let rev = r as Revision;
313 317 // in this case node() won't ever return None
314 318 nt.insert(&*index, index.node(rev).unwrap(), rev)
315 319 .map_err(|e| nodemap_error(py, e))?
316 320 }
317 321 Ok(py.None())
318 322 }
319 323
320 324 fn get_nodetree<'a>(
321 325 &'a self,
322 326 py: Python<'a>,
323 327 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
324 328 if self.nt(py).borrow().is_none() {
325 329 let readonly = Box::new(Vec::new());
326 330 let mut nt = NodeTree::load_bytes(readonly, 0);
327 331 self.fill_nodemap(py, &mut nt)?;
328 332 self.nt(py).borrow_mut().replace(nt);
329 333 }
330 334 Ok(self.nt(py))
331 335 }
332 336
333 337 /// forward a method call to the underlying C index
334 338 fn call_cindex(
335 339 &self,
336 340 py: Python,
337 341 name: &str,
338 342 args: &PyTuple,
339 343 kwargs: Option<&PyDict>,
340 344 ) -> PyResult<PyObject> {
341 345 self.cindex(py)
342 346 .borrow()
343 347 .inner()
344 348 .call_method(py, name, args, kwargs)
345 349 }
346 350
347 351 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
348 352 self.cindex(py).borrow().clone_ref(py)
349 353 }
350 354
351 355 /// Returns the full nodemap bytes to be written as-is to disk
352 356 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
353 357 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
354 358 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
355 359
356 360 // If there's anything readonly, we need to build the data again from
357 361 // scratch
358 362 let bytes = if readonly.len() > 0 {
359 363 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
360 364 self.fill_nodemap(py, &mut nt)?;
361 365
362 366 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
363 367 assert_eq!(readonly.len(), 0);
364 368
365 369 bytes
366 370 } else {
367 371 bytes
368 372 };
369 373
370 374 let bytes = PyBytes::new(py, &bytes);
371 375 Ok(bytes)
372 376 }
373 377
374 378 /// Returns the last saved docket along with the size of any changed data
375 379 /// (in number of blocks), and said data as bytes.
376 380 fn inner_nodemap_data_incremental(
377 381 &self,
378 382 py: Python,
379 383 ) -> PyResult<PyObject> {
380 384 let docket = self.docket(py).borrow();
381 385 let docket = match docket.as_ref() {
382 386 Some(d) => d,
383 387 None => return Ok(py.None()),
384 388 };
385 389
386 390 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
387 391 let masked_blocks = node_tree.masked_readonly_blocks();
388 392 let (_, data) = node_tree.into_readonly_and_added_bytes();
389 393 let changed = masked_blocks * std::mem::size_of::<Block>();
390 394
391 395 Ok((docket, changed, PyBytes::new(py, &data))
392 396 .to_py_object(py)
393 397 .into_object())
394 398 }
395 399
396 400 /// Update the nodemap from the new (mmaped) data.
397 401 /// The docket is kept as a reference for later incremental calls.
398 402 fn inner_update_nodemap_data(
399 403 &self,
400 404 py: Python,
401 405 docket: PyObject,
402 406 nm_data: PyObject,
403 407 ) -> PyResult<PyObject> {
404 408 let buf = PyBuffer::get(py, &nm_data)?;
405 409 let len = buf.item_count();
406 410
407 411 // Build a slice from the mmap'ed buffer data
408 412 let cbuf = buf.buf_ptr();
409 413 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
410 414 && buf.is_c_contiguous()
411 415 && u8::is_compatible_format(buf.format())
412 416 {
413 417 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
414 418 } else {
415 419 return Err(PyErr::new::<ValueError, _>(
416 420 py,
417 421 "Nodemap data buffer has an invalid memory representation"
418 422 .to_string(),
419 423 ));
420 424 };
421 425
422 426 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
423 427 // pointer.
424 428 self.mmap(py).borrow_mut().replace(buf);
425 429
426 430 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
427 431
428 432 let data_tip =
429 433 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
430 434 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
431 435 let idx = self.cindex(py).borrow();
432 436 let current_tip = idx.len();
433 437
434 438 for r in (data_tip + 1)..current_tip as Revision {
435 439 let rev = r as Revision;
436 440 // in this case node() won't ever return None
437 441 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
438 442 .map_err(|e| nodemap_error(py, e))?
439 443 }
440 444
441 445 *self.nt(py).borrow_mut() = Some(nt);
442 446
443 447 Ok(py.None())
444 448 }
445 449 }
446 450
447 451 fn revlog_error(py: Python) -> PyErr {
448 452 match py
449 453 .import("mercurial.error")
450 454 .and_then(|m| m.get(py, "RevlogError"))
451 455 {
452 456 Err(e) => e,
453 457 Ok(cls) => PyErr::from_instance(py, cls),
454 458 }
455 459 }
456 460
457 461 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
458 462 PyErr::new::<ValueError, _>(
459 463 py,
460 464 format!(
461 465 "Inconsistency: Revision {} found in nodemap \
462 466 is not in revlog index",
463 467 rev
464 468 ),
465 469 )
466 470 }
467 471
468 472 /// Standard treatment of NodeMapError
469 473 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
470 474 match err {
471 475 NodeMapError::MultipleResults => revlog_error(py),
472 476 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
473 477 }
474 478 }
475 479
476 480 /// Create the module, with __package__ given from parent
477 481 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
478 482 let dotted_name = &format!("{}.revlog", package);
479 483 let m = PyModule::new(py, dotted_name)?;
480 484 m.add(py, "__package__", package)?;
481 485 m.add(py, "__doc__", "RevLog - Rust implementations")?;
482 486
483 487 m.add_class::<MixedIndex>(py)?;
484 488
485 489 let sys = PyModule::import(py, "sys")?;
486 490 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
487 491 sys_modules.set_item(py, dotted_name, &m)?;
488 492
489 493 Ok(m)
490 494 }
General Comments 0
You need to be logged in to leave comments. Login now