##// END OF EJS Templates
revlog: add the option to track the expected compression upper bound...
marmoute -
r42662:bc4373ba default
parent child Browse files
Show More
@@ -1,3059 +1,3063 b''
1 1 # perf.py - performance test routines
2 2 '''helper extension to measure performance
3 3
4 4 Configurations
5 5 ==============
6 6
7 7 ``perf``
8 8 --------
9 9
10 10 ``all-timing``
11 11 When set, additional statistics will be reported for each benchmark: best,
12 12 worst, median average. If not set only the best timing is reported
13 13 (default: off).
14 14
15 15 ``presleep``
16 16 number of second to wait before any group of runs (default: 1)
17 17
18 18 ``pre-run``
19 19 number of run to perform before starting measurement.
20 20
21 21 ``profile-benchmark``
22 22 Enable profiling for the benchmarked section.
23 23 (The first iteration is benchmarked)
24 24
25 25 ``run-limits``
26 26 Control the number of runs each benchmark will perform. The option value
27 27 should be a list of `<time>-<numberofrun>` pairs. After each run the
28 28 conditions are considered in order with the following logic:
29 29
30 30 If benchmark has been running for <time> seconds, and we have performed
31 31 <numberofrun> iterations, stop the benchmark,
32 32
33 33 The default value is: `3.0-100, 10.0-3`
34 34
35 35 ``stub``
36 36 When set, benchmarks will only be run once, useful for testing
37 37 (default: off)
38 38 '''
39 39
40 40 # "historical portability" policy of perf.py:
41 41 #
42 42 # We have to do:
43 43 # - make perf.py "loadable" with as wide Mercurial version as possible
44 44 # This doesn't mean that perf commands work correctly with that Mercurial.
45 45 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
46 46 # - make historical perf command work correctly with as wide Mercurial
47 47 # version as possible
48 48 #
49 49 # We have to do, if possible with reasonable cost:
50 50 # - make recent perf command for historical feature work correctly
51 51 # with early Mercurial
52 52 #
53 53 # We don't have to do:
54 54 # - make perf command for recent feature work correctly with early
55 55 # Mercurial
56 56
57 57 from __future__ import absolute_import
58 58 import contextlib
59 59 import functools
60 60 import gc
61 61 import os
62 62 import random
63 63 import shutil
64 64 import struct
65 65 import sys
66 66 import tempfile
67 67 import threading
68 68 import time
69 69 from mercurial import (
70 70 changegroup,
71 71 cmdutil,
72 72 commands,
73 73 copies,
74 74 error,
75 75 extensions,
76 76 hg,
77 77 mdiff,
78 78 merge,
79 79 revlog,
80 80 util,
81 81 )
82 82
83 83 # for "historical portability":
84 84 # try to import modules separately (in dict order), and ignore
85 85 # failure, because these aren't available with early Mercurial
86 86 try:
87 87 from mercurial import branchmap # since 2.5 (or bcee63733aad)
88 88 except ImportError:
89 89 pass
90 90 try:
91 91 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
92 92 except ImportError:
93 93 pass
94 94 try:
95 95 from mercurial import registrar # since 3.7 (or 37d50250b696)
96 96 dir(registrar) # forcibly load it
97 97 except ImportError:
98 98 registrar = None
99 99 try:
100 100 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
101 101 except ImportError:
102 102 pass
103 103 try:
104 104 from mercurial.utils import repoviewutil # since 5.0
105 105 except ImportError:
106 106 repoviewutil = None
107 107 try:
108 108 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
109 109 except ImportError:
110 110 pass
111 111 try:
112 112 from mercurial import setdiscovery # since 1.9 (or cb98fed52495)
113 113 except ImportError:
114 114 pass
115 115
116 116 try:
117 117 from mercurial import profiling
118 118 except ImportError:
119 119 profiling = None
120 120
121 121 def identity(a):
122 122 return a
123 123
124 124 try:
125 125 from mercurial import pycompat
126 126 getargspec = pycompat.getargspec # added to module after 4.5
127 127 _byteskwargs = pycompat.byteskwargs # since 4.1 (or fbc3f73dc802)
128 128 _sysstr = pycompat.sysstr # since 4.0 (or 2219f4f82ede)
129 129 _xrange = pycompat.xrange # since 4.8 (or 7eba8f83129b)
130 130 fsencode = pycompat.fsencode # since 3.9 (or f4a5e0e86a7e)
131 131 if pycompat.ispy3:
132 132 _maxint = sys.maxsize # per py3 docs for replacing maxint
133 133 else:
134 134 _maxint = sys.maxint
135 135 except (ImportError, AttributeError):
136 136 import inspect
137 137 getargspec = inspect.getargspec
138 138 _byteskwargs = identity
139 139 fsencode = identity # no py3 support
140 140 _maxint = sys.maxint # no py3 support
141 141 _sysstr = lambda x: x # no py3 support
142 142 _xrange = xrange
143 143
144 144 try:
145 145 # 4.7+
146 146 queue = pycompat.queue.Queue
147 147 except (AttributeError, ImportError):
148 148 # <4.7.
149 149 try:
150 150 queue = pycompat.queue
151 151 except (AttributeError, ImportError):
152 152 queue = util.queue
153 153
154 154 try:
155 155 from mercurial import logcmdutil
156 156 makelogtemplater = logcmdutil.maketemplater
157 157 except (AttributeError, ImportError):
158 158 try:
159 159 makelogtemplater = cmdutil.makelogtemplater
160 160 except (AttributeError, ImportError):
161 161 makelogtemplater = None
162 162
163 163 # for "historical portability":
164 164 # define util.safehasattr forcibly, because util.safehasattr has been
165 165 # available since 1.9.3 (or 94b200a11cf7)
166 166 _undefined = object()
167 167 def safehasattr(thing, attr):
168 168 return getattr(thing, _sysstr(attr), _undefined) is not _undefined
169 169 setattr(util, 'safehasattr', safehasattr)
170 170
171 171 # for "historical portability":
172 172 # define util.timer forcibly, because util.timer has been available
173 173 # since ae5d60bb70c9
174 174 if safehasattr(time, 'perf_counter'):
175 175 util.timer = time.perf_counter
176 176 elif os.name == b'nt':
177 177 util.timer = time.clock
178 178 else:
179 179 util.timer = time.time
180 180
181 181 # for "historical portability":
182 182 # use locally defined empty option list, if formatteropts isn't
183 183 # available, because commands.formatteropts has been available since
184 184 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
185 185 # available since 2.2 (or ae5f92e154d3)
186 186 formatteropts = getattr(cmdutil, "formatteropts",
187 187 getattr(commands, "formatteropts", []))
188 188
189 189 # for "historical portability":
190 190 # use locally defined option list, if debugrevlogopts isn't available,
191 191 # because commands.debugrevlogopts has been available since 3.7 (or
192 192 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
193 193 # since 1.9 (or a79fea6b3e77).
194 194 revlogopts = getattr(cmdutil, "debugrevlogopts",
195 195 getattr(commands, "debugrevlogopts", [
196 196 (b'c', b'changelog', False, (b'open changelog')),
197 197 (b'm', b'manifest', False, (b'open manifest')),
198 198 (b'', b'dir', False, (b'open directory manifest')),
199 199 ]))
200 200
201 201 cmdtable = {}
202 202
203 203 # for "historical portability":
204 204 # define parsealiases locally, because cmdutil.parsealiases has been
205 205 # available since 1.5 (or 6252852b4332)
206 206 def parsealiases(cmd):
207 207 return cmd.split(b"|")
208 208
209 209 if safehasattr(registrar, 'command'):
210 210 command = registrar.command(cmdtable)
211 211 elif safehasattr(cmdutil, 'command'):
212 212 command = cmdutil.command(cmdtable)
213 213 if b'norepo' not in getargspec(command).args:
214 214 # for "historical portability":
215 215 # wrap original cmdutil.command, because "norepo" option has
216 216 # been available since 3.1 (or 75a96326cecb)
217 217 _command = command
218 218 def command(name, options=(), synopsis=None, norepo=False):
219 219 if norepo:
220 220 commands.norepo += b' %s' % b' '.join(parsealiases(name))
221 221 return _command(name, list(options), synopsis)
222 222 else:
223 223 # for "historical portability":
224 224 # define "@command" annotation locally, because cmdutil.command
225 225 # has been available since 1.9 (or 2daa5179e73f)
226 226 def command(name, options=(), synopsis=None, norepo=False):
227 227 def decorator(func):
228 228 if synopsis:
229 229 cmdtable[name] = func, list(options), synopsis
230 230 else:
231 231 cmdtable[name] = func, list(options)
232 232 if norepo:
233 233 commands.norepo += b' %s' % b' '.join(parsealiases(name))
234 234 return func
235 235 return decorator
236 236
237 237 try:
238 238 import mercurial.registrar
239 239 import mercurial.configitems
240 240 configtable = {}
241 241 configitem = mercurial.registrar.configitem(configtable)
242 242 configitem(b'perf', b'presleep',
243 243 default=mercurial.configitems.dynamicdefault,
244 244 )
245 245 configitem(b'perf', b'stub',
246 246 default=mercurial.configitems.dynamicdefault,
247 247 )
248 248 configitem(b'perf', b'parentscount',
249 249 default=mercurial.configitems.dynamicdefault,
250 250 )
251 251 configitem(b'perf', b'all-timing',
252 252 default=mercurial.configitems.dynamicdefault,
253 253 )
254 254 configitem(b'perf', b'pre-run',
255 255 default=mercurial.configitems.dynamicdefault,
256 256 )
257 257 configitem(b'perf', b'profile-benchmark',
258 258 default=mercurial.configitems.dynamicdefault,
259 259 )
260 260 configitem(b'perf', b'run-limits',
261 261 default=mercurial.configitems.dynamicdefault,
262 262 )
263 263 except (ImportError, AttributeError):
264 264 pass
265 265
266 266 def getlen(ui):
267 267 if ui.configbool(b"perf", b"stub", False):
268 268 return lambda x: 1
269 269 return len
270 270
271 271 class noop(object):
272 272 """dummy context manager"""
273 273 def __enter__(self):
274 274 pass
275 275 def __exit__(self, *args):
276 276 pass
277 277
278 278 NOOPCTX = noop()
279 279
280 280 def gettimer(ui, opts=None):
281 281 """return a timer function and formatter: (timer, formatter)
282 282
283 283 This function exists to gather the creation of formatter in a single
284 284 place instead of duplicating it in all performance commands."""
285 285
286 286 # enforce an idle period before execution to counteract power management
287 287 # experimental config: perf.presleep
288 288 time.sleep(getint(ui, b"perf", b"presleep", 1))
289 289
290 290 if opts is None:
291 291 opts = {}
292 292 # redirect all to stderr unless buffer api is in use
293 293 if not ui._buffers:
294 294 ui = ui.copy()
295 295 uifout = safeattrsetter(ui, b'fout', ignoremissing=True)
296 296 if uifout:
297 297 # for "historical portability":
298 298 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
299 299 uifout.set(ui.ferr)
300 300
301 301 # get a formatter
302 302 uiformatter = getattr(ui, 'formatter', None)
303 303 if uiformatter:
304 304 fm = uiformatter(b'perf', opts)
305 305 else:
306 306 # for "historical portability":
307 307 # define formatter locally, because ui.formatter has been
308 308 # available since 2.2 (or ae5f92e154d3)
309 309 from mercurial import node
310 310 class defaultformatter(object):
311 311 """Minimized composition of baseformatter and plainformatter
312 312 """
313 313 def __init__(self, ui, topic, opts):
314 314 self._ui = ui
315 315 if ui.debugflag:
316 316 self.hexfunc = node.hex
317 317 else:
318 318 self.hexfunc = node.short
319 319 def __nonzero__(self):
320 320 return False
321 321 __bool__ = __nonzero__
322 322 def startitem(self):
323 323 pass
324 324 def data(self, **data):
325 325 pass
326 326 def write(self, fields, deftext, *fielddata, **opts):
327 327 self._ui.write(deftext % fielddata, **opts)
328 328 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
329 329 if cond:
330 330 self._ui.write(deftext % fielddata, **opts)
331 331 def plain(self, text, **opts):
332 332 self._ui.write(text, **opts)
333 333 def end(self):
334 334 pass
335 335 fm = defaultformatter(ui, b'perf', opts)
336 336
337 337 # stub function, runs code only once instead of in a loop
338 338 # experimental config: perf.stub
339 339 if ui.configbool(b"perf", b"stub", False):
340 340 return functools.partial(stub_timer, fm), fm
341 341
342 342 # experimental config: perf.all-timing
343 343 displayall = ui.configbool(b"perf", b"all-timing", False)
344 344
345 345 # experimental config: perf.run-limits
346 346 limitspec = ui.configlist(b"perf", b"run-limits", [])
347 347 limits = []
348 348 for item in limitspec:
349 349 parts = item.split(b'-', 1)
350 350 if len(parts) < 2:
351 351 ui.warn((b'malformatted run limit entry, missing "-": %s\n'
352 352 % item))
353 353 continue
354 354 try:
355 355 time_limit = float(pycompat.sysstr(parts[0]))
356 356 except ValueError as e:
357 357 ui.warn((b'malformatted run limit entry, %s: %s\n'
358 358 % (pycompat.bytestr(e), item)))
359 359 continue
360 360 try:
361 361 run_limit = int(pycompat.sysstr(parts[1]))
362 362 except ValueError as e:
363 363 ui.warn((b'malformatted run limit entry, %s: %s\n'
364 364 % (pycompat.bytestr(e), item)))
365 365 continue
366 366 limits.append((time_limit, run_limit))
367 367 if not limits:
368 368 limits = DEFAULTLIMITS
369 369
370 370 profiler = None
371 371 if profiling is not None:
372 372 if ui.configbool(b"perf", b"profile-benchmark", False):
373 373 profiler = profiling.profile(ui)
374 374
375 375 prerun = getint(ui, b"perf", b"pre-run", 0)
376 376 t = functools.partial(_timer, fm, displayall=displayall, limits=limits,
377 377 prerun=prerun, profiler=profiler)
378 378 return t, fm
379 379
380 380 def stub_timer(fm, func, setup=None, title=None):
381 381 if setup is not None:
382 382 setup()
383 383 func()
384 384
385 385 @contextlib.contextmanager
386 386 def timeone():
387 387 r = []
388 388 ostart = os.times()
389 389 cstart = util.timer()
390 390 yield r
391 391 cstop = util.timer()
392 392 ostop = os.times()
393 393 a, b = ostart, ostop
394 394 r.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
395 395
396 396
397 397 # list of stop condition (elapsed time, minimal run count)
398 398 DEFAULTLIMITS = (
399 399 (3.0, 100),
400 400 (10.0, 3),
401 401 )
402 402
403 403 def _timer(fm, func, setup=None, title=None, displayall=False,
404 404 limits=DEFAULTLIMITS, prerun=0, profiler=None):
405 405 gc.collect()
406 406 results = []
407 407 begin = util.timer()
408 408 count = 0
409 409 if profiler is None:
410 410 profiler = NOOPCTX
411 411 for i in range(prerun):
412 412 if setup is not None:
413 413 setup()
414 414 func()
415 415 keepgoing = True
416 416 while keepgoing:
417 417 if setup is not None:
418 418 setup()
419 419 with profiler:
420 420 with timeone() as item:
421 421 r = func()
422 422 profiler = NOOPCTX
423 423 count += 1
424 424 results.append(item[0])
425 425 cstop = util.timer()
426 426 # Look for a stop condition.
427 427 elapsed = cstop - begin
428 428 for t, mincount in limits:
429 429 if elapsed >= t and count >= mincount:
430 430 keepgoing = False
431 431 break
432 432
433 433 formatone(fm, results, title=title, result=r,
434 434 displayall=displayall)
435 435
436 436 def formatone(fm, timings, title=None, result=None, displayall=False):
437 437
438 438 count = len(timings)
439 439
440 440 fm.startitem()
441 441
442 442 if title:
443 443 fm.write(b'title', b'! %s\n', title)
444 444 if result:
445 445 fm.write(b'result', b'! result: %s\n', result)
446 446 def display(role, entry):
447 447 prefix = b''
448 448 if role != b'best':
449 449 prefix = b'%s.' % role
450 450 fm.plain(b'!')
451 451 fm.write(prefix + b'wall', b' wall %f', entry[0])
452 452 fm.write(prefix + b'comb', b' comb %f', entry[1] + entry[2])
453 453 fm.write(prefix + b'user', b' user %f', entry[1])
454 454 fm.write(prefix + b'sys', b' sys %f', entry[2])
455 455 fm.write(prefix + b'count', b' (%s of %%d)' % role, count)
456 456 fm.plain(b'\n')
457 457 timings.sort()
458 458 min_val = timings[0]
459 459 display(b'best', min_val)
460 460 if displayall:
461 461 max_val = timings[-1]
462 462 display(b'max', max_val)
463 463 avg = tuple([sum(x) / count for x in zip(*timings)])
464 464 display(b'avg', avg)
465 465 median = timings[len(timings) // 2]
466 466 display(b'median', median)
467 467
468 468 # utilities for historical portability
469 469
470 470 def getint(ui, section, name, default):
471 471 # for "historical portability":
472 472 # ui.configint has been available since 1.9 (or fa2b596db182)
473 473 v = ui.config(section, name, None)
474 474 if v is None:
475 475 return default
476 476 try:
477 477 return int(v)
478 478 except ValueError:
479 479 raise error.ConfigError((b"%s.%s is not an integer ('%s')")
480 480 % (section, name, v))
481 481
482 482 def safeattrsetter(obj, name, ignoremissing=False):
483 483 """Ensure that 'obj' has 'name' attribute before subsequent setattr
484 484
485 485 This function is aborted, if 'obj' doesn't have 'name' attribute
486 486 at runtime. This avoids overlooking removal of an attribute, which
487 487 breaks assumption of performance measurement, in the future.
488 488
489 489 This function returns the object to (1) assign a new value, and
490 490 (2) restore an original value to the attribute.
491 491
492 492 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
493 493 abortion, and this function returns None. This is useful to
494 494 examine an attribute, which isn't ensured in all Mercurial
495 495 versions.
496 496 """
497 497 if not util.safehasattr(obj, name):
498 498 if ignoremissing:
499 499 return None
500 500 raise error.Abort((b"missing attribute %s of %s might break assumption"
501 501 b" of performance measurement") % (name, obj))
502 502
503 503 origvalue = getattr(obj, _sysstr(name))
504 504 class attrutil(object):
505 505 def set(self, newvalue):
506 506 setattr(obj, _sysstr(name), newvalue)
507 507 def restore(self):
508 508 setattr(obj, _sysstr(name), origvalue)
509 509
510 510 return attrutil()
511 511
512 512 # utilities to examine each internal API changes
513 513
514 514 def getbranchmapsubsettable():
515 515 # for "historical portability":
516 516 # subsettable is defined in:
517 517 # - branchmap since 2.9 (or 175c6fd8cacc)
518 518 # - repoview since 2.5 (or 59a9f18d4587)
519 519 # - repoviewutil since 5.0
520 520 for mod in (branchmap, repoview, repoviewutil):
521 521 subsettable = getattr(mod, 'subsettable', None)
522 522 if subsettable:
523 523 return subsettable
524 524
525 525 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
526 526 # branchmap and repoview modules exist, but subsettable attribute
527 527 # doesn't)
528 528 raise error.Abort((b"perfbranchmap not available with this Mercurial"),
529 529 hint=b"use 2.5 or later")
530 530
531 531 def getsvfs(repo):
532 532 """Return appropriate object to access files under .hg/store
533 533 """
534 534 # for "historical portability":
535 535 # repo.svfs has been available since 2.3 (or 7034365089bf)
536 536 svfs = getattr(repo, 'svfs', None)
537 537 if svfs:
538 538 return svfs
539 539 else:
540 540 return getattr(repo, 'sopener')
541 541
542 542 def getvfs(repo):
543 543 """Return appropriate object to access files under .hg
544 544 """
545 545 # for "historical portability":
546 546 # repo.vfs has been available since 2.3 (or 7034365089bf)
547 547 vfs = getattr(repo, 'vfs', None)
548 548 if vfs:
549 549 return vfs
550 550 else:
551 551 return getattr(repo, 'opener')
552 552
553 553 def repocleartagscachefunc(repo):
554 554 """Return the function to clear tags cache according to repo internal API
555 555 """
556 556 if util.safehasattr(repo, b'_tagscache'): # since 2.0 (or 9dca7653b525)
557 557 # in this case, setattr(repo, '_tagscache', None) or so isn't
558 558 # correct way to clear tags cache, because existing code paths
559 559 # expect _tagscache to be a structured object.
560 560 def clearcache():
561 561 # _tagscache has been filteredpropertycache since 2.5 (or
562 562 # 98c867ac1330), and delattr() can't work in such case
563 563 if b'_tagscache' in vars(repo):
564 564 del repo.__dict__[b'_tagscache']
565 565 return clearcache
566 566
567 567 repotags = safeattrsetter(repo, b'_tags', ignoremissing=True)
568 568 if repotags: # since 1.4 (or 5614a628d173)
569 569 return lambda : repotags.set(None)
570 570
571 571 repotagscache = safeattrsetter(repo, b'tagscache', ignoremissing=True)
572 572 if repotagscache: # since 0.6 (or d7df759d0e97)
573 573 return lambda : repotagscache.set(None)
574 574
575 575 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
576 576 # this point, but it isn't so problematic, because:
577 577 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
578 578 # in perftags() causes failure soon
579 579 # - perf.py itself has been available since 1.1 (or eb240755386d)
580 580 raise error.Abort((b"tags API of this hg command is unknown"))
581 581
582 582 # utilities to clear cache
583 583
584 584 def clearfilecache(obj, attrname):
585 585 unfiltered = getattr(obj, 'unfiltered', None)
586 586 if unfiltered is not None:
587 587 obj = obj.unfiltered()
588 588 if attrname in vars(obj):
589 589 delattr(obj, attrname)
590 590 obj._filecache.pop(attrname, None)
591 591
592 592 def clearchangelog(repo):
593 593 if repo is not repo.unfiltered():
594 594 object.__setattr__(repo, r'_clcachekey', None)
595 595 object.__setattr__(repo, r'_clcache', None)
596 596 clearfilecache(repo.unfiltered(), 'changelog')
597 597
598 598 # perf commands
599 599
600 600 @command(b'perfwalk', formatteropts)
601 601 def perfwalk(ui, repo, *pats, **opts):
602 602 opts = _byteskwargs(opts)
603 603 timer, fm = gettimer(ui, opts)
604 604 m = scmutil.match(repo[None], pats, {})
605 605 timer(lambda: len(list(repo.dirstate.walk(m, subrepos=[], unknown=True,
606 606 ignored=False))))
607 607 fm.end()
608 608
609 609 @command(b'perfannotate', formatteropts)
610 610 def perfannotate(ui, repo, f, **opts):
611 611 opts = _byteskwargs(opts)
612 612 timer, fm = gettimer(ui, opts)
613 613 fc = repo[b'.'][f]
614 614 timer(lambda: len(fc.annotate(True)))
615 615 fm.end()
616 616
617 617 @command(b'perfstatus',
618 618 [(b'u', b'unknown', False,
619 619 b'ask status to look for unknown files')] + formatteropts)
620 620 def perfstatus(ui, repo, **opts):
621 621 opts = _byteskwargs(opts)
622 622 #m = match.always(repo.root, repo.getcwd())
623 623 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
624 624 # False))))
625 625 timer, fm = gettimer(ui, opts)
626 626 timer(lambda: sum(map(len, repo.status(unknown=opts[b'unknown']))))
627 627 fm.end()
628 628
629 629 @command(b'perfaddremove', formatteropts)
630 630 def perfaddremove(ui, repo, **opts):
631 631 opts = _byteskwargs(opts)
632 632 timer, fm = gettimer(ui, opts)
633 633 try:
634 634 oldquiet = repo.ui.quiet
635 635 repo.ui.quiet = True
636 636 matcher = scmutil.match(repo[None])
637 637 opts[b'dry_run'] = True
638 638 if b'uipathfn' in getargspec(scmutil.addremove).args:
639 639 uipathfn = scmutil.getuipathfn(repo)
640 640 timer(lambda: scmutil.addremove(repo, matcher, b"", uipathfn, opts))
641 641 else:
642 642 timer(lambda: scmutil.addremove(repo, matcher, b"", opts))
643 643 finally:
644 644 repo.ui.quiet = oldquiet
645 645 fm.end()
646 646
647 647 def clearcaches(cl):
648 648 # behave somewhat consistently across internal API changes
649 649 if util.safehasattr(cl, b'clearcaches'):
650 650 cl.clearcaches()
651 651 elif util.safehasattr(cl, b'_nodecache'):
652 652 from mercurial.node import nullid, nullrev
653 653 cl._nodecache = {nullid: nullrev}
654 654 cl._nodepos = None
655 655
656 656 @command(b'perfheads', formatteropts)
657 657 def perfheads(ui, repo, **opts):
658 658 """benchmark the computation of a changelog heads"""
659 659 opts = _byteskwargs(opts)
660 660 timer, fm = gettimer(ui, opts)
661 661 cl = repo.changelog
662 662 def s():
663 663 clearcaches(cl)
664 664 def d():
665 665 len(cl.headrevs())
666 666 timer(d, setup=s)
667 667 fm.end()
668 668
669 669 @command(b'perftags', formatteropts+
670 670 [
671 671 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
672 672 ])
673 673 def perftags(ui, repo, **opts):
674 674 opts = _byteskwargs(opts)
675 675 timer, fm = gettimer(ui, opts)
676 676 repocleartagscache = repocleartagscachefunc(repo)
677 677 clearrevlogs = opts[b'clear_revlogs']
678 678 def s():
679 679 if clearrevlogs:
680 680 clearchangelog(repo)
681 681 clearfilecache(repo.unfiltered(), 'manifest')
682 682 repocleartagscache()
683 683 def t():
684 684 return len(repo.tags())
685 685 timer(t, setup=s)
686 686 fm.end()
687 687
688 688 @command(b'perfancestors', formatteropts)
689 689 def perfancestors(ui, repo, **opts):
690 690 opts = _byteskwargs(opts)
691 691 timer, fm = gettimer(ui, opts)
692 692 heads = repo.changelog.headrevs()
693 693 def d():
694 694 for a in repo.changelog.ancestors(heads):
695 695 pass
696 696 timer(d)
697 697 fm.end()
698 698
699 699 @command(b'perfancestorset', formatteropts)
700 700 def perfancestorset(ui, repo, revset, **opts):
701 701 opts = _byteskwargs(opts)
702 702 timer, fm = gettimer(ui, opts)
703 703 revs = repo.revs(revset)
704 704 heads = repo.changelog.headrevs()
705 705 def d():
706 706 s = repo.changelog.ancestors(heads)
707 707 for rev in revs:
708 708 rev in s
709 709 timer(d)
710 710 fm.end()
711 711
712 712 @command(b'perfdiscovery', formatteropts, b'PATH')
713 713 def perfdiscovery(ui, repo, path, **opts):
714 714 """benchmark discovery between local repo and the peer at given path
715 715 """
716 716 repos = [repo, None]
717 717 timer, fm = gettimer(ui, opts)
718 718 path = ui.expandpath(path)
719 719
720 720 def s():
721 721 repos[1] = hg.peer(ui, opts, path)
722 722 def d():
723 723 setdiscovery.findcommonheads(ui, *repos)
724 724 timer(d, setup=s)
725 725 fm.end()
726 726
727 727 @command(b'perfbookmarks', formatteropts +
728 728 [
729 729 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
730 730 ])
731 731 def perfbookmarks(ui, repo, **opts):
732 732 """benchmark parsing bookmarks from disk to memory"""
733 733 opts = _byteskwargs(opts)
734 734 timer, fm = gettimer(ui, opts)
735 735
736 736 clearrevlogs = opts[b'clear_revlogs']
737 737 def s():
738 738 if clearrevlogs:
739 739 clearchangelog(repo)
740 740 clearfilecache(repo, b'_bookmarks')
741 741 def d():
742 742 repo._bookmarks
743 743 timer(d, setup=s)
744 744 fm.end()
745 745
746 746 @command(b'perfbundleread', formatteropts, b'BUNDLE')
747 747 def perfbundleread(ui, repo, bundlepath, **opts):
748 748 """Benchmark reading of bundle files.
749 749
750 750 This command is meant to isolate the I/O part of bundle reading as
751 751 much as possible.
752 752 """
753 753 from mercurial import (
754 754 bundle2,
755 755 exchange,
756 756 streamclone,
757 757 )
758 758
759 759 opts = _byteskwargs(opts)
760 760
761 761 def makebench(fn):
762 762 def run():
763 763 with open(bundlepath, b'rb') as fh:
764 764 bundle = exchange.readbundle(ui, fh, bundlepath)
765 765 fn(bundle)
766 766
767 767 return run
768 768
769 769 def makereadnbytes(size):
770 770 def run():
771 771 with open(bundlepath, b'rb') as fh:
772 772 bundle = exchange.readbundle(ui, fh, bundlepath)
773 773 while bundle.read(size):
774 774 pass
775 775
776 776 return run
777 777
778 778 def makestdioread(size):
779 779 def run():
780 780 with open(bundlepath, b'rb') as fh:
781 781 while fh.read(size):
782 782 pass
783 783
784 784 return run
785 785
786 786 # bundle1
787 787
788 788 def deltaiter(bundle):
789 789 for delta in bundle.deltaiter():
790 790 pass
791 791
792 792 def iterchunks(bundle):
793 793 for chunk in bundle.getchunks():
794 794 pass
795 795
796 796 # bundle2
797 797
798 798 def forwardchunks(bundle):
799 799 for chunk in bundle._forwardchunks():
800 800 pass
801 801
802 802 def iterparts(bundle):
803 803 for part in bundle.iterparts():
804 804 pass
805 805
806 806 def iterpartsseekable(bundle):
807 807 for part in bundle.iterparts(seekable=True):
808 808 pass
809 809
810 810 def seek(bundle):
811 811 for part in bundle.iterparts(seekable=True):
812 812 part.seek(0, os.SEEK_END)
813 813
814 814 def makepartreadnbytes(size):
815 815 def run():
816 816 with open(bundlepath, b'rb') as fh:
817 817 bundle = exchange.readbundle(ui, fh, bundlepath)
818 818 for part in bundle.iterparts():
819 819 while part.read(size):
820 820 pass
821 821
822 822 return run
823 823
824 824 benches = [
825 825 (makestdioread(8192), b'read(8k)'),
826 826 (makestdioread(16384), b'read(16k)'),
827 827 (makestdioread(32768), b'read(32k)'),
828 828 (makestdioread(131072), b'read(128k)'),
829 829 ]
830 830
831 831 with open(bundlepath, b'rb') as fh:
832 832 bundle = exchange.readbundle(ui, fh, bundlepath)
833 833
834 834 if isinstance(bundle, changegroup.cg1unpacker):
835 835 benches.extend([
836 836 (makebench(deltaiter), b'cg1 deltaiter()'),
837 837 (makebench(iterchunks), b'cg1 getchunks()'),
838 838 (makereadnbytes(8192), b'cg1 read(8k)'),
839 839 (makereadnbytes(16384), b'cg1 read(16k)'),
840 840 (makereadnbytes(32768), b'cg1 read(32k)'),
841 841 (makereadnbytes(131072), b'cg1 read(128k)'),
842 842 ])
843 843 elif isinstance(bundle, bundle2.unbundle20):
844 844 benches.extend([
845 845 (makebench(forwardchunks), b'bundle2 forwardchunks()'),
846 846 (makebench(iterparts), b'bundle2 iterparts()'),
847 847 (makebench(iterpartsseekable), b'bundle2 iterparts() seekable'),
848 848 (makebench(seek), b'bundle2 part seek()'),
849 849 (makepartreadnbytes(8192), b'bundle2 part read(8k)'),
850 850 (makepartreadnbytes(16384), b'bundle2 part read(16k)'),
851 851 (makepartreadnbytes(32768), b'bundle2 part read(32k)'),
852 852 (makepartreadnbytes(131072), b'bundle2 part read(128k)'),
853 853 ])
854 854 elif isinstance(bundle, streamclone.streamcloneapplier):
855 855 raise error.Abort(b'stream clone bundles not supported')
856 856 else:
857 857 raise error.Abort(b'unhandled bundle type: %s' % type(bundle))
858 858
859 859 for fn, title in benches:
860 860 timer, fm = gettimer(ui, opts)
861 861 timer(fn, title=title)
862 862 fm.end()
863 863
864 864 @command(b'perfchangegroupchangelog', formatteropts +
865 865 [(b'', b'cgversion', b'02', b'changegroup version'),
866 866 (b'r', b'rev', b'', b'revisions to add to changegroup')])
867 867 def perfchangegroupchangelog(ui, repo, cgversion=b'02', rev=None, **opts):
868 868 """Benchmark producing a changelog group for a changegroup.
869 869
870 870 This measures the time spent processing the changelog during a
871 871 bundle operation. This occurs during `hg bundle` and on a server
872 872 processing a `getbundle` wire protocol request (handles clones
873 873 and pull requests).
874 874
875 875 By default, all revisions are added to the changegroup.
876 876 """
877 877 opts = _byteskwargs(opts)
878 878 cl = repo.changelog
879 879 nodes = [cl.lookup(r) for r in repo.revs(rev or b'all()')]
880 880 bundler = changegroup.getbundler(cgversion, repo)
881 881
882 882 def d():
883 883 state, chunks = bundler._generatechangelog(cl, nodes)
884 884 for chunk in chunks:
885 885 pass
886 886
887 887 timer, fm = gettimer(ui, opts)
888 888
889 889 # Terminal printing can interfere with timing. So disable it.
890 890 with ui.configoverride({(b'progress', b'disable'): True}):
891 891 timer(d)
892 892
893 893 fm.end()
894 894
895 895 @command(b'perfdirs', formatteropts)
896 896 def perfdirs(ui, repo, **opts):
897 897 opts = _byteskwargs(opts)
898 898 timer, fm = gettimer(ui, opts)
899 899 dirstate = repo.dirstate
900 900 b'a' in dirstate
901 901 def d():
902 902 dirstate.hasdir(b'a')
903 903 del dirstate._map._dirs
904 904 timer(d)
905 905 fm.end()
906 906
907 907 @command(b'perfdirstate', formatteropts)
908 908 def perfdirstate(ui, repo, **opts):
909 909 opts = _byteskwargs(opts)
910 910 timer, fm = gettimer(ui, opts)
911 911 b"a" in repo.dirstate
912 912 def d():
913 913 repo.dirstate.invalidate()
914 914 b"a" in repo.dirstate
915 915 timer(d)
916 916 fm.end()
917 917
918 918 @command(b'perfdirstatedirs', formatteropts)
919 919 def perfdirstatedirs(ui, repo, **opts):
920 920 opts = _byteskwargs(opts)
921 921 timer, fm = gettimer(ui, opts)
922 922 b"a" in repo.dirstate
923 923 def d():
924 924 repo.dirstate.hasdir(b"a")
925 925 del repo.dirstate._map._dirs
926 926 timer(d)
927 927 fm.end()
928 928
929 929 @command(b'perfdirstatefoldmap', formatteropts)
930 930 def perfdirstatefoldmap(ui, repo, **opts):
931 931 opts = _byteskwargs(opts)
932 932 timer, fm = gettimer(ui, opts)
933 933 dirstate = repo.dirstate
934 934 b'a' in dirstate
935 935 def d():
936 936 dirstate._map.filefoldmap.get(b'a')
937 937 del dirstate._map.filefoldmap
938 938 timer(d)
939 939 fm.end()
940 940
941 941 @command(b'perfdirfoldmap', formatteropts)
942 942 def perfdirfoldmap(ui, repo, **opts):
943 943 opts = _byteskwargs(opts)
944 944 timer, fm = gettimer(ui, opts)
945 945 dirstate = repo.dirstate
946 946 b'a' in dirstate
947 947 def d():
948 948 dirstate._map.dirfoldmap.get(b'a')
949 949 del dirstate._map.dirfoldmap
950 950 del dirstate._map._dirs
951 951 timer(d)
952 952 fm.end()
953 953
954 954 @command(b'perfdirstatewrite', formatteropts)
955 955 def perfdirstatewrite(ui, repo, **opts):
956 956 opts = _byteskwargs(opts)
957 957 timer, fm = gettimer(ui, opts)
958 958 ds = repo.dirstate
959 959 b"a" in ds
960 960 def d():
961 961 ds._dirty = True
962 962 ds.write(repo.currenttransaction())
963 963 timer(d)
964 964 fm.end()
965 965
966 966 def _getmergerevs(repo, opts):
967 967 """parse command argument to return rev involved in merge
968 968
969 969 input: options dictionnary with `rev`, `from` and `bse`
970 970 output: (localctx, otherctx, basectx)
971 971 """
972 972 if opts[b'from']:
973 973 fromrev = scmutil.revsingle(repo, opts[b'from'])
974 974 wctx = repo[fromrev]
975 975 else:
976 976 wctx = repo[None]
977 977 # we don't want working dir files to be stat'd in the benchmark, so
978 978 # prime that cache
979 979 wctx.dirty()
980 980 rctx = scmutil.revsingle(repo, opts[b'rev'], opts[b'rev'])
981 981 if opts[b'base']:
982 982 fromrev = scmutil.revsingle(repo, opts[b'base'])
983 983 ancestor = repo[fromrev]
984 984 else:
985 985 ancestor = wctx.ancestor(rctx)
986 986 return (wctx, rctx, ancestor)
987 987
988 988 @command(b'perfmergecalculate',
989 989 [
990 990 (b'r', b'rev', b'.', b'rev to merge against'),
991 991 (b'', b'from', b'', b'rev to merge from'),
992 992 (b'', b'base', b'', b'the revision to use as base'),
993 993 ] + formatteropts)
994 994 def perfmergecalculate(ui, repo, **opts):
995 995 opts = _byteskwargs(opts)
996 996 timer, fm = gettimer(ui, opts)
997 997
998 998 wctx, rctx, ancestor = _getmergerevs(repo, opts)
999 999 def d():
1000 1000 # acceptremote is True because we don't want prompts in the middle of
1001 1001 # our benchmark
1002 1002 merge.calculateupdates(repo, wctx, rctx, [ancestor], False, False,
1003 1003 acceptremote=True, followcopies=True)
1004 1004 timer(d)
1005 1005 fm.end()
1006 1006
1007 1007 @command(b'perfmergecopies',
1008 1008 [
1009 1009 (b'r', b'rev', b'.', b'rev to merge against'),
1010 1010 (b'', b'from', b'', b'rev to merge from'),
1011 1011 (b'', b'base', b'', b'the revision to use as base'),
1012 1012 ] + formatteropts)
1013 1013 def perfmergecopies(ui, repo, **opts):
1014 1014 """measure runtime of `copies.mergecopies`"""
1015 1015 opts = _byteskwargs(opts)
1016 1016 timer, fm = gettimer(ui, opts)
1017 1017 wctx, rctx, ancestor = _getmergerevs(repo, opts)
1018 1018 def d():
1019 1019 # acceptremote is True because we don't want prompts in the middle of
1020 1020 # our benchmark
1021 1021 copies.mergecopies(repo, wctx, rctx, ancestor)
1022 1022 timer(d)
1023 1023 fm.end()
1024 1024
1025 1025 @command(b'perfpathcopies', [], b"REV REV")
1026 1026 def perfpathcopies(ui, repo, rev1, rev2, **opts):
1027 1027 """benchmark the copy tracing logic"""
1028 1028 opts = _byteskwargs(opts)
1029 1029 timer, fm = gettimer(ui, opts)
1030 1030 ctx1 = scmutil.revsingle(repo, rev1, rev1)
1031 1031 ctx2 = scmutil.revsingle(repo, rev2, rev2)
1032 1032 def d():
1033 1033 copies.pathcopies(ctx1, ctx2)
1034 1034 timer(d)
1035 1035 fm.end()
1036 1036
1037 1037 @command(b'perfphases',
1038 1038 [(b'', b'full', False, b'include file reading time too'),
1039 1039 ], b"")
1040 1040 def perfphases(ui, repo, **opts):
1041 1041 """benchmark phasesets computation"""
1042 1042 opts = _byteskwargs(opts)
1043 1043 timer, fm = gettimer(ui, opts)
1044 1044 _phases = repo._phasecache
1045 1045 full = opts.get(b'full')
1046 1046 def d():
1047 1047 phases = _phases
1048 1048 if full:
1049 1049 clearfilecache(repo, b'_phasecache')
1050 1050 phases = repo._phasecache
1051 1051 phases.invalidate()
1052 1052 phases.loadphaserevs(repo)
1053 1053 timer(d)
1054 1054 fm.end()
1055 1055
1056 1056 @command(b'perfphasesremote',
1057 1057 [], b"[DEST]")
1058 1058 def perfphasesremote(ui, repo, dest=None, **opts):
1059 1059 """benchmark time needed to analyse phases of the remote server"""
1060 1060 from mercurial.node import (
1061 1061 bin,
1062 1062 )
1063 1063 from mercurial import (
1064 1064 exchange,
1065 1065 hg,
1066 1066 phases,
1067 1067 )
1068 1068 opts = _byteskwargs(opts)
1069 1069 timer, fm = gettimer(ui, opts)
1070 1070
1071 1071 path = ui.paths.getpath(dest, default=(b'default-push', b'default'))
1072 1072 if not path:
1073 1073 raise error.Abort((b'default repository not configured!'),
1074 1074 hint=(b"see 'hg help config.paths'"))
1075 1075 dest = path.pushloc or path.loc
1076 1076 ui.status((b'analysing phase of %s\n') % util.hidepassword(dest))
1077 1077 other = hg.peer(repo, opts, dest)
1078 1078
1079 1079 # easier to perform discovery through the operation
1080 1080 op = exchange.pushoperation(repo, other)
1081 1081 exchange._pushdiscoverychangeset(op)
1082 1082
1083 1083 remotesubset = op.fallbackheads
1084 1084
1085 1085 with other.commandexecutor() as e:
1086 1086 remotephases = e.callcommand(b'listkeys',
1087 1087 {b'namespace': b'phases'}).result()
1088 1088 del other
1089 1089 publishing = remotephases.get(b'publishing', False)
1090 1090 if publishing:
1091 1091 ui.status((b'publishing: yes\n'))
1092 1092 else:
1093 1093 ui.status((b'publishing: no\n'))
1094 1094
1095 1095 nodemap = repo.changelog.nodemap
1096 1096 nonpublishroots = 0
1097 1097 for nhex, phase in remotephases.iteritems():
1098 1098 if nhex == b'publishing': # ignore data related to publish option
1099 1099 continue
1100 1100 node = bin(nhex)
1101 1101 if node in nodemap and int(phase):
1102 1102 nonpublishroots += 1
1103 1103 ui.status((b'number of roots: %d\n') % len(remotephases))
1104 1104 ui.status((b'number of known non public roots: %d\n') % nonpublishroots)
1105 1105 def d():
1106 1106 phases.remotephasessummary(repo,
1107 1107 remotesubset,
1108 1108 remotephases)
1109 1109 timer(d)
1110 1110 fm.end()
1111 1111
1112 1112 @command(b'perfmanifest',[
1113 1113 (b'm', b'manifest-rev', False, b'Look up a manifest node revision'),
1114 1114 (b'', b'clear-disk', False, b'clear on-disk caches too'),
1115 1115 ] + formatteropts, b'REV|NODE')
1116 1116 def perfmanifest(ui, repo, rev, manifest_rev=False, clear_disk=False, **opts):
1117 1117 """benchmark the time to read a manifest from disk and return a usable
1118 1118 dict-like object
1119 1119
1120 1120 Manifest caches are cleared before retrieval."""
1121 1121 opts = _byteskwargs(opts)
1122 1122 timer, fm = gettimer(ui, opts)
1123 1123 if not manifest_rev:
1124 1124 ctx = scmutil.revsingle(repo, rev, rev)
1125 1125 t = ctx.manifestnode()
1126 1126 else:
1127 1127 from mercurial.node import bin
1128 1128
1129 1129 if len(rev) == 40:
1130 1130 t = bin(rev)
1131 1131 else:
1132 1132 try:
1133 1133 rev = int(rev)
1134 1134
1135 1135 if util.safehasattr(repo.manifestlog, b'getstorage'):
1136 1136 t = repo.manifestlog.getstorage(b'').node(rev)
1137 1137 else:
1138 1138 t = repo.manifestlog._revlog.lookup(rev)
1139 1139 except ValueError:
1140 1140 raise error.Abort(b'manifest revision must be integer or full '
1141 1141 b'node')
1142 1142 def d():
1143 1143 repo.manifestlog.clearcaches(clear_persisted_data=clear_disk)
1144 1144 repo.manifestlog[t].read()
1145 1145 timer(d)
1146 1146 fm.end()
1147 1147
1148 1148 @command(b'perfchangeset', formatteropts)
1149 1149 def perfchangeset(ui, repo, rev, **opts):
1150 1150 opts = _byteskwargs(opts)
1151 1151 timer, fm = gettimer(ui, opts)
1152 1152 n = scmutil.revsingle(repo, rev).node()
1153 1153 def d():
1154 1154 repo.changelog.read(n)
1155 1155 #repo.changelog._cache = None
1156 1156 timer(d)
1157 1157 fm.end()
1158 1158
1159 1159 @command(b'perfignore', formatteropts)
1160 1160 def perfignore(ui, repo, **opts):
1161 1161 """benchmark operation related to computing ignore"""
1162 1162 opts = _byteskwargs(opts)
1163 1163 timer, fm = gettimer(ui, opts)
1164 1164 dirstate = repo.dirstate
1165 1165
1166 1166 def setupone():
1167 1167 dirstate.invalidate()
1168 1168 clearfilecache(dirstate, b'_ignore')
1169 1169
1170 1170 def runone():
1171 1171 dirstate._ignore
1172 1172
1173 1173 timer(runone, setup=setupone, title=b"load")
1174 1174 fm.end()
1175 1175
1176 1176 @command(b'perfindex', [
1177 1177 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1178 1178 (b'', b'no-lookup', None, b'do not revision lookup post creation'),
1179 1179 ] + formatteropts)
1180 1180 def perfindex(ui, repo, **opts):
1181 1181 """benchmark index creation time followed by a lookup
1182 1182
1183 1183 The default is to look `tip` up. Depending on the index implementation,
1184 1184 the revision looked up can matters. For example, an implementation
1185 1185 scanning the index will have a faster lookup time for `--rev tip` than for
1186 1186 `--rev 0`. The number of looked up revisions and their order can also
1187 1187 matters.
1188 1188
1189 1189 Example of useful set to test:
1190 1190 * tip
1191 1191 * 0
1192 1192 * -10:
1193 1193 * :10
1194 1194 * -10: + :10
1195 1195 * :10: + -10:
1196 1196 * -10000:
1197 1197 * -10000: + 0
1198 1198
1199 1199 It is not currently possible to check for lookup of a missing node. For
1200 1200 deeper lookup benchmarking, checkout the `perfnodemap` command."""
1201 1201 import mercurial.revlog
1202 1202 opts = _byteskwargs(opts)
1203 1203 timer, fm = gettimer(ui, opts)
1204 1204 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
1205 1205 if opts[b'no_lookup']:
1206 1206 if opts['rev']:
1207 1207 raise error.Abort('--no-lookup and --rev are mutually exclusive')
1208 1208 nodes = []
1209 1209 elif not opts[b'rev']:
1210 1210 nodes = [repo[b"tip"].node()]
1211 1211 else:
1212 1212 revs = scmutil.revrange(repo, opts[b'rev'])
1213 1213 cl = repo.changelog
1214 1214 nodes = [cl.node(r) for r in revs]
1215 1215
1216 1216 unfi = repo.unfiltered()
1217 1217 # find the filecache func directly
1218 1218 # This avoid polluting the benchmark with the filecache logic
1219 1219 makecl = unfi.__class__.changelog.func
1220 1220 def setup():
1221 1221 # probably not necessary, but for good measure
1222 1222 clearchangelog(unfi)
1223 1223 def d():
1224 1224 cl = makecl(unfi)
1225 1225 for n in nodes:
1226 1226 cl.rev(n)
1227 1227 timer(d, setup=setup)
1228 1228 fm.end()
1229 1229
1230 1230 @command(b'perfnodemap', [
1231 1231 (b'', b'rev', [], b'revision to be looked up (default tip)'),
1232 1232 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
1233 1233 ] + formatteropts)
1234 1234 def perfnodemap(ui, repo, **opts):
1235 1235 """benchmark the time necessary to look up revision from a cold nodemap
1236 1236
1237 1237 Depending on the implementation, the amount and order of revision we look
1238 1238 up can varies. Example of useful set to test:
1239 1239 * tip
1240 1240 * 0
1241 1241 * -10:
1242 1242 * :10
1243 1243 * -10: + :10
1244 1244 * :10: + -10:
1245 1245 * -10000:
1246 1246 * -10000: + 0
1247 1247
1248 1248 The command currently focus on valid binary lookup. Benchmarking for
1249 1249 hexlookup, prefix lookup and missing lookup would also be valuable.
1250 1250 """
1251 1251 import mercurial.revlog
1252 1252 opts = _byteskwargs(opts)
1253 1253 timer, fm = gettimer(ui, opts)
1254 1254 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
1255 1255
1256 1256 unfi = repo.unfiltered()
1257 1257 clearcaches = opts['clear_caches']
1258 1258 # find the filecache func directly
1259 1259 # This avoid polluting the benchmark with the filecache logic
1260 1260 makecl = unfi.__class__.changelog.func
1261 1261 if not opts[b'rev']:
1262 1262 raise error.Abort('use --rev to specify revisions to look up')
1263 1263 revs = scmutil.revrange(repo, opts[b'rev'])
1264 1264 cl = repo.changelog
1265 1265 nodes = [cl.node(r) for r in revs]
1266 1266
1267 1267 # use a list to pass reference to a nodemap from one closure to the next
1268 1268 nodeget = [None]
1269 1269 def setnodeget():
1270 1270 # probably not necessary, but for good measure
1271 1271 clearchangelog(unfi)
1272 1272 nodeget[0] = makecl(unfi).nodemap.get
1273 1273
1274 1274 def d():
1275 1275 get = nodeget[0]
1276 1276 for n in nodes:
1277 1277 get(n)
1278 1278
1279 1279 setup = None
1280 1280 if clearcaches:
1281 1281 def setup():
1282 1282 setnodeget()
1283 1283 else:
1284 1284 setnodeget()
1285 1285 d() # prewarm the data structure
1286 1286 timer(d, setup=setup)
1287 1287 fm.end()
1288 1288
1289 1289 @command(b'perfstartup', formatteropts)
1290 1290 def perfstartup(ui, repo, **opts):
1291 1291 opts = _byteskwargs(opts)
1292 1292 timer, fm = gettimer(ui, opts)
1293 1293 def d():
1294 1294 if os.name != r'nt':
1295 1295 os.system(b"HGRCPATH= %s version -q > /dev/null" %
1296 1296 fsencode(sys.argv[0]))
1297 1297 else:
1298 1298 os.environ[r'HGRCPATH'] = r' '
1299 1299 os.system(r"%s version -q > NUL" % sys.argv[0])
1300 1300 timer(d)
1301 1301 fm.end()
1302 1302
1303 1303 @command(b'perfparents', formatteropts)
1304 1304 def perfparents(ui, repo, **opts):
1305 1305 """benchmark the time necessary to fetch one changeset's parents.
1306 1306
1307 1307 The fetch is done using the `node identifier`, traversing all object layers
1308 1308 from the repository object. The first N revisions will be used for this
1309 1309 benchmark. N is controlled by the ``perf.parentscount`` config option
1310 1310 (default: 1000).
1311 1311 """
1312 1312 opts = _byteskwargs(opts)
1313 1313 timer, fm = gettimer(ui, opts)
1314 1314 # control the number of commits perfparents iterates over
1315 1315 # experimental config: perf.parentscount
1316 1316 count = getint(ui, b"perf", b"parentscount", 1000)
1317 1317 if len(repo.changelog) < count:
1318 1318 raise error.Abort(b"repo needs %d commits for this test" % count)
1319 1319 repo = repo.unfiltered()
1320 1320 nl = [repo.changelog.node(i) for i in _xrange(count)]
1321 1321 def d():
1322 1322 for n in nl:
1323 1323 repo.changelog.parents(n)
1324 1324 timer(d)
1325 1325 fm.end()
1326 1326
1327 1327 @command(b'perfctxfiles', formatteropts)
1328 1328 def perfctxfiles(ui, repo, x, **opts):
1329 1329 opts = _byteskwargs(opts)
1330 1330 x = int(x)
1331 1331 timer, fm = gettimer(ui, opts)
1332 1332 def d():
1333 1333 len(repo[x].files())
1334 1334 timer(d)
1335 1335 fm.end()
1336 1336
1337 1337 @command(b'perfrawfiles', formatteropts)
1338 1338 def perfrawfiles(ui, repo, x, **opts):
1339 1339 opts = _byteskwargs(opts)
1340 1340 x = int(x)
1341 1341 timer, fm = gettimer(ui, opts)
1342 1342 cl = repo.changelog
1343 1343 def d():
1344 1344 len(cl.read(x)[3])
1345 1345 timer(d)
1346 1346 fm.end()
1347 1347
1348 1348 @command(b'perflookup', formatteropts)
1349 1349 def perflookup(ui, repo, rev, **opts):
1350 1350 opts = _byteskwargs(opts)
1351 1351 timer, fm = gettimer(ui, opts)
1352 1352 timer(lambda: len(repo.lookup(rev)))
1353 1353 fm.end()
1354 1354
1355 1355 @command(b'perflinelogedits',
1356 1356 [(b'n', b'edits', 10000, b'number of edits'),
1357 1357 (b'', b'max-hunk-lines', 10, b'max lines in a hunk'),
1358 1358 ], norepo=True)
1359 1359 def perflinelogedits(ui, **opts):
1360 1360 from mercurial import linelog
1361 1361
1362 1362 opts = _byteskwargs(opts)
1363 1363
1364 1364 edits = opts[b'edits']
1365 1365 maxhunklines = opts[b'max_hunk_lines']
1366 1366
1367 1367 maxb1 = 100000
1368 1368 random.seed(0)
1369 1369 randint = random.randint
1370 1370 currentlines = 0
1371 1371 arglist = []
1372 1372 for rev in _xrange(edits):
1373 1373 a1 = randint(0, currentlines)
1374 1374 a2 = randint(a1, min(currentlines, a1 + maxhunklines))
1375 1375 b1 = randint(0, maxb1)
1376 1376 b2 = randint(b1, b1 + maxhunklines)
1377 1377 currentlines += (b2 - b1) - (a2 - a1)
1378 1378 arglist.append((rev, a1, a2, b1, b2))
1379 1379
1380 1380 def d():
1381 1381 ll = linelog.linelog()
1382 1382 for args in arglist:
1383 1383 ll.replacelines(*args)
1384 1384
1385 1385 timer, fm = gettimer(ui, opts)
1386 1386 timer(d)
1387 1387 fm.end()
1388 1388
1389 1389 @command(b'perfrevrange', formatteropts)
1390 1390 def perfrevrange(ui, repo, *specs, **opts):
1391 1391 opts = _byteskwargs(opts)
1392 1392 timer, fm = gettimer(ui, opts)
1393 1393 revrange = scmutil.revrange
1394 1394 timer(lambda: len(revrange(repo, specs)))
1395 1395 fm.end()
1396 1396
1397 1397 @command(b'perfnodelookup', formatteropts)
1398 1398 def perfnodelookup(ui, repo, rev, **opts):
1399 1399 opts = _byteskwargs(opts)
1400 1400 timer, fm = gettimer(ui, opts)
1401 1401 import mercurial.revlog
1402 1402 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
1403 1403 n = scmutil.revsingle(repo, rev).node()
1404 1404 cl = mercurial.revlog.revlog(getsvfs(repo), b"00changelog.i")
1405 1405 def d():
1406 1406 cl.rev(n)
1407 1407 clearcaches(cl)
1408 1408 timer(d)
1409 1409 fm.end()
1410 1410
1411 1411 @command(b'perflog',
1412 1412 [(b'', b'rename', False, b'ask log to follow renames')
1413 1413 ] + formatteropts)
1414 1414 def perflog(ui, repo, rev=None, **opts):
1415 1415 opts = _byteskwargs(opts)
1416 1416 if rev is None:
1417 1417 rev=[]
1418 1418 timer, fm = gettimer(ui, opts)
1419 1419 ui.pushbuffer()
1420 1420 timer(lambda: commands.log(ui, repo, rev=rev, date=b'', user=b'',
1421 1421 copies=opts.get(b'rename')))
1422 1422 ui.popbuffer()
1423 1423 fm.end()
1424 1424
1425 1425 @command(b'perfmoonwalk', formatteropts)
1426 1426 def perfmoonwalk(ui, repo, **opts):
1427 1427 """benchmark walking the changelog backwards
1428 1428
1429 1429 This also loads the changelog data for each revision in the changelog.
1430 1430 """
1431 1431 opts = _byteskwargs(opts)
1432 1432 timer, fm = gettimer(ui, opts)
1433 1433 def moonwalk():
1434 1434 for i in repo.changelog.revs(start=(len(repo) - 1), stop=-1):
1435 1435 ctx = repo[i]
1436 1436 ctx.branch() # read changelog data (in addition to the index)
1437 1437 timer(moonwalk)
1438 1438 fm.end()
1439 1439
1440 1440 @command(b'perftemplating',
1441 1441 [(b'r', b'rev', [], b'revisions to run the template on'),
1442 1442 ] + formatteropts)
1443 1443 def perftemplating(ui, repo, testedtemplate=None, **opts):
1444 1444 """test the rendering time of a given template"""
1445 1445 if makelogtemplater is None:
1446 1446 raise error.Abort((b"perftemplating not available with this Mercurial"),
1447 1447 hint=b"use 4.3 or later")
1448 1448
1449 1449 opts = _byteskwargs(opts)
1450 1450
1451 1451 nullui = ui.copy()
1452 1452 nullui.fout = open(os.devnull, r'wb')
1453 1453 nullui.disablepager()
1454 1454 revs = opts.get(b'rev')
1455 1455 if not revs:
1456 1456 revs = [b'all()']
1457 1457 revs = list(scmutil.revrange(repo, revs))
1458 1458
1459 1459 defaulttemplate = (b'{date|shortdate} [{rev}:{node|short}]'
1460 1460 b' {author|person}: {desc|firstline}\n')
1461 1461 if testedtemplate is None:
1462 1462 testedtemplate = defaulttemplate
1463 1463 displayer = makelogtemplater(nullui, repo, testedtemplate)
1464 1464 def format():
1465 1465 for r in revs:
1466 1466 ctx = repo[r]
1467 1467 displayer.show(ctx)
1468 1468 displayer.flush(ctx)
1469 1469
1470 1470 timer, fm = gettimer(ui, opts)
1471 1471 timer(format)
1472 1472 fm.end()
1473 1473
1474 1474 @command(b'perfhelper-mergecopies', formatteropts +
1475 1475 [
1476 1476 (b'r', b'revs', [], b'restrict search to these revisions'),
1477 1477 (b'', b'timing', False, b'provides extra data (costly)'),
1478 1478 ])
1479 1479 def perfhelpermergecopies(ui, repo, revs=[], **opts):
1480 1480 """find statistics about potential parameters for `perfmergecopies`
1481 1481
1482 1482 This command find (base, p1, p2) triplet relevant for copytracing
1483 1483 benchmarking in the context of a merge. It reports values for some of the
1484 1484 parameters that impact merge copy tracing time during merge.
1485 1485
1486 1486 If `--timing` is set, rename detection is run and the associated timing
1487 1487 will be reported. The extra details come at the cost of slower command
1488 1488 execution.
1489 1489
1490 1490 Since rename detection is only run once, other factors might easily
1491 1491 affect the precision of the timing. However it should give a good
1492 1492 approximation of which revision triplets are very costly.
1493 1493 """
1494 1494 opts = _byteskwargs(opts)
1495 1495 fm = ui.formatter(b'perf', opts)
1496 1496 dotiming = opts[b'timing']
1497 1497
1498 1498 output_template = [
1499 1499 ("base", "%(base)12s"),
1500 1500 ("p1", "%(p1.node)12s"),
1501 1501 ("p2", "%(p2.node)12s"),
1502 1502 ("p1.nb-revs", "%(p1.nbrevs)12d"),
1503 1503 ("p1.nb-files", "%(p1.nbmissingfiles)12d"),
1504 1504 ("p1.renames", "%(p1.renamedfiles)12d"),
1505 1505 ("p1.time", "%(p1.time)12.3f"),
1506 1506 ("p2.nb-revs", "%(p2.nbrevs)12d"),
1507 1507 ("p2.nb-files", "%(p2.nbmissingfiles)12d"),
1508 1508 ("p2.renames", "%(p2.renamedfiles)12d"),
1509 1509 ("p2.time", "%(p2.time)12.3f"),
1510 1510 ("renames", "%(nbrenamedfiles)12d"),
1511 1511 ("total.time", "%(time)12.3f"),
1512 1512 ]
1513 1513 if not dotiming:
1514 1514 output_template = [i for i in output_template
1515 1515 if not ('time' in i[0] or 'renames' in i[0])]
1516 1516 header_names = [h for (h, v) in output_template]
1517 1517 output = ' '.join([v for (h, v) in output_template]) + '\n'
1518 1518 header = ' '.join(['%12s'] * len(header_names)) + '\n'
1519 1519 fm.plain(header % tuple(header_names))
1520 1520
1521 1521 if not revs:
1522 1522 revs = ['all()']
1523 1523 revs = scmutil.revrange(repo, revs)
1524 1524
1525 1525 roi = repo.revs('merge() and %ld', revs)
1526 1526 for r in roi:
1527 1527 ctx = repo[r]
1528 1528 p1 = ctx.p1()
1529 1529 p2 = ctx.p2()
1530 1530 bases = repo.changelog._commonancestorsheads(p1.rev(), p2.rev())
1531 1531 for b in bases:
1532 1532 b = repo[b]
1533 1533 p1missing = copies._computeforwardmissing(b, p1)
1534 1534 p2missing = copies._computeforwardmissing(b, p2)
1535 1535 data = {
1536 1536 b'base': b.hex(),
1537 1537 b'p1.node': p1.hex(),
1538 1538 b'p1.nbrevs': len(repo.revs('%d::%d', b.rev(), p1.rev())),
1539 1539 b'p1.nbmissingfiles': len(p1missing),
1540 1540 b'p2.node': p2.hex(),
1541 1541 b'p2.nbrevs': len(repo.revs('%d::%d', b.rev(), p2.rev())),
1542 1542 b'p2.nbmissingfiles': len(p2missing),
1543 1543 }
1544 1544 if dotiming:
1545 1545 begin = util.timer()
1546 1546 mergedata = copies.mergecopies(repo, p1, p2, b)
1547 1547 end = util.timer()
1548 1548 # not very stable timing since we did only one run
1549 1549 data['time'] = end - begin
1550 1550 # mergedata contains five dicts: "copy", "movewithdir",
1551 1551 # "diverge", "renamedelete" and "dirmove".
1552 1552 # The first 4 are about renamed file so lets count that.
1553 1553 renames = len(mergedata[0])
1554 1554 renames += len(mergedata[1])
1555 1555 renames += len(mergedata[2])
1556 1556 renames += len(mergedata[3])
1557 1557 data['nbrenamedfiles'] = renames
1558 1558 begin = util.timer()
1559 1559 p1renames = copies.pathcopies(b, p1)
1560 1560 end = util.timer()
1561 1561 data['p1.time'] = end - begin
1562 1562 begin = util.timer()
1563 1563 p2renames = copies.pathcopies(b, p2)
1564 1564 data['p2.time'] = end - begin
1565 1565 end = util.timer()
1566 1566 data['p1.renamedfiles'] = len(p1renames)
1567 1567 data['p2.renamedfiles'] = len(p2renames)
1568 1568 fm.startitem()
1569 1569 fm.data(**data)
1570 1570 # make node pretty for the human output
1571 1571 out = data.copy()
1572 1572 out['base'] = fm.hexfunc(b.node())
1573 1573 out['p1.node'] = fm.hexfunc(p1.node())
1574 1574 out['p2.node'] = fm.hexfunc(p2.node())
1575 1575 fm.plain(output % out)
1576 1576
1577 1577 fm.end()
1578 1578
1579 1579 @command(b'perfhelper-pathcopies', formatteropts +
1580 1580 [
1581 1581 (b'r', b'revs', [], b'restrict search to these revisions'),
1582 1582 (b'', b'timing', False, b'provides extra data (costly)'),
1583 1583 ])
1584 1584 def perfhelperpathcopies(ui, repo, revs=[], **opts):
1585 1585 """find statistic about potential parameters for the `perftracecopies`
1586 1586
1587 1587 This command find source-destination pair relevant for copytracing testing.
1588 1588 It report value for some of the parameters that impact copy tracing time.
1589 1589
1590 1590 If `--timing` is set, rename detection is run and the associated timing
1591 1591 will be reported. The extra details comes at the cost of a slower command
1592 1592 execution.
1593 1593
1594 1594 Since the rename detection is only run once, other factors might easily
1595 1595 affect the precision of the timing. However it should give a good
1596 1596 approximation of which revision pairs are very costly.
1597 1597 """
1598 1598 opts = _byteskwargs(opts)
1599 1599 fm = ui.formatter(b'perf', opts)
1600 1600 dotiming = opts[b'timing']
1601 1601
1602 1602 if dotiming:
1603 1603 header = '%12s %12s %12s %12s %12s %12s\n'
1604 1604 output = ("%(source)12s %(destination)12s "
1605 1605 "%(nbrevs)12d %(nbmissingfiles)12d "
1606 1606 "%(nbrenamedfiles)12d %(time)18.5f\n")
1607 1607 header_names = ("source", "destination", "nb-revs", "nb-files",
1608 1608 "nb-renames", "time")
1609 1609 fm.plain(header % header_names)
1610 1610 else:
1611 1611 header = '%12s %12s %12s %12s\n'
1612 1612 output = ("%(source)12s %(destination)12s "
1613 1613 "%(nbrevs)12d %(nbmissingfiles)12d\n")
1614 1614 fm.plain(header % ("source", "destination", "nb-revs", "nb-files"))
1615 1615
1616 1616 if not revs:
1617 1617 revs = ['all()']
1618 1618 revs = scmutil.revrange(repo, revs)
1619 1619
1620 1620 roi = repo.revs('merge() and %ld', revs)
1621 1621 for r in roi:
1622 1622 ctx = repo[r]
1623 1623 p1 = ctx.p1().rev()
1624 1624 p2 = ctx.p2().rev()
1625 1625 bases = repo.changelog._commonancestorsheads(p1, p2)
1626 1626 for p in (p1, p2):
1627 1627 for b in bases:
1628 1628 base = repo[b]
1629 1629 parent = repo[p]
1630 1630 missing = copies._computeforwardmissing(base, parent)
1631 1631 if not missing:
1632 1632 continue
1633 1633 data = {
1634 1634 b'source': base.hex(),
1635 1635 b'destination': parent.hex(),
1636 1636 b'nbrevs': len(repo.revs('%d::%d', b, p)),
1637 1637 b'nbmissingfiles': len(missing),
1638 1638 }
1639 1639 if dotiming:
1640 1640 begin = util.timer()
1641 1641 renames = copies.pathcopies(base, parent)
1642 1642 end = util.timer()
1643 1643 # not very stable timing since we did only one run
1644 1644 data['time'] = end - begin
1645 1645 data['nbrenamedfiles'] = len(renames)
1646 1646 fm.startitem()
1647 1647 fm.data(**data)
1648 1648 out = data.copy()
1649 1649 out['source'] = fm.hexfunc(base.node())
1650 1650 out['destination'] = fm.hexfunc(parent.node())
1651 1651 fm.plain(output % out)
1652 1652
1653 1653 fm.end()
1654 1654
1655 1655 @command(b'perfcca', formatteropts)
1656 1656 def perfcca(ui, repo, **opts):
1657 1657 opts = _byteskwargs(opts)
1658 1658 timer, fm = gettimer(ui, opts)
1659 1659 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
1660 1660 fm.end()
1661 1661
1662 1662 @command(b'perffncacheload', formatteropts)
1663 1663 def perffncacheload(ui, repo, **opts):
1664 1664 opts = _byteskwargs(opts)
1665 1665 timer, fm = gettimer(ui, opts)
1666 1666 s = repo.store
1667 1667 def d():
1668 1668 s.fncache._load()
1669 1669 timer(d)
1670 1670 fm.end()
1671 1671
1672 1672 @command(b'perffncachewrite', formatteropts)
1673 1673 def perffncachewrite(ui, repo, **opts):
1674 1674 opts = _byteskwargs(opts)
1675 1675 timer, fm = gettimer(ui, opts)
1676 1676 s = repo.store
1677 1677 lock = repo.lock()
1678 1678 s.fncache._load()
1679 1679 tr = repo.transaction(b'perffncachewrite')
1680 1680 tr.addbackup(b'fncache')
1681 1681 def d():
1682 1682 s.fncache._dirty = True
1683 1683 s.fncache.write(tr)
1684 1684 timer(d)
1685 1685 tr.close()
1686 1686 lock.release()
1687 1687 fm.end()
1688 1688
1689 1689 @command(b'perffncacheencode', formatteropts)
1690 1690 def perffncacheencode(ui, repo, **opts):
1691 1691 opts = _byteskwargs(opts)
1692 1692 timer, fm = gettimer(ui, opts)
1693 1693 s = repo.store
1694 1694 s.fncache._load()
1695 1695 def d():
1696 1696 for p in s.fncache.entries:
1697 1697 s.encode(p)
1698 1698 timer(d)
1699 1699 fm.end()
1700 1700
1701 1701 def _bdiffworker(q, blocks, xdiff, ready, done):
1702 1702 while not done.is_set():
1703 1703 pair = q.get()
1704 1704 while pair is not None:
1705 1705 if xdiff:
1706 1706 mdiff.bdiff.xdiffblocks(*pair)
1707 1707 elif blocks:
1708 1708 mdiff.bdiff.blocks(*pair)
1709 1709 else:
1710 1710 mdiff.textdiff(*pair)
1711 1711 q.task_done()
1712 1712 pair = q.get()
1713 1713 q.task_done() # for the None one
1714 1714 with ready:
1715 1715 ready.wait()
1716 1716
1717 1717 def _manifestrevision(repo, mnode):
1718 1718 ml = repo.manifestlog
1719 1719
1720 1720 if util.safehasattr(ml, b'getstorage'):
1721 1721 store = ml.getstorage(b'')
1722 1722 else:
1723 1723 store = ml._revlog
1724 1724
1725 1725 return store.revision(mnode)
1726 1726
1727 1727 @command(b'perfbdiff', revlogopts + formatteropts + [
1728 1728 (b'', b'count', 1, b'number of revisions to test (when using --startrev)'),
1729 1729 (b'', b'alldata', False, b'test bdiffs for all associated revisions'),
1730 1730 (b'', b'threads', 0, b'number of thread to use (disable with 0)'),
1731 1731 (b'', b'blocks', False, b'test computing diffs into blocks'),
1732 1732 (b'', b'xdiff', False, b'use xdiff algorithm'),
1733 1733 ],
1734 1734
1735 1735 b'-c|-m|FILE REV')
1736 1736 def perfbdiff(ui, repo, file_, rev=None, count=None, threads=0, **opts):
1737 1737 """benchmark a bdiff between revisions
1738 1738
1739 1739 By default, benchmark a bdiff between its delta parent and itself.
1740 1740
1741 1741 With ``--count``, benchmark bdiffs between delta parents and self for N
1742 1742 revisions starting at the specified revision.
1743 1743
1744 1744 With ``--alldata``, assume the requested revision is a changeset and
1745 1745 measure bdiffs for all changes related to that changeset (manifest
1746 1746 and filelogs).
1747 1747 """
1748 1748 opts = _byteskwargs(opts)
1749 1749
1750 1750 if opts[b'xdiff'] and not opts[b'blocks']:
1751 1751 raise error.CommandError(b'perfbdiff', b'--xdiff requires --blocks')
1752 1752
1753 1753 if opts[b'alldata']:
1754 1754 opts[b'changelog'] = True
1755 1755
1756 1756 if opts.get(b'changelog') or opts.get(b'manifest'):
1757 1757 file_, rev = None, file_
1758 1758 elif rev is None:
1759 1759 raise error.CommandError(b'perfbdiff', b'invalid arguments')
1760 1760
1761 1761 blocks = opts[b'blocks']
1762 1762 xdiff = opts[b'xdiff']
1763 1763 textpairs = []
1764 1764
1765 1765 r = cmdutil.openrevlog(repo, b'perfbdiff', file_, opts)
1766 1766
1767 1767 startrev = r.rev(r.lookup(rev))
1768 1768 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1769 1769 if opts[b'alldata']:
1770 1770 # Load revisions associated with changeset.
1771 1771 ctx = repo[rev]
1772 1772 mtext = _manifestrevision(repo, ctx.manifestnode())
1773 1773 for pctx in ctx.parents():
1774 1774 pman = _manifestrevision(repo, pctx.manifestnode())
1775 1775 textpairs.append((pman, mtext))
1776 1776
1777 1777 # Load filelog revisions by iterating manifest delta.
1778 1778 man = ctx.manifest()
1779 1779 pman = ctx.p1().manifest()
1780 1780 for filename, change in pman.diff(man).items():
1781 1781 fctx = repo.file(filename)
1782 1782 f1 = fctx.revision(change[0][0] or -1)
1783 1783 f2 = fctx.revision(change[1][0] or -1)
1784 1784 textpairs.append((f1, f2))
1785 1785 else:
1786 1786 dp = r.deltaparent(rev)
1787 1787 textpairs.append((r.revision(dp), r.revision(rev)))
1788 1788
1789 1789 withthreads = threads > 0
1790 1790 if not withthreads:
1791 1791 def d():
1792 1792 for pair in textpairs:
1793 1793 if xdiff:
1794 1794 mdiff.bdiff.xdiffblocks(*pair)
1795 1795 elif blocks:
1796 1796 mdiff.bdiff.blocks(*pair)
1797 1797 else:
1798 1798 mdiff.textdiff(*pair)
1799 1799 else:
1800 1800 q = queue()
1801 1801 for i in _xrange(threads):
1802 1802 q.put(None)
1803 1803 ready = threading.Condition()
1804 1804 done = threading.Event()
1805 1805 for i in _xrange(threads):
1806 1806 threading.Thread(target=_bdiffworker,
1807 1807 args=(q, blocks, xdiff, ready, done)).start()
1808 1808 q.join()
1809 1809 def d():
1810 1810 for pair in textpairs:
1811 1811 q.put(pair)
1812 1812 for i in _xrange(threads):
1813 1813 q.put(None)
1814 1814 with ready:
1815 1815 ready.notify_all()
1816 1816 q.join()
1817 1817 timer, fm = gettimer(ui, opts)
1818 1818 timer(d)
1819 1819 fm.end()
1820 1820
1821 1821 if withthreads:
1822 1822 done.set()
1823 1823 for i in _xrange(threads):
1824 1824 q.put(None)
1825 1825 with ready:
1826 1826 ready.notify_all()
1827 1827
1828 1828 @command(b'perfunidiff', revlogopts + formatteropts + [
1829 1829 (b'', b'count', 1, b'number of revisions to test (when using --startrev)'),
1830 1830 (b'', b'alldata', False, b'test unidiffs for all associated revisions'),
1831 1831 ], b'-c|-m|FILE REV')
1832 1832 def perfunidiff(ui, repo, file_, rev=None, count=None, **opts):
1833 1833 """benchmark a unified diff between revisions
1834 1834
1835 1835 This doesn't include any copy tracing - it's just a unified diff
1836 1836 of the texts.
1837 1837
1838 1838 By default, benchmark a diff between its delta parent and itself.
1839 1839
1840 1840 With ``--count``, benchmark diffs between delta parents and self for N
1841 1841 revisions starting at the specified revision.
1842 1842
1843 1843 With ``--alldata``, assume the requested revision is a changeset and
1844 1844 measure diffs for all changes related to that changeset (manifest
1845 1845 and filelogs).
1846 1846 """
1847 1847 opts = _byteskwargs(opts)
1848 1848 if opts[b'alldata']:
1849 1849 opts[b'changelog'] = True
1850 1850
1851 1851 if opts.get(b'changelog') or opts.get(b'manifest'):
1852 1852 file_, rev = None, file_
1853 1853 elif rev is None:
1854 1854 raise error.CommandError(b'perfunidiff', b'invalid arguments')
1855 1855
1856 1856 textpairs = []
1857 1857
1858 1858 r = cmdutil.openrevlog(repo, b'perfunidiff', file_, opts)
1859 1859
1860 1860 startrev = r.rev(r.lookup(rev))
1861 1861 for rev in range(startrev, min(startrev + count, len(r) - 1)):
1862 1862 if opts[b'alldata']:
1863 1863 # Load revisions associated with changeset.
1864 1864 ctx = repo[rev]
1865 1865 mtext = _manifestrevision(repo, ctx.manifestnode())
1866 1866 for pctx in ctx.parents():
1867 1867 pman = _manifestrevision(repo, pctx.manifestnode())
1868 1868 textpairs.append((pman, mtext))
1869 1869
1870 1870 # Load filelog revisions by iterating manifest delta.
1871 1871 man = ctx.manifest()
1872 1872 pman = ctx.p1().manifest()
1873 1873 for filename, change in pman.diff(man).items():
1874 1874 fctx = repo.file(filename)
1875 1875 f1 = fctx.revision(change[0][0] or -1)
1876 1876 f2 = fctx.revision(change[1][0] or -1)
1877 1877 textpairs.append((f1, f2))
1878 1878 else:
1879 1879 dp = r.deltaparent(rev)
1880 1880 textpairs.append((r.revision(dp), r.revision(rev)))
1881 1881
1882 1882 def d():
1883 1883 for left, right in textpairs:
1884 1884 # The date strings don't matter, so we pass empty strings.
1885 1885 headerlines, hunks = mdiff.unidiff(
1886 1886 left, b'', right, b'', b'left', b'right', binary=False)
1887 1887 # consume iterators in roughly the way patch.py does
1888 1888 b'\n'.join(headerlines)
1889 1889 b''.join(sum((list(hlines) for hrange, hlines in hunks), []))
1890 1890 timer, fm = gettimer(ui, opts)
1891 1891 timer(d)
1892 1892 fm.end()
1893 1893
1894 1894 @command(b'perfdiffwd', formatteropts)
1895 1895 def perfdiffwd(ui, repo, **opts):
1896 1896 """Profile diff of working directory changes"""
1897 1897 opts = _byteskwargs(opts)
1898 1898 timer, fm = gettimer(ui, opts)
1899 1899 options = {
1900 1900 'w': 'ignore_all_space',
1901 1901 'b': 'ignore_space_change',
1902 1902 'B': 'ignore_blank_lines',
1903 1903 }
1904 1904
1905 1905 for diffopt in ('', 'w', 'b', 'B', 'wB'):
1906 1906 opts = dict((options[c], b'1') for c in diffopt)
1907 1907 def d():
1908 1908 ui.pushbuffer()
1909 1909 commands.diff(ui, repo, **opts)
1910 1910 ui.popbuffer()
1911 1911 diffopt = diffopt.encode('ascii')
1912 1912 title = b'diffopts: %s' % (diffopt and (b'-' + diffopt) or b'none')
1913 1913 timer(d, title=title)
1914 1914 fm.end()
1915 1915
1916 1916 @command(b'perfrevlogindex', revlogopts + formatteropts,
1917 1917 b'-c|-m|FILE')
1918 1918 def perfrevlogindex(ui, repo, file_=None, **opts):
1919 1919 """Benchmark operations against a revlog index.
1920 1920
1921 1921 This tests constructing a revlog instance, reading index data,
1922 1922 parsing index data, and performing various operations related to
1923 1923 index data.
1924 1924 """
1925 1925
1926 1926 opts = _byteskwargs(opts)
1927 1927
1928 1928 rl = cmdutil.openrevlog(repo, b'perfrevlogindex', file_, opts)
1929 1929
1930 1930 opener = getattr(rl, 'opener') # trick linter
1931 1931 indexfile = rl.indexfile
1932 1932 data = opener.read(indexfile)
1933 1933
1934 1934 header = struct.unpack(b'>I', data[0:4])[0]
1935 1935 version = header & 0xFFFF
1936 1936 if version == 1:
1937 1937 revlogio = revlog.revlogio()
1938 1938 inline = header & (1 << 16)
1939 1939 else:
1940 1940 raise error.Abort((b'unsupported revlog version: %d') % version)
1941 1941
1942 1942 rllen = len(rl)
1943 1943
1944 1944 node0 = rl.node(0)
1945 1945 node25 = rl.node(rllen // 4)
1946 1946 node50 = rl.node(rllen // 2)
1947 1947 node75 = rl.node(rllen // 4 * 3)
1948 1948 node100 = rl.node(rllen - 1)
1949 1949
1950 1950 allrevs = range(rllen)
1951 1951 allrevsrev = list(reversed(allrevs))
1952 1952 allnodes = [rl.node(rev) for rev in range(rllen)]
1953 1953 allnodesrev = list(reversed(allnodes))
1954 1954
1955 1955 def constructor():
1956 1956 revlog.revlog(opener, indexfile)
1957 1957
1958 1958 def read():
1959 1959 with opener(indexfile) as fh:
1960 1960 fh.read()
1961 1961
1962 1962 def parseindex():
1963 1963 revlogio.parseindex(data, inline)
1964 1964
1965 1965 def getentry(revornode):
1966 1966 index = revlogio.parseindex(data, inline)[0]
1967 1967 index[revornode]
1968 1968
1969 1969 def getentries(revs, count=1):
1970 1970 index = revlogio.parseindex(data, inline)[0]
1971 1971
1972 1972 for i in range(count):
1973 1973 for rev in revs:
1974 1974 index[rev]
1975 1975
1976 1976 def resolvenode(node):
1977 1977 nodemap = revlogio.parseindex(data, inline)[1]
1978 1978 # This only works for the C code.
1979 1979 if nodemap is None:
1980 1980 return
1981 1981
1982 1982 try:
1983 1983 nodemap[node]
1984 1984 except error.RevlogError:
1985 1985 pass
1986 1986
1987 1987 def resolvenodes(nodes, count=1):
1988 1988 nodemap = revlogio.parseindex(data, inline)[1]
1989 1989 if nodemap is None:
1990 1990 return
1991 1991
1992 1992 for i in range(count):
1993 1993 for node in nodes:
1994 1994 try:
1995 1995 nodemap[node]
1996 1996 except error.RevlogError:
1997 1997 pass
1998 1998
1999 1999 benches = [
2000 2000 (constructor, b'revlog constructor'),
2001 2001 (read, b'read'),
2002 2002 (parseindex, b'create index object'),
2003 2003 (lambda: getentry(0), b'retrieve index entry for rev 0'),
2004 2004 (lambda: resolvenode(b'a' * 20), b'look up missing node'),
2005 2005 (lambda: resolvenode(node0), b'look up node at rev 0'),
2006 2006 (lambda: resolvenode(node25), b'look up node at 1/4 len'),
2007 2007 (lambda: resolvenode(node50), b'look up node at 1/2 len'),
2008 2008 (lambda: resolvenode(node75), b'look up node at 3/4 len'),
2009 2009 (lambda: resolvenode(node100), b'look up node at tip'),
2010 2010 # 2x variation is to measure caching impact.
2011 2011 (lambda: resolvenodes(allnodes),
2012 2012 b'look up all nodes (forward)'),
2013 2013 (lambda: resolvenodes(allnodes, 2),
2014 2014 b'look up all nodes 2x (forward)'),
2015 2015 (lambda: resolvenodes(allnodesrev),
2016 2016 b'look up all nodes (reverse)'),
2017 2017 (lambda: resolvenodes(allnodesrev, 2),
2018 2018 b'look up all nodes 2x (reverse)'),
2019 2019 (lambda: getentries(allrevs),
2020 2020 b'retrieve all index entries (forward)'),
2021 2021 (lambda: getentries(allrevs, 2),
2022 2022 b'retrieve all index entries 2x (forward)'),
2023 2023 (lambda: getentries(allrevsrev),
2024 2024 b'retrieve all index entries (reverse)'),
2025 2025 (lambda: getentries(allrevsrev, 2),
2026 2026 b'retrieve all index entries 2x (reverse)'),
2027 2027 ]
2028 2028
2029 2029 for fn, title in benches:
2030 2030 timer, fm = gettimer(ui, opts)
2031 2031 timer(fn, title=title)
2032 2032 fm.end()
2033 2033
2034 2034 @command(b'perfrevlogrevisions', revlogopts + formatteropts +
2035 2035 [(b'd', b'dist', 100, b'distance between the revisions'),
2036 2036 (b's', b'startrev', 0, b'revision to start reading at'),
2037 2037 (b'', b'reverse', False, b'read in reverse')],
2038 2038 b'-c|-m|FILE')
2039 2039 def perfrevlogrevisions(ui, repo, file_=None, startrev=0, reverse=False,
2040 2040 **opts):
2041 2041 """Benchmark reading a series of revisions from a revlog.
2042 2042
2043 2043 By default, we read every ``-d/--dist`` revision from 0 to tip of
2044 2044 the specified revlog.
2045 2045
2046 2046 The start revision can be defined via ``-s/--startrev``.
2047 2047 """
2048 2048 opts = _byteskwargs(opts)
2049 2049
2050 2050 rl = cmdutil.openrevlog(repo, b'perfrevlogrevisions', file_, opts)
2051 2051 rllen = getlen(ui)(rl)
2052 2052
2053 2053 if startrev < 0:
2054 2054 startrev = rllen + startrev
2055 2055
2056 2056 def d():
2057 2057 rl.clearcaches()
2058 2058
2059 2059 beginrev = startrev
2060 2060 endrev = rllen
2061 2061 dist = opts[b'dist']
2062 2062
2063 2063 if reverse:
2064 2064 beginrev, endrev = endrev - 1, beginrev - 1
2065 2065 dist = -1 * dist
2066 2066
2067 2067 for x in _xrange(beginrev, endrev, dist):
2068 2068 # Old revisions don't support passing int.
2069 2069 n = rl.node(x)
2070 2070 rl.revision(n)
2071 2071
2072 2072 timer, fm = gettimer(ui, opts)
2073 2073 timer(d)
2074 2074 fm.end()
2075 2075
2076 2076 @command(b'perfrevlogwrite', revlogopts + formatteropts +
2077 2077 [(b's', b'startrev', 1000, b'revision to start writing at'),
2078 2078 (b'', b'stoprev', -1, b'last revision to write'),
2079 2079 (b'', b'count', 3, b'number of passes to perform'),
2080 2080 (b'', b'details', False, b'print timing for every revisions tested'),
2081 2081 (b'', b'source', b'full', b'the kind of data feed in the revlog'),
2082 2082 (b'', b'lazydeltabase', True, b'try the provided delta first'),
2083 2083 (b'', b'clear-caches', True, b'clear revlog cache between calls'),
2084 2084 ],
2085 2085 b'-c|-m|FILE')
2086 2086 def perfrevlogwrite(ui, repo, file_=None, startrev=1000, stoprev=-1, **opts):
2087 2087 """Benchmark writing a series of revisions to a revlog.
2088 2088
2089 2089 Possible source values are:
2090 2090 * `full`: add from a full text (default).
2091 2091 * `parent-1`: add from a delta to the first parent
2092 2092 * `parent-2`: add from a delta to the second parent if it exists
2093 2093 (use a delta from the first parent otherwise)
2094 2094 * `parent-smallest`: add from the smallest delta (either p1 or p2)
2095 2095 * `storage`: add from the existing precomputed deltas
2096 2096
2097 2097 Note: This performance command measures performance in a custom way. As a
2098 2098 result some of the global configuration of the 'perf' command does not
2099 2099 apply to it:
2100 2100
2101 2101 * ``pre-run``: disabled
2102 2102
2103 2103 * ``profile-benchmark``: disabled
2104 2104
2105 2105 * ``run-limits``: disabled use --count instead
2106 2106 """
2107 2107 opts = _byteskwargs(opts)
2108 2108
2109 2109 rl = cmdutil.openrevlog(repo, b'perfrevlogwrite', file_, opts)
2110 2110 rllen = getlen(ui)(rl)
2111 2111 if startrev < 0:
2112 2112 startrev = rllen + startrev
2113 2113 if stoprev < 0:
2114 2114 stoprev = rllen + stoprev
2115 2115
2116 2116 lazydeltabase = opts['lazydeltabase']
2117 2117 source = opts['source']
2118 2118 clearcaches = opts['clear_caches']
2119 2119 validsource = (b'full', b'parent-1', b'parent-2', b'parent-smallest',
2120 2120 b'storage')
2121 2121 if source not in validsource:
2122 2122 raise error.Abort('invalid source type: %s' % source)
2123 2123
2124 2124 ### actually gather results
2125 2125 count = opts['count']
2126 2126 if count <= 0:
2127 2127 raise error.Abort('invalide run count: %d' % count)
2128 2128 allresults = []
2129 2129 for c in range(count):
2130 2130 timing = _timeonewrite(ui, rl, source, startrev, stoprev, c + 1,
2131 2131 lazydeltabase=lazydeltabase,
2132 2132 clearcaches=clearcaches)
2133 2133 allresults.append(timing)
2134 2134
2135 2135 ### consolidate the results in a single list
2136 2136 results = []
2137 2137 for idx, (rev, t) in enumerate(allresults[0]):
2138 2138 ts = [t]
2139 2139 for other in allresults[1:]:
2140 2140 orev, ot = other[idx]
2141 2141 assert orev == rev
2142 2142 ts.append(ot)
2143 2143 results.append((rev, ts))
2144 2144 resultcount = len(results)
2145 2145
2146 2146 ### Compute and display relevant statistics
2147 2147
2148 2148 # get a formatter
2149 2149 fm = ui.formatter(b'perf', opts)
2150 2150 displayall = ui.configbool(b"perf", b"all-timing", False)
2151 2151
2152 2152 # print individual details if requested
2153 2153 if opts['details']:
2154 2154 for idx, item in enumerate(results, 1):
2155 2155 rev, data = item
2156 2156 title = 'revisions #%d of %d, rev %d' % (idx, resultcount, rev)
2157 2157 formatone(fm, data, title=title, displayall=displayall)
2158 2158
2159 2159 # sorts results by median time
2160 2160 results.sort(key=lambda x: sorted(x[1])[len(x[1]) // 2])
2161 2161 # list of (name, index) to display)
2162 2162 relevants = [
2163 2163 ("min", 0),
2164 2164 ("10%", resultcount * 10 // 100),
2165 2165 ("25%", resultcount * 25 // 100),
2166 2166 ("50%", resultcount * 70 // 100),
2167 2167 ("75%", resultcount * 75 // 100),
2168 2168 ("90%", resultcount * 90 // 100),
2169 2169 ("95%", resultcount * 95 // 100),
2170 2170 ("99%", resultcount * 99 // 100),
2171 2171 ("99.9%", resultcount * 999 // 1000),
2172 2172 ("99.99%", resultcount * 9999 // 10000),
2173 2173 ("99.999%", resultcount * 99999 // 100000),
2174 2174 ("max", -1),
2175 2175 ]
2176 2176 if not ui.quiet:
2177 2177 for name, idx in relevants:
2178 2178 data = results[idx]
2179 2179 title = '%s of %d, rev %d' % (name, resultcount, data[0])
2180 2180 formatone(fm, data[1], title=title, displayall=displayall)
2181 2181
2182 2182 # XXX summing that many float will not be very precise, we ignore this fact
2183 2183 # for now
2184 2184 totaltime = []
2185 2185 for item in allresults:
2186 2186 totaltime.append((sum(x[1][0] for x in item),
2187 2187 sum(x[1][1] for x in item),
2188 2188 sum(x[1][2] for x in item),)
2189 2189 )
2190 2190 formatone(fm, totaltime, title="total time (%d revs)" % resultcount,
2191 2191 displayall=displayall)
2192 2192 fm.end()
2193 2193
2194 2194 class _faketr(object):
2195 2195 def add(s, x, y, z=None):
2196 2196 return None
2197 2197
2198 2198 def _timeonewrite(ui, orig, source, startrev, stoprev, runidx=None,
2199 2199 lazydeltabase=True, clearcaches=True):
2200 2200 timings = []
2201 2201 tr = _faketr()
2202 2202 with _temprevlog(ui, orig, startrev) as dest:
2203 2203 dest._lazydeltabase = lazydeltabase
2204 2204 revs = list(orig.revs(startrev, stoprev))
2205 2205 total = len(revs)
2206 2206 topic = 'adding'
2207 2207 if runidx is not None:
2208 2208 topic += ' (run #%d)' % runidx
2209 2209 # Support both old and new progress API
2210 2210 if util.safehasattr(ui, 'makeprogress'):
2211 2211 progress = ui.makeprogress(topic, unit='revs', total=total)
2212 2212 def updateprogress(pos):
2213 2213 progress.update(pos)
2214 2214 def completeprogress():
2215 2215 progress.complete()
2216 2216 else:
2217 2217 def updateprogress(pos):
2218 2218 ui.progress(topic, pos, unit='revs', total=total)
2219 2219 def completeprogress():
2220 2220 ui.progress(topic, None, unit='revs', total=total)
2221 2221
2222 2222 for idx, rev in enumerate(revs):
2223 2223 updateprogress(idx)
2224 2224 addargs, addkwargs = _getrevisionseed(orig, rev, tr, source)
2225 2225 if clearcaches:
2226 2226 dest.index.clearcaches()
2227 2227 dest.clearcaches()
2228 2228 with timeone() as r:
2229 2229 dest.addrawrevision(*addargs, **addkwargs)
2230 2230 timings.append((rev, r[0]))
2231 2231 updateprogress(total)
2232 2232 completeprogress()
2233 2233 return timings
2234 2234
2235 2235 def _getrevisionseed(orig, rev, tr, source):
2236 2236 from mercurial.node import nullid
2237 2237
2238 2238 linkrev = orig.linkrev(rev)
2239 2239 node = orig.node(rev)
2240 2240 p1, p2 = orig.parents(node)
2241 2241 flags = orig.flags(rev)
2242 2242 cachedelta = None
2243 2243 text = None
2244 2244
2245 2245 if source == b'full':
2246 2246 text = orig.revision(rev)
2247 2247 elif source == b'parent-1':
2248 2248 baserev = orig.rev(p1)
2249 2249 cachedelta = (baserev, orig.revdiff(p1, rev))
2250 2250 elif source == b'parent-2':
2251 2251 parent = p2
2252 2252 if p2 == nullid:
2253 2253 parent = p1
2254 2254 baserev = orig.rev(parent)
2255 2255 cachedelta = (baserev, orig.revdiff(parent, rev))
2256 2256 elif source == b'parent-smallest':
2257 2257 p1diff = orig.revdiff(p1, rev)
2258 2258 parent = p1
2259 2259 diff = p1diff
2260 2260 if p2 != nullid:
2261 2261 p2diff = orig.revdiff(p2, rev)
2262 2262 if len(p1diff) > len(p2diff):
2263 2263 parent = p2
2264 2264 diff = p2diff
2265 2265 baserev = orig.rev(parent)
2266 2266 cachedelta = (baserev, diff)
2267 2267 elif source == b'storage':
2268 2268 baserev = orig.deltaparent(rev)
2269 2269 cachedelta = (baserev, orig.revdiff(orig.node(baserev), rev))
2270 2270
2271 2271 return ((text, tr, linkrev, p1, p2),
2272 2272 {'node': node, 'flags': flags, 'cachedelta': cachedelta})
2273 2273
2274 2274 @contextlib.contextmanager
2275 2275 def _temprevlog(ui, orig, truncaterev):
2276 2276 from mercurial import vfs as vfsmod
2277 2277
2278 2278 if orig._inline:
2279 2279 raise error.Abort('not supporting inline revlog (yet)')
2280 revlogkwargs = {}
2281 k = 'upperboundcomp'
2282 if util.safehasattr(orig, k):
2283 revlogkwargs[k] = getattr(orig, k)
2280 2284
2281 2285 origindexpath = orig.opener.join(orig.indexfile)
2282 2286 origdatapath = orig.opener.join(orig.datafile)
2283 2287 indexname = 'revlog.i'
2284 2288 dataname = 'revlog.d'
2285 2289
2286 2290 tmpdir = tempfile.mkdtemp(prefix='tmp-hgperf-')
2287 2291 try:
2288 2292 # copy the data file in a temporary directory
2289 2293 ui.debug('copying data in %s\n' % tmpdir)
2290 2294 destindexpath = os.path.join(tmpdir, 'revlog.i')
2291 2295 destdatapath = os.path.join(tmpdir, 'revlog.d')
2292 2296 shutil.copyfile(origindexpath, destindexpath)
2293 2297 shutil.copyfile(origdatapath, destdatapath)
2294 2298
2295 2299 # remove the data we want to add again
2296 2300 ui.debug('truncating data to be rewritten\n')
2297 2301 with open(destindexpath, 'ab') as index:
2298 2302 index.seek(0)
2299 2303 index.truncate(truncaterev * orig._io.size)
2300 2304 with open(destdatapath, 'ab') as data:
2301 2305 data.seek(0)
2302 2306 data.truncate(orig.start(truncaterev))
2303 2307
2304 2308 # instantiate a new revlog from the temporary copy
2305 2309 ui.debug('truncating adding to be rewritten\n')
2306 2310 vfs = vfsmod.vfs(tmpdir)
2307 2311 vfs.options = getattr(orig.opener, 'options', None)
2308 2312
2309 2313 dest = revlog.revlog(vfs,
2310 2314 indexfile=indexname,
2311 datafile=dataname)
2315 datafile=dataname, **revlogkwargs)
2312 2316 if dest._inline:
2313 2317 raise error.Abort('not supporting inline revlog (yet)')
2314 2318 # make sure internals are initialized
2315 2319 dest.revision(len(dest) - 1)
2316 2320 yield dest
2317 2321 del dest, vfs
2318 2322 finally:
2319 2323 shutil.rmtree(tmpdir, True)
2320 2324
2321 2325 @command(b'perfrevlogchunks', revlogopts + formatteropts +
2322 2326 [(b'e', b'engines', b'', b'compression engines to use'),
2323 2327 (b's', b'startrev', 0, b'revision to start at')],
2324 2328 b'-c|-m|FILE')
2325 2329 def perfrevlogchunks(ui, repo, file_=None, engines=None, startrev=0, **opts):
2326 2330 """Benchmark operations on revlog chunks.
2327 2331
2328 2332 Logically, each revlog is a collection of fulltext revisions. However,
2329 2333 stored within each revlog are "chunks" of possibly compressed data. This
2330 2334 data needs to be read and decompressed or compressed and written.
2331 2335
2332 2336 This command measures the time it takes to read+decompress and recompress
2333 2337 chunks in a revlog. It effectively isolates I/O and compression performance.
2334 2338 For measurements of higher-level operations like resolving revisions,
2335 2339 see ``perfrevlogrevisions`` and ``perfrevlogrevision``.
2336 2340 """
2337 2341 opts = _byteskwargs(opts)
2338 2342
2339 2343 rl = cmdutil.openrevlog(repo, b'perfrevlogchunks', file_, opts)
2340 2344
2341 2345 # _chunkraw was renamed to _getsegmentforrevs.
2342 2346 try:
2343 2347 segmentforrevs = rl._getsegmentforrevs
2344 2348 except AttributeError:
2345 2349 segmentforrevs = rl._chunkraw
2346 2350
2347 2351 # Verify engines argument.
2348 2352 if engines:
2349 2353 engines = set(e.strip() for e in engines.split(b','))
2350 2354 for engine in engines:
2351 2355 try:
2352 2356 util.compressionengines[engine]
2353 2357 except KeyError:
2354 2358 raise error.Abort(b'unknown compression engine: %s' % engine)
2355 2359 else:
2356 2360 engines = []
2357 2361 for e in util.compengines:
2358 2362 engine = util.compengines[e]
2359 2363 try:
2360 2364 if engine.available():
2361 2365 engine.revlogcompressor().compress(b'dummy')
2362 2366 engines.append(e)
2363 2367 except NotImplementedError:
2364 2368 pass
2365 2369
2366 2370 revs = list(rl.revs(startrev, len(rl) - 1))
2367 2371
2368 2372 def rlfh(rl):
2369 2373 if rl._inline:
2370 2374 return getsvfs(repo)(rl.indexfile)
2371 2375 else:
2372 2376 return getsvfs(repo)(rl.datafile)
2373 2377
2374 2378 def doread():
2375 2379 rl.clearcaches()
2376 2380 for rev in revs:
2377 2381 segmentforrevs(rev, rev)
2378 2382
2379 2383 def doreadcachedfh():
2380 2384 rl.clearcaches()
2381 2385 fh = rlfh(rl)
2382 2386 for rev in revs:
2383 2387 segmentforrevs(rev, rev, df=fh)
2384 2388
2385 2389 def doreadbatch():
2386 2390 rl.clearcaches()
2387 2391 segmentforrevs(revs[0], revs[-1])
2388 2392
2389 2393 def doreadbatchcachedfh():
2390 2394 rl.clearcaches()
2391 2395 fh = rlfh(rl)
2392 2396 segmentforrevs(revs[0], revs[-1], df=fh)
2393 2397
2394 2398 def dochunk():
2395 2399 rl.clearcaches()
2396 2400 fh = rlfh(rl)
2397 2401 for rev in revs:
2398 2402 rl._chunk(rev, df=fh)
2399 2403
2400 2404 chunks = [None]
2401 2405
2402 2406 def dochunkbatch():
2403 2407 rl.clearcaches()
2404 2408 fh = rlfh(rl)
2405 2409 # Save chunks as a side-effect.
2406 2410 chunks[0] = rl._chunks(revs, df=fh)
2407 2411
2408 2412 def docompress(compressor):
2409 2413 rl.clearcaches()
2410 2414
2411 2415 try:
2412 2416 # Swap in the requested compression engine.
2413 2417 oldcompressor = rl._compressor
2414 2418 rl._compressor = compressor
2415 2419 for chunk in chunks[0]:
2416 2420 rl.compress(chunk)
2417 2421 finally:
2418 2422 rl._compressor = oldcompressor
2419 2423
2420 2424 benches = [
2421 2425 (lambda: doread(), b'read'),
2422 2426 (lambda: doreadcachedfh(), b'read w/ reused fd'),
2423 2427 (lambda: doreadbatch(), b'read batch'),
2424 2428 (lambda: doreadbatchcachedfh(), b'read batch w/ reused fd'),
2425 2429 (lambda: dochunk(), b'chunk'),
2426 2430 (lambda: dochunkbatch(), b'chunk batch'),
2427 2431 ]
2428 2432
2429 2433 for engine in sorted(engines):
2430 2434 compressor = util.compengines[engine].revlogcompressor()
2431 2435 benches.append((functools.partial(docompress, compressor),
2432 2436 b'compress w/ %s' % engine))
2433 2437
2434 2438 for fn, title in benches:
2435 2439 timer, fm = gettimer(ui, opts)
2436 2440 timer(fn, title=title)
2437 2441 fm.end()
2438 2442
2439 2443 @command(b'perfrevlogrevision', revlogopts + formatteropts +
2440 2444 [(b'', b'cache', False, b'use caches instead of clearing')],
2441 2445 b'-c|-m|FILE REV')
2442 2446 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
2443 2447 """Benchmark obtaining a revlog revision.
2444 2448
2445 2449 Obtaining a revlog revision consists of roughly the following steps:
2446 2450
2447 2451 1. Compute the delta chain
2448 2452 2. Slice the delta chain if applicable
2449 2453 3. Obtain the raw chunks for that delta chain
2450 2454 4. Decompress each raw chunk
2451 2455 5. Apply binary patches to obtain fulltext
2452 2456 6. Verify hash of fulltext
2453 2457
2454 2458 This command measures the time spent in each of these phases.
2455 2459 """
2456 2460 opts = _byteskwargs(opts)
2457 2461
2458 2462 if opts.get(b'changelog') or opts.get(b'manifest'):
2459 2463 file_, rev = None, file_
2460 2464 elif rev is None:
2461 2465 raise error.CommandError(b'perfrevlogrevision', b'invalid arguments')
2462 2466
2463 2467 r = cmdutil.openrevlog(repo, b'perfrevlogrevision', file_, opts)
2464 2468
2465 2469 # _chunkraw was renamed to _getsegmentforrevs.
2466 2470 try:
2467 2471 segmentforrevs = r._getsegmentforrevs
2468 2472 except AttributeError:
2469 2473 segmentforrevs = r._chunkraw
2470 2474
2471 2475 node = r.lookup(rev)
2472 2476 rev = r.rev(node)
2473 2477
2474 2478 def getrawchunks(data, chain):
2475 2479 start = r.start
2476 2480 length = r.length
2477 2481 inline = r._inline
2478 2482 iosize = r._io.size
2479 2483 buffer = util.buffer
2480 2484
2481 2485 chunks = []
2482 2486 ladd = chunks.append
2483 2487 for idx, item in enumerate(chain):
2484 2488 offset = start(item[0])
2485 2489 bits = data[idx]
2486 2490 for rev in item:
2487 2491 chunkstart = start(rev)
2488 2492 if inline:
2489 2493 chunkstart += (rev + 1) * iosize
2490 2494 chunklength = length(rev)
2491 2495 ladd(buffer(bits, chunkstart - offset, chunklength))
2492 2496
2493 2497 return chunks
2494 2498
2495 2499 def dodeltachain(rev):
2496 2500 if not cache:
2497 2501 r.clearcaches()
2498 2502 r._deltachain(rev)
2499 2503
2500 2504 def doread(chain):
2501 2505 if not cache:
2502 2506 r.clearcaches()
2503 2507 for item in slicedchain:
2504 2508 segmentforrevs(item[0], item[-1])
2505 2509
2506 2510 def doslice(r, chain, size):
2507 2511 for s in slicechunk(r, chain, targetsize=size):
2508 2512 pass
2509 2513
2510 2514 def dorawchunks(data, chain):
2511 2515 if not cache:
2512 2516 r.clearcaches()
2513 2517 getrawchunks(data, chain)
2514 2518
2515 2519 def dodecompress(chunks):
2516 2520 decomp = r.decompress
2517 2521 for chunk in chunks:
2518 2522 decomp(chunk)
2519 2523
2520 2524 def dopatch(text, bins):
2521 2525 if not cache:
2522 2526 r.clearcaches()
2523 2527 mdiff.patches(text, bins)
2524 2528
2525 2529 def dohash(text):
2526 2530 if not cache:
2527 2531 r.clearcaches()
2528 2532 r.checkhash(text, node, rev=rev)
2529 2533
2530 2534 def dorevision():
2531 2535 if not cache:
2532 2536 r.clearcaches()
2533 2537 r.revision(node)
2534 2538
2535 2539 try:
2536 2540 from mercurial.revlogutils.deltas import slicechunk
2537 2541 except ImportError:
2538 2542 slicechunk = getattr(revlog, '_slicechunk', None)
2539 2543
2540 2544 size = r.length(rev)
2541 2545 chain = r._deltachain(rev)[0]
2542 2546 if not getattr(r, '_withsparseread', False):
2543 2547 slicedchain = (chain,)
2544 2548 else:
2545 2549 slicedchain = tuple(slicechunk(r, chain, targetsize=size))
2546 2550 data = [segmentforrevs(seg[0], seg[-1])[1] for seg in slicedchain]
2547 2551 rawchunks = getrawchunks(data, slicedchain)
2548 2552 bins = r._chunks(chain)
2549 2553 text = bytes(bins[0])
2550 2554 bins = bins[1:]
2551 2555 text = mdiff.patches(text, bins)
2552 2556
2553 2557 benches = [
2554 2558 (lambda: dorevision(), b'full'),
2555 2559 (lambda: dodeltachain(rev), b'deltachain'),
2556 2560 (lambda: doread(chain), b'read'),
2557 2561 ]
2558 2562
2559 2563 if getattr(r, '_withsparseread', False):
2560 2564 slicing = (lambda: doslice(r, chain, size), b'slice-sparse-chain')
2561 2565 benches.append(slicing)
2562 2566
2563 2567 benches.extend([
2564 2568 (lambda: dorawchunks(data, slicedchain), b'rawchunks'),
2565 2569 (lambda: dodecompress(rawchunks), b'decompress'),
2566 2570 (lambda: dopatch(text, bins), b'patch'),
2567 2571 (lambda: dohash(text), b'hash'),
2568 2572 ])
2569 2573
2570 2574 timer, fm = gettimer(ui, opts)
2571 2575 for fn, title in benches:
2572 2576 timer(fn, title=title)
2573 2577 fm.end()
2574 2578
2575 2579 @command(b'perfrevset',
2576 2580 [(b'C', b'clear', False, b'clear volatile cache between each call.'),
2577 2581 (b'', b'contexts', False, b'obtain changectx for each revision')]
2578 2582 + formatteropts, b"REVSET")
2579 2583 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
2580 2584 """benchmark the execution time of a revset
2581 2585
2582 2586 Use the --clean option if need to evaluate the impact of build volatile
2583 2587 revisions set cache on the revset execution. Volatile cache hold filtered
2584 2588 and obsolete related cache."""
2585 2589 opts = _byteskwargs(opts)
2586 2590
2587 2591 timer, fm = gettimer(ui, opts)
2588 2592 def d():
2589 2593 if clear:
2590 2594 repo.invalidatevolatilesets()
2591 2595 if contexts:
2592 2596 for ctx in repo.set(expr): pass
2593 2597 else:
2594 2598 for r in repo.revs(expr): pass
2595 2599 timer(d)
2596 2600 fm.end()
2597 2601
2598 2602 @command(b'perfvolatilesets',
2599 2603 [(b'', b'clear-obsstore', False, b'drop obsstore between each call.'),
2600 2604 ] + formatteropts)
2601 2605 def perfvolatilesets(ui, repo, *names, **opts):
2602 2606 """benchmark the computation of various volatile set
2603 2607
2604 2608 Volatile set computes element related to filtering and obsolescence."""
2605 2609 opts = _byteskwargs(opts)
2606 2610 timer, fm = gettimer(ui, opts)
2607 2611 repo = repo.unfiltered()
2608 2612
2609 2613 def getobs(name):
2610 2614 def d():
2611 2615 repo.invalidatevolatilesets()
2612 2616 if opts[b'clear_obsstore']:
2613 2617 clearfilecache(repo, b'obsstore')
2614 2618 obsolete.getrevs(repo, name)
2615 2619 return d
2616 2620
2617 2621 allobs = sorted(obsolete.cachefuncs)
2618 2622 if names:
2619 2623 allobs = [n for n in allobs if n in names]
2620 2624
2621 2625 for name in allobs:
2622 2626 timer(getobs(name), title=name)
2623 2627
2624 2628 def getfiltered(name):
2625 2629 def d():
2626 2630 repo.invalidatevolatilesets()
2627 2631 if opts[b'clear_obsstore']:
2628 2632 clearfilecache(repo, b'obsstore')
2629 2633 repoview.filterrevs(repo, name)
2630 2634 return d
2631 2635
2632 2636 allfilter = sorted(repoview.filtertable)
2633 2637 if names:
2634 2638 allfilter = [n for n in allfilter if n in names]
2635 2639
2636 2640 for name in allfilter:
2637 2641 timer(getfiltered(name), title=name)
2638 2642 fm.end()
2639 2643
2640 2644 @command(b'perfbranchmap',
2641 2645 [(b'f', b'full', False,
2642 2646 b'Includes build time of subset'),
2643 2647 (b'', b'clear-revbranch', False,
2644 2648 b'purge the revbranch cache between computation'),
2645 2649 ] + formatteropts)
2646 2650 def perfbranchmap(ui, repo, *filternames, **opts):
2647 2651 """benchmark the update of a branchmap
2648 2652
2649 2653 This benchmarks the full repo.branchmap() call with read and write disabled
2650 2654 """
2651 2655 opts = _byteskwargs(opts)
2652 2656 full = opts.get(b"full", False)
2653 2657 clear_revbranch = opts.get(b"clear_revbranch", False)
2654 2658 timer, fm = gettimer(ui, opts)
2655 2659 def getbranchmap(filtername):
2656 2660 """generate a benchmark function for the filtername"""
2657 2661 if filtername is None:
2658 2662 view = repo
2659 2663 else:
2660 2664 view = repo.filtered(filtername)
2661 2665 if util.safehasattr(view._branchcaches, '_per_filter'):
2662 2666 filtered = view._branchcaches._per_filter
2663 2667 else:
2664 2668 # older versions
2665 2669 filtered = view._branchcaches
2666 2670 def d():
2667 2671 if clear_revbranch:
2668 2672 repo.revbranchcache()._clear()
2669 2673 if full:
2670 2674 view._branchcaches.clear()
2671 2675 else:
2672 2676 filtered.pop(filtername, None)
2673 2677 view.branchmap()
2674 2678 return d
2675 2679 # add filter in smaller subset to bigger subset
2676 2680 possiblefilters = set(repoview.filtertable)
2677 2681 if filternames:
2678 2682 possiblefilters &= set(filternames)
2679 2683 subsettable = getbranchmapsubsettable()
2680 2684 allfilters = []
2681 2685 while possiblefilters:
2682 2686 for name in possiblefilters:
2683 2687 subset = subsettable.get(name)
2684 2688 if subset not in possiblefilters:
2685 2689 break
2686 2690 else:
2687 2691 assert False, b'subset cycle %s!' % possiblefilters
2688 2692 allfilters.append(name)
2689 2693 possiblefilters.remove(name)
2690 2694
2691 2695 # warm the cache
2692 2696 if not full:
2693 2697 for name in allfilters:
2694 2698 repo.filtered(name).branchmap()
2695 2699 if not filternames or b'unfiltered' in filternames:
2696 2700 # add unfiltered
2697 2701 allfilters.append(None)
2698 2702
2699 2703 if util.safehasattr(branchmap.branchcache, 'fromfile'):
2700 2704 branchcacheread = safeattrsetter(branchmap.branchcache, b'fromfile')
2701 2705 branchcacheread.set(classmethod(lambda *args: None))
2702 2706 else:
2703 2707 # older versions
2704 2708 branchcacheread = safeattrsetter(branchmap, b'read')
2705 2709 branchcacheread.set(lambda *args: None)
2706 2710 branchcachewrite = safeattrsetter(branchmap.branchcache, b'write')
2707 2711 branchcachewrite.set(lambda *args: None)
2708 2712 try:
2709 2713 for name in allfilters:
2710 2714 printname = name
2711 2715 if name is None:
2712 2716 printname = b'unfiltered'
2713 2717 timer(getbranchmap(name), title=str(printname))
2714 2718 finally:
2715 2719 branchcacheread.restore()
2716 2720 branchcachewrite.restore()
2717 2721 fm.end()
2718 2722
2719 2723 @command(b'perfbranchmapupdate', [
2720 2724 (b'', b'base', [], b'subset of revision to start from'),
2721 2725 (b'', b'target', [], b'subset of revision to end with'),
2722 2726 (b'', b'clear-caches', False, b'clear cache between each runs')
2723 2727 ] + formatteropts)
2724 2728 def perfbranchmapupdate(ui, repo, base=(), target=(), **opts):
2725 2729 """benchmark branchmap update from for <base> revs to <target> revs
2726 2730
2727 2731 If `--clear-caches` is passed, the following items will be reset before
2728 2732 each update:
2729 2733 * the changelog instance and associated indexes
2730 2734 * the rev-branch-cache instance
2731 2735
2732 2736 Examples:
2733 2737
2734 2738 # update for the one last revision
2735 2739 $ hg perfbranchmapupdate --base 'not tip' --target 'tip'
2736 2740
2737 2741 $ update for change coming with a new branch
2738 2742 $ hg perfbranchmapupdate --base 'stable' --target 'default'
2739 2743 """
2740 2744 from mercurial import branchmap
2741 2745 from mercurial import repoview
2742 2746 opts = _byteskwargs(opts)
2743 2747 timer, fm = gettimer(ui, opts)
2744 2748 clearcaches = opts[b'clear_caches']
2745 2749 unfi = repo.unfiltered()
2746 2750 x = [None] # used to pass data between closure
2747 2751
2748 2752 # we use a `list` here to avoid possible side effect from smartset
2749 2753 baserevs = list(scmutil.revrange(repo, base))
2750 2754 targetrevs = list(scmutil.revrange(repo, target))
2751 2755 if not baserevs:
2752 2756 raise error.Abort(b'no revisions selected for --base')
2753 2757 if not targetrevs:
2754 2758 raise error.Abort(b'no revisions selected for --target')
2755 2759
2756 2760 # make sure the target branchmap also contains the one in the base
2757 2761 targetrevs = list(set(baserevs) | set(targetrevs))
2758 2762 targetrevs.sort()
2759 2763
2760 2764 cl = repo.changelog
2761 2765 allbaserevs = list(cl.ancestors(baserevs, inclusive=True))
2762 2766 allbaserevs.sort()
2763 2767 alltargetrevs = frozenset(cl.ancestors(targetrevs, inclusive=True))
2764 2768
2765 2769 newrevs = list(alltargetrevs.difference(allbaserevs))
2766 2770 newrevs.sort()
2767 2771
2768 2772 allrevs = frozenset(unfi.changelog.revs())
2769 2773 basefilterrevs = frozenset(allrevs.difference(allbaserevs))
2770 2774 targetfilterrevs = frozenset(allrevs.difference(alltargetrevs))
2771 2775
2772 2776 def basefilter(repo, visibilityexceptions=None):
2773 2777 return basefilterrevs
2774 2778
2775 2779 def targetfilter(repo, visibilityexceptions=None):
2776 2780 return targetfilterrevs
2777 2781
2778 2782 msg = b'benchmark of branchmap with %d revisions with %d new ones\n'
2779 2783 ui.status(msg % (len(allbaserevs), len(newrevs)))
2780 2784 if targetfilterrevs:
2781 2785 msg = b'(%d revisions still filtered)\n'
2782 2786 ui.status(msg % len(targetfilterrevs))
2783 2787
2784 2788 try:
2785 2789 repoview.filtertable[b'__perf_branchmap_update_base'] = basefilter
2786 2790 repoview.filtertable[b'__perf_branchmap_update_target'] = targetfilter
2787 2791
2788 2792 baserepo = repo.filtered(b'__perf_branchmap_update_base')
2789 2793 targetrepo = repo.filtered(b'__perf_branchmap_update_target')
2790 2794
2791 2795 # try to find an existing branchmap to reuse
2792 2796 subsettable = getbranchmapsubsettable()
2793 2797 candidatefilter = subsettable.get(None)
2794 2798 while candidatefilter is not None:
2795 2799 candidatebm = repo.filtered(candidatefilter).branchmap()
2796 2800 if candidatebm.validfor(baserepo):
2797 2801 filtered = repoview.filterrevs(repo, candidatefilter)
2798 2802 missing = [r for r in allbaserevs if r in filtered]
2799 2803 base = candidatebm.copy()
2800 2804 base.update(baserepo, missing)
2801 2805 break
2802 2806 candidatefilter = subsettable.get(candidatefilter)
2803 2807 else:
2804 2808 # no suitable subset where found
2805 2809 base = branchmap.branchcache()
2806 2810 base.update(baserepo, allbaserevs)
2807 2811
2808 2812 def setup():
2809 2813 x[0] = base.copy()
2810 2814 if clearcaches:
2811 2815 unfi._revbranchcache = None
2812 2816 clearchangelog(repo)
2813 2817
2814 2818 def bench():
2815 2819 x[0].update(targetrepo, newrevs)
2816 2820
2817 2821 timer(bench, setup=setup)
2818 2822 fm.end()
2819 2823 finally:
2820 2824 repoview.filtertable.pop(b'__perf_branchmap_update_base', None)
2821 2825 repoview.filtertable.pop(b'__perf_branchmap_update_target', None)
2822 2826
2823 2827 @command(b'perfbranchmapload', [
2824 2828 (b'f', b'filter', b'', b'Specify repoview filter'),
2825 2829 (b'', b'list', False, b'List brachmap filter caches'),
2826 2830 (b'', b'clear-revlogs', False, b'refresh changelog and manifest'),
2827 2831
2828 2832 ] + formatteropts)
2829 2833 def perfbranchmapload(ui, repo, filter=b'', list=False, **opts):
2830 2834 """benchmark reading the branchmap"""
2831 2835 opts = _byteskwargs(opts)
2832 2836 clearrevlogs = opts[b'clear_revlogs']
2833 2837
2834 2838 if list:
2835 2839 for name, kind, st in repo.cachevfs.readdir(stat=True):
2836 2840 if name.startswith(b'branch2'):
2837 2841 filtername = name.partition(b'-')[2] or b'unfiltered'
2838 2842 ui.status(b'%s - %s\n'
2839 2843 % (filtername, util.bytecount(st.st_size)))
2840 2844 return
2841 2845 if not filter:
2842 2846 filter = None
2843 2847 subsettable = getbranchmapsubsettable()
2844 2848 if filter is None:
2845 2849 repo = repo.unfiltered()
2846 2850 else:
2847 2851 repo = repoview.repoview(repo, filter)
2848 2852
2849 2853 repo.branchmap() # make sure we have a relevant, up to date branchmap
2850 2854
2851 2855 try:
2852 2856 fromfile = branchmap.branchcache.fromfile
2853 2857 except AttributeError:
2854 2858 # older versions
2855 2859 fromfile = branchmap.read
2856 2860
2857 2861 currentfilter = filter
2858 2862 # try once without timer, the filter may not be cached
2859 2863 while fromfile(repo) is None:
2860 2864 currentfilter = subsettable.get(currentfilter)
2861 2865 if currentfilter is None:
2862 2866 raise error.Abort(b'No branchmap cached for %s repo'
2863 2867 % (filter or b'unfiltered'))
2864 2868 repo = repo.filtered(currentfilter)
2865 2869 timer, fm = gettimer(ui, opts)
2866 2870 def setup():
2867 2871 if clearrevlogs:
2868 2872 clearchangelog(repo)
2869 2873 def bench():
2870 2874 fromfile(repo)
2871 2875 timer(bench, setup=setup)
2872 2876 fm.end()
2873 2877
2874 2878 @command(b'perfloadmarkers')
2875 2879 def perfloadmarkers(ui, repo):
2876 2880 """benchmark the time to parse the on-disk markers for a repo
2877 2881
2878 2882 Result is the number of markers in the repo."""
2879 2883 timer, fm = gettimer(ui)
2880 2884 svfs = getsvfs(repo)
2881 2885 timer(lambda: len(obsolete.obsstore(svfs)))
2882 2886 fm.end()
2883 2887
2884 2888 @command(b'perflrucachedict', formatteropts +
2885 2889 [(b'', b'costlimit', 0, b'maximum total cost of items in cache'),
2886 2890 (b'', b'mincost', 0, b'smallest cost of items in cache'),
2887 2891 (b'', b'maxcost', 100, b'maximum cost of items in cache'),
2888 2892 (b'', b'size', 4, b'size of cache'),
2889 2893 (b'', b'gets', 10000, b'number of key lookups'),
2890 2894 (b'', b'sets', 10000, b'number of key sets'),
2891 2895 (b'', b'mixed', 10000, b'number of mixed mode operations'),
2892 2896 (b'', b'mixedgetfreq', 50, b'frequency of get vs set ops in mixed mode')],
2893 2897 norepo=True)
2894 2898 def perflrucache(ui, mincost=0, maxcost=100, costlimit=0, size=4,
2895 2899 gets=10000, sets=10000, mixed=10000, mixedgetfreq=50, **opts):
2896 2900 opts = _byteskwargs(opts)
2897 2901
2898 2902 def doinit():
2899 2903 for i in _xrange(10000):
2900 2904 util.lrucachedict(size)
2901 2905
2902 2906 costrange = list(range(mincost, maxcost + 1))
2903 2907
2904 2908 values = []
2905 2909 for i in _xrange(size):
2906 2910 values.append(random.randint(0, _maxint))
2907 2911
2908 2912 # Get mode fills the cache and tests raw lookup performance with no
2909 2913 # eviction.
2910 2914 getseq = []
2911 2915 for i in _xrange(gets):
2912 2916 getseq.append(random.choice(values))
2913 2917
2914 2918 def dogets():
2915 2919 d = util.lrucachedict(size)
2916 2920 for v in values:
2917 2921 d[v] = v
2918 2922 for key in getseq:
2919 2923 value = d[key]
2920 2924 value # silence pyflakes warning
2921 2925
2922 2926 def dogetscost():
2923 2927 d = util.lrucachedict(size, maxcost=costlimit)
2924 2928 for i, v in enumerate(values):
2925 2929 d.insert(v, v, cost=costs[i])
2926 2930 for key in getseq:
2927 2931 try:
2928 2932 value = d[key]
2929 2933 value # silence pyflakes warning
2930 2934 except KeyError:
2931 2935 pass
2932 2936
2933 2937 # Set mode tests insertion speed with cache eviction.
2934 2938 setseq = []
2935 2939 costs = []
2936 2940 for i in _xrange(sets):
2937 2941 setseq.append(random.randint(0, _maxint))
2938 2942 costs.append(random.choice(costrange))
2939 2943
2940 2944 def doinserts():
2941 2945 d = util.lrucachedict(size)
2942 2946 for v in setseq:
2943 2947 d.insert(v, v)
2944 2948
2945 2949 def doinsertscost():
2946 2950 d = util.lrucachedict(size, maxcost=costlimit)
2947 2951 for i, v in enumerate(setseq):
2948 2952 d.insert(v, v, cost=costs[i])
2949 2953
2950 2954 def dosets():
2951 2955 d = util.lrucachedict(size)
2952 2956 for v in setseq:
2953 2957 d[v] = v
2954 2958
2955 2959 # Mixed mode randomly performs gets and sets with eviction.
2956 2960 mixedops = []
2957 2961 for i in _xrange(mixed):
2958 2962 r = random.randint(0, 100)
2959 2963 if r < mixedgetfreq:
2960 2964 op = 0
2961 2965 else:
2962 2966 op = 1
2963 2967
2964 2968 mixedops.append((op,
2965 2969 random.randint(0, size * 2),
2966 2970 random.choice(costrange)))
2967 2971
2968 2972 def domixed():
2969 2973 d = util.lrucachedict(size)
2970 2974
2971 2975 for op, v, cost in mixedops:
2972 2976 if op == 0:
2973 2977 try:
2974 2978 d[v]
2975 2979 except KeyError:
2976 2980 pass
2977 2981 else:
2978 2982 d[v] = v
2979 2983
2980 2984 def domixedcost():
2981 2985 d = util.lrucachedict(size, maxcost=costlimit)
2982 2986
2983 2987 for op, v, cost in mixedops:
2984 2988 if op == 0:
2985 2989 try:
2986 2990 d[v]
2987 2991 except KeyError:
2988 2992 pass
2989 2993 else:
2990 2994 d.insert(v, v, cost=cost)
2991 2995
2992 2996 benches = [
2993 2997 (doinit, b'init'),
2994 2998 ]
2995 2999
2996 3000 if costlimit:
2997 3001 benches.extend([
2998 3002 (dogetscost, b'gets w/ cost limit'),
2999 3003 (doinsertscost, b'inserts w/ cost limit'),
3000 3004 (domixedcost, b'mixed w/ cost limit'),
3001 3005 ])
3002 3006 else:
3003 3007 benches.extend([
3004 3008 (dogets, b'gets'),
3005 3009 (doinserts, b'inserts'),
3006 3010 (dosets, b'sets'),
3007 3011 (domixed, b'mixed')
3008 3012 ])
3009 3013
3010 3014 for fn, title in benches:
3011 3015 timer, fm = gettimer(ui, opts)
3012 3016 timer(fn, title=title)
3013 3017 fm.end()
3014 3018
3015 3019 @command(b'perfwrite', formatteropts)
3016 3020 def perfwrite(ui, repo, **opts):
3017 3021 """microbenchmark ui.write
3018 3022 """
3019 3023 opts = _byteskwargs(opts)
3020 3024
3021 3025 timer, fm = gettimer(ui, opts)
3022 3026 def write():
3023 3027 for i in range(100000):
3024 3028 ui.write((b'Testing write performance\n'))
3025 3029 timer(write)
3026 3030 fm.end()
3027 3031
3028 3032 def uisetup(ui):
3029 3033 if (util.safehasattr(cmdutil, b'openrevlog') and
3030 3034 not util.safehasattr(commands, b'debugrevlogopts')):
3031 3035 # for "historical portability":
3032 3036 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
3033 3037 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
3034 3038 # openrevlog() should cause failure, because it has been
3035 3039 # available since 3.5 (or 49c583ca48c4).
3036 3040 def openrevlog(orig, repo, cmd, file_, opts):
3037 3041 if opts.get(b'dir') and not util.safehasattr(repo, b'dirlog'):
3038 3042 raise error.Abort(b"This version doesn't support --dir option",
3039 3043 hint=b"use 3.5 or later")
3040 3044 return orig(repo, cmd, file_, opts)
3041 3045 extensions.wrapfunction(cmdutil, b'openrevlog', openrevlog)
3042 3046
3043 3047 @command(b'perfprogress', formatteropts + [
3044 3048 (b'', b'topic', b'topic', b'topic for progress messages'),
3045 3049 (b'c', b'total', 1000000, b'total value we are progressing to'),
3046 3050 ], norepo=True)
3047 3051 def perfprogress(ui, topic=None, total=None, **opts):
3048 3052 """printing of progress bars"""
3049 3053 opts = _byteskwargs(opts)
3050 3054
3051 3055 timer, fm = gettimer(ui, opts)
3052 3056
3053 3057 def doprogress():
3054 3058 with ui.makeprogress(topic, total=total) as progress:
3055 3059 for i in pycompat.xrange(total):
3056 3060 progress.increment()
3057 3061
3058 3062 timer(doprogress)
3059 3063 fm.end()
@@ -1,2095 +1,2100 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from . import (
23 23 error,
24 24 mdiff,
25 25 policy,
26 26 pycompat,
27 27 repository,
28 28 revlog,
29 29 util,
30 30 )
31 31 from .utils import (
32 32 interfaceutil,
33 33 )
34 34
35 35 parsers = policy.importmod(r'parsers')
36 36 propertycache = util.propertycache
37 37
38 38 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
39 39 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
40 40
41 41 def _parse(data):
42 42 # This method does a little bit of excessive-looking
43 43 # precondition checking. This is so that the behavior of this
44 44 # class exactly matches its C counterpart to try and help
45 45 # prevent surprise breakage for anyone that develops against
46 46 # the pure version.
47 47 if data and data[-1:] != '\n':
48 48 raise ValueError('Manifest did not end in a newline.')
49 49 prev = None
50 50 for l in data.splitlines():
51 51 if prev is not None and prev > l:
52 52 raise ValueError('Manifest lines not in sorted order.')
53 53 prev = l
54 54 f, n = l.split('\0')
55 55 if len(n) > 40:
56 56 yield f, bin(n[:40]), n[40:]
57 57 else:
58 58 yield f, bin(n), ''
59 59
60 60 def _text(it):
61 61 files = []
62 62 lines = []
63 63 for f, n, fl in it:
64 64 files.append(f)
65 65 # if this is changed to support newlines in filenames,
66 66 # be sure to check the templates/ dir again (especially *-raw.tmpl)
67 67 lines.append("%s\0%s%s\n" % (f, hex(n), fl))
68 68
69 69 _checkforbidden(files)
70 70 return ''.join(lines)
71 71
72 72 class lazymanifestiter(object):
73 73 def __init__(self, lm):
74 74 self.pos = 0
75 75 self.lm = lm
76 76
77 77 def __iter__(self):
78 78 return self
79 79
80 80 def next(self):
81 81 try:
82 82 data, pos = self.lm._get(self.pos)
83 83 except IndexError:
84 84 raise StopIteration
85 85 if pos == -1:
86 86 self.pos += 1
87 87 return data[0]
88 88 self.pos += 1
89 89 zeropos = data.find('\x00', pos)
90 90 return data[pos:zeropos]
91 91
92 92 __next__ = next
93 93
94 94 class lazymanifestiterentries(object):
95 95 def __init__(self, lm):
96 96 self.lm = lm
97 97 self.pos = 0
98 98
99 99 def __iter__(self):
100 100 return self
101 101
102 102 def next(self):
103 103 try:
104 104 data, pos = self.lm._get(self.pos)
105 105 except IndexError:
106 106 raise StopIteration
107 107 if pos == -1:
108 108 self.pos += 1
109 109 return data
110 110 zeropos = data.find('\x00', pos)
111 111 hashval = unhexlify(data, self.lm.extrainfo[self.pos],
112 112 zeropos + 1, 40)
113 113 flags = self.lm._getflags(data, self.pos, zeropos)
114 114 self.pos += 1
115 115 return (data[pos:zeropos], hashval, flags)
116 116
117 117 __next__ = next
118 118
119 119 def unhexlify(data, extra, pos, length):
120 120 s = bin(data[pos:pos + length])
121 121 if extra:
122 122 s += chr(extra & 0xff)
123 123 return s
124 124
125 125 def _cmp(a, b):
126 126 return (a > b) - (a < b)
127 127
128 128 class _lazymanifest(object):
129 129 """A pure python manifest backed by a byte string. It is supplimented with
130 130 internal lists as it is modified, until it is compacted back to a pure byte
131 131 string.
132 132
133 133 ``data`` is the initial manifest data.
134 134
135 135 ``positions`` is a list of offsets, one per manifest entry. Positive
136 136 values are offsets into ``data``, negative values are offsets into the
137 137 ``extradata`` list. When an entry is removed, its entry is dropped from
138 138 ``positions``. The values are encoded such that when walking the list and
139 139 indexing into ``data`` or ``extradata`` as appropriate, the entries are
140 140 sorted by filename.
141 141
142 142 ``extradata`` is a list of (key, hash, flags) for entries that were added or
143 143 modified since the manifest was created or compacted.
144 144 """
145 145 def __init__(self, data, positions=None, extrainfo=None, extradata=None,
146 146 hasremovals=False):
147 147 if positions is None:
148 148 self.positions = self.findlines(data)
149 149 self.extrainfo = [0] * len(self.positions)
150 150 self.data = data
151 151 self.extradata = []
152 152 self.hasremovals = False
153 153 else:
154 154 self.positions = positions[:]
155 155 self.extrainfo = extrainfo[:]
156 156 self.extradata = extradata[:]
157 157 self.data = data
158 158 self.hasremovals = hasremovals
159 159
160 160 def findlines(self, data):
161 161 if not data:
162 162 return []
163 163 pos = data.find("\n")
164 164 if pos == -1 or data[-1:] != '\n':
165 165 raise ValueError("Manifest did not end in a newline.")
166 166 positions = [0]
167 167 prev = data[:data.find('\x00')]
168 168 while pos < len(data) - 1 and pos != -1:
169 169 positions.append(pos + 1)
170 170 nexts = data[pos + 1:data.find('\x00', pos + 1)]
171 171 if nexts < prev:
172 172 raise ValueError("Manifest lines not in sorted order.")
173 173 prev = nexts
174 174 pos = data.find("\n", pos + 1)
175 175 return positions
176 176
177 177 def _get(self, index):
178 178 # get the position encoded in pos:
179 179 # positive number is an index in 'data'
180 180 # negative number is in extrapieces
181 181 pos = self.positions[index]
182 182 if pos >= 0:
183 183 return self.data, pos
184 184 return self.extradata[-pos - 1], -1
185 185
186 186 def _getkey(self, pos):
187 187 if pos >= 0:
188 188 return self.data[pos:self.data.find('\x00', pos + 1)]
189 189 return self.extradata[-pos - 1][0]
190 190
191 191 def bsearch(self, key):
192 192 first = 0
193 193 last = len(self.positions) - 1
194 194
195 195 while first <= last:
196 196 midpoint = (first + last)//2
197 197 nextpos = self.positions[midpoint]
198 198 candidate = self._getkey(nextpos)
199 199 r = _cmp(key, candidate)
200 200 if r == 0:
201 201 return midpoint
202 202 else:
203 203 if r < 0:
204 204 last = midpoint - 1
205 205 else:
206 206 first = midpoint + 1
207 207 return -1
208 208
209 209 def bsearch2(self, key):
210 210 # same as the above, but will always return the position
211 211 # done for performance reasons
212 212 first = 0
213 213 last = len(self.positions) - 1
214 214
215 215 while first <= last:
216 216 midpoint = (first + last)//2
217 217 nextpos = self.positions[midpoint]
218 218 candidate = self._getkey(nextpos)
219 219 r = _cmp(key, candidate)
220 220 if r == 0:
221 221 return (midpoint, True)
222 222 else:
223 223 if r < 0:
224 224 last = midpoint - 1
225 225 else:
226 226 first = midpoint + 1
227 227 return (first, False)
228 228
229 229 def __contains__(self, key):
230 230 return self.bsearch(key) != -1
231 231
232 232 def _getflags(self, data, needle, pos):
233 233 start = pos + 41
234 234 end = data.find("\n", start)
235 235 if end == -1:
236 236 end = len(data) - 1
237 237 if start == end:
238 238 return ''
239 239 return self.data[start:end]
240 240
241 241 def __getitem__(self, key):
242 242 if not isinstance(key, bytes):
243 243 raise TypeError("getitem: manifest keys must be a bytes.")
244 244 needle = self.bsearch(key)
245 245 if needle == -1:
246 246 raise KeyError
247 247 data, pos = self._get(needle)
248 248 if pos == -1:
249 249 return (data[1], data[2])
250 250 zeropos = data.find('\x00', pos)
251 251 assert 0 <= needle <= len(self.positions)
252 252 assert len(self.extrainfo) == len(self.positions)
253 253 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, 40)
254 254 flags = self._getflags(data, needle, zeropos)
255 255 return (hashval, flags)
256 256
257 257 def __delitem__(self, key):
258 258 needle, found = self.bsearch2(key)
259 259 if not found:
260 260 raise KeyError
261 261 cur = self.positions[needle]
262 262 self.positions = self.positions[:needle] + self.positions[needle + 1:]
263 263 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
264 264 if cur >= 0:
265 265 # This does NOT unsort the list as far as the search functions are
266 266 # concerned, as they only examine lines mapped by self.positions.
267 267 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
268 268 self.hasremovals = True
269 269
270 270 def __setitem__(self, key, value):
271 271 if not isinstance(key, bytes):
272 272 raise TypeError("setitem: manifest keys must be a byte string.")
273 273 if not isinstance(value, tuple) or len(value) != 2:
274 274 raise TypeError("Manifest values must be a tuple of (node, flags).")
275 275 hashval = value[0]
276 276 if not isinstance(hashval, bytes) or not 20 <= len(hashval) <= 22:
277 277 raise TypeError("node must be a 20-byte byte string")
278 278 flags = value[1]
279 279 if len(hashval) == 22:
280 280 hashval = hashval[:-1]
281 281 if not isinstance(flags, bytes) or len(flags) > 1:
282 282 raise TypeError("flags must a 0 or 1 byte string, got %r", flags)
283 283 needle, found = self.bsearch2(key)
284 284 if found:
285 285 # put the item
286 286 pos = self.positions[needle]
287 287 if pos < 0:
288 288 self.extradata[-pos - 1] = (key, hashval, value[1])
289 289 else:
290 290 # just don't bother
291 291 self.extradata.append((key, hashval, value[1]))
292 292 self.positions[needle] = -len(self.extradata)
293 293 else:
294 294 # not found, put it in with extra positions
295 295 self.extradata.append((key, hashval, value[1]))
296 296 self.positions = (self.positions[:needle] + [-len(self.extradata)]
297 297 + self.positions[needle:])
298 298 self.extrainfo = (self.extrainfo[:needle] + [0] +
299 299 self.extrainfo[needle:])
300 300
301 301 def copy(self):
302 302 # XXX call _compact like in C?
303 303 return _lazymanifest(self.data, self.positions, self.extrainfo,
304 304 self.extradata, self.hasremovals)
305 305
306 306 def _compact(self):
307 307 # hopefully not called TOO often
308 308 if len(self.extradata) == 0 and not self.hasremovals:
309 309 return
310 310 l = []
311 311 i = 0
312 312 offset = 0
313 313 self.extrainfo = [0] * len(self.positions)
314 314 while i < len(self.positions):
315 315 if self.positions[i] >= 0:
316 316 cur = self.positions[i]
317 317 last_cut = cur
318 318
319 319 # Collect all contiguous entries in the buffer at the current
320 320 # offset, breaking out only for added/modified items held in
321 321 # extradata, or a deleted line prior to the next position.
322 322 while True:
323 323 self.positions[i] = offset
324 324 i += 1
325 325 if i == len(self.positions) or self.positions[i] < 0:
326 326 break
327 327
328 328 # A removed file has no positions[] entry, but does have an
329 329 # overwritten first byte. Break out and find the end of the
330 330 # current good entry/entries if there is a removed file
331 331 # before the next position.
332 332 if (self.hasremovals
333 333 and self.data.find('\n\x00', cur,
334 334 self.positions[i]) != -1):
335 335 break
336 336
337 337 offset += self.positions[i] - cur
338 338 cur = self.positions[i]
339 339 end_cut = self.data.find('\n', cur)
340 340 if end_cut != -1:
341 341 end_cut += 1
342 342 offset += end_cut - cur
343 343 l.append(self.data[last_cut:end_cut])
344 344 else:
345 345 while i < len(self.positions) and self.positions[i] < 0:
346 346 cur = self.positions[i]
347 347 t = self.extradata[-cur - 1]
348 348 l.append(self._pack(t))
349 349 self.positions[i] = offset
350 350 if len(t[1]) > 20:
351 351 self.extrainfo[i] = ord(t[1][21])
352 352 offset += len(l[-1])
353 353 i += 1
354 354 self.data = ''.join(l)
355 355 self.hasremovals = False
356 356 self.extradata = []
357 357
358 358 def _pack(self, d):
359 359 return d[0] + '\x00' + hex(d[1][:20]) + d[2] + '\n'
360 360
361 361 def text(self):
362 362 self._compact()
363 363 return self.data
364 364
365 365 def diff(self, m2, clean=False):
366 366 '''Finds changes between the current manifest and m2.'''
367 367 # XXX think whether efficiency matters here
368 368 diff = {}
369 369
370 370 for fn, e1, flags in self.iterentries():
371 371 if fn not in m2:
372 372 diff[fn] = (e1, flags), (None, '')
373 373 else:
374 374 e2 = m2[fn]
375 375 if (e1, flags) != e2:
376 376 diff[fn] = (e1, flags), e2
377 377 elif clean:
378 378 diff[fn] = None
379 379
380 380 for fn, e2, flags in m2.iterentries():
381 381 if fn not in self:
382 382 diff[fn] = (None, ''), (e2, flags)
383 383
384 384 return diff
385 385
386 386 def iterentries(self):
387 387 return lazymanifestiterentries(self)
388 388
389 389 def iterkeys(self):
390 390 return lazymanifestiter(self)
391 391
392 392 def __iter__(self):
393 393 return lazymanifestiter(self)
394 394
395 395 def __len__(self):
396 396 return len(self.positions)
397 397
398 398 def filtercopy(self, filterfn):
399 399 # XXX should be optimized
400 400 c = _lazymanifest('')
401 401 for f, n, fl in self.iterentries():
402 402 if filterfn(f):
403 403 c[f] = n, fl
404 404 return c
405 405
406 406 try:
407 407 _lazymanifest = parsers.lazymanifest
408 408 except AttributeError:
409 409 pass
410 410
411 411 @interfaceutil.implementer(repository.imanifestdict)
412 412 class manifestdict(object):
413 413 def __init__(self, data=''):
414 414 self._lm = _lazymanifest(data)
415 415
416 416 def __getitem__(self, key):
417 417 return self._lm[key][0]
418 418
419 419 def find(self, key):
420 420 return self._lm[key]
421 421
422 422 def __len__(self):
423 423 return len(self._lm)
424 424
425 425 def __nonzero__(self):
426 426 # nonzero is covered by the __len__ function, but implementing it here
427 427 # makes it easier for extensions to override.
428 428 return len(self._lm) != 0
429 429
430 430 __bool__ = __nonzero__
431 431
432 432 def __setitem__(self, key, node):
433 433 self._lm[key] = node, self.flags(key, '')
434 434
435 435 def __contains__(self, key):
436 436 if key is None:
437 437 return False
438 438 return key in self._lm
439 439
440 440 def __delitem__(self, key):
441 441 del self._lm[key]
442 442
443 443 def __iter__(self):
444 444 return self._lm.__iter__()
445 445
446 446 def iterkeys(self):
447 447 return self._lm.iterkeys()
448 448
449 449 def keys(self):
450 450 return list(self.iterkeys())
451 451
452 452 def filesnotin(self, m2, match=None):
453 453 '''Set of files in this manifest that are not in the other'''
454 454 if match:
455 455 m1 = self.matches(match)
456 456 m2 = m2.matches(match)
457 457 return m1.filesnotin(m2)
458 458 diff = self.diff(m2)
459 459 files = set(filepath
460 460 for filepath, hashflags in diff.iteritems()
461 461 if hashflags[1][0] is None)
462 462 return files
463 463
464 464 @propertycache
465 465 def _dirs(self):
466 466 return util.dirs(self)
467 467
468 468 def dirs(self):
469 469 return self._dirs
470 470
471 471 def hasdir(self, dir):
472 472 return dir in self._dirs
473 473
474 474 def _filesfastpath(self, match):
475 475 '''Checks whether we can correctly and quickly iterate over matcher
476 476 files instead of over manifest files.'''
477 477 files = match.files()
478 478 return (len(files) < 100 and (match.isexact() or
479 479 (match.prefix() and all(fn in self for fn in files))))
480 480
481 481 def walk(self, match):
482 482 '''Generates matching file names.
483 483
484 484 Equivalent to manifest.matches(match).iterkeys(), but without creating
485 485 an entirely new manifest.
486 486
487 487 It also reports nonexistent files by marking them bad with match.bad().
488 488 '''
489 489 if match.always():
490 490 for f in iter(self):
491 491 yield f
492 492 return
493 493
494 494 fset = set(match.files())
495 495
496 496 # avoid the entire walk if we're only looking for specific files
497 497 if self._filesfastpath(match):
498 498 for fn in sorted(fset):
499 499 yield fn
500 500 return
501 501
502 502 for fn in self:
503 503 if fn in fset:
504 504 # specified pattern is the exact name
505 505 fset.remove(fn)
506 506 if match(fn):
507 507 yield fn
508 508
509 509 # for dirstate.walk, files=[''] means "walk the whole tree".
510 510 # follow that here, too
511 511 fset.discard('')
512 512
513 513 for fn in sorted(fset):
514 514 if not self.hasdir(fn):
515 515 match.bad(fn, None)
516 516
517 517 def matches(self, match):
518 518 '''generate a new manifest filtered by the match argument'''
519 519 if match.always():
520 520 return self.copy()
521 521
522 522 if self._filesfastpath(match):
523 523 m = manifestdict()
524 524 lm = self._lm
525 525 for fn in match.files():
526 526 if fn in lm:
527 527 m._lm[fn] = lm[fn]
528 528 return m
529 529
530 530 m = manifestdict()
531 531 m._lm = self._lm.filtercopy(match)
532 532 return m
533 533
534 534 def diff(self, m2, match=None, clean=False):
535 535 '''Finds changes between the current manifest and m2.
536 536
537 537 Args:
538 538 m2: the manifest to which this manifest should be compared.
539 539 clean: if true, include files unchanged between these manifests
540 540 with a None value in the returned dictionary.
541 541
542 542 The result is returned as a dict with filename as key and
543 543 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
544 544 nodeid in the current/other manifest and fl1/fl2 is the flag
545 545 in the current/other manifest. Where the file does not exist,
546 546 the nodeid will be None and the flags will be the empty
547 547 string.
548 548 '''
549 549 if match:
550 550 m1 = self.matches(match)
551 551 m2 = m2.matches(match)
552 552 return m1.diff(m2, clean=clean)
553 553 return self._lm.diff(m2._lm, clean)
554 554
555 555 def setflag(self, key, flag):
556 556 self._lm[key] = self[key], flag
557 557
558 558 def get(self, key, default=None):
559 559 try:
560 560 return self._lm[key][0]
561 561 except KeyError:
562 562 return default
563 563
564 564 def flags(self, key, default=''):
565 565 try:
566 566 return self._lm[key][1]
567 567 except KeyError:
568 568 return default
569 569
570 570 def copy(self):
571 571 c = manifestdict()
572 572 c._lm = self._lm.copy()
573 573 return c
574 574
575 575 def items(self):
576 576 return (x[:2] for x in self._lm.iterentries())
577 577
578 578 def iteritems(self):
579 579 return (x[:2] for x in self._lm.iterentries())
580 580
581 581 def iterentries(self):
582 582 return self._lm.iterentries()
583 583
584 584 def text(self):
585 585 # most likely uses native version
586 586 return self._lm.text()
587 587
588 588 def fastdelta(self, base, changes):
589 589 """Given a base manifest text as a bytearray and a list of changes
590 590 relative to that text, compute a delta that can be used by revlog.
591 591 """
592 592 delta = []
593 593 dstart = None
594 594 dend = None
595 595 dline = [""]
596 596 start = 0
597 597 # zero copy representation of base as a buffer
598 598 addbuf = util.buffer(base)
599 599
600 600 changes = list(changes)
601 601 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
602 602 # start with a readonly loop that finds the offset of
603 603 # each line and creates the deltas
604 604 for f, todelete in changes:
605 605 # bs will either be the index of the item or the insert point
606 606 start, end = _msearch(addbuf, f, start)
607 607 if not todelete:
608 608 h, fl = self._lm[f]
609 609 l = "%s\0%s%s\n" % (f, hex(h), fl)
610 610 else:
611 611 if start == end:
612 612 # item we want to delete was not found, error out
613 613 raise AssertionError(
614 614 _("failed to remove %s from manifest") % f)
615 615 l = ""
616 616 if dstart is not None and dstart <= start and dend >= start:
617 617 if dend < end:
618 618 dend = end
619 619 if l:
620 620 dline.append(l)
621 621 else:
622 622 if dstart is not None:
623 623 delta.append([dstart, dend, "".join(dline)])
624 624 dstart = start
625 625 dend = end
626 626 dline = [l]
627 627
628 628 if dstart is not None:
629 629 delta.append([dstart, dend, "".join(dline)])
630 630 # apply the delta to the base, and get a delta for addrevision
631 631 deltatext, arraytext = _addlistdelta(base, delta)
632 632 else:
633 633 # For large changes, it's much cheaper to just build the text and
634 634 # diff it.
635 635 arraytext = bytearray(self.text())
636 636 deltatext = mdiff.textdiff(
637 637 util.buffer(base), util.buffer(arraytext))
638 638
639 639 return arraytext, deltatext
640 640
641 641 def _msearch(m, s, lo=0, hi=None):
642 642 '''return a tuple (start, end) that says where to find s within m.
643 643
644 644 If the string is found m[start:end] are the line containing
645 645 that string. If start == end the string was not found and
646 646 they indicate the proper sorted insertion point.
647 647
648 648 m should be a buffer, a memoryview or a byte string.
649 649 s is a byte string'''
650 650 def advance(i, c):
651 651 while i < lenm and m[i:i + 1] != c:
652 652 i += 1
653 653 return i
654 654 if not s:
655 655 return (lo, lo)
656 656 lenm = len(m)
657 657 if not hi:
658 658 hi = lenm
659 659 while lo < hi:
660 660 mid = (lo + hi) // 2
661 661 start = mid
662 662 while start > 0 and m[start - 1:start] != '\n':
663 663 start -= 1
664 664 end = advance(start, '\0')
665 665 if bytes(m[start:end]) < s:
666 666 # we know that after the null there are 40 bytes of sha1
667 667 # this translates to the bisect lo = mid + 1
668 668 lo = advance(end + 40, '\n') + 1
669 669 else:
670 670 # this translates to the bisect hi = mid
671 671 hi = start
672 672 end = advance(lo, '\0')
673 673 found = m[lo:end]
674 674 if s == found:
675 675 # we know that after the null there are 40 bytes of sha1
676 676 end = advance(end + 40, '\n')
677 677 return (lo, end + 1)
678 678 else:
679 679 return (lo, lo)
680 680
681 681 def _checkforbidden(l):
682 682 """Check filenames for illegal characters."""
683 683 for f in l:
684 684 if '\n' in f or '\r' in f:
685 685 raise error.StorageError(
686 686 _("'\\n' and '\\r' disallowed in filenames: %r")
687 687 % pycompat.bytestr(f))
688 688
689 689
690 690 # apply the changes collected during the bisect loop to our addlist
691 691 # return a delta suitable for addrevision
692 692 def _addlistdelta(addlist, x):
693 693 # for large addlist arrays, building a new array is cheaper
694 694 # than repeatedly modifying the existing one
695 695 currentposition = 0
696 696 newaddlist = bytearray()
697 697
698 698 for start, end, content in x:
699 699 newaddlist += addlist[currentposition:start]
700 700 if content:
701 701 newaddlist += bytearray(content)
702 702
703 703 currentposition = end
704 704
705 705 newaddlist += addlist[currentposition:]
706 706
707 707 deltatext = "".join(struct.pack(">lll", start, end, len(content))
708 708 + content for start, end, content in x)
709 709 return deltatext, newaddlist
710 710
711 711 def _splittopdir(f):
712 712 if '/' in f:
713 713 dir, subpath = f.split('/', 1)
714 714 return dir + '/', subpath
715 715 else:
716 716 return '', f
717 717
718 718 _noop = lambda s: None
719 719
720 720 class treemanifest(object):
721 721 def __init__(self, dir='', text=''):
722 722 self._dir = dir
723 723 self._node = nullid
724 724 self._loadfunc = _noop
725 725 self._copyfunc = _noop
726 726 self._dirty = False
727 727 self._dirs = {}
728 728 self._lazydirs = {}
729 729 # Using _lazymanifest here is a little slower than plain old dicts
730 730 self._files = {}
731 731 self._flags = {}
732 732 if text:
733 733 def readsubtree(subdir, subm):
734 734 raise AssertionError('treemanifest constructor only accepts '
735 735 'flat manifests')
736 736 self.parse(text, readsubtree)
737 737 self._dirty = True # Mark flat manifest dirty after parsing
738 738
739 739 def _subpath(self, path):
740 740 return self._dir + path
741 741
742 742 def _loadalllazy(self):
743 743 selfdirs = self._dirs
744 744 for d, (path, node, readsubtree, docopy) in self._lazydirs.iteritems():
745 745 if docopy:
746 746 selfdirs[d] = readsubtree(path, node).copy()
747 747 else:
748 748 selfdirs[d] = readsubtree(path, node)
749 749 self._lazydirs = {}
750 750
751 751 def _loadlazy(self, d):
752 752 v = self._lazydirs.get(d)
753 753 if v:
754 754 path, node, readsubtree, docopy = v
755 755 if docopy:
756 756 self._dirs[d] = readsubtree(path, node).copy()
757 757 else:
758 758 self._dirs[d] = readsubtree(path, node)
759 759 del self._lazydirs[d]
760 760
761 761 def _loadchildrensetlazy(self, visit):
762 762 if not visit:
763 763 return None
764 764 if visit == 'all' or visit == 'this':
765 765 self._loadalllazy()
766 766 return None
767 767
768 768 loadlazy = self._loadlazy
769 769 for k in visit:
770 770 loadlazy(k + '/')
771 771 return visit
772 772
773 773 def _loaddifflazy(self, t1, t2):
774 774 """load items in t1 and t2 if they're needed for diffing.
775 775
776 776 The criteria currently is:
777 777 - if it's not present in _lazydirs in either t1 or t2, load it in the
778 778 other (it may already be loaded or it may not exist, doesn't matter)
779 779 - if it's present in _lazydirs in both, compare the nodeid; if it
780 780 differs, load it in both
781 781 """
782 782 toloadlazy = []
783 783 for d, v1 in t1._lazydirs.iteritems():
784 784 v2 = t2._lazydirs.get(d)
785 785 if not v2 or v2[1] != v1[1]:
786 786 toloadlazy.append(d)
787 787 for d, v1 in t2._lazydirs.iteritems():
788 788 if d not in t1._lazydirs:
789 789 toloadlazy.append(d)
790 790
791 791 for d in toloadlazy:
792 792 t1._loadlazy(d)
793 793 t2._loadlazy(d)
794 794
795 795 def __len__(self):
796 796 self._load()
797 797 size = len(self._files)
798 798 self._loadalllazy()
799 799 for m in self._dirs.values():
800 800 size += m.__len__()
801 801 return size
802 802
803 803 def __nonzero__(self):
804 804 # Faster than "__len() != 0" since it avoids loading sub-manifests
805 805 return not self._isempty()
806 806
807 807 __bool__ = __nonzero__
808 808
809 809 def _isempty(self):
810 810 self._load() # for consistency; already loaded by all callers
811 811 # See if we can skip loading everything.
812 812 if self._files or (self._dirs and
813 813 any(not m._isempty() for m in self._dirs.values())):
814 814 return False
815 815 self._loadalllazy()
816 816 return (not self._dirs or
817 817 all(m._isempty() for m in self._dirs.values()))
818 818
819 819 def __repr__(self):
820 820 return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
821 821 (self._dir, hex(self._node),
822 822 bool(self._loadfunc is _noop),
823 823 self._dirty, id(self)))
824 824
825 825 def dir(self):
826 826 '''The directory that this tree manifest represents, including a
827 827 trailing '/'. Empty string for the repo root directory.'''
828 828 return self._dir
829 829
830 830 def node(self):
831 831 '''This node of this instance. nullid for unsaved instances. Should
832 832 be updated when the instance is read or written from a revlog.
833 833 '''
834 834 assert not self._dirty
835 835 return self._node
836 836
837 837 def setnode(self, node):
838 838 self._node = node
839 839 self._dirty = False
840 840
841 841 def iterentries(self):
842 842 self._load()
843 843 self._loadalllazy()
844 844 for p, n in sorted(itertools.chain(self._dirs.items(),
845 845 self._files.items())):
846 846 if p in self._files:
847 847 yield self._subpath(p), n, self._flags.get(p, '')
848 848 else:
849 849 for x in n.iterentries():
850 850 yield x
851 851
852 852 def items(self):
853 853 self._load()
854 854 self._loadalllazy()
855 855 for p, n in sorted(itertools.chain(self._dirs.items(),
856 856 self._files.items())):
857 857 if p in self._files:
858 858 yield self._subpath(p), n
859 859 else:
860 860 for f, sn in n.iteritems():
861 861 yield f, sn
862 862
863 863 iteritems = items
864 864
865 865 def iterkeys(self):
866 866 self._load()
867 867 self._loadalllazy()
868 868 for p in sorted(itertools.chain(self._dirs, self._files)):
869 869 if p in self._files:
870 870 yield self._subpath(p)
871 871 else:
872 872 for f in self._dirs[p]:
873 873 yield f
874 874
875 875 def keys(self):
876 876 return list(self.iterkeys())
877 877
878 878 def __iter__(self):
879 879 return self.iterkeys()
880 880
881 881 def __contains__(self, f):
882 882 if f is None:
883 883 return False
884 884 self._load()
885 885 dir, subpath = _splittopdir(f)
886 886 if dir:
887 887 self._loadlazy(dir)
888 888
889 889 if dir not in self._dirs:
890 890 return False
891 891
892 892 return self._dirs[dir].__contains__(subpath)
893 893 else:
894 894 return f in self._files
895 895
896 896 def get(self, f, default=None):
897 897 self._load()
898 898 dir, subpath = _splittopdir(f)
899 899 if dir:
900 900 self._loadlazy(dir)
901 901
902 902 if dir not in self._dirs:
903 903 return default
904 904 return self._dirs[dir].get(subpath, default)
905 905 else:
906 906 return self._files.get(f, default)
907 907
908 908 def __getitem__(self, f):
909 909 self._load()
910 910 dir, subpath = _splittopdir(f)
911 911 if dir:
912 912 self._loadlazy(dir)
913 913
914 914 return self._dirs[dir].__getitem__(subpath)
915 915 else:
916 916 return self._files[f]
917 917
918 918 def flags(self, f):
919 919 self._load()
920 920 dir, subpath = _splittopdir(f)
921 921 if dir:
922 922 self._loadlazy(dir)
923 923
924 924 if dir not in self._dirs:
925 925 return ''
926 926 return self._dirs[dir].flags(subpath)
927 927 else:
928 928 if f in self._lazydirs or f in self._dirs:
929 929 return ''
930 930 return self._flags.get(f, '')
931 931
932 932 def find(self, f):
933 933 self._load()
934 934 dir, subpath = _splittopdir(f)
935 935 if dir:
936 936 self._loadlazy(dir)
937 937
938 938 return self._dirs[dir].find(subpath)
939 939 else:
940 940 return self._files[f], self._flags.get(f, '')
941 941
942 942 def __delitem__(self, f):
943 943 self._load()
944 944 dir, subpath = _splittopdir(f)
945 945 if dir:
946 946 self._loadlazy(dir)
947 947
948 948 self._dirs[dir].__delitem__(subpath)
949 949 # If the directory is now empty, remove it
950 950 if self._dirs[dir]._isempty():
951 951 del self._dirs[dir]
952 952 else:
953 953 del self._files[f]
954 954 if f in self._flags:
955 955 del self._flags[f]
956 956 self._dirty = True
957 957
958 958 def __setitem__(self, f, n):
959 959 assert n is not None
960 960 self._load()
961 961 dir, subpath = _splittopdir(f)
962 962 if dir:
963 963 self._loadlazy(dir)
964 964 if dir not in self._dirs:
965 965 self._dirs[dir] = treemanifest(self._subpath(dir))
966 966 self._dirs[dir].__setitem__(subpath, n)
967 967 else:
968 968 self._files[f] = n[:21] # to match manifestdict's behavior
969 969 self._dirty = True
970 970
971 971 def _load(self):
972 972 if self._loadfunc is not _noop:
973 973 lf, self._loadfunc = self._loadfunc, _noop
974 974 lf(self)
975 975 elif self._copyfunc is not _noop:
976 976 cf, self._copyfunc = self._copyfunc, _noop
977 977 cf(self)
978 978
979 979 def setflag(self, f, flags):
980 980 """Set the flags (symlink, executable) for path f."""
981 981 self._load()
982 982 dir, subpath = _splittopdir(f)
983 983 if dir:
984 984 self._loadlazy(dir)
985 985 if dir not in self._dirs:
986 986 self._dirs[dir] = treemanifest(self._subpath(dir))
987 987 self._dirs[dir].setflag(subpath, flags)
988 988 else:
989 989 self._flags[f] = flags
990 990 self._dirty = True
991 991
992 992 def copy(self):
993 993 copy = treemanifest(self._dir)
994 994 copy._node = self._node
995 995 copy._dirty = self._dirty
996 996 if self._copyfunc is _noop:
997 997 def _copyfunc(s):
998 998 self._load()
999 999 s._lazydirs = {d: (p, n, r, True) for
1000 1000 d, (p, n, r, c) in self._lazydirs.iteritems()}
1001 1001 sdirs = s._dirs
1002 1002 for d, v in self._dirs.iteritems():
1003 1003 sdirs[d] = v.copy()
1004 1004 s._files = dict.copy(self._files)
1005 1005 s._flags = dict.copy(self._flags)
1006 1006 if self._loadfunc is _noop:
1007 1007 _copyfunc(copy)
1008 1008 else:
1009 1009 copy._copyfunc = _copyfunc
1010 1010 else:
1011 1011 copy._copyfunc = self._copyfunc
1012 1012 return copy
1013 1013
1014 1014 def filesnotin(self, m2, match=None):
1015 1015 '''Set of files in this manifest that are not in the other'''
1016 1016 if match and not match.always():
1017 1017 m1 = self.matches(match)
1018 1018 m2 = m2.matches(match)
1019 1019 return m1.filesnotin(m2)
1020 1020
1021 1021 files = set()
1022 1022 def _filesnotin(t1, t2):
1023 1023 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1024 1024 return
1025 1025 t1._load()
1026 1026 t2._load()
1027 1027 self._loaddifflazy(t1, t2)
1028 1028 for d, m1 in t1._dirs.iteritems():
1029 1029 if d in t2._dirs:
1030 1030 m2 = t2._dirs[d]
1031 1031 _filesnotin(m1, m2)
1032 1032 else:
1033 1033 files.update(m1.iterkeys())
1034 1034
1035 1035 for fn in t1._files:
1036 1036 if fn not in t2._files:
1037 1037 files.add(t1._subpath(fn))
1038 1038
1039 1039 _filesnotin(self, m2)
1040 1040 return files
1041 1041
1042 1042 @propertycache
1043 1043 def _alldirs(self):
1044 1044 return util.dirs(self)
1045 1045
1046 1046 def dirs(self):
1047 1047 return self._alldirs
1048 1048
1049 1049 def hasdir(self, dir):
1050 1050 self._load()
1051 1051 topdir, subdir = _splittopdir(dir)
1052 1052 if topdir:
1053 1053 self._loadlazy(topdir)
1054 1054 if topdir in self._dirs:
1055 1055 return self._dirs[topdir].hasdir(subdir)
1056 1056 return False
1057 1057 dirslash = dir + '/'
1058 1058 return dirslash in self._dirs or dirslash in self._lazydirs
1059 1059
1060 1060 def walk(self, match):
1061 1061 '''Generates matching file names.
1062 1062
1063 1063 Equivalent to manifest.matches(match).iterkeys(), but without creating
1064 1064 an entirely new manifest.
1065 1065
1066 1066 It also reports nonexistent files by marking them bad with match.bad().
1067 1067 '''
1068 1068 if match.always():
1069 1069 for f in iter(self):
1070 1070 yield f
1071 1071 return
1072 1072
1073 1073 fset = set(match.files())
1074 1074
1075 1075 for fn in self._walk(match):
1076 1076 if fn in fset:
1077 1077 # specified pattern is the exact name
1078 1078 fset.remove(fn)
1079 1079 yield fn
1080 1080
1081 1081 # for dirstate.walk, files=[''] means "walk the whole tree".
1082 1082 # follow that here, too
1083 1083 fset.discard('')
1084 1084
1085 1085 for fn in sorted(fset):
1086 1086 if not self.hasdir(fn):
1087 1087 match.bad(fn, None)
1088 1088
1089 1089 def _walk(self, match):
1090 1090 '''Recursively generates matching file names for walk().'''
1091 1091 visit = match.visitchildrenset(self._dir[:-1])
1092 1092 if not visit:
1093 1093 return
1094 1094
1095 1095 # yield this dir's files and walk its submanifests
1096 1096 self._load()
1097 1097 visit = self._loadchildrensetlazy(visit)
1098 1098 for p in sorted(list(self._dirs) + list(self._files)):
1099 1099 if p in self._files:
1100 1100 fullp = self._subpath(p)
1101 1101 if match(fullp):
1102 1102 yield fullp
1103 1103 else:
1104 1104 if not visit or p[:-1] in visit:
1105 1105 for f in self._dirs[p]._walk(match):
1106 1106 yield f
1107 1107
1108 1108 def matches(self, match):
1109 1109 '''generate a new manifest filtered by the match argument'''
1110 1110 if match.always():
1111 1111 return self.copy()
1112 1112
1113 1113 return self._matches(match)
1114 1114
1115 1115 def _matches(self, match):
1116 1116 '''recursively generate a new manifest filtered by the match argument.
1117 1117 '''
1118 1118
1119 1119 visit = match.visitchildrenset(self._dir[:-1])
1120 1120 if visit == 'all':
1121 1121 return self.copy()
1122 1122 ret = treemanifest(self._dir)
1123 1123 if not visit:
1124 1124 return ret
1125 1125
1126 1126 self._load()
1127 1127 for fn in self._files:
1128 1128 # While visitchildrenset *usually* lists only subdirs, this is
1129 1129 # actually up to the matcher and may have some files in the set().
1130 1130 # If visit == 'this', we should obviously look at the files in this
1131 1131 # directory; if visit is a set, and fn is in it, we should inspect
1132 1132 # fn (but no need to inspect things not in the set).
1133 1133 if visit != 'this' and fn not in visit:
1134 1134 continue
1135 1135 fullp = self._subpath(fn)
1136 1136 # visitchildrenset isn't perfect, we still need to call the regular
1137 1137 # matcher code to further filter results.
1138 1138 if not match(fullp):
1139 1139 continue
1140 1140 ret._files[fn] = self._files[fn]
1141 1141 if fn in self._flags:
1142 1142 ret._flags[fn] = self._flags[fn]
1143 1143
1144 1144 visit = self._loadchildrensetlazy(visit)
1145 1145 for dir, subm in self._dirs.iteritems():
1146 1146 if visit and dir[:-1] not in visit:
1147 1147 continue
1148 1148 m = subm._matches(match)
1149 1149 if not m._isempty():
1150 1150 ret._dirs[dir] = m
1151 1151
1152 1152 if not ret._isempty():
1153 1153 ret._dirty = True
1154 1154 return ret
1155 1155
1156 1156 def diff(self, m2, match=None, clean=False):
1157 1157 '''Finds changes between the current manifest and m2.
1158 1158
1159 1159 Args:
1160 1160 m2: the manifest to which this manifest should be compared.
1161 1161 clean: if true, include files unchanged between these manifests
1162 1162 with a None value in the returned dictionary.
1163 1163
1164 1164 The result is returned as a dict with filename as key and
1165 1165 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1166 1166 nodeid in the current/other manifest and fl1/fl2 is the flag
1167 1167 in the current/other manifest. Where the file does not exist,
1168 1168 the nodeid will be None and the flags will be the empty
1169 1169 string.
1170 1170 '''
1171 1171 if match and not match.always():
1172 1172 m1 = self.matches(match)
1173 1173 m2 = m2.matches(match)
1174 1174 return m1.diff(m2, clean=clean)
1175 1175 result = {}
1176 1176 emptytree = treemanifest()
1177 1177
1178 1178 def _iterativediff(t1, t2, stack):
1179 1179 """compares two tree manifests and append new tree-manifests which
1180 1180 needs to be compared to stack"""
1181 1181 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1182 1182 return
1183 1183 t1._load()
1184 1184 t2._load()
1185 1185 self._loaddifflazy(t1, t2)
1186 1186
1187 1187 for d, m1 in t1._dirs.iteritems():
1188 1188 m2 = t2._dirs.get(d, emptytree)
1189 1189 stack.append((m1, m2))
1190 1190
1191 1191 for d, m2 in t2._dirs.iteritems():
1192 1192 if d not in t1._dirs:
1193 1193 stack.append((emptytree, m2))
1194 1194
1195 1195 for fn, n1 in t1._files.iteritems():
1196 1196 fl1 = t1._flags.get(fn, '')
1197 1197 n2 = t2._files.get(fn, None)
1198 1198 fl2 = t2._flags.get(fn, '')
1199 1199 if n1 != n2 or fl1 != fl2:
1200 1200 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1201 1201 elif clean:
1202 1202 result[t1._subpath(fn)] = None
1203 1203
1204 1204 for fn, n2 in t2._files.iteritems():
1205 1205 if fn not in t1._files:
1206 1206 fl2 = t2._flags.get(fn, '')
1207 1207 result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
1208 1208
1209 1209 stackls = []
1210 1210 _iterativediff(self, m2, stackls)
1211 1211 while stackls:
1212 1212 t1, t2 = stackls.pop()
1213 1213 # stackls is populated in the function call
1214 1214 _iterativediff(t1, t2, stackls)
1215 1215 return result
1216 1216
1217 1217 def unmodifiedsince(self, m2):
1218 1218 return not self._dirty and not m2._dirty and self._node == m2._node
1219 1219
1220 1220 def parse(self, text, readsubtree):
1221 1221 selflazy = self._lazydirs
1222 1222 subpath = self._subpath
1223 1223 for f, n, fl in _parse(text):
1224 1224 if fl == 't':
1225 1225 f = f + '/'
1226 1226 # False below means "doesn't need to be copied" and can use the
1227 1227 # cached value from readsubtree directly.
1228 1228 selflazy[f] = (subpath(f), n, readsubtree, False)
1229 1229 elif '/' in f:
1230 1230 # This is a flat manifest, so use __setitem__ and setflag rather
1231 1231 # than assigning directly to _files and _flags, so we can
1232 1232 # assign a path in a subdirectory, and to mark dirty (compared
1233 1233 # to nullid).
1234 1234 self[f] = n
1235 1235 if fl:
1236 1236 self.setflag(f, fl)
1237 1237 else:
1238 1238 # Assigning to _files and _flags avoids marking as dirty,
1239 1239 # and should be a little faster.
1240 1240 self._files[f] = n
1241 1241 if fl:
1242 1242 self._flags[f] = fl
1243 1243
1244 1244 def text(self):
1245 1245 """Get the full data of this manifest as a bytestring."""
1246 1246 self._load()
1247 1247 return _text(self.iterentries())
1248 1248
1249 1249 def dirtext(self):
1250 1250 """Get the full data of this directory as a bytestring. Make sure that
1251 1251 any submanifests have been written first, so their nodeids are correct.
1252 1252 """
1253 1253 self._load()
1254 1254 flags = self.flags
1255 1255 lazydirs = [(d[:-1], v[1], 't') for d, v in self._lazydirs.iteritems()]
1256 1256 dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
1257 1257 files = [(f, self._files[f], flags(f)) for f in self._files]
1258 1258 return _text(sorted(dirs + files + lazydirs))
1259 1259
1260 1260 def read(self, gettext, readsubtree):
1261 1261 def _load_for_read(s):
1262 1262 s.parse(gettext(), readsubtree)
1263 1263 s._dirty = False
1264 1264 self._loadfunc = _load_for_read
1265 1265
1266 1266 def writesubtrees(self, m1, m2, writesubtree, match):
1267 1267 self._load() # for consistency; should never have any effect here
1268 1268 m1._load()
1269 1269 m2._load()
1270 1270 emptytree = treemanifest()
1271 1271 def getnode(m, d):
1272 1272 ld = m._lazydirs.get(d)
1273 1273 if ld:
1274 1274 return ld[1]
1275 1275 return m._dirs.get(d, emptytree)._node
1276 1276
1277 1277 # let's skip investigating things that `match` says we do not need.
1278 1278 visit = match.visitchildrenset(self._dir[:-1])
1279 1279 visit = self._loadchildrensetlazy(visit)
1280 1280 if visit == 'this' or visit == 'all':
1281 1281 visit = None
1282 1282 for d, subm in self._dirs.iteritems():
1283 1283 if visit and d[:-1] not in visit:
1284 1284 continue
1285 1285 subp1 = getnode(m1, d)
1286 1286 subp2 = getnode(m2, d)
1287 1287 if subp1 == nullid:
1288 1288 subp1, subp2 = subp2, subp1
1289 1289 writesubtree(subm, subp1, subp2, match)
1290 1290
1291 1291 def walksubtrees(self, matcher=None):
1292 1292 """Returns an iterator of the subtrees of this manifest, including this
1293 1293 manifest itself.
1294 1294
1295 1295 If `matcher` is provided, it only returns subtrees that match.
1296 1296 """
1297 1297 if matcher and not matcher.visitdir(self._dir[:-1]):
1298 1298 return
1299 1299 if not matcher or matcher(self._dir[:-1]):
1300 1300 yield self
1301 1301
1302 1302 self._load()
1303 1303 # OPT: use visitchildrenset to avoid loading everything.
1304 1304 self._loadalllazy()
1305 1305 for d, subm in self._dirs.iteritems():
1306 1306 for subtree in subm.walksubtrees(matcher=matcher):
1307 1307 yield subtree
1308 1308
1309 1309 class manifestfulltextcache(util.lrucachedict):
1310 1310 """File-backed LRU cache for the manifest cache
1311 1311
1312 1312 File consists of entries, up to EOF:
1313 1313
1314 1314 - 20 bytes node, 4 bytes length, <length> manifest data
1315 1315
1316 1316 These are written in reverse cache order (oldest to newest).
1317 1317
1318 1318 """
1319 1319
1320 1320 _file = 'manifestfulltextcache'
1321 1321
1322 1322 def __init__(self, max):
1323 1323 super(manifestfulltextcache, self).__init__(max)
1324 1324 self._dirty = False
1325 1325 self._read = False
1326 1326 self._opener = None
1327 1327
1328 1328 def read(self):
1329 1329 if self._read or self._opener is None:
1330 1330 return
1331 1331
1332 1332 try:
1333 1333 with self._opener(self._file) as fp:
1334 1334 set = super(manifestfulltextcache, self).__setitem__
1335 1335 # ignore trailing data, this is a cache, corruption is skipped
1336 1336 while True:
1337 1337 node = fp.read(20)
1338 1338 if len(node) < 20:
1339 1339 break
1340 1340 try:
1341 1341 size = struct.unpack('>L', fp.read(4))[0]
1342 1342 except struct.error:
1343 1343 break
1344 1344 value = bytearray(fp.read(size))
1345 1345 if len(value) != size:
1346 1346 break
1347 1347 set(node, value)
1348 1348 except IOError:
1349 1349 # the file is allowed to be missing
1350 1350 pass
1351 1351
1352 1352 self._read = True
1353 1353 self._dirty = False
1354 1354
1355 1355 def write(self):
1356 1356 if not self._dirty or self._opener is None:
1357 1357 return
1358 1358 # rotate backwards to the first used node
1359 1359 with self._opener(self._file, 'w', atomictemp=True, checkambig=True
1360 1360 ) as fp:
1361 1361 node = self._head.prev
1362 1362 while True:
1363 1363 if node.key in self._cache:
1364 1364 fp.write(node.key)
1365 1365 fp.write(struct.pack('>L', len(node.value)))
1366 1366 fp.write(node.value)
1367 1367 if node is self._head:
1368 1368 break
1369 1369 node = node.prev
1370 1370
1371 1371 def __len__(self):
1372 1372 if not self._read:
1373 1373 self.read()
1374 1374 return super(manifestfulltextcache, self).__len__()
1375 1375
1376 1376 def __contains__(self, k):
1377 1377 if not self._read:
1378 1378 self.read()
1379 1379 return super(manifestfulltextcache, self).__contains__(k)
1380 1380
1381 1381 def __iter__(self):
1382 1382 if not self._read:
1383 1383 self.read()
1384 1384 return super(manifestfulltextcache, self).__iter__()
1385 1385
1386 1386 def __getitem__(self, k):
1387 1387 if not self._read:
1388 1388 self.read()
1389 1389 # the cache lru order can change on read
1390 1390 setdirty = self._cache.get(k) is not self._head
1391 1391 value = super(manifestfulltextcache, self).__getitem__(k)
1392 1392 if setdirty:
1393 1393 self._dirty = True
1394 1394 return value
1395 1395
1396 1396 def __setitem__(self, k, v):
1397 1397 if not self._read:
1398 1398 self.read()
1399 1399 super(manifestfulltextcache, self).__setitem__(k, v)
1400 1400 self._dirty = True
1401 1401
1402 1402 def __delitem__(self, k):
1403 1403 if not self._read:
1404 1404 self.read()
1405 1405 super(manifestfulltextcache, self).__delitem__(k)
1406 1406 self._dirty = True
1407 1407
1408 1408 def get(self, k, default=None):
1409 1409 if not self._read:
1410 1410 self.read()
1411 1411 return super(manifestfulltextcache, self).get(k, default=default)
1412 1412
1413 1413 def clear(self, clear_persisted_data=False):
1414 1414 super(manifestfulltextcache, self).clear()
1415 1415 if clear_persisted_data:
1416 1416 self._dirty = True
1417 1417 self.write()
1418 1418 self._read = False
1419 1419
1420 # and upper bound of what we expect from compression
1421 # (real live value seems to be "3")
1422 MAXCOMPRESSION = 10
1423
1420 1424 @interfaceutil.implementer(repository.imanifeststorage)
1421 1425 class manifestrevlog(object):
1422 1426 '''A revlog that stores manifest texts. This is responsible for caching the
1423 1427 full-text manifest contents.
1424 1428 '''
1425 1429 def __init__(self, opener, tree='', dirlogcache=None, indexfile=None,
1426 1430 treemanifest=False):
1427 1431 """Constructs a new manifest revlog
1428 1432
1429 1433 `indexfile` - used by extensions to have two manifests at once, like
1430 1434 when transitioning between flatmanifeset and treemanifests.
1431 1435
1432 1436 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1433 1437 options can also be used to make this a tree manifest revlog. The opener
1434 1438 option takes precedence, so if it is set to True, we ignore whatever
1435 1439 value is passed in to the constructor.
1436 1440 """
1437 1441 # During normal operations, we expect to deal with not more than four
1438 1442 # revs at a time (such as during commit --amend). When rebasing large
1439 1443 # stacks of commits, the number can go up, hence the config knob below.
1440 1444 cachesize = 4
1441 1445 optiontreemanifest = False
1442 1446 opts = getattr(opener, 'options', None)
1443 1447 if opts is not None:
1444 1448 cachesize = opts.get('manifestcachesize', cachesize)
1445 1449 optiontreemanifest = opts.get('treemanifest', False)
1446 1450
1447 1451 self._treeondisk = optiontreemanifest or treemanifest
1448 1452
1449 1453 self._fulltextcache = manifestfulltextcache(cachesize)
1450 1454
1451 1455 if tree:
1452 1456 assert self._treeondisk, 'opts is %r' % opts
1453 1457
1454 1458 if indexfile is None:
1455 1459 indexfile = '00manifest.i'
1456 1460 if tree:
1457 1461 indexfile = "meta/" + tree + indexfile
1458 1462
1459 1463 self.tree = tree
1460 1464
1461 1465 # The dirlogcache is kept on the root manifest log
1462 1466 if tree:
1463 1467 self._dirlogcache = dirlogcache
1464 1468 else:
1465 1469 self._dirlogcache = {'': self}
1466 1470
1467 1471 self._revlog = revlog.revlog(opener, indexfile,
1468 1472 # only root indexfile is cached
1469 1473 checkambig=not bool(tree),
1470 mmaplargeindex=True)
1474 mmaplargeindex=True,
1475 upperboundcomp=MAXCOMPRESSION)
1471 1476
1472 1477 self.index = self._revlog.index
1473 1478 self.version = self._revlog.version
1474 1479 self._generaldelta = self._revlog._generaldelta
1475 1480
1476 1481 def _setupmanifestcachehooks(self, repo):
1477 1482 """Persist the manifestfulltextcache on lock release"""
1478 1483 if not util.safehasattr(repo, '_wlockref'):
1479 1484 return
1480 1485
1481 1486 self._fulltextcache._opener = repo.wcachevfs
1482 1487 if repo._currentlock(repo._wlockref) is None:
1483 1488 return
1484 1489
1485 1490 reporef = weakref.ref(repo)
1486 1491 manifestrevlogref = weakref.ref(self)
1487 1492
1488 1493 def persistmanifestcache():
1489 1494 repo = reporef()
1490 1495 self = manifestrevlogref()
1491 1496 if repo is None or self is None:
1492 1497 return
1493 1498 if repo.manifestlog.getstorage(b'') is not self:
1494 1499 # there's a different manifest in play now, abort
1495 1500 return
1496 1501 self._fulltextcache.write()
1497 1502
1498 1503 repo._afterlock(persistmanifestcache)
1499 1504
1500 1505 @property
1501 1506 def fulltextcache(self):
1502 1507 return self._fulltextcache
1503 1508
1504 1509 def clearcaches(self, clear_persisted_data=False):
1505 1510 self._revlog.clearcaches()
1506 1511 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1507 1512 self._dirlogcache = {self.tree: self}
1508 1513
1509 1514 def dirlog(self, d):
1510 1515 if d:
1511 1516 assert self._treeondisk
1512 1517 if d not in self._dirlogcache:
1513 1518 mfrevlog = manifestrevlog(self.opener, d,
1514 1519 self._dirlogcache,
1515 1520 treemanifest=self._treeondisk)
1516 1521 self._dirlogcache[d] = mfrevlog
1517 1522 return self._dirlogcache[d]
1518 1523
1519 1524 def add(self, m, transaction, link, p1, p2, added, removed, readtree=None,
1520 1525 match=None):
1521 1526 if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'):
1522 1527 # If our first parent is in the manifest cache, we can
1523 1528 # compute a delta here using properties we know about the
1524 1529 # manifest up-front, which may save time later for the
1525 1530 # revlog layer.
1526 1531
1527 1532 _checkforbidden(added)
1528 1533 # combine the changed lists into one sorted iterator
1529 1534 work = heapq.merge([(x, False) for x in sorted(added)],
1530 1535 [(x, True) for x in sorted(removed)])
1531 1536
1532 1537 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1533 1538 cachedelta = self._revlog.rev(p1), deltatext
1534 1539 text = util.buffer(arraytext)
1535 1540 n = self._revlog.addrevision(text, transaction, link, p1, p2,
1536 1541 cachedelta)
1537 1542 else:
1538 1543 # The first parent manifest isn't already loaded, so we'll
1539 1544 # just encode a fulltext of the manifest and pass that
1540 1545 # through to the revlog layer, and let it handle the delta
1541 1546 # process.
1542 1547 if self._treeondisk:
1543 1548 assert readtree, "readtree must be set for treemanifest writes"
1544 1549 assert match, "match must be specified for treemanifest writes"
1545 1550 m1 = readtree(self.tree, p1)
1546 1551 m2 = readtree(self.tree, p2)
1547 1552 n = self._addtree(m, transaction, link, m1, m2, readtree,
1548 1553 match=match)
1549 1554 arraytext = None
1550 1555 else:
1551 1556 text = m.text()
1552 1557 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1553 1558 arraytext = bytearray(text)
1554 1559
1555 1560 if arraytext is not None:
1556 1561 self.fulltextcache[n] = arraytext
1557 1562
1558 1563 return n
1559 1564
1560 1565 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1561 1566 # If the manifest is unchanged compared to one parent,
1562 1567 # don't write a new revision
1563 1568 if self.tree != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(
1564 1569 m2)):
1565 1570 return m.node()
1566 1571 def writesubtree(subm, subp1, subp2, match):
1567 1572 sublog = self.dirlog(subm.dir())
1568 1573 sublog.add(subm, transaction, link, subp1, subp2, None, None,
1569 1574 readtree=readtree, match=match)
1570 1575 m.writesubtrees(m1, m2, writesubtree, match)
1571 1576 text = m.dirtext()
1572 1577 n = None
1573 1578 if self.tree != '':
1574 1579 # Double-check whether contents are unchanged to one parent
1575 1580 if text == m1.dirtext():
1576 1581 n = m1.node()
1577 1582 elif text == m2.dirtext():
1578 1583 n = m2.node()
1579 1584
1580 1585 if not n:
1581 1586 n = self._revlog.addrevision(text, transaction, link, m1.node(),
1582 1587 m2.node())
1583 1588
1584 1589 # Save nodeid so parent manifest can calculate its nodeid
1585 1590 m.setnode(n)
1586 1591 return n
1587 1592
1588 1593 def __len__(self):
1589 1594 return len(self._revlog)
1590 1595
1591 1596 def __iter__(self):
1592 1597 return self._revlog.__iter__()
1593 1598
1594 1599 def rev(self, node):
1595 1600 return self._revlog.rev(node)
1596 1601
1597 1602 def node(self, rev):
1598 1603 return self._revlog.node(rev)
1599 1604
1600 1605 def lookup(self, value):
1601 1606 return self._revlog.lookup(value)
1602 1607
1603 1608 def parentrevs(self, rev):
1604 1609 return self._revlog.parentrevs(rev)
1605 1610
1606 1611 def parents(self, node):
1607 1612 return self._revlog.parents(node)
1608 1613
1609 1614 def linkrev(self, rev):
1610 1615 return self._revlog.linkrev(rev)
1611 1616
1612 1617 def checksize(self):
1613 1618 return self._revlog.checksize()
1614 1619
1615 1620 def revision(self, node, _df=None, raw=False):
1616 1621 return self._revlog.revision(node, _df=_df, raw=raw)
1617 1622
1618 1623 def revdiff(self, rev1, rev2):
1619 1624 return self._revlog.revdiff(rev1, rev2)
1620 1625
1621 1626 def cmp(self, node, text):
1622 1627 return self._revlog.cmp(node, text)
1623 1628
1624 1629 def deltaparent(self, rev):
1625 1630 return self._revlog.deltaparent(rev)
1626 1631
1627 1632 def emitrevisions(self, nodes, nodesorder=None,
1628 1633 revisiondata=False, assumehaveparentrevisions=False,
1629 1634 deltamode=repository.CG_DELTAMODE_STD):
1630 1635 return self._revlog.emitrevisions(
1631 1636 nodes, nodesorder=nodesorder, revisiondata=revisiondata,
1632 1637 assumehaveparentrevisions=assumehaveparentrevisions,
1633 1638 deltamode=deltamode)
1634 1639
1635 1640 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1636 1641 return self._revlog.addgroup(deltas, linkmapper, transaction,
1637 1642 addrevisioncb=addrevisioncb)
1638 1643
1639 1644 def rawsize(self, rev):
1640 1645 return self._revlog.rawsize(rev)
1641 1646
1642 1647 def getstrippoint(self, minlink):
1643 1648 return self._revlog.getstrippoint(minlink)
1644 1649
1645 1650 def strip(self, minlink, transaction):
1646 1651 return self._revlog.strip(minlink, transaction)
1647 1652
1648 1653 def files(self):
1649 1654 return self._revlog.files()
1650 1655
1651 1656 def clone(self, tr, destrevlog, **kwargs):
1652 1657 if not isinstance(destrevlog, manifestrevlog):
1653 1658 raise error.ProgrammingError('expected manifestrevlog to clone()')
1654 1659
1655 1660 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1656 1661
1657 1662 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
1658 1663 revisionscount=False, trackedsize=False,
1659 1664 storedsize=False):
1660 1665 return self._revlog.storageinfo(
1661 1666 exclusivefiles=exclusivefiles, sharedfiles=sharedfiles,
1662 1667 revisionscount=revisionscount, trackedsize=trackedsize,
1663 1668 storedsize=storedsize)
1664 1669
1665 1670 @property
1666 1671 def indexfile(self):
1667 1672 return self._revlog.indexfile
1668 1673
1669 1674 @indexfile.setter
1670 1675 def indexfile(self, value):
1671 1676 self._revlog.indexfile = value
1672 1677
1673 1678 @property
1674 1679 def opener(self):
1675 1680 return self._revlog.opener
1676 1681
1677 1682 @opener.setter
1678 1683 def opener(self, value):
1679 1684 self._revlog.opener = value
1680 1685
1681 1686 @interfaceutil.implementer(repository.imanifestlog)
1682 1687 class manifestlog(object):
1683 1688 """A collection class representing the collection of manifest snapshots
1684 1689 referenced by commits in the repository.
1685 1690
1686 1691 In this situation, 'manifest' refers to the abstract concept of a snapshot
1687 1692 of the list of files in the given commit. Consumers of the output of this
1688 1693 class do not care about the implementation details of the actual manifests
1689 1694 they receive (i.e. tree or flat or lazily loaded, etc)."""
1690 1695 def __init__(self, opener, repo, rootstore, narrowmatch):
1691 1696 usetreemanifest = False
1692 1697 cachesize = 4
1693 1698
1694 1699 opts = getattr(opener, 'options', None)
1695 1700 if opts is not None:
1696 1701 usetreemanifest = opts.get('treemanifest', usetreemanifest)
1697 1702 cachesize = opts.get('manifestcachesize', cachesize)
1698 1703
1699 1704 self._treemanifests = usetreemanifest
1700 1705
1701 1706 self._rootstore = rootstore
1702 1707 self._rootstore._setupmanifestcachehooks(repo)
1703 1708 self._narrowmatch = narrowmatch
1704 1709
1705 1710 # A cache of the manifestctx or treemanifestctx for each directory
1706 1711 self._dirmancache = {}
1707 1712 self._dirmancache[''] = util.lrucachedict(cachesize)
1708 1713
1709 1714 self._cachesize = cachesize
1710 1715
1711 1716 def __getitem__(self, node):
1712 1717 """Retrieves the manifest instance for the given node. Throws a
1713 1718 LookupError if not found.
1714 1719 """
1715 1720 return self.get('', node)
1716 1721
1717 1722 def get(self, tree, node, verify=True):
1718 1723 """Retrieves the manifest instance for the given node. Throws a
1719 1724 LookupError if not found.
1720 1725
1721 1726 `verify` - if True an exception will be thrown if the node is not in
1722 1727 the revlog
1723 1728 """
1724 1729 if node in self._dirmancache.get(tree, ()):
1725 1730 return self._dirmancache[tree][node]
1726 1731
1727 1732 if not self._narrowmatch.always():
1728 1733 if not self._narrowmatch.visitdir(tree[:-1]):
1729 1734 return excludeddirmanifestctx(tree, node)
1730 1735 if tree:
1731 1736 if self._rootstore._treeondisk:
1732 1737 if verify:
1733 1738 # Side-effect is LookupError is raised if node doesn't
1734 1739 # exist.
1735 1740 self.getstorage(tree).rev(node)
1736 1741
1737 1742 m = treemanifestctx(self, tree, node)
1738 1743 else:
1739 1744 raise error.Abort(
1740 1745 _("cannot ask for manifest directory '%s' in a flat "
1741 1746 "manifest") % tree)
1742 1747 else:
1743 1748 if verify:
1744 1749 # Side-effect is LookupError is raised if node doesn't exist.
1745 1750 self._rootstore.rev(node)
1746 1751
1747 1752 if self._treemanifests:
1748 1753 m = treemanifestctx(self, '', node)
1749 1754 else:
1750 1755 m = manifestctx(self, node)
1751 1756
1752 1757 if node != nullid:
1753 1758 mancache = self._dirmancache.get(tree)
1754 1759 if not mancache:
1755 1760 mancache = util.lrucachedict(self._cachesize)
1756 1761 self._dirmancache[tree] = mancache
1757 1762 mancache[node] = m
1758 1763 return m
1759 1764
1760 1765 def getstorage(self, tree):
1761 1766 return self._rootstore.dirlog(tree)
1762 1767
1763 1768 def clearcaches(self, clear_persisted_data=False):
1764 1769 self._dirmancache.clear()
1765 1770 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1766 1771
1767 1772 def rev(self, node):
1768 1773 return self._rootstore.rev(node)
1769 1774
1770 1775 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1771 1776 class memmanifestctx(object):
1772 1777 def __init__(self, manifestlog):
1773 1778 self._manifestlog = manifestlog
1774 1779 self._manifestdict = manifestdict()
1775 1780
1776 1781 def _storage(self):
1777 1782 return self._manifestlog.getstorage(b'')
1778 1783
1779 1784 def new(self):
1780 1785 return memmanifestctx(self._manifestlog)
1781 1786
1782 1787 def copy(self):
1783 1788 memmf = memmanifestctx(self._manifestlog)
1784 1789 memmf._manifestdict = self.read().copy()
1785 1790 return memmf
1786 1791
1787 1792 def read(self):
1788 1793 return self._manifestdict
1789 1794
1790 1795 def write(self, transaction, link, p1, p2, added, removed, match=None):
1791 1796 return self._storage().add(self._manifestdict, transaction, link,
1792 1797 p1, p2, added, removed, match=match)
1793 1798
1794 1799 @interfaceutil.implementer(repository.imanifestrevisionstored)
1795 1800 class manifestctx(object):
1796 1801 """A class representing a single revision of a manifest, including its
1797 1802 contents, its parent revs, and its linkrev.
1798 1803 """
1799 1804 def __init__(self, manifestlog, node):
1800 1805 self._manifestlog = manifestlog
1801 1806 self._data = None
1802 1807
1803 1808 self._node = node
1804 1809
1805 1810 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
1806 1811 # but let's add it later when something needs it and we can load it
1807 1812 # lazily.
1808 1813 #self.p1, self.p2 = store.parents(node)
1809 1814 #rev = store.rev(node)
1810 1815 #self.linkrev = store.linkrev(rev)
1811 1816
1812 1817 def _storage(self):
1813 1818 return self._manifestlog.getstorage(b'')
1814 1819
1815 1820 def node(self):
1816 1821 return self._node
1817 1822
1818 1823 def new(self):
1819 1824 return memmanifestctx(self._manifestlog)
1820 1825
1821 1826 def copy(self):
1822 1827 memmf = memmanifestctx(self._manifestlog)
1823 1828 memmf._manifestdict = self.read().copy()
1824 1829 return memmf
1825 1830
1826 1831 @propertycache
1827 1832 def parents(self):
1828 1833 return self._storage().parents(self._node)
1829 1834
1830 1835 def read(self):
1831 1836 if self._data is None:
1832 1837 if self._node == nullid:
1833 1838 self._data = manifestdict()
1834 1839 else:
1835 1840 store = self._storage()
1836 1841 if self._node in store.fulltextcache:
1837 1842 text = pycompat.bytestr(store.fulltextcache[self._node])
1838 1843 else:
1839 1844 text = store.revision(self._node)
1840 1845 arraytext = bytearray(text)
1841 1846 store.fulltextcache[self._node] = arraytext
1842 1847 self._data = manifestdict(text)
1843 1848 return self._data
1844 1849
1845 1850 def readfast(self, shallow=False):
1846 1851 '''Calls either readdelta or read, based on which would be less work.
1847 1852 readdelta is called if the delta is against the p1, and therefore can be
1848 1853 read quickly.
1849 1854
1850 1855 If `shallow` is True, nothing changes since this is a flat manifest.
1851 1856 '''
1852 1857 store = self._storage()
1853 1858 r = store.rev(self._node)
1854 1859 deltaparent = store.deltaparent(r)
1855 1860 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
1856 1861 return self.readdelta()
1857 1862 return self.read()
1858 1863
1859 1864 def readdelta(self, shallow=False):
1860 1865 '''Returns a manifest containing just the entries that are present
1861 1866 in this manifest, but not in its p1 manifest. This is efficient to read
1862 1867 if the revlog delta is already p1.
1863 1868
1864 1869 Changing the value of `shallow` has no effect on flat manifests.
1865 1870 '''
1866 1871 store = self._storage()
1867 1872 r = store.rev(self._node)
1868 1873 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
1869 1874 return manifestdict(d)
1870 1875
1871 1876 def find(self, key):
1872 1877 return self.read().find(key)
1873 1878
1874 1879 @interfaceutil.implementer(repository.imanifestrevisionwritable)
1875 1880 class memtreemanifestctx(object):
1876 1881 def __init__(self, manifestlog, dir=''):
1877 1882 self._manifestlog = manifestlog
1878 1883 self._dir = dir
1879 1884 self._treemanifest = treemanifest()
1880 1885
1881 1886 def _storage(self):
1882 1887 return self._manifestlog.getstorage(b'')
1883 1888
1884 1889 def new(self, dir=''):
1885 1890 return memtreemanifestctx(self._manifestlog, dir=dir)
1886 1891
1887 1892 def copy(self):
1888 1893 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1889 1894 memmf._treemanifest = self._treemanifest.copy()
1890 1895 return memmf
1891 1896
1892 1897 def read(self):
1893 1898 return self._treemanifest
1894 1899
1895 1900 def write(self, transaction, link, p1, p2, added, removed, match=None):
1896 1901 def readtree(dir, node):
1897 1902 return self._manifestlog.get(dir, node).read()
1898 1903 return self._storage().add(self._treemanifest, transaction, link,
1899 1904 p1, p2, added, removed, readtree=readtree,
1900 1905 match=match)
1901 1906
1902 1907 @interfaceutil.implementer(repository.imanifestrevisionstored)
1903 1908 class treemanifestctx(object):
1904 1909 def __init__(self, manifestlog, dir, node):
1905 1910 self._manifestlog = manifestlog
1906 1911 self._dir = dir
1907 1912 self._data = None
1908 1913
1909 1914 self._node = node
1910 1915
1911 1916 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
1912 1917 # we can instantiate treemanifestctx objects for directories we don't
1913 1918 # have on disk.
1914 1919 #self.p1, self.p2 = store.parents(node)
1915 1920 #rev = store.rev(node)
1916 1921 #self.linkrev = store.linkrev(rev)
1917 1922
1918 1923 def _storage(self):
1919 1924 narrowmatch = self._manifestlog._narrowmatch
1920 1925 if not narrowmatch.always():
1921 1926 if not narrowmatch.visitdir(self._dir[:-1]):
1922 1927 return excludedmanifestrevlog(self._dir)
1923 1928 return self._manifestlog.getstorage(self._dir)
1924 1929
1925 1930 def read(self):
1926 1931 if self._data is None:
1927 1932 store = self._storage()
1928 1933 if self._node == nullid:
1929 1934 self._data = treemanifest()
1930 1935 # TODO accessing non-public API
1931 1936 elif store._treeondisk:
1932 1937 m = treemanifest(dir=self._dir)
1933 1938 def gettext():
1934 1939 return store.revision(self._node)
1935 1940 def readsubtree(dir, subm):
1936 1941 # Set verify to False since we need to be able to create
1937 1942 # subtrees for trees that don't exist on disk.
1938 1943 return self._manifestlog.get(dir, subm, verify=False).read()
1939 1944 m.read(gettext, readsubtree)
1940 1945 m.setnode(self._node)
1941 1946 self._data = m
1942 1947 else:
1943 1948 if self._node in store.fulltextcache:
1944 1949 text = pycompat.bytestr(store.fulltextcache[self._node])
1945 1950 else:
1946 1951 text = store.revision(self._node)
1947 1952 arraytext = bytearray(text)
1948 1953 store.fulltextcache[self._node] = arraytext
1949 1954 self._data = treemanifest(dir=self._dir, text=text)
1950 1955
1951 1956 return self._data
1952 1957
1953 1958 def node(self):
1954 1959 return self._node
1955 1960
1956 1961 def new(self, dir=''):
1957 1962 return memtreemanifestctx(self._manifestlog, dir=dir)
1958 1963
1959 1964 def copy(self):
1960 1965 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
1961 1966 memmf._treemanifest = self.read().copy()
1962 1967 return memmf
1963 1968
1964 1969 @propertycache
1965 1970 def parents(self):
1966 1971 return self._storage().parents(self._node)
1967 1972
1968 1973 def readdelta(self, shallow=False):
1969 1974 '''Returns a manifest containing just the entries that are present
1970 1975 in this manifest, but not in its p1 manifest. This is efficient to read
1971 1976 if the revlog delta is already p1.
1972 1977
1973 1978 If `shallow` is True, this will read the delta for this directory,
1974 1979 without recursively reading subdirectory manifests. Instead, any
1975 1980 subdirectory entry will be reported as it appears in the manifest, i.e.
1976 1981 the subdirectory will be reported among files and distinguished only by
1977 1982 its 't' flag.
1978 1983 '''
1979 1984 store = self._storage()
1980 1985 if shallow:
1981 1986 r = store.rev(self._node)
1982 1987 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
1983 1988 return manifestdict(d)
1984 1989 else:
1985 1990 # Need to perform a slow delta
1986 1991 r0 = store.deltaparent(store.rev(self._node))
1987 1992 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
1988 1993 m1 = self.read()
1989 1994 md = treemanifest(dir=self._dir)
1990 1995 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
1991 1996 if n1:
1992 1997 md[f] = n1
1993 1998 if fl1:
1994 1999 md.setflag(f, fl1)
1995 2000 return md
1996 2001
1997 2002 def readfast(self, shallow=False):
1998 2003 '''Calls either readdelta or read, based on which would be less work.
1999 2004 readdelta is called if the delta is against the p1, and therefore can be
2000 2005 read quickly.
2001 2006
2002 2007 If `shallow` is True, it only returns the entries from this manifest,
2003 2008 and not any submanifests.
2004 2009 '''
2005 2010 store = self._storage()
2006 2011 r = store.rev(self._node)
2007 2012 deltaparent = store.deltaparent(r)
2008 2013 if (deltaparent != nullrev and
2009 2014 deltaparent in store.parentrevs(r)):
2010 2015 return self.readdelta(shallow=shallow)
2011 2016
2012 2017 if shallow:
2013 2018 return manifestdict(store.revision(self._node))
2014 2019 else:
2015 2020 return self.read()
2016 2021
2017 2022 def find(self, key):
2018 2023 return self.read().find(key)
2019 2024
2020 2025 class excludeddir(treemanifest):
2021 2026 """Stand-in for a directory that is excluded from the repository.
2022 2027
2023 2028 With narrowing active on a repository that uses treemanifests,
2024 2029 some of the directory revlogs will be excluded from the resulting
2025 2030 clone. This is a huge storage win for clients, but means we need
2026 2031 some sort of pseudo-manifest to surface to internals so we can
2027 2032 detect a merge conflict outside the narrowspec. That's what this
2028 2033 class is: it stands in for a directory whose node is known, but
2029 2034 whose contents are unknown.
2030 2035 """
2031 2036 def __init__(self, dir, node):
2032 2037 super(excludeddir, self).__init__(dir)
2033 2038 self._node = node
2034 2039 # Add an empty file, which will be included by iterators and such,
2035 2040 # appearing as the directory itself (i.e. something like "dir/")
2036 2041 self._files[''] = node
2037 2042 self._flags[''] = 't'
2038 2043
2039 2044 # Manifests outside the narrowspec should never be modified, so avoid
2040 2045 # copying. This makes a noticeable difference when there are very many
2041 2046 # directories outside the narrowspec. Also, it makes sense for the copy to
2042 2047 # be of the same type as the original, which would not happen with the
2043 2048 # super type's copy().
2044 2049 def copy(self):
2045 2050 return self
2046 2051
2047 2052 class excludeddirmanifestctx(treemanifestctx):
2048 2053 """context wrapper for excludeddir - see that docstring for rationale"""
2049 2054 def __init__(self, dir, node):
2050 2055 self._dir = dir
2051 2056 self._node = node
2052 2057
2053 2058 def read(self):
2054 2059 return excludeddir(self._dir, self._node)
2055 2060
2056 2061 def write(self, *args):
2057 2062 raise error.ProgrammingError(
2058 2063 'attempt to write manifest from excluded dir %s' % self._dir)
2059 2064
2060 2065 class excludedmanifestrevlog(manifestrevlog):
2061 2066 """Stand-in for excluded treemanifest revlogs.
2062 2067
2063 2068 When narrowing is active on a treemanifest repository, we'll have
2064 2069 references to directories we can't see due to the revlog being
2065 2070 skipped. This class exists to conform to the manifestrevlog
2066 2071 interface for those directories and proactively prevent writes to
2067 2072 outside the narrowspec.
2068 2073 """
2069 2074
2070 2075 def __init__(self, dir):
2071 2076 self._dir = dir
2072 2077
2073 2078 def __len__(self):
2074 2079 raise error.ProgrammingError(
2075 2080 'attempt to get length of excluded dir %s' % self._dir)
2076 2081
2077 2082 def rev(self, node):
2078 2083 raise error.ProgrammingError(
2079 2084 'attempt to get rev from excluded dir %s' % self._dir)
2080 2085
2081 2086 def linkrev(self, node):
2082 2087 raise error.ProgrammingError(
2083 2088 'attempt to get linkrev from excluded dir %s' % self._dir)
2084 2089
2085 2090 def node(self, rev):
2086 2091 raise error.ProgrammingError(
2087 2092 'attempt to get node from excluded dir %s' % self._dir)
2088 2093
2089 2094 def add(self, *args, **kwargs):
2090 2095 # We should never write entries in dirlogs outside the narrow clone.
2091 2096 # However, the method still gets called from writesubtree() in
2092 2097 # _addtree(), so we need to handle it. We should possibly make that
2093 2098 # avoid calling add() with a clean manifest (_dirty is always False
2094 2099 # in excludeddir instances).
2095 2100 pass
@@ -1,2684 +1,2690 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100 rustancestor = policy.importrust(r'ancestor')
101 101 rustdagop = policy.importrust(r'dagop')
102 102
103 103 # Aliased for performance.
104 104 _zlibdecompress = zlib.decompress
105 105
106 106 # max size of revlog with inline data
107 107 _maxinline = 131072
108 108 _chunksize = 1048576
109 109
110 110 # Store flag processors (cf. 'addflagprocessor()' to register)
111 111 _flagprocessors = {
112 112 REVIDX_ISCENSORED: None,
113 113 }
114 114
115 115 # Flag processors for REVIDX_ELLIPSIS.
116 116 def ellipsisreadprocessor(rl, text):
117 117 return text, False
118 118
119 119 def ellipsiswriteprocessor(rl, text):
120 120 return text, False
121 121
122 122 def ellipsisrawprocessor(rl, text):
123 123 return False
124 124
125 125 ellipsisprocessor = (
126 126 ellipsisreadprocessor,
127 127 ellipsiswriteprocessor,
128 128 ellipsisrawprocessor,
129 129 )
130 130
131 131 def addflagprocessor(flag, processor):
132 132 """Register a flag processor on a revision data flag.
133 133
134 134 Invariant:
135 135 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
136 136 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
137 137 - Only one flag processor can be registered on a specific flag.
138 138 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
139 139 following signatures:
140 140 - (read) f(self, rawtext) -> text, bool
141 141 - (write) f(self, text) -> rawtext, bool
142 142 - (raw) f(self, rawtext) -> bool
143 143 "text" is presented to the user. "rawtext" is stored in revlog data, not
144 144 directly visible to the user.
145 145 The boolean returned by these transforms is used to determine whether
146 146 the returned text can be used for hash integrity checking. For example,
147 147 if "write" returns False, then "text" is used to generate hash. If
148 148 "write" returns True, that basically means "rawtext" returned by "write"
149 149 should be used to generate hash. Usually, "write" and "read" return
150 150 different booleans. And "raw" returns a same boolean as "write".
151 151
152 152 Note: The 'raw' transform is used for changegroup generation and in some
153 153 debug commands. In this case the transform only indicates whether the
154 154 contents can be used for hash integrity checks.
155 155 """
156 156 _insertflagprocessor(flag, processor, _flagprocessors)
157 157
158 158 def _insertflagprocessor(flag, processor, flagprocessors):
159 159 if not flag & REVIDX_KNOWN_FLAGS:
160 160 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
161 161 raise error.ProgrammingError(msg)
162 162 if flag not in REVIDX_FLAGS_ORDER:
163 163 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
164 164 raise error.ProgrammingError(msg)
165 165 if flag in flagprocessors:
166 166 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
167 167 raise error.Abort(msg)
168 168 flagprocessors[flag] = processor
169 169
170 170 def getoffset(q):
171 171 return int(q >> 16)
172 172
173 173 def gettype(q):
174 174 return int(q & 0xFFFF)
175 175
176 176 def offset_type(offset, type):
177 177 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
178 178 raise ValueError('unknown revlog index flags')
179 179 return int(int(offset) << 16 | type)
180 180
181 181 @attr.s(slots=True, frozen=True)
182 182 class _revisioninfo(object):
183 183 """Information about a revision that allows building its fulltext
184 184 node: expected hash of the revision
185 185 p1, p2: parent revs of the revision
186 186 btext: built text cache consisting of a one-element list
187 187 cachedelta: (baserev, uncompressed_delta) or None
188 188 flags: flags associated to the revision storage
189 189
190 190 One of btext[0] or cachedelta must be set.
191 191 """
192 192 node = attr.ib()
193 193 p1 = attr.ib()
194 194 p2 = attr.ib()
195 195 btext = attr.ib()
196 196 textlen = attr.ib()
197 197 cachedelta = attr.ib()
198 198 flags = attr.ib()
199 199
200 200 @interfaceutil.implementer(repository.irevisiondelta)
201 201 @attr.s(slots=True)
202 202 class revlogrevisiondelta(object):
203 203 node = attr.ib()
204 204 p1node = attr.ib()
205 205 p2node = attr.ib()
206 206 basenode = attr.ib()
207 207 flags = attr.ib()
208 208 baserevisionsize = attr.ib()
209 209 revision = attr.ib()
210 210 delta = attr.ib()
211 211 linknode = attr.ib(default=None)
212 212
213 213 @interfaceutil.implementer(repository.iverifyproblem)
214 214 @attr.s(frozen=True)
215 215 class revlogproblem(object):
216 216 warning = attr.ib(default=None)
217 217 error = attr.ib(default=None)
218 218 node = attr.ib(default=None)
219 219
220 220 # index v0:
221 221 # 4 bytes: offset
222 222 # 4 bytes: compressed length
223 223 # 4 bytes: base rev
224 224 # 4 bytes: link rev
225 225 # 20 bytes: parent 1 nodeid
226 226 # 20 bytes: parent 2 nodeid
227 227 # 20 bytes: nodeid
228 228 indexformatv0 = struct.Struct(">4l20s20s20s")
229 229 indexformatv0_pack = indexformatv0.pack
230 230 indexformatv0_unpack = indexformatv0.unpack
231 231
232 232 class revlogoldindex(list):
233 233 def __getitem__(self, i):
234 234 if i == -1:
235 235 return (0, 0, 0, -1, -1, -1, -1, nullid)
236 236 return list.__getitem__(self, i)
237 237
238 238 class revlogoldio(object):
239 239 def __init__(self):
240 240 self.size = indexformatv0.size
241 241
242 242 def parseindex(self, data, inline):
243 243 s = self.size
244 244 index = []
245 245 nodemap = {nullid: nullrev}
246 246 n = off = 0
247 247 l = len(data)
248 248 while off + s <= l:
249 249 cur = data[off:off + s]
250 250 off += s
251 251 e = indexformatv0_unpack(cur)
252 252 # transform to revlogv1 format
253 253 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
254 254 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
255 255 index.append(e2)
256 256 nodemap[e[6]] = n
257 257 n += 1
258 258
259 259 return revlogoldindex(index), nodemap, None
260 260
261 261 def packentry(self, entry, node, version, rev):
262 262 if gettype(entry[0]):
263 263 raise error.RevlogError(_('index entry flags need revlog '
264 264 'version 1'))
265 265 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
266 266 node(entry[5]), node(entry[6]), entry[7])
267 267 return indexformatv0_pack(*e2)
268 268
269 269 # index ng:
270 270 # 6 bytes: offset
271 271 # 2 bytes: flags
272 272 # 4 bytes: compressed length
273 273 # 4 bytes: uncompressed length
274 274 # 4 bytes: base rev
275 275 # 4 bytes: link rev
276 276 # 4 bytes: parent 1 rev
277 277 # 4 bytes: parent 2 rev
278 278 # 32 bytes: nodeid
279 279 indexformatng = struct.Struct(">Qiiiiii20s12x")
280 280 indexformatng_pack = indexformatng.pack
281 281 versionformat = struct.Struct(">I")
282 282 versionformat_pack = versionformat.pack
283 283 versionformat_unpack = versionformat.unpack
284 284
285 285 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
286 286 # signed integer)
287 287 _maxentrysize = 0x7fffffff
288 288
289 289 class revlogio(object):
290 290 def __init__(self):
291 291 self.size = indexformatng.size
292 292
293 293 def parseindex(self, data, inline):
294 294 # call the C implementation to parse the index data
295 295 index, cache = parsers.parse_index2(data, inline)
296 296 return index, getattr(index, 'nodemap', None), cache
297 297
298 298 def packentry(self, entry, node, version, rev):
299 299 p = indexformatng_pack(*entry)
300 300 if rev == 0:
301 301 p = versionformat_pack(version) + p[4:]
302 302 return p
303 303
304 304 class revlog(object):
305 305 """
306 306 the underlying revision storage object
307 307
308 308 A revlog consists of two parts, an index and the revision data.
309 309
310 310 The index is a file with a fixed record size containing
311 311 information on each revision, including its nodeid (hash), the
312 312 nodeids of its parents, the position and offset of its data within
313 313 the data file, and the revision it's based on. Finally, each entry
314 314 contains a linkrev entry that can serve as a pointer to external
315 315 data.
316 316
317 317 The revision data itself is a linear collection of data chunks.
318 318 Each chunk represents a revision and is usually represented as a
319 319 delta against the previous chunk. To bound lookup time, runs of
320 320 deltas are limited to about 2 times the length of the original
321 321 version data. This makes retrieval of a version proportional to
322 322 its size, or O(1) relative to the number of revisions.
323 323
324 324 Both pieces of the revlog are written to in an append-only
325 325 fashion, which means we never need to rewrite a file to insert or
326 326 remove data, and can use some simple techniques to avoid the need
327 327 for locking while reading.
328 328
329 329 If checkambig, indexfile is opened with checkambig=True at
330 330 writing, to avoid file stat ambiguity.
331 331
332 332 If mmaplargeindex is True, and an mmapindexthreshold is set, the
333 333 index will be mmapped rather than read if it is larger than the
334 334 configured threshold.
335 335
336 336 If censorable is True, the revlog can have censored revisions.
337
338 If `upperboundcomp` is not None, this is the expected maximal gain from
339 compression for the data content.
337 340 """
338 341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
339 mmaplargeindex=False, censorable=False):
342 mmaplargeindex=False, censorable=False,
343 upperboundcomp=None):
340 344 """
341 345 create a revlog object
342 346
343 347 opener is a function that abstracts the file opening operation
344 348 and can be used to implement COW semantics or the like.
349
345 350 """
351 self.upperboundcomp = upperboundcomp
346 352 self.indexfile = indexfile
347 353 self.datafile = datafile or (indexfile[:-2] + ".d")
348 354 self.opener = opener
349 355 # When True, indexfile is opened with checkambig=True at writing, to
350 356 # avoid file stat ambiguity.
351 357 self._checkambig = checkambig
352 358 self._mmaplargeindex = mmaplargeindex
353 359 self._censorable = censorable
354 360 # 3-tuple of (node, rev, text) for a raw revision.
355 361 self._revisioncache = None
356 362 # Maps rev to chain base rev.
357 363 self._chainbasecache = util.lrucachedict(100)
358 364 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
359 365 self._chunkcache = (0, '')
360 366 # How much data to read and cache into the raw revlog data cache.
361 367 self._chunkcachesize = 65536
362 368 self._maxchainlen = None
363 369 self._deltabothparents = True
364 370 self.index = []
365 371 # Mapping of partial identifiers to full nodes.
366 372 self._pcache = {}
367 373 # Mapping of revision integer to full node.
368 374 self._nodecache = {nullid: nullrev}
369 375 self._nodepos = None
370 376 self._compengine = 'zlib'
371 377 self._compengineopts = {}
372 378 self._maxdeltachainspan = -1
373 379 self._withsparseread = False
374 380 self._sparserevlog = False
375 381 self._srdensitythreshold = 0.50
376 382 self._srmingapsize = 262144
377 383
378 384 # Make copy of flag processors so each revlog instance can support
379 385 # custom flags.
380 386 self._flagprocessors = dict(_flagprocessors)
381 387
382 388 # 2-tuple of file handles being used for active writing.
383 389 self._writinghandles = None
384 390
385 391 self._loadindex()
386 392
387 393 def _loadindex(self):
388 394 mmapindexthreshold = None
389 395 opts = getattr(self.opener, 'options', {}) or {}
390 396
391 397 if 'revlogv2' in opts:
392 398 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
393 399 elif 'revlogv1' in opts:
394 400 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
395 401 if 'generaldelta' in opts:
396 402 newversionflags |= FLAG_GENERALDELTA
397 403 elif getattr(self.opener, 'options', None) is not None:
398 404 # If options provided but no 'revlog*' found, the repository
399 405 # would have no 'requires' file in it, which means we have to
400 406 # stick to the old format.
401 407 newversionflags = REVLOGV0
402 408 else:
403 409 newversionflags = REVLOG_DEFAULT_VERSION
404 410
405 411 if 'chunkcachesize' in opts:
406 412 self._chunkcachesize = opts['chunkcachesize']
407 413 if 'maxchainlen' in opts:
408 414 self._maxchainlen = opts['maxchainlen']
409 415 if 'deltabothparents' in opts:
410 416 self._deltabothparents = opts['deltabothparents']
411 417 self._lazydelta = bool(opts.get('lazydelta', True))
412 418 self._lazydeltabase = False
413 419 if self._lazydelta:
414 420 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
415 421 if 'compengine' in opts:
416 422 self._compengine = opts['compengine']
417 423 if 'zlib.level' in opts:
418 424 self._compengineopts['zlib.level'] = opts['zlib.level']
419 425 if 'zstd.level' in opts:
420 426 self._compengineopts['zstd.level'] = opts['zstd.level']
421 427 if 'maxdeltachainspan' in opts:
422 428 self._maxdeltachainspan = opts['maxdeltachainspan']
423 429 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
424 430 mmapindexthreshold = opts['mmapindexthreshold']
425 431 self._sparserevlog = bool(opts.get('sparse-revlog', False))
426 432 withsparseread = bool(opts.get('with-sparse-read', False))
427 433 # sparse-revlog forces sparse-read
428 434 self._withsparseread = self._sparserevlog or withsparseread
429 435 if 'sparse-read-density-threshold' in opts:
430 436 self._srdensitythreshold = opts['sparse-read-density-threshold']
431 437 if 'sparse-read-min-gap-size' in opts:
432 438 self._srmingapsize = opts['sparse-read-min-gap-size']
433 439 if opts.get('enableellipsis'):
434 440 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
435 441
436 442 # revlog v0 doesn't have flag processors
437 443 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
438 444 _insertflagprocessor(flag, processor, self._flagprocessors)
439 445
440 446 if self._chunkcachesize <= 0:
441 447 raise error.RevlogError(_('revlog chunk cache size %r is not '
442 448 'greater than 0') % self._chunkcachesize)
443 449 elif self._chunkcachesize & (self._chunkcachesize - 1):
444 450 raise error.RevlogError(_('revlog chunk cache size %r is not a '
445 451 'power of 2') % self._chunkcachesize)
446 452
447 453 indexdata = ''
448 454 self._initempty = True
449 455 try:
450 456 with self._indexfp() as f:
451 457 if (mmapindexthreshold is not None and
452 458 self.opener.fstat(f).st_size >= mmapindexthreshold):
453 459 # TODO: should .close() to release resources without
454 460 # relying on Python GC
455 461 indexdata = util.buffer(util.mmapread(f))
456 462 else:
457 463 indexdata = f.read()
458 464 if len(indexdata) > 0:
459 465 versionflags = versionformat_unpack(indexdata[:4])[0]
460 466 self._initempty = False
461 467 else:
462 468 versionflags = newversionflags
463 469 except IOError as inst:
464 470 if inst.errno != errno.ENOENT:
465 471 raise
466 472
467 473 versionflags = newversionflags
468 474
469 475 self.version = versionflags
470 476
471 477 flags = versionflags & ~0xFFFF
472 478 fmt = versionflags & 0xFFFF
473 479
474 480 if fmt == REVLOGV0:
475 481 if flags:
476 482 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
477 483 'revlog %s') %
478 484 (flags >> 16, fmt, self.indexfile))
479 485
480 486 self._inline = False
481 487 self._generaldelta = False
482 488
483 489 elif fmt == REVLOGV1:
484 490 if flags & ~REVLOGV1_FLAGS:
485 491 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
486 492 'revlog %s') %
487 493 (flags >> 16, fmt, self.indexfile))
488 494
489 495 self._inline = versionflags & FLAG_INLINE_DATA
490 496 self._generaldelta = versionflags & FLAG_GENERALDELTA
491 497
492 498 elif fmt == REVLOGV2:
493 499 if flags & ~REVLOGV2_FLAGS:
494 500 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
495 501 'revlog %s') %
496 502 (flags >> 16, fmt, self.indexfile))
497 503
498 504 self._inline = versionflags & FLAG_INLINE_DATA
499 505 # generaldelta implied by version 2 revlogs.
500 506 self._generaldelta = True
501 507
502 508 else:
503 509 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
504 510 (fmt, self.indexfile))
505 511 # sparse-revlog can't be on without general-delta (issue6056)
506 512 if not self._generaldelta:
507 513 self._sparserevlog = False
508 514
509 515 self._storedeltachains = True
510 516
511 517 self._io = revlogio()
512 518 if self.version == REVLOGV0:
513 519 self._io = revlogoldio()
514 520 try:
515 521 d = self._io.parseindex(indexdata, self._inline)
516 522 except (ValueError, IndexError):
517 523 raise error.RevlogError(_("index %s is corrupted") %
518 524 self.indexfile)
519 525 self.index, nodemap, self._chunkcache = d
520 526 if nodemap is not None:
521 527 self.nodemap = self._nodecache = nodemap
522 528 if not self._chunkcache:
523 529 self._chunkclear()
524 530 # revnum -> (chain-length, sum-delta-length)
525 531 self._chaininfocache = {}
526 532 # revlog header -> revlog compressor
527 533 self._decompressors = {}
528 534
529 535 @util.propertycache
530 536 def _compressor(self):
531 537 engine = util.compengines[self._compengine]
532 538 return engine.revlogcompressor(self._compengineopts)
533 539
534 540 def _indexfp(self, mode='r'):
535 541 """file object for the revlog's index file"""
536 542 args = {r'mode': mode}
537 543 if mode != 'r':
538 544 args[r'checkambig'] = self._checkambig
539 545 if mode == 'w':
540 546 args[r'atomictemp'] = True
541 547 return self.opener(self.indexfile, **args)
542 548
543 549 def _datafp(self, mode='r'):
544 550 """file object for the revlog's data file"""
545 551 return self.opener(self.datafile, mode=mode)
546 552
547 553 @contextlib.contextmanager
548 554 def _datareadfp(self, existingfp=None):
549 555 """file object suitable to read data"""
550 556 # Use explicit file handle, if given.
551 557 if existingfp is not None:
552 558 yield existingfp
553 559
554 560 # Use a file handle being actively used for writes, if available.
555 561 # There is some danger to doing this because reads will seek the
556 562 # file. However, _writeentry() performs a SEEK_END before all writes,
557 563 # so we should be safe.
558 564 elif self._writinghandles:
559 565 if self._inline:
560 566 yield self._writinghandles[0]
561 567 else:
562 568 yield self._writinghandles[1]
563 569
564 570 # Otherwise open a new file handle.
565 571 else:
566 572 if self._inline:
567 573 func = self._indexfp
568 574 else:
569 575 func = self._datafp
570 576 with func() as fp:
571 577 yield fp
572 578
573 579 def tip(self):
574 580 return self.node(len(self.index) - 1)
575 581 def __contains__(self, rev):
576 582 return 0 <= rev < len(self)
577 583 def __len__(self):
578 584 return len(self.index)
579 585 def __iter__(self):
580 586 return iter(pycompat.xrange(len(self)))
581 587 def revs(self, start=0, stop=None):
582 588 """iterate over all rev in this revlog (from start to stop)"""
583 589 return storageutil.iterrevs(len(self), start=start, stop=stop)
584 590
585 591 @util.propertycache
586 592 def nodemap(self):
587 593 if self.index:
588 594 # populate mapping down to the initial node
589 595 node0 = self.index[0][7] # get around changelog filtering
590 596 self.rev(node0)
591 597 return self._nodecache
592 598
593 599 def hasnode(self, node):
594 600 try:
595 601 self.rev(node)
596 602 return True
597 603 except KeyError:
598 604 return False
599 605
600 606 def candelta(self, baserev, rev):
601 607 """whether two revisions (baserev, rev) can be delta-ed or not"""
602 608 # Disable delta if either rev requires a content-changing flag
603 609 # processor (ex. LFS). This is because such flag processor can alter
604 610 # the rawtext content that the delta will be based on, and two clients
605 611 # could have a same revlog node with different flags (i.e. different
606 612 # rawtext contents) and the delta could be incompatible.
607 613 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
608 614 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
609 615 return False
610 616 return True
611 617
612 618 def clearcaches(self):
613 619 self._revisioncache = None
614 620 self._chainbasecache.clear()
615 621 self._chunkcache = (0, '')
616 622 self._pcache = {}
617 623
618 624 try:
619 625 # If we are using the native C version, you are in a fun case
620 626 # where self.index, self.nodemap and self._nodecaches is the same
621 627 # object.
622 628 self._nodecache.clearcaches()
623 629 except AttributeError:
624 630 self._nodecache = {nullid: nullrev}
625 631 self._nodepos = None
626 632
627 633 def rev(self, node):
628 634 try:
629 635 return self._nodecache[node]
630 636 except TypeError:
631 637 raise
632 638 except error.RevlogError:
633 639 # parsers.c radix tree lookup failed
634 640 if node == wdirid or node in wdirfilenodeids:
635 641 raise error.WdirUnsupported
636 642 raise error.LookupError(node, self.indexfile, _('no node'))
637 643 except KeyError:
638 644 # pure python cache lookup failed
639 645 n = self._nodecache
640 646 i = self.index
641 647 p = self._nodepos
642 648 if p is None:
643 649 p = len(i) - 1
644 650 else:
645 651 assert p < len(i)
646 652 for r in pycompat.xrange(p, -1, -1):
647 653 v = i[r][7]
648 654 n[v] = r
649 655 if v == node:
650 656 self._nodepos = r - 1
651 657 return r
652 658 if node == wdirid or node in wdirfilenodeids:
653 659 raise error.WdirUnsupported
654 660 raise error.LookupError(node, self.indexfile, _('no node'))
655 661
656 662 # Accessors for index entries.
657 663
658 664 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
659 665 # are flags.
660 666 def start(self, rev):
661 667 return int(self.index[rev][0] >> 16)
662 668
663 669 def flags(self, rev):
664 670 return self.index[rev][0] & 0xFFFF
665 671
666 672 def length(self, rev):
667 673 return self.index[rev][1]
668 674
669 675 def rawsize(self, rev):
670 676 """return the length of the uncompressed text for a given revision"""
671 677 l = self.index[rev][2]
672 678 if l >= 0:
673 679 return l
674 680
675 681 t = self.revision(rev, raw=True)
676 682 return len(t)
677 683
678 684 def size(self, rev):
679 685 """length of non-raw text (processed by a "read" flag processor)"""
680 686 # fast path: if no "read" flag processor could change the content,
681 687 # size is rawsize. note: ELLIPSIS is known to not change the content.
682 688 flags = self.flags(rev)
683 689 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
684 690 return self.rawsize(rev)
685 691
686 692 return len(self.revision(rev, raw=False))
687 693
688 694 def chainbase(self, rev):
689 695 base = self._chainbasecache.get(rev)
690 696 if base is not None:
691 697 return base
692 698
693 699 index = self.index
694 700 iterrev = rev
695 701 base = index[iterrev][3]
696 702 while base != iterrev:
697 703 iterrev = base
698 704 base = index[iterrev][3]
699 705
700 706 self._chainbasecache[rev] = base
701 707 return base
702 708
703 709 def linkrev(self, rev):
704 710 return self.index[rev][4]
705 711
706 712 def parentrevs(self, rev):
707 713 try:
708 714 entry = self.index[rev]
709 715 except IndexError:
710 716 if rev == wdirrev:
711 717 raise error.WdirUnsupported
712 718 raise
713 719
714 720 return entry[5], entry[6]
715 721
716 722 # fast parentrevs(rev) where rev isn't filtered
717 723 _uncheckedparentrevs = parentrevs
718 724
719 725 def node(self, rev):
720 726 try:
721 727 return self.index[rev][7]
722 728 except IndexError:
723 729 if rev == wdirrev:
724 730 raise error.WdirUnsupported
725 731 raise
726 732
727 733 # Derived from index values.
728 734
729 735 def end(self, rev):
730 736 return self.start(rev) + self.length(rev)
731 737
732 738 def parents(self, node):
733 739 i = self.index
734 740 d = i[self.rev(node)]
735 741 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
736 742
737 743 def chainlen(self, rev):
738 744 return self._chaininfo(rev)[0]
739 745
740 746 def _chaininfo(self, rev):
741 747 chaininfocache = self._chaininfocache
742 748 if rev in chaininfocache:
743 749 return chaininfocache[rev]
744 750 index = self.index
745 751 generaldelta = self._generaldelta
746 752 iterrev = rev
747 753 e = index[iterrev]
748 754 clen = 0
749 755 compresseddeltalen = 0
750 756 while iterrev != e[3]:
751 757 clen += 1
752 758 compresseddeltalen += e[1]
753 759 if generaldelta:
754 760 iterrev = e[3]
755 761 else:
756 762 iterrev -= 1
757 763 if iterrev in chaininfocache:
758 764 t = chaininfocache[iterrev]
759 765 clen += t[0]
760 766 compresseddeltalen += t[1]
761 767 break
762 768 e = index[iterrev]
763 769 else:
764 770 # Add text length of base since decompressing that also takes
765 771 # work. For cache hits the length is already included.
766 772 compresseddeltalen += e[1]
767 773 r = (clen, compresseddeltalen)
768 774 chaininfocache[rev] = r
769 775 return r
770 776
771 777 def _deltachain(self, rev, stoprev=None):
772 778 """Obtain the delta chain for a revision.
773 779
774 780 ``stoprev`` specifies a revision to stop at. If not specified, we
775 781 stop at the base of the chain.
776 782
777 783 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
778 784 revs in ascending order and ``stopped`` is a bool indicating whether
779 785 ``stoprev`` was hit.
780 786 """
781 787 # Try C implementation.
782 788 try:
783 789 return self.index.deltachain(rev, stoprev, self._generaldelta)
784 790 except AttributeError:
785 791 pass
786 792
787 793 chain = []
788 794
789 795 # Alias to prevent attribute lookup in tight loop.
790 796 index = self.index
791 797 generaldelta = self._generaldelta
792 798
793 799 iterrev = rev
794 800 e = index[iterrev]
795 801 while iterrev != e[3] and iterrev != stoprev:
796 802 chain.append(iterrev)
797 803 if generaldelta:
798 804 iterrev = e[3]
799 805 else:
800 806 iterrev -= 1
801 807 e = index[iterrev]
802 808
803 809 if iterrev == stoprev:
804 810 stopped = True
805 811 else:
806 812 chain.append(iterrev)
807 813 stopped = False
808 814
809 815 chain.reverse()
810 816 return chain, stopped
811 817
812 818 def ancestors(self, revs, stoprev=0, inclusive=False):
813 819 """Generate the ancestors of 'revs' in reverse revision order.
814 820 Does not generate revs lower than stoprev.
815 821
816 822 See the documentation for ancestor.lazyancestors for more details."""
817 823
818 824 # first, make sure start revisions aren't filtered
819 825 revs = list(revs)
820 826 checkrev = self.node
821 827 for r in revs:
822 828 checkrev(r)
823 829 # and we're sure ancestors aren't filtered as well
824 830
825 831 if rustancestor is not None:
826 832 lazyancestors = rustancestor.LazyAncestors
827 833 arg = self.index
828 834 elif util.safehasattr(parsers, 'rustlazyancestors'):
829 835 lazyancestors = ancestor.rustlazyancestors
830 836 arg = self.index
831 837 else:
832 838 lazyancestors = ancestor.lazyancestors
833 839 arg = self._uncheckedparentrevs
834 840 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
835 841
836 842 def descendants(self, revs):
837 843 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
838 844
839 845 def findcommonmissing(self, common=None, heads=None):
840 846 """Return a tuple of the ancestors of common and the ancestors of heads
841 847 that are not ancestors of common. In revset terminology, we return the
842 848 tuple:
843 849
844 850 ::common, (::heads) - (::common)
845 851
846 852 The list is sorted by revision number, meaning it is
847 853 topologically sorted.
848 854
849 855 'heads' and 'common' are both lists of node IDs. If heads is
850 856 not supplied, uses all of the revlog's heads. If common is not
851 857 supplied, uses nullid."""
852 858 if common is None:
853 859 common = [nullid]
854 860 if heads is None:
855 861 heads = self.heads()
856 862
857 863 common = [self.rev(n) for n in common]
858 864 heads = [self.rev(n) for n in heads]
859 865
860 866 # we want the ancestors, but inclusive
861 867 class lazyset(object):
862 868 def __init__(self, lazyvalues):
863 869 self.addedvalues = set()
864 870 self.lazyvalues = lazyvalues
865 871
866 872 def __contains__(self, value):
867 873 return value in self.addedvalues or value in self.lazyvalues
868 874
869 875 def __iter__(self):
870 876 added = self.addedvalues
871 877 for r in added:
872 878 yield r
873 879 for r in self.lazyvalues:
874 880 if not r in added:
875 881 yield r
876 882
877 883 def add(self, value):
878 884 self.addedvalues.add(value)
879 885
880 886 def update(self, values):
881 887 self.addedvalues.update(values)
882 888
883 889 has = lazyset(self.ancestors(common))
884 890 has.add(nullrev)
885 891 has.update(common)
886 892
887 893 # take all ancestors from heads that aren't in has
888 894 missing = set()
889 895 visit = collections.deque(r for r in heads if r not in has)
890 896 while visit:
891 897 r = visit.popleft()
892 898 if r in missing:
893 899 continue
894 900 else:
895 901 missing.add(r)
896 902 for p in self.parentrevs(r):
897 903 if p not in has:
898 904 visit.append(p)
899 905 missing = list(missing)
900 906 missing.sort()
901 907 return has, [self.node(miss) for miss in missing]
902 908
903 909 def incrementalmissingrevs(self, common=None):
904 910 """Return an object that can be used to incrementally compute the
905 911 revision numbers of the ancestors of arbitrary sets that are not
906 912 ancestors of common. This is an ancestor.incrementalmissingancestors
907 913 object.
908 914
909 915 'common' is a list of revision numbers. If common is not supplied, uses
910 916 nullrev.
911 917 """
912 918 if common is None:
913 919 common = [nullrev]
914 920
915 921 if rustancestor is not None:
916 922 return rustancestor.MissingAncestors(self.index, common)
917 923 return ancestor.incrementalmissingancestors(self.parentrevs, common)
918 924
919 925 def findmissingrevs(self, common=None, heads=None):
920 926 """Return the revision numbers of the ancestors of heads that
921 927 are not ancestors of common.
922 928
923 929 More specifically, return a list of revision numbers corresponding to
924 930 nodes N such that every N satisfies the following constraints:
925 931
926 932 1. N is an ancestor of some node in 'heads'
927 933 2. N is not an ancestor of any node in 'common'
928 934
929 935 The list is sorted by revision number, meaning it is
930 936 topologically sorted.
931 937
932 938 'heads' and 'common' are both lists of revision numbers. If heads is
933 939 not supplied, uses all of the revlog's heads. If common is not
934 940 supplied, uses nullid."""
935 941 if common is None:
936 942 common = [nullrev]
937 943 if heads is None:
938 944 heads = self.headrevs()
939 945
940 946 inc = self.incrementalmissingrevs(common=common)
941 947 return inc.missingancestors(heads)
942 948
943 949 def findmissing(self, common=None, heads=None):
944 950 """Return the ancestors of heads that are not ancestors of common.
945 951
946 952 More specifically, return a list of nodes N such that every N
947 953 satisfies the following constraints:
948 954
949 955 1. N is an ancestor of some node in 'heads'
950 956 2. N is not an ancestor of any node in 'common'
951 957
952 958 The list is sorted by revision number, meaning it is
953 959 topologically sorted.
954 960
955 961 'heads' and 'common' are both lists of node IDs. If heads is
956 962 not supplied, uses all of the revlog's heads. If common is not
957 963 supplied, uses nullid."""
958 964 if common is None:
959 965 common = [nullid]
960 966 if heads is None:
961 967 heads = self.heads()
962 968
963 969 common = [self.rev(n) for n in common]
964 970 heads = [self.rev(n) for n in heads]
965 971
966 972 inc = self.incrementalmissingrevs(common=common)
967 973 return [self.node(r) for r in inc.missingancestors(heads)]
968 974
969 975 def nodesbetween(self, roots=None, heads=None):
970 976 """Return a topological path from 'roots' to 'heads'.
971 977
972 978 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
973 979 topologically sorted list of all nodes N that satisfy both of
974 980 these constraints:
975 981
976 982 1. N is a descendant of some node in 'roots'
977 983 2. N is an ancestor of some node in 'heads'
978 984
979 985 Every node is considered to be both a descendant and an ancestor
980 986 of itself, so every reachable node in 'roots' and 'heads' will be
981 987 included in 'nodes'.
982 988
983 989 'outroots' is the list of reachable nodes in 'roots', i.e., the
984 990 subset of 'roots' that is returned in 'nodes'. Likewise,
985 991 'outheads' is the subset of 'heads' that is also in 'nodes'.
986 992
987 993 'roots' and 'heads' are both lists of node IDs. If 'roots' is
988 994 unspecified, uses nullid as the only root. If 'heads' is
989 995 unspecified, uses list of all of the revlog's heads."""
990 996 nonodes = ([], [], [])
991 997 if roots is not None:
992 998 roots = list(roots)
993 999 if not roots:
994 1000 return nonodes
995 1001 lowestrev = min([self.rev(n) for n in roots])
996 1002 else:
997 1003 roots = [nullid] # Everybody's a descendant of nullid
998 1004 lowestrev = nullrev
999 1005 if (lowestrev == nullrev) and (heads is None):
1000 1006 # We want _all_ the nodes!
1001 1007 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1002 1008 if heads is None:
1003 1009 # All nodes are ancestors, so the latest ancestor is the last
1004 1010 # node.
1005 1011 highestrev = len(self) - 1
1006 1012 # Set ancestors to None to signal that every node is an ancestor.
1007 1013 ancestors = None
1008 1014 # Set heads to an empty dictionary for later discovery of heads
1009 1015 heads = {}
1010 1016 else:
1011 1017 heads = list(heads)
1012 1018 if not heads:
1013 1019 return nonodes
1014 1020 ancestors = set()
1015 1021 # Turn heads into a dictionary so we can remove 'fake' heads.
1016 1022 # Also, later we will be using it to filter out the heads we can't
1017 1023 # find from roots.
1018 1024 heads = dict.fromkeys(heads, False)
1019 1025 # Start at the top and keep marking parents until we're done.
1020 1026 nodestotag = set(heads)
1021 1027 # Remember where the top was so we can use it as a limit later.
1022 1028 highestrev = max([self.rev(n) for n in nodestotag])
1023 1029 while nodestotag:
1024 1030 # grab a node to tag
1025 1031 n = nodestotag.pop()
1026 1032 # Never tag nullid
1027 1033 if n == nullid:
1028 1034 continue
1029 1035 # A node's revision number represents its place in a
1030 1036 # topologically sorted list of nodes.
1031 1037 r = self.rev(n)
1032 1038 if r >= lowestrev:
1033 1039 if n not in ancestors:
1034 1040 # If we are possibly a descendant of one of the roots
1035 1041 # and we haven't already been marked as an ancestor
1036 1042 ancestors.add(n) # Mark as ancestor
1037 1043 # Add non-nullid parents to list of nodes to tag.
1038 1044 nodestotag.update([p for p in self.parents(n) if
1039 1045 p != nullid])
1040 1046 elif n in heads: # We've seen it before, is it a fake head?
1041 1047 # So it is, real heads should not be the ancestors of
1042 1048 # any other heads.
1043 1049 heads.pop(n)
1044 1050 if not ancestors:
1045 1051 return nonodes
1046 1052 # Now that we have our set of ancestors, we want to remove any
1047 1053 # roots that are not ancestors.
1048 1054
1049 1055 # If one of the roots was nullid, everything is included anyway.
1050 1056 if lowestrev > nullrev:
1051 1057 # But, since we weren't, let's recompute the lowest rev to not
1052 1058 # include roots that aren't ancestors.
1053 1059
1054 1060 # Filter out roots that aren't ancestors of heads
1055 1061 roots = [root for root in roots if root in ancestors]
1056 1062 # Recompute the lowest revision
1057 1063 if roots:
1058 1064 lowestrev = min([self.rev(root) for root in roots])
1059 1065 else:
1060 1066 # No more roots? Return empty list
1061 1067 return nonodes
1062 1068 else:
1063 1069 # We are descending from nullid, and don't need to care about
1064 1070 # any other roots.
1065 1071 lowestrev = nullrev
1066 1072 roots = [nullid]
1067 1073 # Transform our roots list into a set.
1068 1074 descendants = set(roots)
1069 1075 # Also, keep the original roots so we can filter out roots that aren't
1070 1076 # 'real' roots (i.e. are descended from other roots).
1071 1077 roots = descendants.copy()
1072 1078 # Our topologically sorted list of output nodes.
1073 1079 orderedout = []
1074 1080 # Don't start at nullid since we don't want nullid in our output list,
1075 1081 # and if nullid shows up in descendants, empty parents will look like
1076 1082 # they're descendants.
1077 1083 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1078 1084 n = self.node(r)
1079 1085 isdescendant = False
1080 1086 if lowestrev == nullrev: # Everybody is a descendant of nullid
1081 1087 isdescendant = True
1082 1088 elif n in descendants:
1083 1089 # n is already a descendant
1084 1090 isdescendant = True
1085 1091 # This check only needs to be done here because all the roots
1086 1092 # will start being marked is descendants before the loop.
1087 1093 if n in roots:
1088 1094 # If n was a root, check if it's a 'real' root.
1089 1095 p = tuple(self.parents(n))
1090 1096 # If any of its parents are descendants, it's not a root.
1091 1097 if (p[0] in descendants) or (p[1] in descendants):
1092 1098 roots.remove(n)
1093 1099 else:
1094 1100 p = tuple(self.parents(n))
1095 1101 # A node is a descendant if either of its parents are
1096 1102 # descendants. (We seeded the dependents list with the roots
1097 1103 # up there, remember?)
1098 1104 if (p[0] in descendants) or (p[1] in descendants):
1099 1105 descendants.add(n)
1100 1106 isdescendant = True
1101 1107 if isdescendant and ((ancestors is None) or (n in ancestors)):
1102 1108 # Only include nodes that are both descendants and ancestors.
1103 1109 orderedout.append(n)
1104 1110 if (ancestors is not None) and (n in heads):
1105 1111 # We're trying to figure out which heads are reachable
1106 1112 # from roots.
1107 1113 # Mark this head as having been reached
1108 1114 heads[n] = True
1109 1115 elif ancestors is None:
1110 1116 # Otherwise, we're trying to discover the heads.
1111 1117 # Assume this is a head because if it isn't, the next step
1112 1118 # will eventually remove it.
1113 1119 heads[n] = True
1114 1120 # But, obviously its parents aren't.
1115 1121 for p in self.parents(n):
1116 1122 heads.pop(p, None)
1117 1123 heads = [head for head, flag in heads.iteritems() if flag]
1118 1124 roots = list(roots)
1119 1125 assert orderedout
1120 1126 assert roots
1121 1127 assert heads
1122 1128 return (orderedout, roots, heads)
1123 1129
1124 1130 def headrevs(self, revs=None):
1125 1131 if revs is None:
1126 1132 try:
1127 1133 return self.index.headrevs()
1128 1134 except AttributeError:
1129 1135 return self._headrevs()
1130 1136 if rustdagop is not None:
1131 1137 return rustdagop.headrevs(self.index, revs)
1132 1138 return dagop.headrevs(revs, self._uncheckedparentrevs)
1133 1139
1134 1140 def computephases(self, roots):
1135 1141 return self.index.computephasesmapsets(roots)
1136 1142
1137 1143 def _headrevs(self):
1138 1144 count = len(self)
1139 1145 if not count:
1140 1146 return [nullrev]
1141 1147 # we won't iter over filtered rev so nobody is a head at start
1142 1148 ishead = [0] * (count + 1)
1143 1149 index = self.index
1144 1150 for r in self:
1145 1151 ishead[r] = 1 # I may be an head
1146 1152 e = index[r]
1147 1153 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1148 1154 return [r for r, val in enumerate(ishead) if val]
1149 1155
1150 1156 def heads(self, start=None, stop=None):
1151 1157 """return the list of all nodes that have no children
1152 1158
1153 1159 if start is specified, only heads that are descendants of
1154 1160 start will be returned
1155 1161 if stop is specified, it will consider all the revs from stop
1156 1162 as if they had no children
1157 1163 """
1158 1164 if start is None and stop is None:
1159 1165 if not len(self):
1160 1166 return [nullid]
1161 1167 return [self.node(r) for r in self.headrevs()]
1162 1168
1163 1169 if start is None:
1164 1170 start = nullrev
1165 1171 else:
1166 1172 start = self.rev(start)
1167 1173
1168 1174 stoprevs = set(self.rev(n) for n in stop or [])
1169 1175
1170 1176 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1171 1177 stoprevs=stoprevs)
1172 1178
1173 1179 return [self.node(rev) for rev in revs]
1174 1180
1175 1181 def children(self, node):
1176 1182 """find the children of a given node"""
1177 1183 c = []
1178 1184 p = self.rev(node)
1179 1185 for r in self.revs(start=p + 1):
1180 1186 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1181 1187 if prevs:
1182 1188 for pr in prevs:
1183 1189 if pr == p:
1184 1190 c.append(self.node(r))
1185 1191 elif p == nullrev:
1186 1192 c.append(self.node(r))
1187 1193 return c
1188 1194
1189 1195 def commonancestorsheads(self, a, b):
1190 1196 """calculate all the heads of the common ancestors of nodes a and b"""
1191 1197 a, b = self.rev(a), self.rev(b)
1192 1198 ancs = self._commonancestorsheads(a, b)
1193 1199 return pycompat.maplist(self.node, ancs)
1194 1200
1195 1201 def _commonancestorsheads(self, *revs):
1196 1202 """calculate all the heads of the common ancestors of revs"""
1197 1203 try:
1198 1204 ancs = self.index.commonancestorsheads(*revs)
1199 1205 except (AttributeError, OverflowError): # C implementation failed
1200 1206 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1201 1207 return ancs
1202 1208
1203 1209 def isancestor(self, a, b):
1204 1210 """return True if node a is an ancestor of node b
1205 1211
1206 1212 A revision is considered an ancestor of itself."""
1207 1213 a, b = self.rev(a), self.rev(b)
1208 1214 return self.isancestorrev(a, b)
1209 1215
1210 1216 def isancestorrev(self, a, b):
1211 1217 """return True if revision a is an ancestor of revision b
1212 1218
1213 1219 A revision is considered an ancestor of itself.
1214 1220
1215 1221 The implementation of this is trivial but the use of
1216 1222 reachableroots is not."""
1217 1223 if a == nullrev:
1218 1224 return True
1219 1225 elif a == b:
1220 1226 return True
1221 1227 elif a > b:
1222 1228 return False
1223 1229 return bool(self.reachableroots(a, [b], [a], includepath=False))
1224 1230
1225 1231 def reachableroots(self, minroot, heads, roots, includepath=False):
1226 1232 """return (heads(::<roots> and <roots>::<heads>))
1227 1233
1228 1234 If includepath is True, return (<roots>::<heads>)."""
1229 1235 try:
1230 1236 return self.index.reachableroots2(minroot, heads, roots,
1231 1237 includepath)
1232 1238 except AttributeError:
1233 1239 return dagop._reachablerootspure(self.parentrevs,
1234 1240 minroot, roots, heads, includepath)
1235 1241
1236 1242 def ancestor(self, a, b):
1237 1243 """calculate the "best" common ancestor of nodes a and b"""
1238 1244
1239 1245 a, b = self.rev(a), self.rev(b)
1240 1246 try:
1241 1247 ancs = self.index.ancestors(a, b)
1242 1248 except (AttributeError, OverflowError):
1243 1249 ancs = ancestor.ancestors(self.parentrevs, a, b)
1244 1250 if ancs:
1245 1251 # choose a consistent winner when there's a tie
1246 1252 return min(map(self.node, ancs))
1247 1253 return nullid
1248 1254
1249 1255 def _match(self, id):
1250 1256 if isinstance(id, int):
1251 1257 # rev
1252 1258 return self.node(id)
1253 1259 if len(id) == 20:
1254 1260 # possibly a binary node
1255 1261 # odds of a binary node being all hex in ASCII are 1 in 10**25
1256 1262 try:
1257 1263 node = id
1258 1264 self.rev(node) # quick search the index
1259 1265 return node
1260 1266 except error.LookupError:
1261 1267 pass # may be partial hex id
1262 1268 try:
1263 1269 # str(rev)
1264 1270 rev = int(id)
1265 1271 if "%d" % rev != id:
1266 1272 raise ValueError
1267 1273 if rev < 0:
1268 1274 rev = len(self) + rev
1269 1275 if rev < 0 or rev >= len(self):
1270 1276 raise ValueError
1271 1277 return self.node(rev)
1272 1278 except (ValueError, OverflowError):
1273 1279 pass
1274 1280 if len(id) == 40:
1275 1281 try:
1276 1282 # a full hex nodeid?
1277 1283 node = bin(id)
1278 1284 self.rev(node)
1279 1285 return node
1280 1286 except (TypeError, error.LookupError):
1281 1287 pass
1282 1288
1283 1289 def _partialmatch(self, id):
1284 1290 # we don't care wdirfilenodeids as they should be always full hash
1285 1291 maybewdir = wdirhex.startswith(id)
1286 1292 try:
1287 1293 partial = self.index.partialmatch(id)
1288 1294 if partial and self.hasnode(partial):
1289 1295 if maybewdir:
1290 1296 # single 'ff...' match in radix tree, ambiguous with wdir
1291 1297 raise error.RevlogError
1292 1298 return partial
1293 1299 if maybewdir:
1294 1300 # no 'ff...' match in radix tree, wdir identified
1295 1301 raise error.WdirUnsupported
1296 1302 return None
1297 1303 except error.RevlogError:
1298 1304 # parsers.c radix tree lookup gave multiple matches
1299 1305 # fast path: for unfiltered changelog, radix tree is accurate
1300 1306 if not getattr(self, 'filteredrevs', None):
1301 1307 raise error.AmbiguousPrefixLookupError(
1302 1308 id, self.indexfile, _('ambiguous identifier'))
1303 1309 # fall through to slow path that filters hidden revisions
1304 1310 except (AttributeError, ValueError):
1305 1311 # we are pure python, or key was too short to search radix tree
1306 1312 pass
1307 1313
1308 1314 if id in self._pcache:
1309 1315 return self._pcache[id]
1310 1316
1311 1317 if len(id) <= 40:
1312 1318 try:
1313 1319 # hex(node)[:...]
1314 1320 l = len(id) // 2 # grab an even number of digits
1315 1321 prefix = bin(id[:l * 2])
1316 1322 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1317 1323 nl = [n for n in nl if hex(n).startswith(id) and
1318 1324 self.hasnode(n)]
1319 1325 if nullhex.startswith(id):
1320 1326 nl.append(nullid)
1321 1327 if len(nl) > 0:
1322 1328 if len(nl) == 1 and not maybewdir:
1323 1329 self._pcache[id] = nl[0]
1324 1330 return nl[0]
1325 1331 raise error.AmbiguousPrefixLookupError(
1326 1332 id, self.indexfile, _('ambiguous identifier'))
1327 1333 if maybewdir:
1328 1334 raise error.WdirUnsupported
1329 1335 return None
1330 1336 except TypeError:
1331 1337 pass
1332 1338
1333 1339 def lookup(self, id):
1334 1340 """locate a node based on:
1335 1341 - revision number or str(revision number)
1336 1342 - nodeid or subset of hex nodeid
1337 1343 """
1338 1344 n = self._match(id)
1339 1345 if n is not None:
1340 1346 return n
1341 1347 n = self._partialmatch(id)
1342 1348 if n:
1343 1349 return n
1344 1350
1345 1351 raise error.LookupError(id, self.indexfile, _('no match found'))
1346 1352
1347 1353 def shortest(self, node, minlength=1):
1348 1354 """Find the shortest unambiguous prefix that matches node."""
1349 1355 def isvalid(prefix):
1350 1356 try:
1351 1357 node = self._partialmatch(prefix)
1352 1358 except error.AmbiguousPrefixLookupError:
1353 1359 return False
1354 1360 except error.WdirUnsupported:
1355 1361 # single 'ff...' match
1356 1362 return True
1357 1363 if node is None:
1358 1364 raise error.LookupError(node, self.indexfile, _('no node'))
1359 1365 return True
1360 1366
1361 1367 def maybewdir(prefix):
1362 1368 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1363 1369
1364 1370 hexnode = hex(node)
1365 1371
1366 1372 def disambiguate(hexnode, minlength):
1367 1373 """Disambiguate against wdirid."""
1368 1374 for length in range(minlength, 41):
1369 1375 prefix = hexnode[:length]
1370 1376 if not maybewdir(prefix):
1371 1377 return prefix
1372 1378
1373 1379 if not getattr(self, 'filteredrevs', None):
1374 1380 try:
1375 1381 length = max(self.index.shortest(node), minlength)
1376 1382 return disambiguate(hexnode, length)
1377 1383 except error.RevlogError:
1378 1384 if node != wdirid:
1379 1385 raise error.LookupError(node, self.indexfile, _('no node'))
1380 1386 except AttributeError:
1381 1387 # Fall through to pure code
1382 1388 pass
1383 1389
1384 1390 if node == wdirid:
1385 1391 for length in range(minlength, 41):
1386 1392 prefix = hexnode[:length]
1387 1393 if isvalid(prefix):
1388 1394 return prefix
1389 1395
1390 1396 for length in range(minlength, 41):
1391 1397 prefix = hexnode[:length]
1392 1398 if isvalid(prefix):
1393 1399 return disambiguate(hexnode, length)
1394 1400
1395 1401 def cmp(self, node, text):
1396 1402 """compare text with a given file revision
1397 1403
1398 1404 returns True if text is different than what is stored.
1399 1405 """
1400 1406 p1, p2 = self.parents(node)
1401 1407 return storageutil.hashrevisionsha1(text, p1, p2) != node
1402 1408
1403 1409 def _cachesegment(self, offset, data):
1404 1410 """Add a segment to the revlog cache.
1405 1411
1406 1412 Accepts an absolute offset and the data that is at that location.
1407 1413 """
1408 1414 o, d = self._chunkcache
1409 1415 # try to add to existing cache
1410 1416 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1411 1417 self._chunkcache = o, d + data
1412 1418 else:
1413 1419 self._chunkcache = offset, data
1414 1420
1415 1421 def _readsegment(self, offset, length, df=None):
1416 1422 """Load a segment of raw data from the revlog.
1417 1423
1418 1424 Accepts an absolute offset, length to read, and an optional existing
1419 1425 file handle to read from.
1420 1426
1421 1427 If an existing file handle is passed, it will be seeked and the
1422 1428 original seek position will NOT be restored.
1423 1429
1424 1430 Returns a str or buffer of raw byte data.
1425 1431
1426 1432 Raises if the requested number of bytes could not be read.
1427 1433 """
1428 1434 # Cache data both forward and backward around the requested
1429 1435 # data, in a fixed size window. This helps speed up operations
1430 1436 # involving reading the revlog backwards.
1431 1437 cachesize = self._chunkcachesize
1432 1438 realoffset = offset & ~(cachesize - 1)
1433 1439 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1434 1440 - realoffset)
1435 1441 with self._datareadfp(df) as df:
1436 1442 df.seek(realoffset)
1437 1443 d = df.read(reallength)
1438 1444
1439 1445 self._cachesegment(realoffset, d)
1440 1446 if offset != realoffset or reallength != length:
1441 1447 startoffset = offset - realoffset
1442 1448 if len(d) - startoffset < length:
1443 1449 raise error.RevlogError(
1444 1450 _('partial read of revlog %s; expected %d bytes from '
1445 1451 'offset %d, got %d') %
1446 1452 (self.indexfile if self._inline else self.datafile,
1447 1453 length, realoffset, len(d) - startoffset))
1448 1454
1449 1455 return util.buffer(d, startoffset, length)
1450 1456
1451 1457 if len(d) < length:
1452 1458 raise error.RevlogError(
1453 1459 _('partial read of revlog %s; expected %d bytes from offset '
1454 1460 '%d, got %d') %
1455 1461 (self.indexfile if self._inline else self.datafile,
1456 1462 length, offset, len(d)))
1457 1463
1458 1464 return d
1459 1465
1460 1466 def _getsegment(self, offset, length, df=None):
1461 1467 """Obtain a segment of raw data from the revlog.
1462 1468
1463 1469 Accepts an absolute offset, length of bytes to obtain, and an
1464 1470 optional file handle to the already-opened revlog. If the file
1465 1471 handle is used, it's original seek position will not be preserved.
1466 1472
1467 1473 Requests for data may be returned from a cache.
1468 1474
1469 1475 Returns a str or a buffer instance of raw byte data.
1470 1476 """
1471 1477 o, d = self._chunkcache
1472 1478 l = len(d)
1473 1479
1474 1480 # is it in the cache?
1475 1481 cachestart = offset - o
1476 1482 cacheend = cachestart + length
1477 1483 if cachestart >= 0 and cacheend <= l:
1478 1484 if cachestart == 0 and cacheend == l:
1479 1485 return d # avoid a copy
1480 1486 return util.buffer(d, cachestart, cacheend - cachestart)
1481 1487
1482 1488 return self._readsegment(offset, length, df=df)
1483 1489
1484 1490 def _getsegmentforrevs(self, startrev, endrev, df=None):
1485 1491 """Obtain a segment of raw data corresponding to a range of revisions.
1486 1492
1487 1493 Accepts the start and end revisions and an optional already-open
1488 1494 file handle to be used for reading. If the file handle is read, its
1489 1495 seek position will not be preserved.
1490 1496
1491 1497 Requests for data may be satisfied by a cache.
1492 1498
1493 1499 Returns a 2-tuple of (offset, data) for the requested range of
1494 1500 revisions. Offset is the integer offset from the beginning of the
1495 1501 revlog and data is a str or buffer of the raw byte data.
1496 1502
1497 1503 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1498 1504 to determine where each revision's data begins and ends.
1499 1505 """
1500 1506 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1501 1507 # (functions are expensive).
1502 1508 index = self.index
1503 1509 istart = index[startrev]
1504 1510 start = int(istart[0] >> 16)
1505 1511 if startrev == endrev:
1506 1512 end = start + istart[1]
1507 1513 else:
1508 1514 iend = index[endrev]
1509 1515 end = int(iend[0] >> 16) + iend[1]
1510 1516
1511 1517 if self._inline:
1512 1518 start += (startrev + 1) * self._io.size
1513 1519 end += (endrev + 1) * self._io.size
1514 1520 length = end - start
1515 1521
1516 1522 return start, self._getsegment(start, length, df=df)
1517 1523
1518 1524 def _chunk(self, rev, df=None):
1519 1525 """Obtain a single decompressed chunk for a revision.
1520 1526
1521 1527 Accepts an integer revision and an optional already-open file handle
1522 1528 to be used for reading. If used, the seek position of the file will not
1523 1529 be preserved.
1524 1530
1525 1531 Returns a str holding uncompressed data for the requested revision.
1526 1532 """
1527 1533 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1528 1534
1529 1535 def _chunks(self, revs, df=None, targetsize=None):
1530 1536 """Obtain decompressed chunks for the specified revisions.
1531 1537
1532 1538 Accepts an iterable of numeric revisions that are assumed to be in
1533 1539 ascending order. Also accepts an optional already-open file handle
1534 1540 to be used for reading. If used, the seek position of the file will
1535 1541 not be preserved.
1536 1542
1537 1543 This function is similar to calling ``self._chunk()`` multiple times,
1538 1544 but is faster.
1539 1545
1540 1546 Returns a list with decompressed data for each requested revision.
1541 1547 """
1542 1548 if not revs:
1543 1549 return []
1544 1550 start = self.start
1545 1551 length = self.length
1546 1552 inline = self._inline
1547 1553 iosize = self._io.size
1548 1554 buffer = util.buffer
1549 1555
1550 1556 l = []
1551 1557 ladd = l.append
1552 1558
1553 1559 if not self._withsparseread:
1554 1560 slicedchunks = (revs,)
1555 1561 else:
1556 1562 slicedchunks = deltautil.slicechunk(self, revs,
1557 1563 targetsize=targetsize)
1558 1564
1559 1565 for revschunk in slicedchunks:
1560 1566 firstrev = revschunk[0]
1561 1567 # Skip trailing revisions with empty diff
1562 1568 for lastrev in revschunk[::-1]:
1563 1569 if length(lastrev) != 0:
1564 1570 break
1565 1571
1566 1572 try:
1567 1573 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1568 1574 except OverflowError:
1569 1575 # issue4215 - we can't cache a run of chunks greater than
1570 1576 # 2G on Windows
1571 1577 return [self._chunk(rev, df=df) for rev in revschunk]
1572 1578
1573 1579 decomp = self.decompress
1574 1580 for rev in revschunk:
1575 1581 chunkstart = start(rev)
1576 1582 if inline:
1577 1583 chunkstart += (rev + 1) * iosize
1578 1584 chunklength = length(rev)
1579 1585 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1580 1586
1581 1587 return l
1582 1588
1583 1589 def _chunkclear(self):
1584 1590 """Clear the raw chunk cache."""
1585 1591 self._chunkcache = (0, '')
1586 1592
1587 1593 def deltaparent(self, rev):
1588 1594 """return deltaparent of the given revision"""
1589 1595 base = self.index[rev][3]
1590 1596 if base == rev:
1591 1597 return nullrev
1592 1598 elif self._generaldelta:
1593 1599 return base
1594 1600 else:
1595 1601 return rev - 1
1596 1602
1597 1603 def issnapshot(self, rev):
1598 1604 """tells whether rev is a snapshot
1599 1605 """
1600 1606 if not self._sparserevlog:
1601 1607 return self.deltaparent(rev) == nullrev
1602 1608 elif util.safehasattr(self.index, 'issnapshot'):
1603 1609 # directly assign the method to cache the testing and access
1604 1610 self.issnapshot = self.index.issnapshot
1605 1611 return self.issnapshot(rev)
1606 1612 if rev == nullrev:
1607 1613 return True
1608 1614 entry = self.index[rev]
1609 1615 base = entry[3]
1610 1616 if base == rev:
1611 1617 return True
1612 1618 if base == nullrev:
1613 1619 return True
1614 1620 p1 = entry[5]
1615 1621 p2 = entry[6]
1616 1622 if base == p1 or base == p2:
1617 1623 return False
1618 1624 return self.issnapshot(base)
1619 1625
1620 1626 def snapshotdepth(self, rev):
1621 1627 """number of snapshot in the chain before this one"""
1622 1628 if not self.issnapshot(rev):
1623 1629 raise error.ProgrammingError('revision %d not a snapshot')
1624 1630 return len(self._deltachain(rev)[0]) - 1
1625 1631
1626 1632 def revdiff(self, rev1, rev2):
1627 1633 """return or calculate a delta between two revisions
1628 1634
1629 1635 The delta calculated is in binary form and is intended to be written to
1630 1636 revlog data directly. So this function needs raw revision data.
1631 1637 """
1632 1638 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1633 1639 return bytes(self._chunk(rev2))
1634 1640
1635 1641 return mdiff.textdiff(self.revision(rev1, raw=True),
1636 1642 self.revision(rev2, raw=True))
1637 1643
1638 1644 def revision(self, nodeorrev, _df=None, raw=False):
1639 1645 """return an uncompressed revision of a given node or revision
1640 1646 number.
1641 1647
1642 1648 _df - an existing file handle to read from. (internal-only)
1643 1649 raw - an optional argument specifying if the revision data is to be
1644 1650 treated as raw data when applying flag transforms. 'raw' should be set
1645 1651 to True when generating changegroups or in debug commands.
1646 1652 """
1647 1653 if isinstance(nodeorrev, int):
1648 1654 rev = nodeorrev
1649 1655 node = self.node(rev)
1650 1656 else:
1651 1657 node = nodeorrev
1652 1658 rev = None
1653 1659
1654 1660 cachedrev = None
1655 1661 flags = None
1656 1662 rawtext = None
1657 1663 if node == nullid:
1658 1664 return ""
1659 1665 if self._revisioncache:
1660 1666 if self._revisioncache[0] == node:
1661 1667 # _cache only stores rawtext
1662 1668 if raw:
1663 1669 return self._revisioncache[2]
1664 1670 # duplicated, but good for perf
1665 1671 if rev is None:
1666 1672 rev = self.rev(node)
1667 1673 if flags is None:
1668 1674 flags = self.flags(rev)
1669 1675 # no extra flags set, no flag processor runs, text = rawtext
1670 1676 if flags == REVIDX_DEFAULT_FLAGS:
1671 1677 return self._revisioncache[2]
1672 1678 # rawtext is reusable. need to run flag processor
1673 1679 rawtext = self._revisioncache[2]
1674 1680
1675 1681 cachedrev = self._revisioncache[1]
1676 1682
1677 1683 # look up what we need to read
1678 1684 if rawtext is None:
1679 1685 if rev is None:
1680 1686 rev = self.rev(node)
1681 1687
1682 1688 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1683 1689 if stopped:
1684 1690 rawtext = self._revisioncache[2]
1685 1691
1686 1692 # drop cache to save memory
1687 1693 self._revisioncache = None
1688 1694
1689 1695 targetsize = None
1690 1696 rawsize = self.index[rev][2]
1691 1697 if 0 <= rawsize:
1692 1698 targetsize = 4 * rawsize
1693 1699
1694 1700 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1695 1701 if rawtext is None:
1696 1702 rawtext = bytes(bins[0])
1697 1703 bins = bins[1:]
1698 1704
1699 1705 rawtext = mdiff.patches(rawtext, bins)
1700 1706 self._revisioncache = (node, rev, rawtext)
1701 1707
1702 1708 if flags is None:
1703 1709 if rev is None:
1704 1710 rev = self.rev(node)
1705 1711 flags = self.flags(rev)
1706 1712
1707 1713 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1708 1714 if validatehash:
1709 1715 self.checkhash(text, node, rev=rev)
1710 1716
1711 1717 return text
1712 1718
1713 1719 def hash(self, text, p1, p2):
1714 1720 """Compute a node hash.
1715 1721
1716 1722 Available as a function so that subclasses can replace the hash
1717 1723 as needed.
1718 1724 """
1719 1725 return storageutil.hashrevisionsha1(text, p1, p2)
1720 1726
1721 1727 def _processflags(self, text, flags, operation, raw=False):
1722 1728 """Inspect revision data flags and applies transforms defined by
1723 1729 registered flag processors.
1724 1730
1725 1731 ``text`` - the revision data to process
1726 1732 ``flags`` - the revision flags
1727 1733 ``operation`` - the operation being performed (read or write)
1728 1734 ``raw`` - an optional argument describing if the raw transform should be
1729 1735 applied.
1730 1736
1731 1737 This method processes the flags in the order (or reverse order if
1732 1738 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1733 1739 flag processors registered for present flags. The order of flags defined
1734 1740 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1735 1741
1736 1742 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1737 1743 processed text and ``validatehash`` is a bool indicating whether the
1738 1744 returned text should be checked for hash integrity.
1739 1745
1740 1746 Note: If the ``raw`` argument is set, it has precedence over the
1741 1747 operation and will only update the value of ``validatehash``.
1742 1748 """
1743 1749 # fast path: no flag processors will run
1744 1750 if flags == 0:
1745 1751 return text, True
1746 1752 if not operation in ('read', 'write'):
1747 1753 raise error.ProgrammingError(_("invalid '%s' operation") %
1748 1754 operation)
1749 1755 # Check all flags are known.
1750 1756 if flags & ~REVIDX_KNOWN_FLAGS:
1751 1757 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1752 1758 (flags & ~REVIDX_KNOWN_FLAGS))
1753 1759 validatehash = True
1754 1760 # Depending on the operation (read or write), the order might be
1755 1761 # reversed due to non-commutative transforms.
1756 1762 orderedflags = REVIDX_FLAGS_ORDER
1757 1763 if operation == 'write':
1758 1764 orderedflags = reversed(orderedflags)
1759 1765
1760 1766 for flag in orderedflags:
1761 1767 # If a flagprocessor has been registered for a known flag, apply the
1762 1768 # related operation transform and update result tuple.
1763 1769 if flag & flags:
1764 1770 vhash = True
1765 1771
1766 1772 if flag not in self._flagprocessors:
1767 1773 message = _("missing processor for flag '%#x'") % (flag)
1768 1774 raise error.RevlogError(message)
1769 1775
1770 1776 processor = self._flagprocessors[flag]
1771 1777 if processor is not None:
1772 1778 readtransform, writetransform, rawtransform = processor
1773 1779
1774 1780 if raw:
1775 1781 vhash = rawtransform(self, text)
1776 1782 elif operation == 'read':
1777 1783 text, vhash = readtransform(self, text)
1778 1784 else: # write operation
1779 1785 text, vhash = writetransform(self, text)
1780 1786 validatehash = validatehash and vhash
1781 1787
1782 1788 return text, validatehash
1783 1789
1784 1790 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1785 1791 """Check node hash integrity.
1786 1792
1787 1793 Available as a function so that subclasses can extend hash mismatch
1788 1794 behaviors as needed.
1789 1795 """
1790 1796 try:
1791 1797 if p1 is None and p2 is None:
1792 1798 p1, p2 = self.parents(node)
1793 1799 if node != self.hash(text, p1, p2):
1794 1800 # Clear the revision cache on hash failure. The revision cache
1795 1801 # only stores the raw revision and clearing the cache does have
1796 1802 # the side-effect that we won't have a cache hit when the raw
1797 1803 # revision data is accessed. But this case should be rare and
1798 1804 # it is extra work to teach the cache about the hash
1799 1805 # verification state.
1800 1806 if self._revisioncache and self._revisioncache[0] == node:
1801 1807 self._revisioncache = None
1802 1808
1803 1809 revornode = rev
1804 1810 if revornode is None:
1805 1811 revornode = templatefilters.short(hex(node))
1806 1812 raise error.RevlogError(_("integrity check failed on %s:%s")
1807 1813 % (self.indexfile, pycompat.bytestr(revornode)))
1808 1814 except error.RevlogError:
1809 1815 if self._censorable and storageutil.iscensoredtext(text):
1810 1816 raise error.CensoredNodeError(self.indexfile, node, text)
1811 1817 raise
1812 1818
1813 1819 def _enforceinlinesize(self, tr, fp=None):
1814 1820 """Check if the revlog is too big for inline and convert if so.
1815 1821
1816 1822 This should be called after revisions are added to the revlog. If the
1817 1823 revlog has grown too large to be an inline revlog, it will convert it
1818 1824 to use multiple index and data files.
1819 1825 """
1820 1826 tiprev = len(self) - 1
1821 1827 if (not self._inline or
1822 1828 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1823 1829 return
1824 1830
1825 1831 trinfo = tr.find(self.indexfile)
1826 1832 if trinfo is None:
1827 1833 raise error.RevlogError(_("%s not found in the transaction")
1828 1834 % self.indexfile)
1829 1835
1830 1836 trindex = trinfo[2]
1831 1837 if trindex is not None:
1832 1838 dataoff = self.start(trindex)
1833 1839 else:
1834 1840 # revlog was stripped at start of transaction, use all leftover data
1835 1841 trindex = len(self) - 1
1836 1842 dataoff = self.end(tiprev)
1837 1843
1838 1844 tr.add(self.datafile, dataoff)
1839 1845
1840 1846 if fp:
1841 1847 fp.flush()
1842 1848 fp.close()
1843 1849 # We can't use the cached file handle after close(). So prevent
1844 1850 # its usage.
1845 1851 self._writinghandles = None
1846 1852
1847 1853 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1848 1854 for r in self:
1849 1855 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1850 1856
1851 1857 with self._indexfp('w') as fp:
1852 1858 self.version &= ~FLAG_INLINE_DATA
1853 1859 self._inline = False
1854 1860 io = self._io
1855 1861 for i in self:
1856 1862 e = io.packentry(self.index[i], self.node, self.version, i)
1857 1863 fp.write(e)
1858 1864
1859 1865 # the temp file replace the real index when we exit the context
1860 1866 # manager
1861 1867
1862 1868 tr.replace(self.indexfile, trindex * self._io.size)
1863 1869 self._chunkclear()
1864 1870
1865 1871 def _nodeduplicatecallback(self, transaction, node):
1866 1872 """called when trying to add a node already stored.
1867 1873 """
1868 1874
1869 1875 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1870 1876 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1871 1877 """add a revision to the log
1872 1878
1873 1879 text - the revision data to add
1874 1880 transaction - the transaction object used for rollback
1875 1881 link - the linkrev data to add
1876 1882 p1, p2 - the parent nodeids of the revision
1877 1883 cachedelta - an optional precomputed delta
1878 1884 node - nodeid of revision; typically node is not specified, and it is
1879 1885 computed by default as hash(text, p1, p2), however subclasses might
1880 1886 use different hashing method (and override checkhash() in such case)
1881 1887 flags - the known flags to set on the revision
1882 1888 deltacomputer - an optional deltacomputer instance shared between
1883 1889 multiple calls
1884 1890 """
1885 1891 if link == nullrev:
1886 1892 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1887 1893 % self.indexfile)
1888 1894
1889 1895 if flags:
1890 1896 node = node or self.hash(text, p1, p2)
1891 1897
1892 1898 rawtext, validatehash = self._processflags(text, flags, 'write')
1893 1899
1894 1900 # If the flag processor modifies the revision data, ignore any provided
1895 1901 # cachedelta.
1896 1902 if rawtext != text:
1897 1903 cachedelta = None
1898 1904
1899 1905 if len(rawtext) > _maxentrysize:
1900 1906 raise error.RevlogError(
1901 1907 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1902 1908 % (self.indexfile, len(rawtext)))
1903 1909
1904 1910 node = node or self.hash(rawtext, p1, p2)
1905 1911 if node in self.nodemap:
1906 1912 return node
1907 1913
1908 1914 if validatehash:
1909 1915 self.checkhash(rawtext, node, p1=p1, p2=p2)
1910 1916
1911 1917 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1912 1918 flags, cachedelta=cachedelta,
1913 1919 deltacomputer=deltacomputer)
1914 1920
1915 1921 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1916 1922 cachedelta=None, deltacomputer=None):
1917 1923 """add a raw revision with known flags, node and parents
1918 1924 useful when reusing a revision not stored in this revlog (ex: received
1919 1925 over wire, or read from an external bundle).
1920 1926 """
1921 1927 dfh = None
1922 1928 if not self._inline:
1923 1929 dfh = self._datafp("a+")
1924 1930 ifh = self._indexfp("a+")
1925 1931 try:
1926 1932 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1927 1933 flags, cachedelta, ifh, dfh,
1928 1934 deltacomputer=deltacomputer)
1929 1935 finally:
1930 1936 if dfh:
1931 1937 dfh.close()
1932 1938 ifh.close()
1933 1939
1934 1940 def compress(self, data):
1935 1941 """Generate a possibly-compressed representation of data."""
1936 1942 if not data:
1937 1943 return '', data
1938 1944
1939 1945 compressed = self._compressor.compress(data)
1940 1946
1941 1947 if compressed:
1942 1948 # The revlog compressor added the header in the returned data.
1943 1949 return '', compressed
1944 1950
1945 1951 if data[0:1] == '\0':
1946 1952 return '', data
1947 1953 return 'u', data
1948 1954
1949 1955 def decompress(self, data):
1950 1956 """Decompress a revlog chunk.
1951 1957
1952 1958 The chunk is expected to begin with a header identifying the
1953 1959 format type so it can be routed to an appropriate decompressor.
1954 1960 """
1955 1961 if not data:
1956 1962 return data
1957 1963
1958 1964 # Revlogs are read much more frequently than they are written and many
1959 1965 # chunks only take microseconds to decompress, so performance is
1960 1966 # important here.
1961 1967 #
1962 1968 # We can make a few assumptions about revlogs:
1963 1969 #
1964 1970 # 1) the majority of chunks will be compressed (as opposed to inline
1965 1971 # raw data).
1966 1972 # 2) decompressing *any* data will likely by at least 10x slower than
1967 1973 # returning raw inline data.
1968 1974 # 3) we want to prioritize common and officially supported compression
1969 1975 # engines
1970 1976 #
1971 1977 # It follows that we want to optimize for "decompress compressed data
1972 1978 # when encoded with common and officially supported compression engines"
1973 1979 # case over "raw data" and "data encoded by less common or non-official
1974 1980 # compression engines." That is why we have the inline lookup first
1975 1981 # followed by the compengines lookup.
1976 1982 #
1977 1983 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1978 1984 # compressed chunks. And this matters for changelog and manifest reads.
1979 1985 t = data[0:1]
1980 1986
1981 1987 if t == 'x':
1982 1988 try:
1983 1989 return _zlibdecompress(data)
1984 1990 except zlib.error as e:
1985 1991 raise error.RevlogError(_('revlog decompress error: %s') %
1986 1992 stringutil.forcebytestr(e))
1987 1993 # '\0' is more common than 'u' so it goes first.
1988 1994 elif t == '\0':
1989 1995 return data
1990 1996 elif t == 'u':
1991 1997 return util.buffer(data, 1)
1992 1998
1993 1999 try:
1994 2000 compressor = self._decompressors[t]
1995 2001 except KeyError:
1996 2002 try:
1997 2003 engine = util.compengines.forrevlogheader(t)
1998 2004 compressor = engine.revlogcompressor(self._compengineopts)
1999 2005 self._decompressors[t] = compressor
2000 2006 except KeyError:
2001 2007 raise error.RevlogError(_('unknown compression type %r') % t)
2002 2008
2003 2009 return compressor.decompress(data)
2004 2010
2005 2011 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2006 2012 cachedelta, ifh, dfh, alwayscache=False,
2007 2013 deltacomputer=None):
2008 2014 """internal function to add revisions to the log
2009 2015
2010 2016 see addrevision for argument descriptions.
2011 2017
2012 2018 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2013 2019
2014 2020 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2015 2021 be used.
2016 2022
2017 2023 invariants:
2018 2024 - rawtext is optional (can be None); if not set, cachedelta must be set.
2019 2025 if both are set, they must correspond to each other.
2020 2026 """
2021 2027 if node == nullid:
2022 2028 raise error.RevlogError(_("%s: attempt to add null revision") %
2023 2029 self.indexfile)
2024 2030 if node == wdirid or node in wdirfilenodeids:
2025 2031 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2026 2032 self.indexfile)
2027 2033
2028 2034 if self._inline:
2029 2035 fh = ifh
2030 2036 else:
2031 2037 fh = dfh
2032 2038
2033 2039 btext = [rawtext]
2034 2040
2035 2041 curr = len(self)
2036 2042 prev = curr - 1
2037 2043 offset = self.end(prev)
2038 2044 p1r, p2r = self.rev(p1), self.rev(p2)
2039 2045
2040 2046 # full versions are inserted when the needed deltas
2041 2047 # become comparable to the uncompressed text
2042 2048 if rawtext is None:
2043 2049 # need rawtext size, before changed by flag processors, which is
2044 2050 # the non-raw size. use revlog explicitly to avoid filelog's extra
2045 2051 # logic that might remove metadata size.
2046 2052 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2047 2053 cachedelta[1])
2048 2054 else:
2049 2055 textlen = len(rawtext)
2050 2056
2051 2057 if deltacomputer is None:
2052 2058 deltacomputer = deltautil.deltacomputer(self)
2053 2059
2054 2060 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2055 2061
2056 2062 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2057 2063
2058 2064 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2059 2065 deltainfo.base, link, p1r, p2r, node)
2060 2066 self.index.append(e)
2061 2067 self.nodemap[node] = curr
2062 2068
2063 2069 # Reset the pure node cache start lookup offset to account for new
2064 2070 # revision.
2065 2071 if self._nodepos is not None:
2066 2072 self._nodepos = curr
2067 2073
2068 2074 entry = self._io.packentry(e, self.node, self.version, curr)
2069 2075 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2070 2076 link, offset)
2071 2077
2072 2078 rawtext = btext[0]
2073 2079
2074 2080 if alwayscache and rawtext is None:
2075 2081 rawtext = deltacomputer.buildtext(revinfo, fh)
2076 2082
2077 2083 if type(rawtext) == bytes: # only accept immutable objects
2078 2084 self._revisioncache = (node, curr, rawtext)
2079 2085 self._chainbasecache[curr] = deltainfo.chainbase
2080 2086 return node
2081 2087
2082 2088 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2083 2089 # Files opened in a+ mode have inconsistent behavior on various
2084 2090 # platforms. Windows requires that a file positioning call be made
2085 2091 # when the file handle transitions between reads and writes. See
2086 2092 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2087 2093 # platforms, Python or the platform itself can be buggy. Some versions
2088 2094 # of Solaris have been observed to not append at the end of the file
2089 2095 # if the file was seeked to before the end. See issue4943 for more.
2090 2096 #
2091 2097 # We work around this issue by inserting a seek() before writing.
2092 2098 # Note: This is likely not necessary on Python 3. However, because
2093 2099 # the file handle is reused for reads and may be seeked there, we need
2094 2100 # to be careful before changing this.
2095 2101 ifh.seek(0, os.SEEK_END)
2096 2102 if dfh:
2097 2103 dfh.seek(0, os.SEEK_END)
2098 2104
2099 2105 curr = len(self) - 1
2100 2106 if not self._inline:
2101 2107 transaction.add(self.datafile, offset)
2102 2108 transaction.add(self.indexfile, curr * len(entry))
2103 2109 if data[0]:
2104 2110 dfh.write(data[0])
2105 2111 dfh.write(data[1])
2106 2112 ifh.write(entry)
2107 2113 else:
2108 2114 offset += curr * self._io.size
2109 2115 transaction.add(self.indexfile, offset, curr)
2110 2116 ifh.write(entry)
2111 2117 ifh.write(data[0])
2112 2118 ifh.write(data[1])
2113 2119 self._enforceinlinesize(transaction, ifh)
2114 2120
2115 2121 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2116 2122 """
2117 2123 add a delta group
2118 2124
2119 2125 given a set of deltas, add them to the revision log. the
2120 2126 first delta is against its parent, which should be in our
2121 2127 log, the rest are against the previous delta.
2122 2128
2123 2129 If ``addrevisioncb`` is defined, it will be called with arguments of
2124 2130 this revlog and the node that was added.
2125 2131 """
2126 2132
2127 2133 if self._writinghandles:
2128 2134 raise error.ProgrammingError('cannot nest addgroup() calls')
2129 2135
2130 2136 nodes = []
2131 2137
2132 2138 r = len(self)
2133 2139 end = 0
2134 2140 if r:
2135 2141 end = self.end(r - 1)
2136 2142 ifh = self._indexfp("a+")
2137 2143 isize = r * self._io.size
2138 2144 if self._inline:
2139 2145 transaction.add(self.indexfile, end + isize, r)
2140 2146 dfh = None
2141 2147 else:
2142 2148 transaction.add(self.indexfile, isize, r)
2143 2149 transaction.add(self.datafile, end)
2144 2150 dfh = self._datafp("a+")
2145 2151 def flush():
2146 2152 if dfh:
2147 2153 dfh.flush()
2148 2154 ifh.flush()
2149 2155
2150 2156 self._writinghandles = (ifh, dfh)
2151 2157
2152 2158 try:
2153 2159 deltacomputer = deltautil.deltacomputer(self)
2154 2160 # loop through our set of deltas
2155 2161 for data in deltas:
2156 2162 node, p1, p2, linknode, deltabase, delta, flags = data
2157 2163 link = linkmapper(linknode)
2158 2164 flags = flags or REVIDX_DEFAULT_FLAGS
2159 2165
2160 2166 nodes.append(node)
2161 2167
2162 2168 if node in self.nodemap:
2163 2169 self._nodeduplicatecallback(transaction, node)
2164 2170 # this can happen if two branches make the same change
2165 2171 continue
2166 2172
2167 2173 for p in (p1, p2):
2168 2174 if p not in self.nodemap:
2169 2175 raise error.LookupError(p, self.indexfile,
2170 2176 _('unknown parent'))
2171 2177
2172 2178 if deltabase not in self.nodemap:
2173 2179 raise error.LookupError(deltabase, self.indexfile,
2174 2180 _('unknown delta base'))
2175 2181
2176 2182 baserev = self.rev(deltabase)
2177 2183
2178 2184 if baserev != nullrev and self.iscensored(baserev):
2179 2185 # if base is censored, delta must be full replacement in a
2180 2186 # single patch operation
2181 2187 hlen = struct.calcsize(">lll")
2182 2188 oldlen = self.rawsize(baserev)
2183 2189 newlen = len(delta) - hlen
2184 2190 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2185 2191 raise error.CensoredBaseError(self.indexfile,
2186 2192 self.node(baserev))
2187 2193
2188 2194 if not flags and self._peek_iscensored(baserev, delta, flush):
2189 2195 flags |= REVIDX_ISCENSORED
2190 2196
2191 2197 # We assume consumers of addrevisioncb will want to retrieve
2192 2198 # the added revision, which will require a call to
2193 2199 # revision(). revision() will fast path if there is a cache
2194 2200 # hit. So, we tell _addrevision() to always cache in this case.
2195 2201 # We're only using addgroup() in the context of changegroup
2196 2202 # generation so the revision data can always be handled as raw
2197 2203 # by the flagprocessor.
2198 2204 self._addrevision(node, None, transaction, link,
2199 2205 p1, p2, flags, (baserev, delta),
2200 2206 ifh, dfh,
2201 2207 alwayscache=bool(addrevisioncb),
2202 2208 deltacomputer=deltacomputer)
2203 2209
2204 2210 if addrevisioncb:
2205 2211 addrevisioncb(self, node)
2206 2212
2207 2213 if not dfh and not self._inline:
2208 2214 # addrevision switched from inline to conventional
2209 2215 # reopen the index
2210 2216 ifh.close()
2211 2217 dfh = self._datafp("a+")
2212 2218 ifh = self._indexfp("a+")
2213 2219 self._writinghandles = (ifh, dfh)
2214 2220 finally:
2215 2221 self._writinghandles = None
2216 2222
2217 2223 if dfh:
2218 2224 dfh.close()
2219 2225 ifh.close()
2220 2226
2221 2227 return nodes
2222 2228
2223 2229 def iscensored(self, rev):
2224 2230 """Check if a file revision is censored."""
2225 2231 if not self._censorable:
2226 2232 return False
2227 2233
2228 2234 return self.flags(rev) & REVIDX_ISCENSORED
2229 2235
2230 2236 def _peek_iscensored(self, baserev, delta, flush):
2231 2237 """Quickly check if a delta produces a censored revision."""
2232 2238 if not self._censorable:
2233 2239 return False
2234 2240
2235 2241 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2236 2242
2237 2243 def getstrippoint(self, minlink):
2238 2244 """find the minimum rev that must be stripped to strip the linkrev
2239 2245
2240 2246 Returns a tuple containing the minimum rev and a set of all revs that
2241 2247 have linkrevs that will be broken by this strip.
2242 2248 """
2243 2249 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2244 2250 self.headrevs(),
2245 2251 self.linkrev, self.parentrevs)
2246 2252
2247 2253 def strip(self, minlink, transaction):
2248 2254 """truncate the revlog on the first revision with a linkrev >= minlink
2249 2255
2250 2256 This function is called when we're stripping revision minlink and
2251 2257 its descendants from the repository.
2252 2258
2253 2259 We have to remove all revisions with linkrev >= minlink, because
2254 2260 the equivalent changelog revisions will be renumbered after the
2255 2261 strip.
2256 2262
2257 2263 So we truncate the revlog on the first of these revisions, and
2258 2264 trust that the caller has saved the revisions that shouldn't be
2259 2265 removed and that it'll re-add them after this truncation.
2260 2266 """
2261 2267 if len(self) == 0:
2262 2268 return
2263 2269
2264 2270 rev, _ = self.getstrippoint(minlink)
2265 2271 if rev == len(self):
2266 2272 return
2267 2273
2268 2274 # first truncate the files on disk
2269 2275 end = self.start(rev)
2270 2276 if not self._inline:
2271 2277 transaction.add(self.datafile, end)
2272 2278 end = rev * self._io.size
2273 2279 else:
2274 2280 end += rev * self._io.size
2275 2281
2276 2282 transaction.add(self.indexfile, end)
2277 2283
2278 2284 # then reset internal state in memory to forget those revisions
2279 2285 self._revisioncache = None
2280 2286 self._chaininfocache = {}
2281 2287 self._chunkclear()
2282 2288 for x in pycompat.xrange(rev, len(self)):
2283 2289 del self.nodemap[self.node(x)]
2284 2290
2285 2291 del self.index[rev:-1]
2286 2292 self._nodepos = None
2287 2293
2288 2294 def checksize(self):
2289 2295 """Check size of index and data files
2290 2296
2291 2297 return a (dd, di) tuple.
2292 2298 - dd: extra bytes for the "data" file
2293 2299 - di: extra bytes for the "index" file
2294 2300
2295 2301 A healthy revlog will return (0, 0).
2296 2302 """
2297 2303 expected = 0
2298 2304 if len(self):
2299 2305 expected = max(0, self.end(len(self) - 1))
2300 2306
2301 2307 try:
2302 2308 with self._datafp() as f:
2303 2309 f.seek(0, 2)
2304 2310 actual = f.tell()
2305 2311 dd = actual - expected
2306 2312 except IOError as inst:
2307 2313 if inst.errno != errno.ENOENT:
2308 2314 raise
2309 2315 dd = 0
2310 2316
2311 2317 try:
2312 2318 f = self.opener(self.indexfile)
2313 2319 f.seek(0, 2)
2314 2320 actual = f.tell()
2315 2321 f.close()
2316 2322 s = self._io.size
2317 2323 i = max(0, actual // s)
2318 2324 di = actual - (i * s)
2319 2325 if self._inline:
2320 2326 databytes = 0
2321 2327 for r in self:
2322 2328 databytes += max(0, self.length(r))
2323 2329 dd = 0
2324 2330 di = actual - len(self) * s - databytes
2325 2331 except IOError as inst:
2326 2332 if inst.errno != errno.ENOENT:
2327 2333 raise
2328 2334 di = 0
2329 2335
2330 2336 return (dd, di)
2331 2337
2332 2338 def files(self):
2333 2339 res = [self.indexfile]
2334 2340 if not self._inline:
2335 2341 res.append(self.datafile)
2336 2342 return res
2337 2343
2338 2344 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2339 2345 assumehaveparentrevisions=False,
2340 2346 deltamode=repository.CG_DELTAMODE_STD):
2341 2347 if nodesorder not in ('nodes', 'storage', 'linear', None):
2342 2348 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2343 2349 nodesorder)
2344 2350
2345 2351 if nodesorder is None and not self._generaldelta:
2346 2352 nodesorder = 'storage'
2347 2353
2348 2354 if (not self._storedeltachains and
2349 2355 deltamode != repository.CG_DELTAMODE_PREV):
2350 2356 deltamode = repository.CG_DELTAMODE_FULL
2351 2357
2352 2358 return storageutil.emitrevisions(
2353 2359 self, nodes, nodesorder, revlogrevisiondelta,
2354 2360 deltaparentfn=self.deltaparent,
2355 2361 candeltafn=self.candelta,
2356 2362 rawsizefn=self.rawsize,
2357 2363 revdifffn=self.revdiff,
2358 2364 flagsfn=self.flags,
2359 2365 deltamode=deltamode,
2360 2366 revisiondata=revisiondata,
2361 2367 assumehaveparentrevisions=assumehaveparentrevisions)
2362 2368
2363 2369 DELTAREUSEALWAYS = 'always'
2364 2370 DELTAREUSESAMEREVS = 'samerevs'
2365 2371 DELTAREUSENEVER = 'never'
2366 2372
2367 2373 DELTAREUSEFULLADD = 'fulladd'
2368 2374
2369 2375 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2370 2376
2371 2377 def clone(self, tr, destrevlog, addrevisioncb=None,
2372 2378 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2373 2379 """Copy this revlog to another, possibly with format changes.
2374 2380
2375 2381 The destination revlog will contain the same revisions and nodes.
2376 2382 However, it may not be bit-for-bit identical due to e.g. delta encoding
2377 2383 differences.
2378 2384
2379 2385 The ``deltareuse`` argument control how deltas from the existing revlog
2380 2386 are preserved in the destination revlog. The argument can have the
2381 2387 following values:
2382 2388
2383 2389 DELTAREUSEALWAYS
2384 2390 Deltas will always be reused (if possible), even if the destination
2385 2391 revlog would not select the same revisions for the delta. This is the
2386 2392 fastest mode of operation.
2387 2393 DELTAREUSESAMEREVS
2388 2394 Deltas will be reused if the destination revlog would pick the same
2389 2395 revisions for the delta. This mode strikes a balance between speed
2390 2396 and optimization.
2391 2397 DELTAREUSENEVER
2392 2398 Deltas will never be reused. This is the slowest mode of execution.
2393 2399 This mode can be used to recompute deltas (e.g. if the diff/delta
2394 2400 algorithm changes).
2395 2401
2396 2402 Delta computation can be slow, so the choice of delta reuse policy can
2397 2403 significantly affect run time.
2398 2404
2399 2405 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2400 2406 two extremes. Deltas will be reused if they are appropriate. But if the
2401 2407 delta could choose a better revision, it will do so. This means if you
2402 2408 are converting a non-generaldelta revlog to a generaldelta revlog,
2403 2409 deltas will be recomputed if the delta's parent isn't a parent of the
2404 2410 revision.
2405 2411
2406 2412 In addition to the delta policy, the ``forcedeltabothparents``
2407 2413 argument controls whether to force compute deltas against both parents
2408 2414 for merges. By default, the current default is used.
2409 2415 """
2410 2416 if deltareuse not in self.DELTAREUSEALL:
2411 2417 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2412 2418
2413 2419 if len(destrevlog):
2414 2420 raise ValueError(_('destination revlog is not empty'))
2415 2421
2416 2422 if getattr(self, 'filteredrevs', None):
2417 2423 raise ValueError(_('source revlog has filtered revisions'))
2418 2424 if getattr(destrevlog, 'filteredrevs', None):
2419 2425 raise ValueError(_('destination revlog has filtered revisions'))
2420 2426
2421 2427 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2422 2428 # if possible.
2423 2429 oldlazydelta = destrevlog._lazydelta
2424 2430 oldlazydeltabase = destrevlog._lazydeltabase
2425 2431 oldamd = destrevlog._deltabothparents
2426 2432
2427 2433 try:
2428 2434 if deltareuse == self.DELTAREUSEALWAYS:
2429 2435 destrevlog._lazydeltabase = True
2430 2436 destrevlog._lazydelta = True
2431 2437 elif deltareuse == self.DELTAREUSESAMEREVS:
2432 2438 destrevlog._lazydeltabase = False
2433 2439 destrevlog._lazydelta = True
2434 2440 elif deltareuse == self.DELTAREUSENEVER:
2435 2441 destrevlog._lazydeltabase = False
2436 2442 destrevlog._lazydelta = False
2437 2443
2438 2444 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2439 2445
2440 2446 deltacomputer = deltautil.deltacomputer(destrevlog)
2441 2447 index = self.index
2442 2448 for rev in self:
2443 2449 entry = index[rev]
2444 2450
2445 2451 # Some classes override linkrev to take filtered revs into
2446 2452 # account. Use raw entry from index.
2447 2453 flags = entry[0] & 0xffff
2448 2454 linkrev = entry[4]
2449 2455 p1 = index[entry[5]][7]
2450 2456 p2 = index[entry[6]][7]
2451 2457 node = entry[7]
2452 2458
2453 2459 # (Possibly) reuse the delta from the revlog if allowed and
2454 2460 # the revlog chunk is a delta.
2455 2461 cachedelta = None
2456 2462 rawtext = None
2457 2463 if destrevlog._lazydelta:
2458 2464 dp = self.deltaparent(rev)
2459 2465 if dp != nullrev:
2460 2466 cachedelta = (dp, bytes(self._chunk(rev)))
2461 2467
2462 2468 if not cachedelta:
2463 2469 rawtext = self.revision(rev, raw=True)
2464 2470
2465 2471
2466 2472 if deltareuse == self.DELTAREUSEFULLADD:
2467 2473 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2468 2474 cachedelta=cachedelta,
2469 2475 node=node, flags=flags,
2470 2476 deltacomputer=deltacomputer)
2471 2477 else:
2472 2478 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2473 2479 checkambig=False)
2474 2480 dfh = None
2475 2481 if not destrevlog._inline:
2476 2482 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2477 2483 try:
2478 2484 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2479 2485 p2, flags, cachedelta, ifh, dfh,
2480 2486 deltacomputer=deltacomputer)
2481 2487 finally:
2482 2488 if dfh:
2483 2489 dfh.close()
2484 2490 ifh.close()
2485 2491
2486 2492 if addrevisioncb:
2487 2493 addrevisioncb(self, rev, node)
2488 2494 finally:
2489 2495 destrevlog._lazydelta = oldlazydelta
2490 2496 destrevlog._lazydeltabase = oldlazydeltabase
2491 2497 destrevlog._deltabothparents = oldamd
2492 2498
2493 2499 def censorrevision(self, tr, censornode, tombstone=b''):
2494 2500 if (self.version & 0xFFFF) == REVLOGV0:
2495 2501 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2496 2502 self.version)
2497 2503
2498 2504 censorrev = self.rev(censornode)
2499 2505 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2500 2506
2501 2507 if len(tombstone) > self.rawsize(censorrev):
2502 2508 raise error.Abort(_('censor tombstone must be no longer than '
2503 2509 'censored data'))
2504 2510
2505 2511 # Rewriting the revlog in place is hard. Our strategy for censoring is
2506 2512 # to create a new revlog, copy all revisions to it, then replace the
2507 2513 # revlogs on transaction close.
2508 2514
2509 2515 newindexfile = self.indexfile + b'.tmpcensored'
2510 2516 newdatafile = self.datafile + b'.tmpcensored'
2511 2517
2512 2518 # This is a bit dangerous. We could easily have a mismatch of state.
2513 2519 newrl = revlog(self.opener, newindexfile, newdatafile,
2514 2520 censorable=True)
2515 2521 newrl.version = self.version
2516 2522 newrl._generaldelta = self._generaldelta
2517 2523 newrl._io = self._io
2518 2524
2519 2525 for rev in self.revs():
2520 2526 node = self.node(rev)
2521 2527 p1, p2 = self.parents(node)
2522 2528
2523 2529 if rev == censorrev:
2524 2530 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2525 2531 p1, p2, censornode, REVIDX_ISCENSORED)
2526 2532
2527 2533 if newrl.deltaparent(rev) != nullrev:
2528 2534 raise error.Abort(_('censored revision stored as delta; '
2529 2535 'cannot censor'),
2530 2536 hint=_('censoring of revlogs is not '
2531 2537 'fully implemented; please report '
2532 2538 'this bug'))
2533 2539 continue
2534 2540
2535 2541 if self.iscensored(rev):
2536 2542 if self.deltaparent(rev) != nullrev:
2537 2543 raise error.Abort(_('cannot censor due to censored '
2538 2544 'revision having delta stored'))
2539 2545 rawtext = self._chunk(rev)
2540 2546 else:
2541 2547 rawtext = self.revision(rev, raw=True)
2542 2548
2543 2549 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2544 2550 self.flags(rev))
2545 2551
2546 2552 tr.addbackup(self.indexfile, location='store')
2547 2553 if not self._inline:
2548 2554 tr.addbackup(self.datafile, location='store')
2549 2555
2550 2556 self.opener.rename(newrl.indexfile, self.indexfile)
2551 2557 if not self._inline:
2552 2558 self.opener.rename(newrl.datafile, self.datafile)
2553 2559
2554 2560 self.clearcaches()
2555 2561 self._loadindex()
2556 2562
2557 2563 def verifyintegrity(self, state):
2558 2564 """Verifies the integrity of the revlog.
2559 2565
2560 2566 Yields ``revlogproblem`` instances describing problems that are
2561 2567 found.
2562 2568 """
2563 2569 dd, di = self.checksize()
2564 2570 if dd:
2565 2571 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2566 2572 if di:
2567 2573 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2568 2574
2569 2575 version = self.version & 0xFFFF
2570 2576
2571 2577 # The verifier tells us what version revlog we should be.
2572 2578 if version != state['expectedversion']:
2573 2579 yield revlogproblem(
2574 2580 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2575 2581 (self.indexfile, version, state['expectedversion']))
2576 2582
2577 2583 state['skipread'] = set()
2578 2584
2579 2585 for rev in self:
2580 2586 node = self.node(rev)
2581 2587
2582 2588 # Verify contents. 4 cases to care about:
2583 2589 #
2584 2590 # common: the most common case
2585 2591 # rename: with a rename
2586 2592 # meta: file content starts with b'\1\n', the metadata
2587 2593 # header defined in filelog.py, but without a rename
2588 2594 # ext: content stored externally
2589 2595 #
2590 2596 # More formally, their differences are shown below:
2591 2597 #
2592 2598 # | common | rename | meta | ext
2593 2599 # -------------------------------------------------------
2594 2600 # flags() | 0 | 0 | 0 | not 0
2595 2601 # renamed() | False | True | False | ?
2596 2602 # rawtext[0:2]=='\1\n'| False | True | True | ?
2597 2603 #
2598 2604 # "rawtext" means the raw text stored in revlog data, which
2599 2605 # could be retrieved by "revision(rev, raw=True)". "text"
2600 2606 # mentioned below is "revision(rev, raw=False)".
2601 2607 #
2602 2608 # There are 3 different lengths stored physically:
2603 2609 # 1. L1: rawsize, stored in revlog index
2604 2610 # 2. L2: len(rawtext), stored in revlog data
2605 2611 # 3. L3: len(text), stored in revlog data if flags==0, or
2606 2612 # possibly somewhere else if flags!=0
2607 2613 #
2608 2614 # L1 should be equal to L2. L3 could be different from them.
2609 2615 # "text" may or may not affect commit hash depending on flag
2610 2616 # processors (see revlog.addflagprocessor).
2611 2617 #
2612 2618 # | common | rename | meta | ext
2613 2619 # -------------------------------------------------
2614 2620 # rawsize() | L1 | L1 | L1 | L1
2615 2621 # size() | L1 | L2-LM | L1(*) | L1 (?)
2616 2622 # len(rawtext) | L2 | L2 | L2 | L2
2617 2623 # len(text) | L2 | L2 | L2 | L3
2618 2624 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2619 2625 #
2620 2626 # LM: length of metadata, depending on rawtext
2621 2627 # (*): not ideal, see comment in filelog.size
2622 2628 # (?): could be "- len(meta)" if the resolved content has
2623 2629 # rename metadata
2624 2630 #
2625 2631 # Checks needed to be done:
2626 2632 # 1. length check: L1 == L2, in all cases.
2627 2633 # 2. hash check: depending on flag processor, we may need to
2628 2634 # use either "text" (external), or "rawtext" (in revlog).
2629 2635
2630 2636 try:
2631 2637 skipflags = state.get('skipflags', 0)
2632 2638 if skipflags:
2633 2639 skipflags &= self.flags(rev)
2634 2640
2635 2641 if skipflags:
2636 2642 state['skipread'].add(node)
2637 2643 else:
2638 2644 # Side-effect: read content and verify hash.
2639 2645 self.revision(node)
2640 2646
2641 2647 l1 = self.rawsize(rev)
2642 2648 l2 = len(self.revision(node, raw=True))
2643 2649
2644 2650 if l1 != l2:
2645 2651 yield revlogproblem(
2646 2652 error=_('unpacked size is %d, %d expected') % (l2, l1),
2647 2653 node=node)
2648 2654
2649 2655 except error.CensoredNodeError:
2650 2656 if state['erroroncensored']:
2651 2657 yield revlogproblem(error=_('censored file data'),
2652 2658 node=node)
2653 2659 state['skipread'].add(node)
2654 2660 except Exception as e:
2655 2661 yield revlogproblem(
2656 2662 error=_('unpacking %s: %s') % (short(node),
2657 2663 stringutil.forcebytestr(e)),
2658 2664 node=node)
2659 2665 state['skipread'].add(node)
2660 2666
2661 2667 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2662 2668 revisionscount=False, trackedsize=False,
2663 2669 storedsize=False):
2664 2670 d = {}
2665 2671
2666 2672 if exclusivefiles:
2667 2673 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2668 2674 if not self._inline:
2669 2675 d['exclusivefiles'].append((self.opener, self.datafile))
2670 2676
2671 2677 if sharedfiles:
2672 2678 d['sharedfiles'] = []
2673 2679
2674 2680 if revisionscount:
2675 2681 d['revisionscount'] = len(self)
2676 2682
2677 2683 if trackedsize:
2678 2684 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2679 2685
2680 2686 if storedsize:
2681 2687 d['storedsize'] = sum(self.opener.stat(path).st_size
2682 2688 for path in self.files())
2683 2689
2684 2690 return d
General Comments 0
You need to be logged in to leave comments. Login now