##// END OF EJS Templates
revlog: move decompress() from module to revlog class (API)...
Gregory Szorc -
r30793:b6f455a6 default
parent child Browse files
Show More
@@ -1,1241 +1,1240 b''
1 1 # perf.py - performance test routines
2 2 '''helper extension to measure performance'''
3 3
4 4 # "historical portability" policy of perf.py:
5 5 #
6 6 # We have to do:
7 7 # - make perf.py "loadable" with as wide Mercurial version as possible
8 8 # This doesn't mean that perf commands work correctly with that Mercurial.
9 9 # BTW, perf.py itself has been available since 1.1 (or eb240755386d).
10 10 # - make historical perf command work correctly with as wide Mercurial
11 11 # version as possible
12 12 #
13 13 # We have to do, if possible with reasonable cost:
14 14 # - make recent perf command for historical feature work correctly
15 15 # with early Mercurial
16 16 #
17 17 # We don't have to do:
18 18 # - make perf command for recent feature work correctly with early
19 19 # Mercurial
20 20
21 21 from __future__ import absolute_import
22 22 import functools
23 23 import os
24 24 import random
25 25 import sys
26 26 import time
27 27 from mercurial import (
28 28 bdiff,
29 29 changegroup,
30 30 cmdutil,
31 31 commands,
32 32 copies,
33 33 error,
34 34 extensions,
35 35 mdiff,
36 36 merge,
37 revlog,
38 37 util,
39 38 )
40 39
41 40 # for "historical portability":
42 41 # try to import modules separately (in dict order), and ignore
43 42 # failure, because these aren't available with early Mercurial
44 43 try:
45 44 from mercurial import branchmap # since 2.5 (or bcee63733aad)
46 45 except ImportError:
47 46 pass
48 47 try:
49 48 from mercurial import obsolete # since 2.3 (or ad0d6c2b3279)
50 49 except ImportError:
51 50 pass
52 51 try:
53 52 from mercurial import repoview # since 2.5 (or 3a6ddacb7198)
54 53 except ImportError:
55 54 pass
56 55 try:
57 56 from mercurial import scmutil # since 1.9 (or 8b252e826c68)
58 57 except ImportError:
59 58 pass
60 59
61 60 # for "historical portability":
62 61 # define util.safehasattr forcibly, because util.safehasattr has been
63 62 # available since 1.9.3 (or 94b200a11cf7)
64 63 _undefined = object()
65 64 def safehasattr(thing, attr):
66 65 return getattr(thing, attr, _undefined) is not _undefined
67 66 setattr(util, 'safehasattr', safehasattr)
68 67
69 68 # for "historical portability":
70 69 # use locally defined empty option list, if formatteropts isn't
71 70 # available, because commands.formatteropts has been available since
72 71 # 3.2 (or 7a7eed5176a4), even though formatting itself has been
73 72 # available since 2.2 (or ae5f92e154d3)
74 73 formatteropts = getattr(commands, "formatteropts", [])
75 74
76 75 # for "historical portability":
77 76 # use locally defined option list, if debugrevlogopts isn't available,
78 77 # because commands.debugrevlogopts has been available since 3.7 (or
79 78 # 5606f7d0d063), even though cmdutil.openrevlog() has been available
80 79 # since 1.9 (or a79fea6b3e77).
81 80 revlogopts = getattr(commands, "debugrevlogopts", [
82 81 ('c', 'changelog', False, ('open changelog')),
83 82 ('m', 'manifest', False, ('open manifest')),
84 83 ('', 'dir', False, ('open directory manifest')),
85 84 ])
86 85
87 86 cmdtable = {}
88 87
89 88 # for "historical portability":
90 89 # define parsealiases locally, because cmdutil.parsealiases has been
91 90 # available since 1.5 (or 6252852b4332)
92 91 def parsealiases(cmd):
93 92 return cmd.lstrip("^").split("|")
94 93
95 94 if safehasattr(cmdutil, 'command'):
96 95 import inspect
97 96 command = cmdutil.command(cmdtable)
98 97 if 'norepo' not in inspect.getargspec(command)[0]:
99 98 # for "historical portability":
100 99 # wrap original cmdutil.command, because "norepo" option has
101 100 # been available since 3.1 (or 75a96326cecb)
102 101 _command = command
103 102 def command(name, options=(), synopsis=None, norepo=False):
104 103 if norepo:
105 104 commands.norepo += ' %s' % ' '.join(parsealiases(name))
106 105 return _command(name, list(options), synopsis)
107 106 else:
108 107 # for "historical portability":
109 108 # define "@command" annotation locally, because cmdutil.command
110 109 # has been available since 1.9 (or 2daa5179e73f)
111 110 def command(name, options=(), synopsis=None, norepo=False):
112 111 def decorator(func):
113 112 if synopsis:
114 113 cmdtable[name] = func, list(options), synopsis
115 114 else:
116 115 cmdtable[name] = func, list(options)
117 116 if norepo:
118 117 commands.norepo += ' %s' % ' '.join(parsealiases(name))
119 118 return func
120 119 return decorator
121 120
122 121 def getlen(ui):
123 122 if ui.configbool("perf", "stub"):
124 123 return lambda x: 1
125 124 return len
126 125
127 126 def gettimer(ui, opts=None):
128 127 """return a timer function and formatter: (timer, formatter)
129 128
130 129 This function exists to gather the creation of formatter in a single
131 130 place instead of duplicating it in all performance commands."""
132 131
133 132 # enforce an idle period before execution to counteract power management
134 133 # experimental config: perf.presleep
135 134 time.sleep(getint(ui, "perf", "presleep", 1))
136 135
137 136 if opts is None:
138 137 opts = {}
139 138 # redirect all to stderr unless buffer api is in use
140 139 if not ui._buffers:
141 140 ui = ui.copy()
142 141 uifout = safeattrsetter(ui, 'fout', ignoremissing=True)
143 142 if uifout:
144 143 # for "historical portability":
145 144 # ui.fout/ferr have been available since 1.9 (or 4e1ccd4c2b6d)
146 145 uifout.set(ui.ferr)
147 146
148 147 # get a formatter
149 148 uiformatter = getattr(ui, 'formatter', None)
150 149 if uiformatter:
151 150 fm = uiformatter('perf', opts)
152 151 else:
153 152 # for "historical portability":
154 153 # define formatter locally, because ui.formatter has been
155 154 # available since 2.2 (or ae5f92e154d3)
156 155 from mercurial import node
157 156 class defaultformatter(object):
158 157 """Minimized composition of baseformatter and plainformatter
159 158 """
160 159 def __init__(self, ui, topic, opts):
161 160 self._ui = ui
162 161 if ui.debugflag:
163 162 self.hexfunc = node.hex
164 163 else:
165 164 self.hexfunc = node.short
166 165 def __nonzero__(self):
167 166 return False
168 167 def startitem(self):
169 168 pass
170 169 def data(self, **data):
171 170 pass
172 171 def write(self, fields, deftext, *fielddata, **opts):
173 172 self._ui.write(deftext % fielddata, **opts)
174 173 def condwrite(self, cond, fields, deftext, *fielddata, **opts):
175 174 if cond:
176 175 self._ui.write(deftext % fielddata, **opts)
177 176 def plain(self, text, **opts):
178 177 self._ui.write(text, **opts)
179 178 def end(self):
180 179 pass
181 180 fm = defaultformatter(ui, 'perf', opts)
182 181
183 182 # stub function, runs code only once instead of in a loop
184 183 # experimental config: perf.stub
185 184 if ui.configbool("perf", "stub"):
186 185 return functools.partial(stub_timer, fm), fm
187 186 return functools.partial(_timer, fm), fm
188 187
189 188 def stub_timer(fm, func, title=None):
190 189 func()
191 190
192 191 def _timer(fm, func, title=None):
193 192 results = []
194 193 begin = time.time()
195 194 count = 0
196 195 while True:
197 196 ostart = os.times()
198 197 cstart = time.time()
199 198 r = func()
200 199 cstop = time.time()
201 200 ostop = os.times()
202 201 count += 1
203 202 a, b = ostart, ostop
204 203 results.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
205 204 if cstop - begin > 3 and count >= 100:
206 205 break
207 206 if cstop - begin > 10 and count >= 3:
208 207 break
209 208
210 209 fm.startitem()
211 210
212 211 if title:
213 212 fm.write('title', '! %s\n', title)
214 213 if r:
215 214 fm.write('result', '! result: %s\n', r)
216 215 m = min(results)
217 216 fm.plain('!')
218 217 fm.write('wall', ' wall %f', m[0])
219 218 fm.write('comb', ' comb %f', m[1] + m[2])
220 219 fm.write('user', ' user %f', m[1])
221 220 fm.write('sys', ' sys %f', m[2])
222 221 fm.write('count', ' (best of %d)', count)
223 222 fm.plain('\n')
224 223
225 224 # utilities for historical portability
226 225
227 226 def getint(ui, section, name, default):
228 227 # for "historical portability":
229 228 # ui.configint has been available since 1.9 (or fa2b596db182)
230 229 v = ui.config(section, name, None)
231 230 if v is None:
232 231 return default
233 232 try:
234 233 return int(v)
235 234 except ValueError:
236 235 raise error.ConfigError(("%s.%s is not an integer ('%s')")
237 236 % (section, name, v))
238 237
239 238 def safeattrsetter(obj, name, ignoremissing=False):
240 239 """Ensure that 'obj' has 'name' attribute before subsequent setattr
241 240
242 241 This function is aborted, if 'obj' doesn't have 'name' attribute
243 242 at runtime. This avoids overlooking removal of an attribute, which
244 243 breaks assumption of performance measurement, in the future.
245 244
246 245 This function returns the object to (1) assign a new value, and
247 246 (2) restore an original value to the attribute.
248 247
249 248 If 'ignoremissing' is true, missing 'name' attribute doesn't cause
250 249 abortion, and this function returns None. This is useful to
251 250 examine an attribute, which isn't ensured in all Mercurial
252 251 versions.
253 252 """
254 253 if not util.safehasattr(obj, name):
255 254 if ignoremissing:
256 255 return None
257 256 raise error.Abort(("missing attribute %s of %s might break assumption"
258 257 " of performance measurement") % (name, obj))
259 258
260 259 origvalue = getattr(obj, name)
261 260 class attrutil(object):
262 261 def set(self, newvalue):
263 262 setattr(obj, name, newvalue)
264 263 def restore(self):
265 264 setattr(obj, name, origvalue)
266 265
267 266 return attrutil()
268 267
269 268 # utilities to examine each internal API changes
270 269
271 270 def getbranchmapsubsettable():
272 271 # for "historical portability":
273 272 # subsettable is defined in:
274 273 # - branchmap since 2.9 (or 175c6fd8cacc)
275 274 # - repoview since 2.5 (or 59a9f18d4587)
276 275 for mod in (branchmap, repoview):
277 276 subsettable = getattr(mod, 'subsettable', None)
278 277 if subsettable:
279 278 return subsettable
280 279
281 280 # bisecting in bcee63733aad::59a9f18d4587 can reach here (both
282 281 # branchmap and repoview modules exist, but subsettable attribute
283 282 # doesn't)
284 283 raise error.Abort(("perfbranchmap not available with this Mercurial"),
285 284 hint="use 2.5 or later")
286 285
287 286 def getsvfs(repo):
288 287 """Return appropriate object to access files under .hg/store
289 288 """
290 289 # for "historical portability":
291 290 # repo.svfs has been available since 2.3 (or 7034365089bf)
292 291 svfs = getattr(repo, 'svfs', None)
293 292 if svfs:
294 293 return svfs
295 294 else:
296 295 return getattr(repo, 'sopener')
297 296
298 297 def getvfs(repo):
299 298 """Return appropriate object to access files under .hg
300 299 """
301 300 # for "historical portability":
302 301 # repo.vfs has been available since 2.3 (or 7034365089bf)
303 302 vfs = getattr(repo, 'vfs', None)
304 303 if vfs:
305 304 return vfs
306 305 else:
307 306 return getattr(repo, 'opener')
308 307
309 308 def repocleartagscachefunc(repo):
310 309 """Return the function to clear tags cache according to repo internal API
311 310 """
312 311 if util.safehasattr(repo, '_tagscache'): # since 2.0 (or 9dca7653b525)
313 312 # in this case, setattr(repo, '_tagscache', None) or so isn't
314 313 # correct way to clear tags cache, because existing code paths
315 314 # expect _tagscache to be a structured object.
316 315 def clearcache():
317 316 # _tagscache has been filteredpropertycache since 2.5 (or
318 317 # 98c867ac1330), and delattr() can't work in such case
319 318 if '_tagscache' in vars(repo):
320 319 del repo.__dict__['_tagscache']
321 320 return clearcache
322 321
323 322 repotags = safeattrsetter(repo, '_tags', ignoremissing=True)
324 323 if repotags: # since 1.4 (or 5614a628d173)
325 324 return lambda : repotags.set(None)
326 325
327 326 repotagscache = safeattrsetter(repo, 'tagscache', ignoremissing=True)
328 327 if repotagscache: # since 0.6 (or d7df759d0e97)
329 328 return lambda : repotagscache.set(None)
330 329
331 330 # Mercurial earlier than 0.6 (or d7df759d0e97) logically reaches
332 331 # this point, but it isn't so problematic, because:
333 332 # - repo.tags of such Mercurial isn't "callable", and repo.tags()
334 333 # in perftags() causes failure soon
335 334 # - perf.py itself has been available since 1.1 (or eb240755386d)
336 335 raise error.Abort(("tags API of this hg command is unknown"))
337 336
338 337 # perf commands
339 338
340 339 @command('perfwalk', formatteropts)
341 340 def perfwalk(ui, repo, *pats, **opts):
342 341 timer, fm = gettimer(ui, opts)
343 342 try:
344 343 m = scmutil.match(repo[None], pats, {})
345 344 timer(lambda: len(list(repo.dirstate.walk(m, [], True, False))))
346 345 except Exception:
347 346 try:
348 347 m = scmutil.match(repo[None], pats, {})
349 348 timer(lambda: len([b for a, b, c in repo.dirstate.statwalk([], m)]))
350 349 except Exception:
351 350 timer(lambda: len(list(cmdutil.walk(repo, pats, {}))))
352 351 fm.end()
353 352
354 353 @command('perfannotate', formatteropts)
355 354 def perfannotate(ui, repo, f, **opts):
356 355 timer, fm = gettimer(ui, opts)
357 356 fc = repo['.'][f]
358 357 timer(lambda: len(fc.annotate(True)))
359 358 fm.end()
360 359
361 360 @command('perfstatus',
362 361 [('u', 'unknown', False,
363 362 'ask status to look for unknown files')] + formatteropts)
364 363 def perfstatus(ui, repo, **opts):
365 364 #m = match.always(repo.root, repo.getcwd())
366 365 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
367 366 # False))))
368 367 timer, fm = gettimer(ui, opts)
369 368 timer(lambda: sum(map(len, repo.status(unknown=opts['unknown']))))
370 369 fm.end()
371 370
372 371 @command('perfaddremove', formatteropts)
373 372 def perfaddremove(ui, repo, **opts):
374 373 timer, fm = gettimer(ui, opts)
375 374 try:
376 375 oldquiet = repo.ui.quiet
377 376 repo.ui.quiet = True
378 377 matcher = scmutil.match(repo[None])
379 378 timer(lambda: scmutil.addremove(repo, matcher, "", dry_run=True))
380 379 finally:
381 380 repo.ui.quiet = oldquiet
382 381 fm.end()
383 382
384 383 def clearcaches(cl):
385 384 # behave somewhat consistently across internal API changes
386 385 if util.safehasattr(cl, 'clearcaches'):
387 386 cl.clearcaches()
388 387 elif util.safehasattr(cl, '_nodecache'):
389 388 from mercurial.node import nullid, nullrev
390 389 cl._nodecache = {nullid: nullrev}
391 390 cl._nodepos = None
392 391
393 392 @command('perfheads', formatteropts)
394 393 def perfheads(ui, repo, **opts):
395 394 timer, fm = gettimer(ui, opts)
396 395 cl = repo.changelog
397 396 def d():
398 397 len(cl.headrevs())
399 398 clearcaches(cl)
400 399 timer(d)
401 400 fm.end()
402 401
403 402 @command('perftags', formatteropts)
404 403 def perftags(ui, repo, **opts):
405 404 import mercurial.changelog
406 405 import mercurial.manifest
407 406 timer, fm = gettimer(ui, opts)
408 407 svfs = getsvfs(repo)
409 408 repocleartagscache = repocleartagscachefunc(repo)
410 409 def t():
411 410 repo.changelog = mercurial.changelog.changelog(svfs)
412 411 repo.manifestlog = mercurial.manifest.manifestlog(svfs, repo)
413 412 repocleartagscache()
414 413 return len(repo.tags())
415 414 timer(t)
416 415 fm.end()
417 416
418 417 @command('perfancestors', formatteropts)
419 418 def perfancestors(ui, repo, **opts):
420 419 timer, fm = gettimer(ui, opts)
421 420 heads = repo.changelog.headrevs()
422 421 def d():
423 422 for a in repo.changelog.ancestors(heads):
424 423 pass
425 424 timer(d)
426 425 fm.end()
427 426
428 427 @command('perfancestorset', formatteropts)
429 428 def perfancestorset(ui, repo, revset, **opts):
430 429 timer, fm = gettimer(ui, opts)
431 430 revs = repo.revs(revset)
432 431 heads = repo.changelog.headrevs()
433 432 def d():
434 433 s = repo.changelog.ancestors(heads)
435 434 for rev in revs:
436 435 rev in s
437 436 timer(d)
438 437 fm.end()
439 438
440 439 @command('perfchangegroupchangelog', formatteropts +
441 440 [('', 'version', '02', 'changegroup version'),
442 441 ('r', 'rev', '', 'revisions to add to changegroup')])
443 442 def perfchangegroupchangelog(ui, repo, version='02', rev=None, **opts):
444 443 """Benchmark producing a changelog group for a changegroup.
445 444
446 445 This measures the time spent processing the changelog during a
447 446 bundle operation. This occurs during `hg bundle` and on a server
448 447 processing a `getbundle` wire protocol request (handles clones
449 448 and pull requests).
450 449
451 450 By default, all revisions are added to the changegroup.
452 451 """
453 452 cl = repo.changelog
454 453 revs = [cl.lookup(r) for r in repo.revs(rev or 'all()')]
455 454 bundler = changegroup.getbundler(version, repo)
456 455
457 456 def lookup(node):
458 457 # The real bundler reads the revision in order to access the
459 458 # manifest node and files list. Do that here.
460 459 cl.read(node)
461 460 return node
462 461
463 462 def d():
464 463 for chunk in bundler.group(revs, cl, lookup):
465 464 pass
466 465
467 466 timer, fm = gettimer(ui, opts)
468 467 timer(d)
469 468 fm.end()
470 469
471 470 @command('perfdirs', formatteropts)
472 471 def perfdirs(ui, repo, **opts):
473 472 timer, fm = gettimer(ui, opts)
474 473 dirstate = repo.dirstate
475 474 'a' in dirstate
476 475 def d():
477 476 dirstate.dirs()
478 477 del dirstate._dirs
479 478 timer(d)
480 479 fm.end()
481 480
482 481 @command('perfdirstate', formatteropts)
483 482 def perfdirstate(ui, repo, **opts):
484 483 timer, fm = gettimer(ui, opts)
485 484 "a" in repo.dirstate
486 485 def d():
487 486 repo.dirstate.invalidate()
488 487 "a" in repo.dirstate
489 488 timer(d)
490 489 fm.end()
491 490
492 491 @command('perfdirstatedirs', formatteropts)
493 492 def perfdirstatedirs(ui, repo, **opts):
494 493 timer, fm = gettimer(ui, opts)
495 494 "a" in repo.dirstate
496 495 def d():
497 496 "a" in repo.dirstate._dirs
498 497 del repo.dirstate._dirs
499 498 timer(d)
500 499 fm.end()
501 500
502 501 @command('perfdirstatefoldmap', formatteropts)
503 502 def perfdirstatefoldmap(ui, repo, **opts):
504 503 timer, fm = gettimer(ui, opts)
505 504 dirstate = repo.dirstate
506 505 'a' in dirstate
507 506 def d():
508 507 dirstate._filefoldmap.get('a')
509 508 del dirstate._filefoldmap
510 509 timer(d)
511 510 fm.end()
512 511
513 512 @command('perfdirfoldmap', formatteropts)
514 513 def perfdirfoldmap(ui, repo, **opts):
515 514 timer, fm = gettimer(ui, opts)
516 515 dirstate = repo.dirstate
517 516 'a' in dirstate
518 517 def d():
519 518 dirstate._dirfoldmap.get('a')
520 519 del dirstate._dirfoldmap
521 520 del dirstate._dirs
522 521 timer(d)
523 522 fm.end()
524 523
525 524 @command('perfdirstatewrite', formatteropts)
526 525 def perfdirstatewrite(ui, repo, **opts):
527 526 timer, fm = gettimer(ui, opts)
528 527 ds = repo.dirstate
529 528 "a" in ds
530 529 def d():
531 530 ds._dirty = True
532 531 ds.write(repo.currenttransaction())
533 532 timer(d)
534 533 fm.end()
535 534
536 535 @command('perfmergecalculate',
537 536 [('r', 'rev', '.', 'rev to merge against')] + formatteropts)
538 537 def perfmergecalculate(ui, repo, rev, **opts):
539 538 timer, fm = gettimer(ui, opts)
540 539 wctx = repo[None]
541 540 rctx = scmutil.revsingle(repo, rev, rev)
542 541 ancestor = wctx.ancestor(rctx)
543 542 # we don't want working dir files to be stat'd in the benchmark, so prime
544 543 # that cache
545 544 wctx.dirty()
546 545 def d():
547 546 # acceptremote is True because we don't want prompts in the middle of
548 547 # our benchmark
549 548 merge.calculateupdates(repo, wctx, rctx, [ancestor], False, False,
550 549 acceptremote=True, followcopies=True)
551 550 timer(d)
552 551 fm.end()
553 552
554 553 @command('perfpathcopies', [], "REV REV")
555 554 def perfpathcopies(ui, repo, rev1, rev2, **opts):
556 555 timer, fm = gettimer(ui, opts)
557 556 ctx1 = scmutil.revsingle(repo, rev1, rev1)
558 557 ctx2 = scmutil.revsingle(repo, rev2, rev2)
559 558 def d():
560 559 copies.pathcopies(ctx1, ctx2)
561 560 timer(d)
562 561 fm.end()
563 562
564 563 @command('perfmanifest', [], 'REV')
565 564 def perfmanifest(ui, repo, rev, **opts):
566 565 timer, fm = gettimer(ui, opts)
567 566 ctx = scmutil.revsingle(repo, rev, rev)
568 567 t = ctx.manifestnode()
569 568 def d():
570 569 repo.manifestlog.clearcaches()
571 570 repo.manifestlog[t].read()
572 571 timer(d)
573 572 fm.end()
574 573
575 574 @command('perfchangeset', formatteropts)
576 575 def perfchangeset(ui, repo, rev, **opts):
577 576 timer, fm = gettimer(ui, opts)
578 577 n = repo[rev].node()
579 578 def d():
580 579 repo.changelog.read(n)
581 580 #repo.changelog._cache = None
582 581 timer(d)
583 582 fm.end()
584 583
585 584 @command('perfindex', formatteropts)
586 585 def perfindex(ui, repo, **opts):
587 586 import mercurial.revlog
588 587 timer, fm = gettimer(ui, opts)
589 588 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
590 589 n = repo["tip"].node()
591 590 svfs = getsvfs(repo)
592 591 def d():
593 592 cl = mercurial.revlog.revlog(svfs, "00changelog.i")
594 593 cl.rev(n)
595 594 timer(d)
596 595 fm.end()
597 596
598 597 @command('perfstartup', formatteropts)
599 598 def perfstartup(ui, repo, **opts):
600 599 timer, fm = gettimer(ui, opts)
601 600 cmd = sys.argv[0]
602 601 def d():
603 602 if os.name != 'nt':
604 603 os.system("HGRCPATH= %s version -q > /dev/null" % cmd)
605 604 else:
606 605 os.environ['HGRCPATH'] = ''
607 606 os.system("%s version -q > NUL" % cmd)
608 607 timer(d)
609 608 fm.end()
610 609
611 610 @command('perfparents', formatteropts)
612 611 def perfparents(ui, repo, **opts):
613 612 timer, fm = gettimer(ui, opts)
614 613 # control the number of commits perfparents iterates over
615 614 # experimental config: perf.parentscount
616 615 count = getint(ui, "perf", "parentscount", 1000)
617 616 if len(repo.changelog) < count:
618 617 raise error.Abort("repo needs %d commits for this test" % count)
619 618 repo = repo.unfiltered()
620 619 nl = [repo.changelog.node(i) for i in xrange(count)]
621 620 def d():
622 621 for n in nl:
623 622 repo.changelog.parents(n)
624 623 timer(d)
625 624 fm.end()
626 625
627 626 @command('perfctxfiles', formatteropts)
628 627 def perfctxfiles(ui, repo, x, **opts):
629 628 x = int(x)
630 629 timer, fm = gettimer(ui, opts)
631 630 def d():
632 631 len(repo[x].files())
633 632 timer(d)
634 633 fm.end()
635 634
636 635 @command('perfrawfiles', formatteropts)
637 636 def perfrawfiles(ui, repo, x, **opts):
638 637 x = int(x)
639 638 timer, fm = gettimer(ui, opts)
640 639 cl = repo.changelog
641 640 def d():
642 641 len(cl.read(x)[3])
643 642 timer(d)
644 643 fm.end()
645 644
646 645 @command('perflookup', formatteropts)
647 646 def perflookup(ui, repo, rev, **opts):
648 647 timer, fm = gettimer(ui, opts)
649 648 timer(lambda: len(repo.lookup(rev)))
650 649 fm.end()
651 650
652 651 @command('perfrevrange', formatteropts)
653 652 def perfrevrange(ui, repo, *specs, **opts):
654 653 timer, fm = gettimer(ui, opts)
655 654 revrange = scmutil.revrange
656 655 timer(lambda: len(revrange(repo, specs)))
657 656 fm.end()
658 657
659 658 @command('perfnodelookup', formatteropts)
660 659 def perfnodelookup(ui, repo, rev, **opts):
661 660 timer, fm = gettimer(ui, opts)
662 661 import mercurial.revlog
663 662 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
664 663 n = repo[rev].node()
665 664 cl = mercurial.revlog.revlog(getsvfs(repo), "00changelog.i")
666 665 def d():
667 666 cl.rev(n)
668 667 clearcaches(cl)
669 668 timer(d)
670 669 fm.end()
671 670
672 671 @command('perflog',
673 672 [('', 'rename', False, 'ask log to follow renames')] + formatteropts)
674 673 def perflog(ui, repo, rev=None, **opts):
675 674 if rev is None:
676 675 rev=[]
677 676 timer, fm = gettimer(ui, opts)
678 677 ui.pushbuffer()
679 678 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
680 679 copies=opts.get('rename')))
681 680 ui.popbuffer()
682 681 fm.end()
683 682
684 683 @command('perfmoonwalk', formatteropts)
685 684 def perfmoonwalk(ui, repo, **opts):
686 685 """benchmark walking the changelog backwards
687 686
688 687 This also loads the changelog data for each revision in the changelog.
689 688 """
690 689 timer, fm = gettimer(ui, opts)
691 690 def moonwalk():
692 691 for i in xrange(len(repo), -1, -1):
693 692 ctx = repo[i]
694 693 ctx.branch() # read changelog data (in addition to the index)
695 694 timer(moonwalk)
696 695 fm.end()
697 696
698 697 @command('perftemplating', formatteropts)
699 698 def perftemplating(ui, repo, rev=None, **opts):
700 699 if rev is None:
701 700 rev=[]
702 701 timer, fm = gettimer(ui, opts)
703 702 ui.pushbuffer()
704 703 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
705 704 template='{date|shortdate} [{rev}:{node|short}]'
706 705 ' {author|person}: {desc|firstline}\n'))
707 706 ui.popbuffer()
708 707 fm.end()
709 708
710 709 @command('perfcca', formatteropts)
711 710 def perfcca(ui, repo, **opts):
712 711 timer, fm = gettimer(ui, opts)
713 712 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
714 713 fm.end()
715 714
716 715 @command('perffncacheload', formatteropts)
717 716 def perffncacheload(ui, repo, **opts):
718 717 timer, fm = gettimer(ui, opts)
719 718 s = repo.store
720 719 def d():
721 720 s.fncache._load()
722 721 timer(d)
723 722 fm.end()
724 723
725 724 @command('perffncachewrite', formatteropts)
726 725 def perffncachewrite(ui, repo, **opts):
727 726 timer, fm = gettimer(ui, opts)
728 727 s = repo.store
729 728 s.fncache._load()
730 729 lock = repo.lock()
731 730 tr = repo.transaction('perffncachewrite')
732 731 def d():
733 732 s.fncache._dirty = True
734 733 s.fncache.write(tr)
735 734 timer(d)
736 735 tr.close()
737 736 lock.release()
738 737 fm.end()
739 738
740 739 @command('perffncacheencode', formatteropts)
741 740 def perffncacheencode(ui, repo, **opts):
742 741 timer, fm = gettimer(ui, opts)
743 742 s = repo.store
744 743 s.fncache._load()
745 744 def d():
746 745 for p in s.fncache.entries:
747 746 s.encode(p)
748 747 timer(d)
749 748 fm.end()
750 749
751 750 @command('perfbdiff', revlogopts + formatteropts + [
752 751 ('', 'count', 1, 'number of revisions to test (when using --startrev)'),
753 752 ('', 'alldata', False, 'test bdiffs for all associated revisions')],
754 753 '-c|-m|FILE REV')
755 754 def perfbdiff(ui, repo, file_, rev=None, count=None, **opts):
756 755 """benchmark a bdiff between revisions
757 756
758 757 By default, benchmark a bdiff between its delta parent and itself.
759 758
760 759 With ``--count``, benchmark bdiffs between delta parents and self for N
761 760 revisions starting at the specified revision.
762 761
763 762 With ``--alldata``, assume the requested revision is a changeset and
764 763 measure bdiffs for all changes related to that changeset (manifest
765 764 and filelogs).
766 765 """
767 766 if opts['alldata']:
768 767 opts['changelog'] = True
769 768
770 769 if opts.get('changelog') or opts.get('manifest'):
771 770 file_, rev = None, file_
772 771 elif rev is None:
773 772 raise error.CommandError('perfbdiff', 'invalid arguments')
774 773
775 774 textpairs = []
776 775
777 776 r = cmdutil.openrevlog(repo, 'perfbdiff', file_, opts)
778 777
779 778 startrev = r.rev(r.lookup(rev))
780 779 for rev in range(startrev, min(startrev + count, len(r) - 1)):
781 780 if opts['alldata']:
782 781 # Load revisions associated with changeset.
783 782 ctx = repo[rev]
784 783 mtext = repo.manifestlog._revlog.revision(ctx.manifestnode())
785 784 for pctx in ctx.parents():
786 785 pman = repo.manifestlog._revlog.revision(pctx.manifestnode())
787 786 textpairs.append((pman, mtext))
788 787
789 788 # Load filelog revisions by iterating manifest delta.
790 789 man = ctx.manifest()
791 790 pman = ctx.p1().manifest()
792 791 for filename, change in pman.diff(man).items():
793 792 fctx = repo.file(filename)
794 793 f1 = fctx.revision(change[0][0] or -1)
795 794 f2 = fctx.revision(change[1][0] or -1)
796 795 textpairs.append((f1, f2))
797 796 else:
798 797 dp = r.deltaparent(rev)
799 798 textpairs.append((r.revision(dp), r.revision(rev)))
800 799
801 800 def d():
802 801 for pair in textpairs:
803 802 bdiff.bdiff(*pair)
804 803
805 804 timer, fm = gettimer(ui, opts)
806 805 timer(d)
807 806 fm.end()
808 807
809 808 @command('perfdiffwd', formatteropts)
810 809 def perfdiffwd(ui, repo, **opts):
811 810 """Profile diff of working directory changes"""
812 811 timer, fm = gettimer(ui, opts)
813 812 options = {
814 813 'w': 'ignore_all_space',
815 814 'b': 'ignore_space_change',
816 815 'B': 'ignore_blank_lines',
817 816 }
818 817
819 818 for diffopt in ('', 'w', 'b', 'B', 'wB'):
820 819 opts = dict((options[c], '1') for c in diffopt)
821 820 def d():
822 821 ui.pushbuffer()
823 822 commands.diff(ui, repo, **opts)
824 823 ui.popbuffer()
825 824 title = 'diffopts: %s' % (diffopt and ('-' + diffopt) or 'none')
826 825 timer(d, title)
827 826 fm.end()
828 827
829 828 @command('perfrevlog', revlogopts + formatteropts +
830 829 [('d', 'dist', 100, 'distance between the revisions'),
831 830 ('s', 'startrev', 0, 'revision to start reading at'),
832 831 ('', 'reverse', False, 'read in reverse')],
833 832 '-c|-m|FILE')
834 833 def perfrevlog(ui, repo, file_=None, startrev=0, reverse=False, **opts):
835 834 """Benchmark reading a series of revisions from a revlog.
836 835
837 836 By default, we read every ``-d/--dist`` revision from 0 to tip of
838 837 the specified revlog.
839 838
840 839 The start revision can be defined via ``-s/--startrev``.
841 840 """
842 841 timer, fm = gettimer(ui, opts)
843 842 _len = getlen(ui)
844 843
845 844 def d():
846 845 r = cmdutil.openrevlog(repo, 'perfrevlog', file_, opts)
847 846
848 847 startrev = 0
849 848 endrev = _len(r)
850 849 dist = opts['dist']
851 850
852 851 if reverse:
853 852 startrev, endrev = endrev, startrev
854 853 dist = -1 * dist
855 854
856 855 for x in xrange(startrev, endrev, dist):
857 856 r.revision(r.node(x))
858 857
859 858 timer(d)
860 859 fm.end()
861 860
862 861 @command('perfrevlogchunks', revlogopts + formatteropts +
863 862 [('s', 'startrev', 0, 'revision to start at')],
864 863 '-c|-m|FILE')
865 864 def perfrevlogchunks(ui, repo, file_=None, startrev=0, **opts):
866 865 """Benchmark operations on revlog chunks.
867 866
868 867 Logically, each revlog is a collection of fulltext revisions. However,
869 868 stored within each revlog are "chunks" of possibly compressed data. This
870 869 data needs to be read and decompressed or compressed and written.
871 870
872 871 This command measures the time it takes to read+decompress and recompress
873 872 chunks in a revlog. It effectively isolates I/O and compression performance.
874 873 For measurements of higher-level operations like resolving revisions,
875 874 see ``perfrevlog`` and ``perfrevlogrevision``.
876 875 """
877 876 rl = cmdutil.openrevlog(repo, 'perfrevlogchunks', file_, opts)
878 877 revs = list(rl.revs(startrev, len(rl) - 1))
879 878
880 879 def rlfh(rl):
881 880 if rl._inline:
882 881 return getsvfs(repo)(rl.indexfile)
883 882 else:
884 883 return getsvfs(repo)(rl.datafile)
885 884
886 885 def doread():
887 886 rl.clearcaches()
888 887 for rev in revs:
889 888 rl._chunkraw(rev, rev)
890 889
891 890 def doreadcachedfh():
892 891 rl.clearcaches()
893 892 fh = rlfh(rl)
894 893 for rev in revs:
895 894 rl._chunkraw(rev, rev, df=fh)
896 895
897 896 def doreadbatch():
898 897 rl.clearcaches()
899 898 rl._chunkraw(revs[0], revs[-1])
900 899
901 900 def doreadbatchcachedfh():
902 901 rl.clearcaches()
903 902 fh = rlfh(rl)
904 903 rl._chunkraw(revs[0], revs[-1], df=fh)
905 904
906 905 def dochunk():
907 906 rl.clearcaches()
908 907 fh = rlfh(rl)
909 908 for rev in revs:
910 909 rl._chunk(rev, df=fh)
911 910
912 911 chunks = [None]
913 912
914 913 def dochunkbatch():
915 914 rl.clearcaches()
916 915 fh = rlfh(rl)
917 916 # Save chunks as a side-effect.
918 917 chunks[0] = rl._chunks(revs, df=fh)
919 918
920 919 def docompress():
921 920 rl.clearcaches()
922 921 for chunk in chunks[0]:
923 922 rl.compress(chunk)
924 923
925 924 benches = [
926 925 (lambda: doread(), 'read'),
927 926 (lambda: doreadcachedfh(), 'read w/ reused fd'),
928 927 (lambda: doreadbatch(), 'read batch'),
929 928 (lambda: doreadbatchcachedfh(), 'read batch w/ reused fd'),
930 929 (lambda: dochunk(), 'chunk'),
931 930 (lambda: dochunkbatch(), 'chunk batch'),
932 931 (lambda: docompress(), 'compress'),
933 932 ]
934 933
935 934 for fn, title in benches:
936 935 timer, fm = gettimer(ui, opts)
937 936 timer(fn, title=title)
938 937 fm.end()
939 938
940 939 @command('perfrevlogrevision', revlogopts + formatteropts +
941 940 [('', 'cache', False, 'use caches instead of clearing')],
942 941 '-c|-m|FILE REV')
943 942 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
944 943 """Benchmark obtaining a revlog revision.
945 944
946 945 Obtaining a revlog revision consists of roughly the following steps:
947 946
948 947 1. Compute the delta chain
949 948 2. Obtain the raw chunks for that delta chain
950 949 3. Decompress each raw chunk
951 950 4. Apply binary patches to obtain fulltext
952 951 5. Verify hash of fulltext
953 952
954 953 This command measures the time spent in each of these phases.
955 954 """
956 955 if opts.get('changelog') or opts.get('manifest'):
957 956 file_, rev = None, file_
958 957 elif rev is None:
959 958 raise error.CommandError('perfrevlogrevision', 'invalid arguments')
960 959
961 960 r = cmdutil.openrevlog(repo, 'perfrevlogrevision', file_, opts)
962 961 node = r.lookup(rev)
963 962 rev = r.rev(node)
964 963
965 964 def dodeltachain(rev):
966 965 if not cache:
967 966 r.clearcaches()
968 967 r._deltachain(rev)
969 968
970 969 def doread(chain):
971 970 if not cache:
972 971 r.clearcaches()
973 972 r._chunkraw(chain[0], chain[-1])
974 973
975 974 def dodecompress(data, chain):
976 975 if not cache:
977 976 r.clearcaches()
978 977
979 978 start = r.start
980 979 length = r.length
981 980 inline = r._inline
982 981 iosize = r._io.size
983 982 buffer = util.buffer
984 983 offset = start(chain[0])
985 984
986 985 for rev in chain:
987 986 chunkstart = start(rev)
988 987 if inline:
989 988 chunkstart += (rev + 1) * iosize
990 989 chunklength = length(rev)
991 990 b = buffer(data, chunkstart - offset, chunklength)
992 revlog.decompress(b)
991 r.decompress(b)
993 992
994 993 def dopatch(text, bins):
995 994 if not cache:
996 995 r.clearcaches()
997 996 mdiff.patches(text, bins)
998 997
999 998 def dohash(text):
1000 999 if not cache:
1001 1000 r.clearcaches()
1002 1001 r.checkhash(text, node, rev=rev)
1003 1002
1004 1003 def dorevision():
1005 1004 if not cache:
1006 1005 r.clearcaches()
1007 1006 r.revision(node)
1008 1007
1009 1008 chain = r._deltachain(rev)[0]
1010 1009 data = r._chunkraw(chain[0], chain[-1])[1]
1011 1010 bins = r._chunks(chain)
1012 1011 text = str(bins[0])
1013 1012 bins = bins[1:]
1014 1013 text = mdiff.patches(text, bins)
1015 1014
1016 1015 benches = [
1017 1016 (lambda: dorevision(), 'full'),
1018 1017 (lambda: dodeltachain(rev), 'deltachain'),
1019 1018 (lambda: doread(chain), 'read'),
1020 1019 (lambda: dodecompress(data, chain), 'decompress'),
1021 1020 (lambda: dopatch(text, bins), 'patch'),
1022 1021 (lambda: dohash(text), 'hash'),
1023 1022 ]
1024 1023
1025 1024 for fn, title in benches:
1026 1025 timer, fm = gettimer(ui, opts)
1027 1026 timer(fn, title=title)
1028 1027 fm.end()
1029 1028
1030 1029 @command('perfrevset',
1031 1030 [('C', 'clear', False, 'clear volatile cache between each call.'),
1032 1031 ('', 'contexts', False, 'obtain changectx for each revision')]
1033 1032 + formatteropts, "REVSET")
1034 1033 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
1035 1034 """benchmark the execution time of a revset
1036 1035
1037 1036 Use the --clean option if need to evaluate the impact of build volatile
1038 1037 revisions set cache on the revset execution. Volatile cache hold filtered
1039 1038 and obsolete related cache."""
1040 1039 timer, fm = gettimer(ui, opts)
1041 1040 def d():
1042 1041 if clear:
1043 1042 repo.invalidatevolatilesets()
1044 1043 if contexts:
1045 1044 for ctx in repo.set(expr): pass
1046 1045 else:
1047 1046 for r in repo.revs(expr): pass
1048 1047 timer(d)
1049 1048 fm.end()
1050 1049
1051 1050 @command('perfvolatilesets', formatteropts)
1052 1051 def perfvolatilesets(ui, repo, *names, **opts):
1053 1052 """benchmark the computation of various volatile set
1054 1053
1055 1054 Volatile set computes element related to filtering and obsolescence."""
1056 1055 timer, fm = gettimer(ui, opts)
1057 1056 repo = repo.unfiltered()
1058 1057
1059 1058 def getobs(name):
1060 1059 def d():
1061 1060 repo.invalidatevolatilesets()
1062 1061 obsolete.getrevs(repo, name)
1063 1062 return d
1064 1063
1065 1064 allobs = sorted(obsolete.cachefuncs)
1066 1065 if names:
1067 1066 allobs = [n for n in allobs if n in names]
1068 1067
1069 1068 for name in allobs:
1070 1069 timer(getobs(name), title=name)
1071 1070
1072 1071 def getfiltered(name):
1073 1072 def d():
1074 1073 repo.invalidatevolatilesets()
1075 1074 repoview.filterrevs(repo, name)
1076 1075 return d
1077 1076
1078 1077 allfilter = sorted(repoview.filtertable)
1079 1078 if names:
1080 1079 allfilter = [n for n in allfilter if n in names]
1081 1080
1082 1081 for name in allfilter:
1083 1082 timer(getfiltered(name), title=name)
1084 1083 fm.end()
1085 1084
1086 1085 @command('perfbranchmap',
1087 1086 [('f', 'full', False,
1088 1087 'Includes build time of subset'),
1089 1088 ] + formatteropts)
1090 1089 def perfbranchmap(ui, repo, full=False, **opts):
1091 1090 """benchmark the update of a branchmap
1092 1091
1093 1092 This benchmarks the full repo.branchmap() call with read and write disabled
1094 1093 """
1095 1094 timer, fm = gettimer(ui, opts)
1096 1095 def getbranchmap(filtername):
1097 1096 """generate a benchmark function for the filtername"""
1098 1097 if filtername is None:
1099 1098 view = repo
1100 1099 else:
1101 1100 view = repo.filtered(filtername)
1102 1101 def d():
1103 1102 if full:
1104 1103 view._branchcaches.clear()
1105 1104 else:
1106 1105 view._branchcaches.pop(filtername, None)
1107 1106 view.branchmap()
1108 1107 return d
1109 1108 # add filter in smaller subset to bigger subset
1110 1109 possiblefilters = set(repoview.filtertable)
1111 1110 subsettable = getbranchmapsubsettable()
1112 1111 allfilters = []
1113 1112 while possiblefilters:
1114 1113 for name in possiblefilters:
1115 1114 subset = subsettable.get(name)
1116 1115 if subset not in possiblefilters:
1117 1116 break
1118 1117 else:
1119 1118 assert False, 'subset cycle %s!' % possiblefilters
1120 1119 allfilters.append(name)
1121 1120 possiblefilters.remove(name)
1122 1121
1123 1122 # warm the cache
1124 1123 if not full:
1125 1124 for name in allfilters:
1126 1125 repo.filtered(name).branchmap()
1127 1126 # add unfiltered
1128 1127 allfilters.append(None)
1129 1128
1130 1129 branchcacheread = safeattrsetter(branchmap, 'read')
1131 1130 branchcachewrite = safeattrsetter(branchmap.branchcache, 'write')
1132 1131 branchcacheread.set(lambda repo: None)
1133 1132 branchcachewrite.set(lambda bc, repo: None)
1134 1133 try:
1135 1134 for name in allfilters:
1136 1135 timer(getbranchmap(name), title=str(name))
1137 1136 finally:
1138 1137 branchcacheread.restore()
1139 1138 branchcachewrite.restore()
1140 1139 fm.end()
1141 1140
1142 1141 @command('perfloadmarkers')
1143 1142 def perfloadmarkers(ui, repo):
1144 1143 """benchmark the time to parse the on-disk markers for a repo
1145 1144
1146 1145 Result is the number of markers in the repo."""
1147 1146 timer, fm = gettimer(ui)
1148 1147 svfs = getsvfs(repo)
1149 1148 timer(lambda: len(obsolete.obsstore(svfs)))
1150 1149 fm.end()
1151 1150
1152 1151 @command('perflrucachedict', formatteropts +
1153 1152 [('', 'size', 4, 'size of cache'),
1154 1153 ('', 'gets', 10000, 'number of key lookups'),
1155 1154 ('', 'sets', 10000, 'number of key sets'),
1156 1155 ('', 'mixed', 10000, 'number of mixed mode operations'),
1157 1156 ('', 'mixedgetfreq', 50, 'frequency of get vs set ops in mixed mode')],
1158 1157 norepo=True)
1159 1158 def perflrucache(ui, size=4, gets=10000, sets=10000, mixed=10000,
1160 1159 mixedgetfreq=50, **opts):
1161 1160 def doinit():
1162 1161 for i in xrange(10000):
1163 1162 util.lrucachedict(size)
1164 1163
1165 1164 values = []
1166 1165 for i in xrange(size):
1167 1166 values.append(random.randint(0, sys.maxint))
1168 1167
1169 1168 # Get mode fills the cache and tests raw lookup performance with no
1170 1169 # eviction.
1171 1170 getseq = []
1172 1171 for i in xrange(gets):
1173 1172 getseq.append(random.choice(values))
1174 1173
1175 1174 def dogets():
1176 1175 d = util.lrucachedict(size)
1177 1176 for v in values:
1178 1177 d[v] = v
1179 1178 for key in getseq:
1180 1179 value = d[key]
1181 1180 value # silence pyflakes warning
1182 1181
1183 1182 # Set mode tests insertion speed with cache eviction.
1184 1183 setseq = []
1185 1184 for i in xrange(sets):
1186 1185 setseq.append(random.randint(0, sys.maxint))
1187 1186
1188 1187 def dosets():
1189 1188 d = util.lrucachedict(size)
1190 1189 for v in setseq:
1191 1190 d[v] = v
1192 1191
1193 1192 # Mixed mode randomly performs gets and sets with eviction.
1194 1193 mixedops = []
1195 1194 for i in xrange(mixed):
1196 1195 r = random.randint(0, 100)
1197 1196 if r < mixedgetfreq:
1198 1197 op = 0
1199 1198 else:
1200 1199 op = 1
1201 1200
1202 1201 mixedops.append((op, random.randint(0, size * 2)))
1203 1202
1204 1203 def domixed():
1205 1204 d = util.lrucachedict(size)
1206 1205
1207 1206 for op, v in mixedops:
1208 1207 if op == 0:
1209 1208 try:
1210 1209 d[v]
1211 1210 except KeyError:
1212 1211 pass
1213 1212 else:
1214 1213 d[v] = v
1215 1214
1216 1215 benches = [
1217 1216 (doinit, 'init'),
1218 1217 (dogets, 'gets'),
1219 1218 (dosets, 'sets'),
1220 1219 (domixed, 'mixed')
1221 1220 ]
1222 1221
1223 1222 for fn, title in benches:
1224 1223 timer, fm = gettimer(ui, opts)
1225 1224 timer(fn, title=title)
1226 1225 fm.end()
1227 1226
1228 1227 def uisetup(ui):
1229 1228 if (util.safehasattr(cmdutil, 'openrevlog') and
1230 1229 not util.safehasattr(commands, 'debugrevlogopts')):
1231 1230 # for "historical portability":
1232 1231 # In this case, Mercurial should be 1.9 (or a79fea6b3e77) -
1233 1232 # 3.7 (or 5606f7d0d063). Therefore, '--dir' option for
1234 1233 # openrevlog() should cause failure, because it has been
1235 1234 # available since 3.5 (or 49c583ca48c4).
1236 1235 def openrevlog(orig, repo, cmd, file_, opts):
1237 1236 if opts.get('dir') and not util.safehasattr(repo, 'dirlog'):
1238 1237 raise error.Abort("This version doesn't support --dir option",
1239 1238 hint="use 3.5 or later")
1240 1239 return orig(repo, cmd, file_, opts)
1241 1240 extensions.wrapfunction(cmdutil, 'openrevlog', openrevlog)
@@ -1,2064 +1,2069 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 29 )
30 30 from .i18n import _
31 31 from . import (
32 32 ancestor,
33 33 error,
34 34 mdiff,
35 35 parsers,
36 36 templatefilters,
37 37 util,
38 38 )
39 39
40 40 _pack = struct.pack
41 41 _unpack = struct.unpack
42 42 _compress = zlib.compress
43 43 _decompress = zlib.decompress
44 44
45 45 # revlog header flags
46 46 REVLOGV0 = 0
47 47 REVLOGNG = 1
48 48 REVLOGNGINLINEDATA = (1 << 16)
49 49 REVLOGGENERALDELTA = (1 << 17)
50 50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54 54
55 55 # revlog index flags
56 56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 57 REVIDX_EXTSTORED = (1 << 14) # revision data is stored externally
58 58 REVIDX_DEFAULT_FLAGS = 0
59 59 # stable order in which flags need to be processed and their processors applied
60 60 REVIDX_FLAGS_ORDER = [
61 61 REVIDX_ISCENSORED,
62 62 REVIDX_EXTSTORED,
63 63 ]
64 64 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
65 65
66 66 # max size of revlog with inline data
67 67 _maxinline = 131072
68 68 _chunksize = 1048576
69 69
70 70 RevlogError = error.RevlogError
71 71 LookupError = error.LookupError
72 72 CensoredNodeError = error.CensoredNodeError
73 73 ProgrammingError = error.ProgrammingError
74 74
75 75 # Store flag processors (cf. 'addflagprocessor()' to register)
76 76 _flagprocessors = {
77 77 REVIDX_ISCENSORED: None,
78 78 }
79 79
80 80 def addflagprocessor(flag, processor):
81 81 """Register a flag processor on a revision data flag.
82 82
83 83 Invariant:
84 84 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
85 85 - Only one flag processor can be registered on a specific flag.
86 86 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
87 87 following signatures:
88 88 - (read) f(self, text) -> newtext, bool
89 89 - (write) f(self, text) -> newtext, bool
90 90 - (raw) f(self, text) -> bool
91 91 The boolean returned by these transforms is used to determine whether
92 92 'newtext' can be used for hash integrity checking.
93 93
94 94 Note: The 'raw' transform is used for changegroup generation and in some
95 95 debug commands. In this case the transform only indicates whether the
96 96 contents can be used for hash integrity checks.
97 97 """
98 98 if not flag & REVIDX_KNOWN_FLAGS:
99 99 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
100 100 raise ProgrammingError(msg)
101 101 if flag not in REVIDX_FLAGS_ORDER:
102 102 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
103 103 raise ProgrammingError(msg)
104 104 if flag in _flagprocessors:
105 105 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
106 106 raise error.Abort(msg)
107 107 _flagprocessors[flag] = processor
108 108
109 109 def getoffset(q):
110 110 return int(q >> 16)
111 111
112 112 def gettype(q):
113 113 return int(q & 0xFFFF)
114 114
115 115 def offset_type(offset, type):
116 116 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
117 117 raise ValueError('unknown revlog index flags')
118 118 return long(long(offset) << 16 | type)
119 119
120 120 _nullhash = hashlib.sha1(nullid)
121 121
122 122 def hash(text, p1, p2):
123 123 """generate a hash from the given text and its parent hashes
124 124
125 125 This hash combines both the current file contents and its history
126 126 in a manner that makes it easy to distinguish nodes with the same
127 127 content in the revision graph.
128 128 """
129 129 # As of now, if one of the parent node is null, p2 is null
130 130 if p2 == nullid:
131 131 # deep copy of a hash is faster than creating one
132 132 s = _nullhash.copy()
133 133 s.update(p1)
134 134 else:
135 135 # none of the parent nodes are nullid
136 136 l = [p1, p2]
137 137 l.sort()
138 138 s = hashlib.sha1(l[0])
139 139 s.update(l[1])
140 140 s.update(text)
141 141 return s.digest()
142 142
143 def decompress(bin):
144 """ decompress the given input """
145 if not bin:
146 return bin
147 t = bin[0]
148 if t == '\0':
149 return bin
150 if t == 'x':
151 try:
152 return _decompress(bin)
153 except zlib.error as e:
154 raise RevlogError(_("revlog decompress error: %s") % str(e))
155 if t == 'u':
156 return util.buffer(bin, 1)
157 raise RevlogError(_("unknown compression type %r") % t)
158
159 143 # index v0:
160 144 # 4 bytes: offset
161 145 # 4 bytes: compressed length
162 146 # 4 bytes: base rev
163 147 # 4 bytes: link rev
164 148 # 20 bytes: parent 1 nodeid
165 149 # 20 bytes: parent 2 nodeid
166 150 # 20 bytes: nodeid
167 151 indexformatv0 = ">4l20s20s20s"
168 152
169 153 class revlogoldio(object):
170 154 def __init__(self):
171 155 self.size = struct.calcsize(indexformatv0)
172 156
173 157 def parseindex(self, data, inline):
174 158 s = self.size
175 159 index = []
176 160 nodemap = {nullid: nullrev}
177 161 n = off = 0
178 162 l = len(data)
179 163 while off + s <= l:
180 164 cur = data[off:off + s]
181 165 off += s
182 166 e = _unpack(indexformatv0, cur)
183 167 # transform to revlogv1 format
184 168 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
185 169 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
186 170 index.append(e2)
187 171 nodemap[e[6]] = n
188 172 n += 1
189 173
190 174 # add the magic null revision at -1
191 175 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
192 176
193 177 return index, nodemap, None
194 178
195 179 def packentry(self, entry, node, version, rev):
196 180 if gettype(entry[0]):
197 181 raise RevlogError(_("index entry flags need RevlogNG"))
198 182 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
199 183 node(entry[5]), node(entry[6]), entry[7])
200 184 return _pack(indexformatv0, *e2)
201 185
202 186 # index ng:
203 187 # 6 bytes: offset
204 188 # 2 bytes: flags
205 189 # 4 bytes: compressed length
206 190 # 4 bytes: uncompressed length
207 191 # 4 bytes: base rev
208 192 # 4 bytes: link rev
209 193 # 4 bytes: parent 1 rev
210 194 # 4 bytes: parent 2 rev
211 195 # 32 bytes: nodeid
212 196 indexformatng = ">Qiiiiii20s12x"
213 197 versionformat = ">I"
214 198
215 199 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
216 200 # signed integer)
217 201 _maxentrysize = 0x7fffffff
218 202
219 203 class revlogio(object):
220 204 def __init__(self):
221 205 self.size = struct.calcsize(indexformatng)
222 206
223 207 def parseindex(self, data, inline):
224 208 # call the C implementation to parse the index data
225 209 index, cache = parsers.parse_index2(data, inline)
226 210 return index, getattr(index, 'nodemap', None), cache
227 211
228 212 def packentry(self, entry, node, version, rev):
229 213 p = _pack(indexformatng, *entry)
230 214 if rev == 0:
231 215 p = _pack(versionformat, version) + p[4:]
232 216 return p
233 217
234 218 class revlog(object):
235 219 """
236 220 the underlying revision storage object
237 221
238 222 A revlog consists of two parts, an index and the revision data.
239 223
240 224 The index is a file with a fixed record size containing
241 225 information on each revision, including its nodeid (hash), the
242 226 nodeids of its parents, the position and offset of its data within
243 227 the data file, and the revision it's based on. Finally, each entry
244 228 contains a linkrev entry that can serve as a pointer to external
245 229 data.
246 230
247 231 The revision data itself is a linear collection of data chunks.
248 232 Each chunk represents a revision and is usually represented as a
249 233 delta against the previous chunk. To bound lookup time, runs of
250 234 deltas are limited to about 2 times the length of the original
251 235 version data. This makes retrieval of a version proportional to
252 236 its size, or O(1) relative to the number of revisions.
253 237
254 238 Both pieces of the revlog are written to in an append-only
255 239 fashion, which means we never need to rewrite a file to insert or
256 240 remove data, and can use some simple techniques to avoid the need
257 241 for locking while reading.
258 242
259 243 If checkambig, indexfile is opened with checkambig=True at
260 244 writing, to avoid file stat ambiguity.
261 245 """
262 246 def __init__(self, opener, indexfile, checkambig=False):
263 247 """
264 248 create a revlog object
265 249
266 250 opener is a function that abstracts the file opening operation
267 251 and can be used to implement COW semantics or the like.
268 252 """
269 253 self.indexfile = indexfile
270 254 self.datafile = indexfile[:-2] + ".d"
271 255 self.opener = opener
272 256 # When True, indexfile is opened with checkambig=True at writing, to
273 257 # avoid file stat ambiguity.
274 258 self._checkambig = checkambig
275 259 # 3-tuple of (node, rev, text) for a raw revision.
276 260 self._cache = None
277 261 # Maps rev to chain base rev.
278 262 self._chainbasecache = util.lrucachedict(100)
279 263 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
280 264 self._chunkcache = (0, '')
281 265 # How much data to read and cache into the raw revlog data cache.
282 266 self._chunkcachesize = 65536
283 267 self._maxchainlen = None
284 268 self._aggressivemergedeltas = False
285 269 self.index = []
286 270 # Mapping of partial identifiers to full nodes.
287 271 self._pcache = {}
288 272 # Mapping of revision integer to full node.
289 273 self._nodecache = {nullid: nullrev}
290 274 self._nodepos = None
291 275
292 276 v = REVLOG_DEFAULT_VERSION
293 277 opts = getattr(opener, 'options', None)
294 278 if opts is not None:
295 279 if 'revlogv1' in opts:
296 280 if 'generaldelta' in opts:
297 281 v |= REVLOGGENERALDELTA
298 282 else:
299 283 v = 0
300 284 if 'chunkcachesize' in opts:
301 285 self._chunkcachesize = opts['chunkcachesize']
302 286 if 'maxchainlen' in opts:
303 287 self._maxchainlen = opts['maxchainlen']
304 288 if 'aggressivemergedeltas' in opts:
305 289 self._aggressivemergedeltas = opts['aggressivemergedeltas']
306 290 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
307 291
308 292 if self._chunkcachesize <= 0:
309 293 raise RevlogError(_('revlog chunk cache size %r is not greater '
310 294 'than 0') % self._chunkcachesize)
311 295 elif self._chunkcachesize & (self._chunkcachesize - 1):
312 296 raise RevlogError(_('revlog chunk cache size %r is not a power '
313 297 'of 2') % self._chunkcachesize)
314 298
315 299 indexdata = ''
316 300 self._initempty = True
317 301 try:
318 302 f = self.opener(self.indexfile)
319 303 indexdata = f.read()
320 304 f.close()
321 305 if len(indexdata) > 0:
322 306 v = struct.unpack(versionformat, indexdata[:4])[0]
323 307 self._initempty = False
324 308 except IOError as inst:
325 309 if inst.errno != errno.ENOENT:
326 310 raise
327 311
328 312 self.version = v
329 313 self._inline = v & REVLOGNGINLINEDATA
330 314 self._generaldelta = v & REVLOGGENERALDELTA
331 315 flags = v & ~0xFFFF
332 316 fmt = v & 0xFFFF
333 317 if fmt == REVLOGV0 and flags:
334 318 raise RevlogError(_("index %s unknown flags %#04x for format v0")
335 319 % (self.indexfile, flags >> 16))
336 320 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
337 321 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
338 322 % (self.indexfile, flags >> 16))
339 323 elif fmt > REVLOGNG:
340 324 raise RevlogError(_("index %s unknown format %d")
341 325 % (self.indexfile, fmt))
342 326
343 327 self.storedeltachains = True
344 328
345 329 self._io = revlogio()
346 330 if self.version == REVLOGV0:
347 331 self._io = revlogoldio()
348 332 try:
349 333 d = self._io.parseindex(indexdata, self._inline)
350 334 except (ValueError, IndexError):
351 335 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
352 336 self.index, nodemap, self._chunkcache = d
353 337 if nodemap is not None:
354 338 self.nodemap = self._nodecache = nodemap
355 339 if not self._chunkcache:
356 340 self._chunkclear()
357 341 # revnum -> (chain-length, sum-delta-length)
358 342 self._chaininfocache = {}
359 343
360 344 def tip(self):
361 345 return self.node(len(self.index) - 2)
362 346 def __contains__(self, rev):
363 347 return 0 <= rev < len(self)
364 348 def __len__(self):
365 349 return len(self.index) - 1
366 350 def __iter__(self):
367 351 return iter(xrange(len(self)))
368 352 def revs(self, start=0, stop=None):
369 353 """iterate over all rev in this revlog (from start to stop)"""
370 354 step = 1
371 355 if stop is not None:
372 356 if start > stop:
373 357 step = -1
374 358 stop += step
375 359 else:
376 360 stop = len(self)
377 361 return xrange(start, stop, step)
378 362
379 363 @util.propertycache
380 364 def nodemap(self):
381 365 self.rev(self.node(0))
382 366 return self._nodecache
383 367
384 368 def hasnode(self, node):
385 369 try:
386 370 self.rev(node)
387 371 return True
388 372 except KeyError:
389 373 return False
390 374
391 375 def clearcaches(self):
392 376 self._cache = None
393 377 self._chainbasecache.clear()
394 378 self._chunkcache = (0, '')
395 379 self._pcache = {}
396 380
397 381 try:
398 382 self._nodecache.clearcaches()
399 383 except AttributeError:
400 384 self._nodecache = {nullid: nullrev}
401 385 self._nodepos = None
402 386
403 387 def rev(self, node):
404 388 try:
405 389 return self._nodecache[node]
406 390 except TypeError:
407 391 raise
408 392 except RevlogError:
409 393 # parsers.c radix tree lookup failed
410 394 raise LookupError(node, self.indexfile, _('no node'))
411 395 except KeyError:
412 396 # pure python cache lookup failed
413 397 n = self._nodecache
414 398 i = self.index
415 399 p = self._nodepos
416 400 if p is None:
417 401 p = len(i) - 2
418 402 for r in xrange(p, -1, -1):
419 403 v = i[r][7]
420 404 n[v] = r
421 405 if v == node:
422 406 self._nodepos = r - 1
423 407 return r
424 408 raise LookupError(node, self.indexfile, _('no node'))
425 409
426 410 # Accessors for index entries.
427 411
428 412 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
429 413 # are flags.
430 414 def start(self, rev):
431 415 return int(self.index[rev][0] >> 16)
432 416
433 417 def flags(self, rev):
434 418 return self.index[rev][0] & 0xFFFF
435 419
436 420 def length(self, rev):
437 421 return self.index[rev][1]
438 422
439 423 def rawsize(self, rev):
440 424 """return the length of the uncompressed text for a given revision"""
441 425 l = self.index[rev][2]
442 426 if l >= 0:
443 427 return l
444 428
445 429 t = self.revision(self.node(rev))
446 430 return len(t)
447 431 size = rawsize
448 432
449 433 def chainbase(self, rev):
450 434 base = self._chainbasecache.get(rev)
451 435 if base is not None:
452 436 return base
453 437
454 438 index = self.index
455 439 base = index[rev][3]
456 440 while base != rev:
457 441 rev = base
458 442 base = index[rev][3]
459 443
460 444 self._chainbasecache[rev] = base
461 445 return base
462 446
463 447 def linkrev(self, rev):
464 448 return self.index[rev][4]
465 449
466 450 def parentrevs(self, rev):
467 451 return self.index[rev][5:7]
468 452
469 453 def node(self, rev):
470 454 return self.index[rev][7]
471 455
472 456 # Derived from index values.
473 457
474 458 def end(self, rev):
475 459 return self.start(rev) + self.length(rev)
476 460
477 461 def parents(self, node):
478 462 i = self.index
479 463 d = i[self.rev(node)]
480 464 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
481 465
482 466 def chainlen(self, rev):
483 467 return self._chaininfo(rev)[0]
484 468
485 469 def _chaininfo(self, rev):
486 470 chaininfocache = self._chaininfocache
487 471 if rev in chaininfocache:
488 472 return chaininfocache[rev]
489 473 index = self.index
490 474 generaldelta = self._generaldelta
491 475 iterrev = rev
492 476 e = index[iterrev]
493 477 clen = 0
494 478 compresseddeltalen = 0
495 479 while iterrev != e[3]:
496 480 clen += 1
497 481 compresseddeltalen += e[1]
498 482 if generaldelta:
499 483 iterrev = e[3]
500 484 else:
501 485 iterrev -= 1
502 486 if iterrev in chaininfocache:
503 487 t = chaininfocache[iterrev]
504 488 clen += t[0]
505 489 compresseddeltalen += t[1]
506 490 break
507 491 e = index[iterrev]
508 492 else:
509 493 # Add text length of base since decompressing that also takes
510 494 # work. For cache hits the length is already included.
511 495 compresseddeltalen += e[1]
512 496 r = (clen, compresseddeltalen)
513 497 chaininfocache[rev] = r
514 498 return r
515 499
516 500 def _deltachain(self, rev, stoprev=None):
517 501 """Obtain the delta chain for a revision.
518 502
519 503 ``stoprev`` specifies a revision to stop at. If not specified, we
520 504 stop at the base of the chain.
521 505
522 506 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
523 507 revs in ascending order and ``stopped`` is a bool indicating whether
524 508 ``stoprev`` was hit.
525 509 """
526 510 chain = []
527 511
528 512 # Alias to prevent attribute lookup in tight loop.
529 513 index = self.index
530 514 generaldelta = self._generaldelta
531 515
532 516 iterrev = rev
533 517 e = index[iterrev]
534 518 while iterrev != e[3] and iterrev != stoprev:
535 519 chain.append(iterrev)
536 520 if generaldelta:
537 521 iterrev = e[3]
538 522 else:
539 523 iterrev -= 1
540 524 e = index[iterrev]
541 525
542 526 if iterrev == stoprev:
543 527 stopped = True
544 528 else:
545 529 chain.append(iterrev)
546 530 stopped = False
547 531
548 532 chain.reverse()
549 533 return chain, stopped
550 534
551 535 def ancestors(self, revs, stoprev=0, inclusive=False):
552 536 """Generate the ancestors of 'revs' in reverse topological order.
553 537 Does not generate revs lower than stoprev.
554 538
555 539 See the documentation for ancestor.lazyancestors for more details."""
556 540
557 541 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
558 542 inclusive=inclusive)
559 543
560 544 def descendants(self, revs):
561 545 """Generate the descendants of 'revs' in revision order.
562 546
563 547 Yield a sequence of revision numbers starting with a child of
564 548 some rev in revs, i.e., each revision is *not* considered a
565 549 descendant of itself. Results are ordered by revision number (a
566 550 topological sort)."""
567 551 first = min(revs)
568 552 if first == nullrev:
569 553 for i in self:
570 554 yield i
571 555 return
572 556
573 557 seen = set(revs)
574 558 for i in self.revs(start=first + 1):
575 559 for x in self.parentrevs(i):
576 560 if x != nullrev and x in seen:
577 561 seen.add(i)
578 562 yield i
579 563 break
580 564
581 565 def findcommonmissing(self, common=None, heads=None):
582 566 """Return a tuple of the ancestors of common and the ancestors of heads
583 567 that are not ancestors of common. In revset terminology, we return the
584 568 tuple:
585 569
586 570 ::common, (::heads) - (::common)
587 571
588 572 The list is sorted by revision number, meaning it is
589 573 topologically sorted.
590 574
591 575 'heads' and 'common' are both lists of node IDs. If heads is
592 576 not supplied, uses all of the revlog's heads. If common is not
593 577 supplied, uses nullid."""
594 578 if common is None:
595 579 common = [nullid]
596 580 if heads is None:
597 581 heads = self.heads()
598 582
599 583 common = [self.rev(n) for n in common]
600 584 heads = [self.rev(n) for n in heads]
601 585
602 586 # we want the ancestors, but inclusive
603 587 class lazyset(object):
604 588 def __init__(self, lazyvalues):
605 589 self.addedvalues = set()
606 590 self.lazyvalues = lazyvalues
607 591
608 592 def __contains__(self, value):
609 593 return value in self.addedvalues or value in self.lazyvalues
610 594
611 595 def __iter__(self):
612 596 added = self.addedvalues
613 597 for r in added:
614 598 yield r
615 599 for r in self.lazyvalues:
616 600 if not r in added:
617 601 yield r
618 602
619 603 def add(self, value):
620 604 self.addedvalues.add(value)
621 605
622 606 def update(self, values):
623 607 self.addedvalues.update(values)
624 608
625 609 has = lazyset(self.ancestors(common))
626 610 has.add(nullrev)
627 611 has.update(common)
628 612
629 613 # take all ancestors from heads that aren't in has
630 614 missing = set()
631 615 visit = collections.deque(r for r in heads if r not in has)
632 616 while visit:
633 617 r = visit.popleft()
634 618 if r in missing:
635 619 continue
636 620 else:
637 621 missing.add(r)
638 622 for p in self.parentrevs(r):
639 623 if p not in has:
640 624 visit.append(p)
641 625 missing = list(missing)
642 626 missing.sort()
643 627 return has, [self.node(miss) for miss in missing]
644 628
645 629 def incrementalmissingrevs(self, common=None):
646 630 """Return an object that can be used to incrementally compute the
647 631 revision numbers of the ancestors of arbitrary sets that are not
648 632 ancestors of common. This is an ancestor.incrementalmissingancestors
649 633 object.
650 634
651 635 'common' is a list of revision numbers. If common is not supplied, uses
652 636 nullrev.
653 637 """
654 638 if common is None:
655 639 common = [nullrev]
656 640
657 641 return ancestor.incrementalmissingancestors(self.parentrevs, common)
658 642
659 643 def findmissingrevs(self, common=None, heads=None):
660 644 """Return the revision numbers of the ancestors of heads that
661 645 are not ancestors of common.
662 646
663 647 More specifically, return a list of revision numbers corresponding to
664 648 nodes N such that every N satisfies the following constraints:
665 649
666 650 1. N is an ancestor of some node in 'heads'
667 651 2. N is not an ancestor of any node in 'common'
668 652
669 653 The list is sorted by revision number, meaning it is
670 654 topologically sorted.
671 655
672 656 'heads' and 'common' are both lists of revision numbers. If heads is
673 657 not supplied, uses all of the revlog's heads. If common is not
674 658 supplied, uses nullid."""
675 659 if common is None:
676 660 common = [nullrev]
677 661 if heads is None:
678 662 heads = self.headrevs()
679 663
680 664 inc = self.incrementalmissingrevs(common=common)
681 665 return inc.missingancestors(heads)
682 666
683 667 def findmissing(self, common=None, heads=None):
684 668 """Return the ancestors of heads that are not ancestors of common.
685 669
686 670 More specifically, return a list of nodes N such that every N
687 671 satisfies the following constraints:
688 672
689 673 1. N is an ancestor of some node in 'heads'
690 674 2. N is not an ancestor of any node in 'common'
691 675
692 676 The list is sorted by revision number, meaning it is
693 677 topologically sorted.
694 678
695 679 'heads' and 'common' are both lists of node IDs. If heads is
696 680 not supplied, uses all of the revlog's heads. If common is not
697 681 supplied, uses nullid."""
698 682 if common is None:
699 683 common = [nullid]
700 684 if heads is None:
701 685 heads = self.heads()
702 686
703 687 common = [self.rev(n) for n in common]
704 688 heads = [self.rev(n) for n in heads]
705 689
706 690 inc = self.incrementalmissingrevs(common=common)
707 691 return [self.node(r) for r in inc.missingancestors(heads)]
708 692
709 693 def nodesbetween(self, roots=None, heads=None):
710 694 """Return a topological path from 'roots' to 'heads'.
711 695
712 696 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
713 697 topologically sorted list of all nodes N that satisfy both of
714 698 these constraints:
715 699
716 700 1. N is a descendant of some node in 'roots'
717 701 2. N is an ancestor of some node in 'heads'
718 702
719 703 Every node is considered to be both a descendant and an ancestor
720 704 of itself, so every reachable node in 'roots' and 'heads' will be
721 705 included in 'nodes'.
722 706
723 707 'outroots' is the list of reachable nodes in 'roots', i.e., the
724 708 subset of 'roots' that is returned in 'nodes'. Likewise,
725 709 'outheads' is the subset of 'heads' that is also in 'nodes'.
726 710
727 711 'roots' and 'heads' are both lists of node IDs. If 'roots' is
728 712 unspecified, uses nullid as the only root. If 'heads' is
729 713 unspecified, uses list of all of the revlog's heads."""
730 714 nonodes = ([], [], [])
731 715 if roots is not None:
732 716 roots = list(roots)
733 717 if not roots:
734 718 return nonodes
735 719 lowestrev = min([self.rev(n) for n in roots])
736 720 else:
737 721 roots = [nullid] # Everybody's a descendant of nullid
738 722 lowestrev = nullrev
739 723 if (lowestrev == nullrev) and (heads is None):
740 724 # We want _all_ the nodes!
741 725 return ([self.node(r) for r in self], [nullid], list(self.heads()))
742 726 if heads is None:
743 727 # All nodes are ancestors, so the latest ancestor is the last
744 728 # node.
745 729 highestrev = len(self) - 1
746 730 # Set ancestors to None to signal that every node is an ancestor.
747 731 ancestors = None
748 732 # Set heads to an empty dictionary for later discovery of heads
749 733 heads = {}
750 734 else:
751 735 heads = list(heads)
752 736 if not heads:
753 737 return nonodes
754 738 ancestors = set()
755 739 # Turn heads into a dictionary so we can remove 'fake' heads.
756 740 # Also, later we will be using it to filter out the heads we can't
757 741 # find from roots.
758 742 heads = dict.fromkeys(heads, False)
759 743 # Start at the top and keep marking parents until we're done.
760 744 nodestotag = set(heads)
761 745 # Remember where the top was so we can use it as a limit later.
762 746 highestrev = max([self.rev(n) for n in nodestotag])
763 747 while nodestotag:
764 748 # grab a node to tag
765 749 n = nodestotag.pop()
766 750 # Never tag nullid
767 751 if n == nullid:
768 752 continue
769 753 # A node's revision number represents its place in a
770 754 # topologically sorted list of nodes.
771 755 r = self.rev(n)
772 756 if r >= lowestrev:
773 757 if n not in ancestors:
774 758 # If we are possibly a descendant of one of the roots
775 759 # and we haven't already been marked as an ancestor
776 760 ancestors.add(n) # Mark as ancestor
777 761 # Add non-nullid parents to list of nodes to tag.
778 762 nodestotag.update([p for p in self.parents(n) if
779 763 p != nullid])
780 764 elif n in heads: # We've seen it before, is it a fake head?
781 765 # So it is, real heads should not be the ancestors of
782 766 # any other heads.
783 767 heads.pop(n)
784 768 if not ancestors:
785 769 return nonodes
786 770 # Now that we have our set of ancestors, we want to remove any
787 771 # roots that are not ancestors.
788 772
789 773 # If one of the roots was nullid, everything is included anyway.
790 774 if lowestrev > nullrev:
791 775 # But, since we weren't, let's recompute the lowest rev to not
792 776 # include roots that aren't ancestors.
793 777
794 778 # Filter out roots that aren't ancestors of heads
795 779 roots = [root for root in roots if root in ancestors]
796 780 # Recompute the lowest revision
797 781 if roots:
798 782 lowestrev = min([self.rev(root) for root in roots])
799 783 else:
800 784 # No more roots? Return empty list
801 785 return nonodes
802 786 else:
803 787 # We are descending from nullid, and don't need to care about
804 788 # any other roots.
805 789 lowestrev = nullrev
806 790 roots = [nullid]
807 791 # Transform our roots list into a set.
808 792 descendants = set(roots)
809 793 # Also, keep the original roots so we can filter out roots that aren't
810 794 # 'real' roots (i.e. are descended from other roots).
811 795 roots = descendants.copy()
812 796 # Our topologically sorted list of output nodes.
813 797 orderedout = []
814 798 # Don't start at nullid since we don't want nullid in our output list,
815 799 # and if nullid shows up in descendants, empty parents will look like
816 800 # they're descendants.
817 801 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
818 802 n = self.node(r)
819 803 isdescendant = False
820 804 if lowestrev == nullrev: # Everybody is a descendant of nullid
821 805 isdescendant = True
822 806 elif n in descendants:
823 807 # n is already a descendant
824 808 isdescendant = True
825 809 # This check only needs to be done here because all the roots
826 810 # will start being marked is descendants before the loop.
827 811 if n in roots:
828 812 # If n was a root, check if it's a 'real' root.
829 813 p = tuple(self.parents(n))
830 814 # If any of its parents are descendants, it's not a root.
831 815 if (p[0] in descendants) or (p[1] in descendants):
832 816 roots.remove(n)
833 817 else:
834 818 p = tuple(self.parents(n))
835 819 # A node is a descendant if either of its parents are
836 820 # descendants. (We seeded the dependents list with the roots
837 821 # up there, remember?)
838 822 if (p[0] in descendants) or (p[1] in descendants):
839 823 descendants.add(n)
840 824 isdescendant = True
841 825 if isdescendant and ((ancestors is None) or (n in ancestors)):
842 826 # Only include nodes that are both descendants and ancestors.
843 827 orderedout.append(n)
844 828 if (ancestors is not None) and (n in heads):
845 829 # We're trying to figure out which heads are reachable
846 830 # from roots.
847 831 # Mark this head as having been reached
848 832 heads[n] = True
849 833 elif ancestors is None:
850 834 # Otherwise, we're trying to discover the heads.
851 835 # Assume this is a head because if it isn't, the next step
852 836 # will eventually remove it.
853 837 heads[n] = True
854 838 # But, obviously its parents aren't.
855 839 for p in self.parents(n):
856 840 heads.pop(p, None)
857 841 heads = [head for head, flag in heads.iteritems() if flag]
858 842 roots = list(roots)
859 843 assert orderedout
860 844 assert roots
861 845 assert heads
862 846 return (orderedout, roots, heads)
863 847
864 848 def headrevs(self):
865 849 try:
866 850 return self.index.headrevs()
867 851 except AttributeError:
868 852 return self._headrevs()
869 853
870 854 def computephases(self, roots):
871 855 return self.index.computephasesmapsets(roots)
872 856
873 857 def _headrevs(self):
874 858 count = len(self)
875 859 if not count:
876 860 return [nullrev]
877 861 # we won't iter over filtered rev so nobody is a head at start
878 862 ishead = [0] * (count + 1)
879 863 index = self.index
880 864 for r in self:
881 865 ishead[r] = 1 # I may be an head
882 866 e = index[r]
883 867 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
884 868 return [r for r, val in enumerate(ishead) if val]
885 869
886 870 def heads(self, start=None, stop=None):
887 871 """return the list of all nodes that have no children
888 872
889 873 if start is specified, only heads that are descendants of
890 874 start will be returned
891 875 if stop is specified, it will consider all the revs from stop
892 876 as if they had no children
893 877 """
894 878 if start is None and stop is None:
895 879 if not len(self):
896 880 return [nullid]
897 881 return [self.node(r) for r in self.headrevs()]
898 882
899 883 if start is None:
900 884 start = nullid
901 885 if stop is None:
902 886 stop = []
903 887 stoprevs = set([self.rev(n) for n in stop])
904 888 startrev = self.rev(start)
905 889 reachable = set((startrev,))
906 890 heads = set((startrev,))
907 891
908 892 parentrevs = self.parentrevs
909 893 for r in self.revs(start=startrev + 1):
910 894 for p in parentrevs(r):
911 895 if p in reachable:
912 896 if r not in stoprevs:
913 897 reachable.add(r)
914 898 heads.add(r)
915 899 if p in heads and p not in stoprevs:
916 900 heads.remove(p)
917 901
918 902 return [self.node(r) for r in heads]
919 903
920 904 def children(self, node):
921 905 """find the children of a given node"""
922 906 c = []
923 907 p = self.rev(node)
924 908 for r in self.revs(start=p + 1):
925 909 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
926 910 if prevs:
927 911 for pr in prevs:
928 912 if pr == p:
929 913 c.append(self.node(r))
930 914 elif p == nullrev:
931 915 c.append(self.node(r))
932 916 return c
933 917
934 918 def descendant(self, start, end):
935 919 if start == nullrev:
936 920 return True
937 921 for i in self.descendants([start]):
938 922 if i == end:
939 923 return True
940 924 elif i > end:
941 925 break
942 926 return False
943 927
944 928 def commonancestorsheads(self, a, b):
945 929 """calculate all the heads of the common ancestors of nodes a and b"""
946 930 a, b = self.rev(a), self.rev(b)
947 931 try:
948 932 ancs = self.index.commonancestorsheads(a, b)
949 933 except (AttributeError, OverflowError): # C implementation failed
950 934 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
951 935 return map(self.node, ancs)
952 936
953 937 def isancestor(self, a, b):
954 938 """return True if node a is an ancestor of node b
955 939
956 940 The implementation of this is trivial but the use of
957 941 commonancestorsheads is not."""
958 942 return a in self.commonancestorsheads(a, b)
959 943
960 944 def ancestor(self, a, b):
961 945 """calculate the "best" common ancestor of nodes a and b"""
962 946
963 947 a, b = self.rev(a), self.rev(b)
964 948 try:
965 949 ancs = self.index.ancestors(a, b)
966 950 except (AttributeError, OverflowError):
967 951 ancs = ancestor.ancestors(self.parentrevs, a, b)
968 952 if ancs:
969 953 # choose a consistent winner when there's a tie
970 954 return min(map(self.node, ancs))
971 955 return nullid
972 956
973 957 def _match(self, id):
974 958 if isinstance(id, int):
975 959 # rev
976 960 return self.node(id)
977 961 if len(id) == 20:
978 962 # possibly a binary node
979 963 # odds of a binary node being all hex in ASCII are 1 in 10**25
980 964 try:
981 965 node = id
982 966 self.rev(node) # quick search the index
983 967 return node
984 968 except LookupError:
985 969 pass # may be partial hex id
986 970 try:
987 971 # str(rev)
988 972 rev = int(id)
989 973 if str(rev) != id:
990 974 raise ValueError
991 975 if rev < 0:
992 976 rev = len(self) + rev
993 977 if rev < 0 or rev >= len(self):
994 978 raise ValueError
995 979 return self.node(rev)
996 980 except (ValueError, OverflowError):
997 981 pass
998 982 if len(id) == 40:
999 983 try:
1000 984 # a full hex nodeid?
1001 985 node = bin(id)
1002 986 self.rev(node)
1003 987 return node
1004 988 except (TypeError, LookupError):
1005 989 pass
1006 990
1007 991 def _partialmatch(self, id):
1008 992 try:
1009 993 partial = self.index.partialmatch(id)
1010 994 if partial and self.hasnode(partial):
1011 995 return partial
1012 996 return None
1013 997 except RevlogError:
1014 998 # parsers.c radix tree lookup gave multiple matches
1015 999 # fast path: for unfiltered changelog, radix tree is accurate
1016 1000 if not getattr(self, 'filteredrevs', None):
1017 1001 raise LookupError(id, self.indexfile,
1018 1002 _('ambiguous identifier'))
1019 1003 # fall through to slow path that filters hidden revisions
1020 1004 except (AttributeError, ValueError):
1021 1005 # we are pure python, or key was too short to search radix tree
1022 1006 pass
1023 1007
1024 1008 if id in self._pcache:
1025 1009 return self._pcache[id]
1026 1010
1027 1011 if len(id) < 40:
1028 1012 try:
1029 1013 # hex(node)[:...]
1030 1014 l = len(id) // 2 # grab an even number of digits
1031 1015 prefix = bin(id[:l * 2])
1032 1016 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1033 1017 nl = [n for n in nl if hex(n).startswith(id) and
1034 1018 self.hasnode(n)]
1035 1019 if len(nl) > 0:
1036 1020 if len(nl) == 1:
1037 1021 self._pcache[id] = nl[0]
1038 1022 return nl[0]
1039 1023 raise LookupError(id, self.indexfile,
1040 1024 _('ambiguous identifier'))
1041 1025 return None
1042 1026 except TypeError:
1043 1027 pass
1044 1028
1045 1029 def lookup(self, id):
1046 1030 """locate a node based on:
1047 1031 - revision number or str(revision number)
1048 1032 - nodeid or subset of hex nodeid
1049 1033 """
1050 1034 n = self._match(id)
1051 1035 if n is not None:
1052 1036 return n
1053 1037 n = self._partialmatch(id)
1054 1038 if n:
1055 1039 return n
1056 1040
1057 1041 raise LookupError(id, self.indexfile, _('no match found'))
1058 1042
1059 1043 def cmp(self, node, text):
1060 1044 """compare text with a given file revision
1061 1045
1062 1046 returns True if text is different than what is stored.
1063 1047 """
1064 1048 p1, p2 = self.parents(node)
1065 1049 return hash(text, p1, p2) != node
1066 1050
1067 1051 def _addchunk(self, offset, data):
1068 1052 """Add a segment to the revlog cache.
1069 1053
1070 1054 Accepts an absolute offset and the data that is at that location.
1071 1055 """
1072 1056 o, d = self._chunkcache
1073 1057 # try to add to existing cache
1074 1058 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1075 1059 self._chunkcache = o, d + data
1076 1060 else:
1077 1061 self._chunkcache = offset, data
1078 1062
1079 1063 def _loadchunk(self, offset, length, df=None):
1080 1064 """Load a segment of raw data from the revlog.
1081 1065
1082 1066 Accepts an absolute offset, length to read, and an optional existing
1083 1067 file handle to read from.
1084 1068
1085 1069 If an existing file handle is passed, it will be seeked and the
1086 1070 original seek position will NOT be restored.
1087 1071
1088 1072 Returns a str or buffer of raw byte data.
1089 1073 """
1090 1074 if df is not None:
1091 1075 closehandle = False
1092 1076 else:
1093 1077 if self._inline:
1094 1078 df = self.opener(self.indexfile)
1095 1079 else:
1096 1080 df = self.opener(self.datafile)
1097 1081 closehandle = True
1098 1082
1099 1083 # Cache data both forward and backward around the requested
1100 1084 # data, in a fixed size window. This helps speed up operations
1101 1085 # involving reading the revlog backwards.
1102 1086 cachesize = self._chunkcachesize
1103 1087 realoffset = offset & ~(cachesize - 1)
1104 1088 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1105 1089 - realoffset)
1106 1090 df.seek(realoffset)
1107 1091 d = df.read(reallength)
1108 1092 if closehandle:
1109 1093 df.close()
1110 1094 self._addchunk(realoffset, d)
1111 1095 if offset != realoffset or reallength != length:
1112 1096 return util.buffer(d, offset - realoffset, length)
1113 1097 return d
1114 1098
1115 1099 def _getchunk(self, offset, length, df=None):
1116 1100 """Obtain a segment of raw data from the revlog.
1117 1101
1118 1102 Accepts an absolute offset, length of bytes to obtain, and an
1119 1103 optional file handle to the already-opened revlog. If the file
1120 1104 handle is used, it's original seek position will not be preserved.
1121 1105
1122 1106 Requests for data may be returned from a cache.
1123 1107
1124 1108 Returns a str or a buffer instance of raw byte data.
1125 1109 """
1126 1110 o, d = self._chunkcache
1127 1111 l = len(d)
1128 1112
1129 1113 # is it in the cache?
1130 1114 cachestart = offset - o
1131 1115 cacheend = cachestart + length
1132 1116 if cachestart >= 0 and cacheend <= l:
1133 1117 if cachestart == 0 and cacheend == l:
1134 1118 return d # avoid a copy
1135 1119 return util.buffer(d, cachestart, cacheend - cachestart)
1136 1120
1137 1121 return self._loadchunk(offset, length, df=df)
1138 1122
1139 1123 def _chunkraw(self, startrev, endrev, df=None):
1140 1124 """Obtain a segment of raw data corresponding to a range of revisions.
1141 1125
1142 1126 Accepts the start and end revisions and an optional already-open
1143 1127 file handle to be used for reading. If the file handle is read, its
1144 1128 seek position will not be preserved.
1145 1129
1146 1130 Requests for data may be satisfied by a cache.
1147 1131
1148 1132 Returns a 2-tuple of (offset, data) for the requested range of
1149 1133 revisions. Offset is the integer offset from the beginning of the
1150 1134 revlog and data is a str or buffer of the raw byte data.
1151 1135
1152 1136 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1153 1137 to determine where each revision's data begins and ends.
1154 1138 """
1155 1139 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1156 1140 # (functions are expensive).
1157 1141 index = self.index
1158 1142 istart = index[startrev]
1159 1143 start = int(istart[0] >> 16)
1160 1144 if startrev == endrev:
1161 1145 end = start + istart[1]
1162 1146 else:
1163 1147 iend = index[endrev]
1164 1148 end = int(iend[0] >> 16) + iend[1]
1165 1149
1166 1150 if self._inline:
1167 1151 start += (startrev + 1) * self._io.size
1168 1152 end += (endrev + 1) * self._io.size
1169 1153 length = end - start
1170 1154
1171 1155 return start, self._getchunk(start, length, df=df)
1172 1156
1173 1157 def _chunk(self, rev, df=None):
1174 1158 """Obtain a single decompressed chunk for a revision.
1175 1159
1176 1160 Accepts an integer revision and an optional already-open file handle
1177 1161 to be used for reading. If used, the seek position of the file will not
1178 1162 be preserved.
1179 1163
1180 1164 Returns a str holding uncompressed data for the requested revision.
1181 1165 """
1182 return decompress(self._chunkraw(rev, rev, df=df)[1])
1166 return self.decompress(self._chunkraw(rev, rev, df=df)[1])
1183 1167
1184 1168 def _chunks(self, revs, df=None):
1185 1169 """Obtain decompressed chunks for the specified revisions.
1186 1170
1187 1171 Accepts an iterable of numeric revisions that are assumed to be in
1188 1172 ascending order. Also accepts an optional already-open file handle
1189 1173 to be used for reading. If used, the seek position of the file will
1190 1174 not be preserved.
1191 1175
1192 1176 This function is similar to calling ``self._chunk()`` multiple times,
1193 1177 but is faster.
1194 1178
1195 1179 Returns a list with decompressed data for each requested revision.
1196 1180 """
1197 1181 if not revs:
1198 1182 return []
1199 1183 start = self.start
1200 1184 length = self.length
1201 1185 inline = self._inline
1202 1186 iosize = self._io.size
1203 1187 buffer = util.buffer
1204 1188
1205 1189 l = []
1206 1190 ladd = l.append
1207 1191
1208 1192 try:
1209 1193 offset, data = self._chunkraw(revs[0], revs[-1], df=df)
1210 1194 except OverflowError:
1211 1195 # issue4215 - we can't cache a run of chunks greater than
1212 1196 # 2G on Windows
1213 1197 return [self._chunk(rev, df=df) for rev in revs]
1214 1198
1199 decomp = self.decompress
1215 1200 for rev in revs:
1216 1201 chunkstart = start(rev)
1217 1202 if inline:
1218 1203 chunkstart += (rev + 1) * iosize
1219 1204 chunklength = length(rev)
1220 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1205 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1221 1206
1222 1207 return l
1223 1208
1224 1209 def _chunkclear(self):
1225 1210 """Clear the raw chunk cache."""
1226 1211 self._chunkcache = (0, '')
1227 1212
1228 1213 def deltaparent(self, rev):
1229 1214 """return deltaparent of the given revision"""
1230 1215 base = self.index[rev][3]
1231 1216 if base == rev:
1232 1217 return nullrev
1233 1218 elif self._generaldelta:
1234 1219 return base
1235 1220 else:
1236 1221 return rev - 1
1237 1222
1238 1223 def revdiff(self, rev1, rev2):
1239 1224 """return or calculate a delta between two revisions"""
1240 1225 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1241 1226 return str(self._chunk(rev2))
1242 1227
1243 1228 return mdiff.textdiff(self.revision(rev1),
1244 1229 self.revision(rev2))
1245 1230
1246 1231 def revision(self, nodeorrev, _df=None, raw=False):
1247 1232 """return an uncompressed revision of a given node or revision
1248 1233 number.
1249 1234
1250 1235 _df - an existing file handle to read from. (internal-only)
1251 1236 raw - an optional argument specifying if the revision data is to be
1252 1237 treated as raw data when applying flag transforms. 'raw' should be set
1253 1238 to True when generating changegroups or in debug commands.
1254 1239 """
1255 1240 if isinstance(nodeorrev, int):
1256 1241 rev = nodeorrev
1257 1242 node = self.node(rev)
1258 1243 else:
1259 1244 node = nodeorrev
1260 1245 rev = None
1261 1246
1262 1247 cachedrev = None
1263 1248 if node == nullid:
1264 1249 return ""
1265 1250 if self._cache:
1266 1251 if self._cache[0] == node:
1267 1252 return self._cache[2]
1268 1253 cachedrev = self._cache[1]
1269 1254
1270 1255 # look up what we need to read
1271 1256 text = None
1272 1257 if rev is None:
1273 1258 rev = self.rev(node)
1274 1259
1275 1260 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1276 1261 if stopped:
1277 1262 text = self._cache[2]
1278 1263
1279 1264 # drop cache to save memory
1280 1265 self._cache = None
1281 1266
1282 1267 bins = self._chunks(chain, df=_df)
1283 1268 if text is None:
1284 1269 text = str(bins[0])
1285 1270 bins = bins[1:]
1286 1271
1287 1272 text = mdiff.patches(text, bins)
1288 1273
1289 1274 text, validatehash = self._processflags(text, self.flags(rev), 'read',
1290 1275 raw=raw)
1291 1276 if validatehash:
1292 1277 self.checkhash(text, node, rev=rev)
1293 1278
1294 1279 self._cache = (node, rev, text)
1295 1280 return text
1296 1281
1297 1282 def hash(self, text, p1, p2):
1298 1283 """Compute a node hash.
1299 1284
1300 1285 Available as a function so that subclasses can replace the hash
1301 1286 as needed.
1302 1287 """
1303 1288 return hash(text, p1, p2)
1304 1289
1305 1290 def _processflags(self, text, flags, operation, raw=False):
1306 1291 """Inspect revision data flags and applies transforms defined by
1307 1292 registered flag processors.
1308 1293
1309 1294 ``text`` - the revision data to process
1310 1295 ``flags`` - the revision flags
1311 1296 ``operation`` - the operation being performed (read or write)
1312 1297 ``raw`` - an optional argument describing if the raw transform should be
1313 1298 applied.
1314 1299
1315 1300 This method processes the flags in the order (or reverse order if
1316 1301 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1317 1302 flag processors registered for present flags. The order of flags defined
1318 1303 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1319 1304
1320 1305 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1321 1306 processed text and ``validatehash`` is a bool indicating whether the
1322 1307 returned text should be checked for hash integrity.
1323 1308
1324 1309 Note: If the ``raw`` argument is set, it has precedence over the
1325 1310 operation and will only update the value of ``validatehash``.
1326 1311 """
1327 1312 if not operation in ('read', 'write'):
1328 1313 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1329 1314 # Check all flags are known.
1330 1315 if flags & ~REVIDX_KNOWN_FLAGS:
1331 1316 raise RevlogError(_("incompatible revision flag '%#x'") %
1332 1317 (flags & ~REVIDX_KNOWN_FLAGS))
1333 1318 validatehash = True
1334 1319 # Depending on the operation (read or write), the order might be
1335 1320 # reversed due to non-commutative transforms.
1336 1321 orderedflags = REVIDX_FLAGS_ORDER
1337 1322 if operation == 'write':
1338 1323 orderedflags = reversed(orderedflags)
1339 1324
1340 1325 for flag in orderedflags:
1341 1326 # If a flagprocessor has been registered for a known flag, apply the
1342 1327 # related operation transform and update result tuple.
1343 1328 if flag & flags:
1344 1329 vhash = True
1345 1330
1346 1331 if flag not in _flagprocessors:
1347 1332 message = _("missing processor for flag '%#x'") % (flag)
1348 1333 raise RevlogError(message)
1349 1334
1350 1335 processor = _flagprocessors[flag]
1351 1336 if processor is not None:
1352 1337 readtransform, writetransform, rawtransform = processor
1353 1338
1354 1339 if raw:
1355 1340 vhash = rawtransform(self, text)
1356 1341 elif operation == 'read':
1357 1342 text, vhash = readtransform(self, text)
1358 1343 else: # write operation
1359 1344 text, vhash = writetransform(self, text)
1360 1345 validatehash = validatehash and vhash
1361 1346
1362 1347 return text, validatehash
1363 1348
1364 1349 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1365 1350 """Check node hash integrity.
1366 1351
1367 1352 Available as a function so that subclasses can extend hash mismatch
1368 1353 behaviors as needed.
1369 1354 """
1370 1355 if p1 is None and p2 is None:
1371 1356 p1, p2 = self.parents(node)
1372 1357 if node != self.hash(text, p1, p2):
1373 1358 revornode = rev
1374 1359 if revornode is None:
1375 1360 revornode = templatefilters.short(hex(node))
1376 1361 raise RevlogError(_("integrity check failed on %s:%s")
1377 1362 % (self.indexfile, revornode))
1378 1363
1379 1364 def checkinlinesize(self, tr, fp=None):
1380 1365 """Check if the revlog is too big for inline and convert if so.
1381 1366
1382 1367 This should be called after revisions are added to the revlog. If the
1383 1368 revlog has grown too large to be an inline revlog, it will convert it
1384 1369 to use multiple index and data files.
1385 1370 """
1386 1371 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1387 1372 return
1388 1373
1389 1374 trinfo = tr.find(self.indexfile)
1390 1375 if trinfo is None:
1391 1376 raise RevlogError(_("%s not found in the transaction")
1392 1377 % self.indexfile)
1393 1378
1394 1379 trindex = trinfo[2]
1395 1380 if trindex is not None:
1396 1381 dataoff = self.start(trindex)
1397 1382 else:
1398 1383 # revlog was stripped at start of transaction, use all leftover data
1399 1384 trindex = len(self) - 1
1400 1385 dataoff = self.end(-2)
1401 1386
1402 1387 tr.add(self.datafile, dataoff)
1403 1388
1404 1389 if fp:
1405 1390 fp.flush()
1406 1391 fp.close()
1407 1392
1408 1393 df = self.opener(self.datafile, 'w')
1409 1394 try:
1410 1395 for r in self:
1411 1396 df.write(self._chunkraw(r, r)[1])
1412 1397 finally:
1413 1398 df.close()
1414 1399
1415 1400 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1416 1401 checkambig=self._checkambig)
1417 1402 self.version &= ~(REVLOGNGINLINEDATA)
1418 1403 self._inline = False
1419 1404 for i in self:
1420 1405 e = self._io.packentry(self.index[i], self.node, self.version, i)
1421 1406 fp.write(e)
1422 1407
1423 1408 # if we don't call close, the temp file will never replace the
1424 1409 # real index
1425 1410 fp.close()
1426 1411
1427 1412 tr.replace(self.indexfile, trindex * self._io.size)
1428 1413 self._chunkclear()
1429 1414
1430 1415 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1431 1416 node=None, flags=REVIDX_DEFAULT_FLAGS):
1432 1417 """add a revision to the log
1433 1418
1434 1419 text - the revision data to add
1435 1420 transaction - the transaction object used for rollback
1436 1421 link - the linkrev data to add
1437 1422 p1, p2 - the parent nodeids of the revision
1438 1423 cachedelta - an optional precomputed delta
1439 1424 node - nodeid of revision; typically node is not specified, and it is
1440 1425 computed by default as hash(text, p1, p2), however subclasses might
1441 1426 use different hashing method (and override checkhash() in such case)
1442 1427 flags - the known flags to set on the revision
1443 1428 """
1444 1429 if link == nullrev:
1445 1430 raise RevlogError(_("attempted to add linkrev -1 to %s")
1446 1431 % self.indexfile)
1447 1432
1448 1433 if flags:
1449 1434 node = node or self.hash(text, p1, p2)
1450 1435
1451 1436 newtext, validatehash = self._processflags(text, flags, 'write')
1452 1437
1453 1438 # If the flag processor modifies the revision data, ignore any provided
1454 1439 # cachedelta.
1455 1440 if newtext != text:
1456 1441 cachedelta = None
1457 1442 text = newtext
1458 1443
1459 1444 if len(text) > _maxentrysize:
1460 1445 raise RevlogError(
1461 1446 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1462 1447 % (self.indexfile, len(text)))
1463 1448
1464 1449 node = node or self.hash(text, p1, p2)
1465 1450 if node in self.nodemap:
1466 1451 return node
1467 1452
1468 1453 if validatehash:
1469 1454 self.checkhash(text, node, p1=p1, p2=p2)
1470 1455
1471 1456 dfh = None
1472 1457 if not self._inline:
1473 1458 dfh = self.opener(self.datafile, "a+")
1474 1459 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1475 1460 try:
1476 1461 return self._addrevision(node, text, transaction, link, p1, p2,
1477 1462 flags, cachedelta, ifh, dfh)
1478 1463 finally:
1479 1464 if dfh:
1480 1465 dfh.close()
1481 1466 ifh.close()
1482 1467
1483 1468 def compress(self, text):
1484 1469 """ generate a possibly-compressed representation of text """
1485 1470 if not text:
1486 1471 return ("", text)
1487 1472 l = len(text)
1488 1473 bin = None
1489 1474 if l < 44:
1490 1475 pass
1491 1476 elif l > 1000000:
1492 1477 # zlib makes an internal copy, thus doubling memory usage for
1493 1478 # large files, so lets do this in pieces
1494 1479 z = zlib.compressobj()
1495 1480 p = []
1496 1481 pos = 0
1497 1482 while pos < l:
1498 1483 pos2 = pos + 2**20
1499 1484 p.append(z.compress(text[pos:pos2]))
1500 1485 pos = pos2
1501 1486 p.append(z.flush())
1502 1487 if sum(map(len, p)) < l:
1503 1488 bin = "".join(p)
1504 1489 else:
1505 1490 bin = _compress(text)
1506 1491 if bin is None or len(bin) >= l:
1507 1492 if text[0] == '\0':
1508 1493 return ("", text)
1509 1494 return ('u', text)
1510 1495 return ("", bin)
1511 1496
1497 def decompress(self, data):
1498 """Decompress a revlog chunk.
1499
1500 The chunk is expected to begin with a header identifying the
1501 format type so it can be routed to an appropriate decompressor.
1502 """
1503 if not data:
1504 return data
1505 t = data[0]
1506 if t == '\0':
1507 return data
1508 if t == 'x':
1509 try:
1510 return _decompress(data)
1511 except zlib.error as e:
1512 raise RevlogError(_('revlog decompress error: %s') % str(e))
1513 if t == 'u':
1514 return util.buffer(data, 1)
1515 raise RevlogError(_('unknown compression type %r') % t)
1516
1512 1517 def _isgooddelta(self, d, textlen):
1513 1518 """Returns True if the given delta is good. Good means that it is within
1514 1519 the disk span, disk size, and chain length bounds that we know to be
1515 1520 performant."""
1516 1521 if d is None:
1517 1522 return False
1518 1523
1519 1524 # - 'dist' is the distance from the base revision -- bounding it limits
1520 1525 # the amount of I/O we need to do.
1521 1526 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1522 1527 # to apply -- bounding it limits the amount of CPU we consume.
1523 1528 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1524 1529 if (dist > textlen * 4 or l > textlen or
1525 1530 compresseddeltalen > textlen * 2 or
1526 1531 (self._maxchainlen and chainlen > self._maxchainlen)):
1527 1532 return False
1528 1533
1529 1534 return True
1530 1535
1531 1536 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1532 1537 cachedelta, ifh, dfh, alwayscache=False, raw=False):
1533 1538 """internal function to add revisions to the log
1534 1539
1535 1540 see addrevision for argument descriptions.
1536 1541 invariants:
1537 1542 - text is optional (can be None); if not set, cachedelta must be set.
1538 1543 if both are set, they must correspond to each other.
1539 1544 - raw is optional; if set to True, it indicates the revision data is to
1540 1545 be treated by _processflags() as raw. It is usually set by changegroup
1541 1546 generation and debug commands.
1542 1547 """
1543 1548 btext = [text]
1544 1549 def buildtext():
1545 1550 if btext[0] is not None:
1546 1551 return btext[0]
1547 1552 baserev = cachedelta[0]
1548 1553 delta = cachedelta[1]
1549 1554 # special case deltas which replace entire base; no need to decode
1550 1555 # base revision. this neatly avoids censored bases, which throw when
1551 1556 # they're decoded.
1552 1557 hlen = struct.calcsize(">lll")
1553 1558 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1554 1559 len(delta) - hlen):
1555 1560 btext[0] = delta[hlen:]
1556 1561 else:
1557 1562 if self._inline:
1558 1563 fh = ifh
1559 1564 else:
1560 1565 fh = dfh
1561 1566 basetext = self.revision(self.node(baserev), _df=fh, raw=raw)
1562 1567 btext[0] = mdiff.patch(basetext, delta)
1563 1568
1564 1569 try:
1565 1570 res = self._processflags(btext[0], flags, 'read', raw=raw)
1566 1571 btext[0], validatehash = res
1567 1572 if validatehash:
1568 1573 self.checkhash(btext[0], node, p1=p1, p2=p2)
1569 1574 if flags & REVIDX_ISCENSORED:
1570 1575 raise RevlogError(_('node %s is not censored') % node)
1571 1576 except CensoredNodeError:
1572 1577 # must pass the censored index flag to add censored revisions
1573 1578 if not flags & REVIDX_ISCENSORED:
1574 1579 raise
1575 1580 return btext[0]
1576 1581
1577 1582 def builddelta(rev):
1578 1583 # can we use the cached delta?
1579 1584 if cachedelta and cachedelta[0] == rev:
1580 1585 delta = cachedelta[1]
1581 1586 else:
1582 1587 t = buildtext()
1583 1588 if self.iscensored(rev):
1584 1589 # deltas based on a censored revision must replace the
1585 1590 # full content in one patch, so delta works everywhere
1586 1591 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1587 1592 delta = header + t
1588 1593 else:
1589 1594 if self._inline:
1590 1595 fh = ifh
1591 1596 else:
1592 1597 fh = dfh
1593 1598 ptext = self.revision(self.node(rev), _df=fh)
1594 1599 delta = mdiff.textdiff(ptext, t)
1595 1600 header, data = self.compress(delta)
1596 1601 deltalen = len(header) + len(data)
1597 1602 chainbase = self.chainbase(rev)
1598 1603 dist = deltalen + offset - self.start(chainbase)
1599 1604 if self._generaldelta:
1600 1605 base = rev
1601 1606 else:
1602 1607 base = chainbase
1603 1608 chainlen, compresseddeltalen = self._chaininfo(rev)
1604 1609 chainlen += 1
1605 1610 compresseddeltalen += deltalen
1606 1611 return (dist, deltalen, (header, data), base,
1607 1612 chainbase, chainlen, compresseddeltalen)
1608 1613
1609 1614 curr = len(self)
1610 1615 prev = curr - 1
1611 1616 offset = self.end(prev)
1612 1617 delta = None
1613 1618 p1r, p2r = self.rev(p1), self.rev(p2)
1614 1619
1615 1620 # full versions are inserted when the needed deltas
1616 1621 # become comparable to the uncompressed text
1617 1622 if text is None:
1618 1623 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1619 1624 cachedelta[1])
1620 1625 else:
1621 1626 textlen = len(text)
1622 1627
1623 1628 # should we try to build a delta?
1624 1629 if prev != nullrev and self.storedeltachains:
1625 1630 tested = set()
1626 1631 # This condition is true most of the time when processing
1627 1632 # changegroup data into a generaldelta repo. The only time it
1628 1633 # isn't true is if this is the first revision in a delta chain
1629 1634 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1630 1635 if cachedelta and self._generaldelta and self._lazydeltabase:
1631 1636 # Assume what we received from the server is a good choice
1632 1637 # build delta will reuse the cache
1633 1638 candidatedelta = builddelta(cachedelta[0])
1634 1639 tested.add(cachedelta[0])
1635 1640 if self._isgooddelta(candidatedelta, textlen):
1636 1641 delta = candidatedelta
1637 1642 if delta is None and self._generaldelta:
1638 1643 # exclude already lazy tested base if any
1639 1644 parents = [p for p in (p1r, p2r)
1640 1645 if p != nullrev and p not in tested]
1641 1646 if parents and not self._aggressivemergedeltas:
1642 1647 # Pick whichever parent is closer to us (to minimize the
1643 1648 # chance of having to build a fulltext).
1644 1649 parents = [max(parents)]
1645 1650 tested.update(parents)
1646 1651 pdeltas = []
1647 1652 for p in parents:
1648 1653 pd = builddelta(p)
1649 1654 if self._isgooddelta(pd, textlen):
1650 1655 pdeltas.append(pd)
1651 1656 if pdeltas:
1652 1657 delta = min(pdeltas, key=lambda x: x[1])
1653 1658 if delta is None and prev not in tested:
1654 1659 # other approach failed try against prev to hopefully save us a
1655 1660 # fulltext.
1656 1661 candidatedelta = builddelta(prev)
1657 1662 if self._isgooddelta(candidatedelta, textlen):
1658 1663 delta = candidatedelta
1659 1664 if delta is not None:
1660 1665 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1661 1666 else:
1662 1667 text = buildtext()
1663 1668 data = self.compress(text)
1664 1669 l = len(data[1]) + len(data[0])
1665 1670 base = chainbase = curr
1666 1671
1667 1672 e = (offset_type(offset, flags), l, textlen,
1668 1673 base, link, p1r, p2r, node)
1669 1674 self.index.insert(-1, e)
1670 1675 self.nodemap[node] = curr
1671 1676
1672 1677 entry = self._io.packentry(e, self.node, self.version, curr)
1673 1678 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1674 1679
1675 1680 if alwayscache and text is None:
1676 1681 text = buildtext()
1677 1682
1678 1683 if type(text) == str: # only accept immutable objects
1679 1684 self._cache = (node, curr, text)
1680 1685 self._chainbasecache[curr] = chainbase
1681 1686 return node
1682 1687
1683 1688 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1684 1689 # Files opened in a+ mode have inconsistent behavior on various
1685 1690 # platforms. Windows requires that a file positioning call be made
1686 1691 # when the file handle transitions between reads and writes. See
1687 1692 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1688 1693 # platforms, Python or the platform itself can be buggy. Some versions
1689 1694 # of Solaris have been observed to not append at the end of the file
1690 1695 # if the file was seeked to before the end. See issue4943 for more.
1691 1696 #
1692 1697 # We work around this issue by inserting a seek() before writing.
1693 1698 # Note: This is likely not necessary on Python 3.
1694 1699 ifh.seek(0, os.SEEK_END)
1695 1700 if dfh:
1696 1701 dfh.seek(0, os.SEEK_END)
1697 1702
1698 1703 curr = len(self) - 1
1699 1704 if not self._inline:
1700 1705 transaction.add(self.datafile, offset)
1701 1706 transaction.add(self.indexfile, curr * len(entry))
1702 1707 if data[0]:
1703 1708 dfh.write(data[0])
1704 1709 dfh.write(data[1])
1705 1710 ifh.write(entry)
1706 1711 else:
1707 1712 offset += curr * self._io.size
1708 1713 transaction.add(self.indexfile, offset, curr)
1709 1714 ifh.write(entry)
1710 1715 ifh.write(data[0])
1711 1716 ifh.write(data[1])
1712 1717 self.checkinlinesize(transaction, ifh)
1713 1718
1714 1719 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1715 1720 """
1716 1721 add a delta group
1717 1722
1718 1723 given a set of deltas, add them to the revision log. the
1719 1724 first delta is against its parent, which should be in our
1720 1725 log, the rest are against the previous delta.
1721 1726
1722 1727 If ``addrevisioncb`` is defined, it will be called with arguments of
1723 1728 this revlog and the node that was added.
1724 1729 """
1725 1730
1726 1731 # track the base of the current delta log
1727 1732 content = []
1728 1733 node = None
1729 1734
1730 1735 r = len(self)
1731 1736 end = 0
1732 1737 if r:
1733 1738 end = self.end(r - 1)
1734 1739 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1735 1740 isize = r * self._io.size
1736 1741 if self._inline:
1737 1742 transaction.add(self.indexfile, end + isize, r)
1738 1743 dfh = None
1739 1744 else:
1740 1745 transaction.add(self.indexfile, isize, r)
1741 1746 transaction.add(self.datafile, end)
1742 1747 dfh = self.opener(self.datafile, "a+")
1743 1748 def flush():
1744 1749 if dfh:
1745 1750 dfh.flush()
1746 1751 ifh.flush()
1747 1752 try:
1748 1753 # loop through our set of deltas
1749 1754 chain = None
1750 1755 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1751 1756 node = chunkdata['node']
1752 1757 p1 = chunkdata['p1']
1753 1758 p2 = chunkdata['p2']
1754 1759 cs = chunkdata['cs']
1755 1760 deltabase = chunkdata['deltabase']
1756 1761 delta = chunkdata['delta']
1757 1762 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1758 1763
1759 1764 content.append(node)
1760 1765
1761 1766 link = linkmapper(cs)
1762 1767 if node in self.nodemap:
1763 1768 # this can happen if two branches make the same change
1764 1769 chain = node
1765 1770 continue
1766 1771
1767 1772 for p in (p1, p2):
1768 1773 if p not in self.nodemap:
1769 1774 raise LookupError(p, self.indexfile,
1770 1775 _('unknown parent'))
1771 1776
1772 1777 if deltabase not in self.nodemap:
1773 1778 raise LookupError(deltabase, self.indexfile,
1774 1779 _('unknown delta base'))
1775 1780
1776 1781 baserev = self.rev(deltabase)
1777 1782
1778 1783 if baserev != nullrev and self.iscensored(baserev):
1779 1784 # if base is censored, delta must be full replacement in a
1780 1785 # single patch operation
1781 1786 hlen = struct.calcsize(">lll")
1782 1787 oldlen = self.rawsize(baserev)
1783 1788 newlen = len(delta) - hlen
1784 1789 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1785 1790 raise error.CensoredBaseError(self.indexfile,
1786 1791 self.node(baserev))
1787 1792
1788 1793 if not flags and self._peek_iscensored(baserev, delta, flush):
1789 1794 flags |= REVIDX_ISCENSORED
1790 1795
1791 1796 # We assume consumers of addrevisioncb will want to retrieve
1792 1797 # the added revision, which will require a call to
1793 1798 # revision(). revision() will fast path if there is a cache
1794 1799 # hit. So, we tell _addrevision() to always cache in this case.
1795 1800 # We're only using addgroup() in the context of changegroup
1796 1801 # generation so the revision data can always be handled as raw
1797 1802 # by the flagprocessor.
1798 1803 chain = self._addrevision(node, None, transaction, link,
1799 1804 p1, p2, flags, (baserev, delta),
1800 1805 ifh, dfh,
1801 1806 alwayscache=bool(addrevisioncb),
1802 1807 raw=True)
1803 1808
1804 1809 if addrevisioncb:
1805 1810 addrevisioncb(self, chain)
1806 1811
1807 1812 if not dfh and not self._inline:
1808 1813 # addrevision switched from inline to conventional
1809 1814 # reopen the index
1810 1815 ifh.close()
1811 1816 dfh = self.opener(self.datafile, "a+")
1812 1817 ifh = self.opener(self.indexfile, "a+",
1813 1818 checkambig=self._checkambig)
1814 1819 finally:
1815 1820 if dfh:
1816 1821 dfh.close()
1817 1822 ifh.close()
1818 1823
1819 1824 return content
1820 1825
1821 1826 def iscensored(self, rev):
1822 1827 """Check if a file revision is censored."""
1823 1828 return False
1824 1829
1825 1830 def _peek_iscensored(self, baserev, delta, flush):
1826 1831 """Quickly check if a delta produces a censored revision."""
1827 1832 return False
1828 1833
1829 1834 def getstrippoint(self, minlink):
1830 1835 """find the minimum rev that must be stripped to strip the linkrev
1831 1836
1832 1837 Returns a tuple containing the minimum rev and a set of all revs that
1833 1838 have linkrevs that will be broken by this strip.
1834 1839 """
1835 1840 brokenrevs = set()
1836 1841 strippoint = len(self)
1837 1842
1838 1843 heads = {}
1839 1844 futurelargelinkrevs = set()
1840 1845 for head in self.headrevs():
1841 1846 headlinkrev = self.linkrev(head)
1842 1847 heads[head] = headlinkrev
1843 1848 if headlinkrev >= minlink:
1844 1849 futurelargelinkrevs.add(headlinkrev)
1845 1850
1846 1851 # This algorithm involves walking down the rev graph, starting at the
1847 1852 # heads. Since the revs are topologically sorted according to linkrev,
1848 1853 # once all head linkrevs are below the minlink, we know there are
1849 1854 # no more revs that could have a linkrev greater than minlink.
1850 1855 # So we can stop walking.
1851 1856 while futurelargelinkrevs:
1852 1857 strippoint -= 1
1853 1858 linkrev = heads.pop(strippoint)
1854 1859
1855 1860 if linkrev < minlink:
1856 1861 brokenrevs.add(strippoint)
1857 1862 else:
1858 1863 futurelargelinkrevs.remove(linkrev)
1859 1864
1860 1865 for p in self.parentrevs(strippoint):
1861 1866 if p != nullrev:
1862 1867 plinkrev = self.linkrev(p)
1863 1868 heads[p] = plinkrev
1864 1869 if plinkrev >= minlink:
1865 1870 futurelargelinkrevs.add(plinkrev)
1866 1871
1867 1872 return strippoint, brokenrevs
1868 1873
1869 1874 def strip(self, minlink, transaction):
1870 1875 """truncate the revlog on the first revision with a linkrev >= minlink
1871 1876
1872 1877 This function is called when we're stripping revision minlink and
1873 1878 its descendants from the repository.
1874 1879
1875 1880 We have to remove all revisions with linkrev >= minlink, because
1876 1881 the equivalent changelog revisions will be renumbered after the
1877 1882 strip.
1878 1883
1879 1884 So we truncate the revlog on the first of these revisions, and
1880 1885 trust that the caller has saved the revisions that shouldn't be
1881 1886 removed and that it'll re-add them after this truncation.
1882 1887 """
1883 1888 if len(self) == 0:
1884 1889 return
1885 1890
1886 1891 rev, _ = self.getstrippoint(minlink)
1887 1892 if rev == len(self):
1888 1893 return
1889 1894
1890 1895 # first truncate the files on disk
1891 1896 end = self.start(rev)
1892 1897 if not self._inline:
1893 1898 transaction.add(self.datafile, end)
1894 1899 end = rev * self._io.size
1895 1900 else:
1896 1901 end += rev * self._io.size
1897 1902
1898 1903 transaction.add(self.indexfile, end)
1899 1904
1900 1905 # then reset internal state in memory to forget those revisions
1901 1906 self._cache = None
1902 1907 self._chaininfocache = {}
1903 1908 self._chunkclear()
1904 1909 for x in xrange(rev, len(self)):
1905 1910 del self.nodemap[self.node(x)]
1906 1911
1907 1912 del self.index[rev:-1]
1908 1913
1909 1914 def checksize(self):
1910 1915 expected = 0
1911 1916 if len(self):
1912 1917 expected = max(0, self.end(len(self) - 1))
1913 1918
1914 1919 try:
1915 1920 f = self.opener(self.datafile)
1916 1921 f.seek(0, 2)
1917 1922 actual = f.tell()
1918 1923 f.close()
1919 1924 dd = actual - expected
1920 1925 except IOError as inst:
1921 1926 if inst.errno != errno.ENOENT:
1922 1927 raise
1923 1928 dd = 0
1924 1929
1925 1930 try:
1926 1931 f = self.opener(self.indexfile)
1927 1932 f.seek(0, 2)
1928 1933 actual = f.tell()
1929 1934 f.close()
1930 1935 s = self._io.size
1931 1936 i = max(0, actual // s)
1932 1937 di = actual - (i * s)
1933 1938 if self._inline:
1934 1939 databytes = 0
1935 1940 for r in self:
1936 1941 databytes += max(0, self.length(r))
1937 1942 dd = 0
1938 1943 di = actual - len(self) * s - databytes
1939 1944 except IOError as inst:
1940 1945 if inst.errno != errno.ENOENT:
1941 1946 raise
1942 1947 di = 0
1943 1948
1944 1949 return (dd, di)
1945 1950
1946 1951 def files(self):
1947 1952 res = [self.indexfile]
1948 1953 if not self._inline:
1949 1954 res.append(self.datafile)
1950 1955 return res
1951 1956
1952 1957 DELTAREUSEALWAYS = 'always'
1953 1958 DELTAREUSESAMEREVS = 'samerevs'
1954 1959 DELTAREUSENEVER = 'never'
1955 1960
1956 1961 DELTAREUSEALL = set(['always', 'samerevs', 'never'])
1957 1962
1958 1963 def clone(self, tr, destrevlog, addrevisioncb=None,
1959 1964 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
1960 1965 """Copy this revlog to another, possibly with format changes.
1961 1966
1962 1967 The destination revlog will contain the same revisions and nodes.
1963 1968 However, it may not be bit-for-bit identical due to e.g. delta encoding
1964 1969 differences.
1965 1970
1966 1971 The ``deltareuse`` argument control how deltas from the existing revlog
1967 1972 are preserved in the destination revlog. The argument can have the
1968 1973 following values:
1969 1974
1970 1975 DELTAREUSEALWAYS
1971 1976 Deltas will always be reused (if possible), even if the destination
1972 1977 revlog would not select the same revisions for the delta. This is the
1973 1978 fastest mode of operation.
1974 1979 DELTAREUSESAMEREVS
1975 1980 Deltas will be reused if the destination revlog would pick the same
1976 1981 revisions for the delta. This mode strikes a balance between speed
1977 1982 and optimization.
1978 1983 DELTAREUSENEVER
1979 1984 Deltas will never be reused. This is the slowest mode of execution.
1980 1985 This mode can be used to recompute deltas (e.g. if the diff/delta
1981 1986 algorithm changes).
1982 1987
1983 1988 Delta computation can be slow, so the choice of delta reuse policy can
1984 1989 significantly affect run time.
1985 1990
1986 1991 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
1987 1992 two extremes. Deltas will be reused if they are appropriate. But if the
1988 1993 delta could choose a better revision, it will do so. This means if you
1989 1994 are converting a non-generaldelta revlog to a generaldelta revlog,
1990 1995 deltas will be recomputed if the delta's parent isn't a parent of the
1991 1996 revision.
1992 1997
1993 1998 In addition to the delta policy, the ``aggressivemergedeltas`` argument
1994 1999 controls whether to compute deltas against both parents for merges.
1995 2000 By default, the current default is used.
1996 2001 """
1997 2002 if deltareuse not in self.DELTAREUSEALL:
1998 2003 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
1999 2004
2000 2005 if len(destrevlog):
2001 2006 raise ValueError(_('destination revlog is not empty'))
2002 2007
2003 2008 if getattr(self, 'filteredrevs', None):
2004 2009 raise ValueError(_('source revlog has filtered revisions'))
2005 2010 if getattr(destrevlog, 'filteredrevs', None):
2006 2011 raise ValueError(_('destination revlog has filtered revisions'))
2007 2012
2008 2013 # lazydeltabase controls whether to reuse a cached delta, if possible.
2009 2014 oldlazydeltabase = destrevlog._lazydeltabase
2010 2015 oldamd = destrevlog._aggressivemergedeltas
2011 2016
2012 2017 try:
2013 2018 if deltareuse == self.DELTAREUSEALWAYS:
2014 2019 destrevlog._lazydeltabase = True
2015 2020 elif deltareuse == self.DELTAREUSESAMEREVS:
2016 2021 destrevlog._lazydeltabase = False
2017 2022
2018 2023 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2019 2024
2020 2025 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2021 2026 self.DELTAREUSESAMEREVS)
2022 2027
2023 2028 index = self.index
2024 2029 for rev in self:
2025 2030 entry = index[rev]
2026 2031
2027 2032 # Some classes override linkrev to take filtered revs into
2028 2033 # account. Use raw entry from index.
2029 2034 flags = entry[0] & 0xffff
2030 2035 linkrev = entry[4]
2031 2036 p1 = index[entry[5]][7]
2032 2037 p2 = index[entry[6]][7]
2033 2038 node = entry[7]
2034 2039
2035 2040 # (Possibly) reuse the delta from the revlog if allowed and
2036 2041 # the revlog chunk is a delta.
2037 2042 cachedelta = None
2038 2043 text = None
2039 2044 if populatecachedelta:
2040 2045 dp = self.deltaparent(rev)
2041 2046 if dp != nullrev:
2042 2047 cachedelta = (dp, str(self._chunk(rev)))
2043 2048
2044 2049 if not cachedelta:
2045 2050 text = self.revision(rev)
2046 2051
2047 2052 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2048 2053 checkambig=False)
2049 2054 dfh = None
2050 2055 if not destrevlog._inline:
2051 2056 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2052 2057 try:
2053 2058 destrevlog._addrevision(node, text, tr, linkrev, p1, p2,
2054 2059 flags, cachedelta, ifh, dfh)
2055 2060 finally:
2056 2061 if dfh:
2057 2062 dfh.close()
2058 2063 ifh.close()
2059 2064
2060 2065 if addrevisioncb:
2061 2066 addrevisioncb(self, rev, node)
2062 2067 finally:
2063 2068 destrevlog._lazydeltabase = oldlazydeltabase
2064 2069 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now