##// END OF EJS Templates
revlog: return offset from _chunkraw()...
Gregory Szorc -
r27649:6446e9b3 default
parent child Browse files
Show More
@@ -1,784 +1,784 b''
1 1 # perf.py - performance test routines
2 2 '''helper extension to measure performance'''
3 3
4 4 from mercurial import cmdutil, scmutil, util, commands, obsolete
5 5 from mercurial import repoview, branchmap, merge, copies, error, revlog
6 6 from mercurial import mdiff
7 7 import time, os, sys
8 8 import random
9 9 import functools
10 10
11 11 formatteropts = commands.formatteropts
12 12 revlogopts = commands.debugrevlogopts
13 13
14 14 cmdtable = {}
15 15 command = cmdutil.command(cmdtable)
16 16
17 17 def getlen(ui):
18 18 if ui.configbool("perf", "stub"):
19 19 return lambda x: 1
20 20 return len
21 21
22 22 def gettimer(ui, opts=None):
23 23 """return a timer function and formatter: (timer, formatter)
24 24
25 25 This function exists to gather the creation of formatter in a single
26 26 place instead of duplicating it in all performance commands."""
27 27
28 28 # enforce an idle period before execution to counteract power management
29 29 # experimental config: perf.presleep
30 30 time.sleep(ui.configint("perf", "presleep", 1))
31 31
32 32 if opts is None:
33 33 opts = {}
34 34 # redirect all to stderr
35 35 ui = ui.copy()
36 36 ui.fout = ui.ferr
37 37 # get a formatter
38 38 fm = ui.formatter('perf', opts)
39 39 # stub function, runs code only once instead of in a loop
40 40 # experimental config: perf.stub
41 41 if ui.configbool("perf", "stub"):
42 42 return functools.partial(stub_timer, fm), fm
43 43 return functools.partial(_timer, fm), fm
44 44
45 45 def stub_timer(fm, func, title=None):
46 46 func()
47 47
48 48 def _timer(fm, func, title=None):
49 49 results = []
50 50 begin = time.time()
51 51 count = 0
52 52 while True:
53 53 ostart = os.times()
54 54 cstart = time.time()
55 55 r = func()
56 56 cstop = time.time()
57 57 ostop = os.times()
58 58 count += 1
59 59 a, b = ostart, ostop
60 60 results.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
61 61 if cstop - begin > 3 and count >= 100:
62 62 break
63 63 if cstop - begin > 10 and count >= 3:
64 64 break
65 65
66 66 fm.startitem()
67 67
68 68 if title:
69 69 fm.write('title', '! %s\n', title)
70 70 if r:
71 71 fm.write('result', '! result: %s\n', r)
72 72 m = min(results)
73 73 fm.plain('!')
74 74 fm.write('wall', ' wall %f', m[0])
75 75 fm.write('comb', ' comb %f', m[1] + m[2])
76 76 fm.write('user', ' user %f', m[1])
77 77 fm.write('sys', ' sys %f', m[2])
78 78 fm.write('count', ' (best of %d)', count)
79 79 fm.plain('\n')
80 80
81 81 @command('perfwalk', formatteropts)
82 82 def perfwalk(ui, repo, *pats, **opts):
83 83 timer, fm = gettimer(ui, opts)
84 84 try:
85 85 m = scmutil.match(repo[None], pats, {})
86 86 timer(lambda: len(list(repo.dirstate.walk(m, [], True, False))))
87 87 except Exception:
88 88 try:
89 89 m = scmutil.match(repo[None], pats, {})
90 90 timer(lambda: len([b for a, b, c in repo.dirstate.statwalk([], m)]))
91 91 except Exception:
92 92 timer(lambda: len(list(cmdutil.walk(repo, pats, {}))))
93 93 fm.end()
94 94
95 95 @command('perfannotate', formatteropts)
96 96 def perfannotate(ui, repo, f, **opts):
97 97 timer, fm = gettimer(ui, opts)
98 98 fc = repo['.'][f]
99 99 timer(lambda: len(fc.annotate(True)))
100 100 fm.end()
101 101
102 102 @command('perfstatus',
103 103 [('u', 'unknown', False,
104 104 'ask status to look for unknown files')] + formatteropts)
105 105 def perfstatus(ui, repo, **opts):
106 106 #m = match.always(repo.root, repo.getcwd())
107 107 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
108 108 # False))))
109 109 timer, fm = gettimer(ui, opts)
110 110 timer(lambda: sum(map(len, repo.status(unknown=opts['unknown']))))
111 111 fm.end()
112 112
113 113 @command('perfaddremove', formatteropts)
114 114 def perfaddremove(ui, repo, **opts):
115 115 timer, fm = gettimer(ui, opts)
116 116 try:
117 117 oldquiet = repo.ui.quiet
118 118 repo.ui.quiet = True
119 119 matcher = scmutil.match(repo[None])
120 120 timer(lambda: scmutil.addremove(repo, matcher, "", dry_run=True))
121 121 finally:
122 122 repo.ui.quiet = oldquiet
123 123 fm.end()
124 124
125 125 def clearcaches(cl):
126 126 # behave somewhat consistently across internal API changes
127 127 if util.safehasattr(cl, 'clearcaches'):
128 128 cl.clearcaches()
129 129 elif util.safehasattr(cl, '_nodecache'):
130 130 from mercurial.node import nullid, nullrev
131 131 cl._nodecache = {nullid: nullrev}
132 132 cl._nodepos = None
133 133
134 134 @command('perfheads', formatteropts)
135 135 def perfheads(ui, repo, **opts):
136 136 timer, fm = gettimer(ui, opts)
137 137 cl = repo.changelog
138 138 def d():
139 139 len(cl.headrevs())
140 140 clearcaches(cl)
141 141 timer(d)
142 142 fm.end()
143 143
144 144 @command('perftags', formatteropts)
145 145 def perftags(ui, repo, **opts):
146 146 import mercurial.changelog
147 147 import mercurial.manifest
148 148 timer, fm = gettimer(ui, opts)
149 149 def t():
150 150 repo.changelog = mercurial.changelog.changelog(repo.svfs)
151 151 repo.manifest = mercurial.manifest.manifest(repo.svfs)
152 152 repo._tags = None
153 153 return len(repo.tags())
154 154 timer(t)
155 155 fm.end()
156 156
157 157 @command('perfancestors', formatteropts)
158 158 def perfancestors(ui, repo, **opts):
159 159 timer, fm = gettimer(ui, opts)
160 160 heads = repo.changelog.headrevs()
161 161 def d():
162 162 for a in repo.changelog.ancestors(heads):
163 163 pass
164 164 timer(d)
165 165 fm.end()
166 166
167 167 @command('perfancestorset', formatteropts)
168 168 def perfancestorset(ui, repo, revset, **opts):
169 169 timer, fm = gettimer(ui, opts)
170 170 revs = repo.revs(revset)
171 171 heads = repo.changelog.headrevs()
172 172 def d():
173 173 s = repo.changelog.ancestors(heads)
174 174 for rev in revs:
175 175 rev in s
176 176 timer(d)
177 177 fm.end()
178 178
179 179 @command('perfdirs', formatteropts)
180 180 def perfdirs(ui, repo, **opts):
181 181 timer, fm = gettimer(ui, opts)
182 182 dirstate = repo.dirstate
183 183 'a' in dirstate
184 184 def d():
185 185 dirstate.dirs()
186 186 del dirstate._dirs
187 187 timer(d)
188 188 fm.end()
189 189
190 190 @command('perfdirstate', formatteropts)
191 191 def perfdirstate(ui, repo, **opts):
192 192 timer, fm = gettimer(ui, opts)
193 193 "a" in repo.dirstate
194 194 def d():
195 195 repo.dirstate.invalidate()
196 196 "a" in repo.dirstate
197 197 timer(d)
198 198 fm.end()
199 199
200 200 @command('perfdirstatedirs', formatteropts)
201 201 def perfdirstatedirs(ui, repo, **opts):
202 202 timer, fm = gettimer(ui, opts)
203 203 "a" in repo.dirstate
204 204 def d():
205 205 "a" in repo.dirstate._dirs
206 206 del repo.dirstate._dirs
207 207 timer(d)
208 208 fm.end()
209 209
210 210 @command('perfdirstatefoldmap', formatteropts)
211 211 def perfdirstatefoldmap(ui, repo, **opts):
212 212 timer, fm = gettimer(ui, opts)
213 213 dirstate = repo.dirstate
214 214 'a' in dirstate
215 215 def d():
216 216 dirstate._filefoldmap.get('a')
217 217 del dirstate._filefoldmap
218 218 timer(d)
219 219 fm.end()
220 220
221 221 @command('perfdirfoldmap', formatteropts)
222 222 def perfdirfoldmap(ui, repo, **opts):
223 223 timer, fm = gettimer(ui, opts)
224 224 dirstate = repo.dirstate
225 225 'a' in dirstate
226 226 def d():
227 227 dirstate._dirfoldmap.get('a')
228 228 del dirstate._dirfoldmap
229 229 del dirstate._dirs
230 230 timer(d)
231 231 fm.end()
232 232
233 233 @command('perfdirstatewrite', formatteropts)
234 234 def perfdirstatewrite(ui, repo, **opts):
235 235 timer, fm = gettimer(ui, opts)
236 236 ds = repo.dirstate
237 237 "a" in ds
238 238 def d():
239 239 ds._dirty = True
240 240 ds.write(repo.currenttransaction())
241 241 timer(d)
242 242 fm.end()
243 243
244 244 @command('perfmergecalculate',
245 245 [('r', 'rev', '.', 'rev to merge against')] + formatteropts)
246 246 def perfmergecalculate(ui, repo, rev, **opts):
247 247 timer, fm = gettimer(ui, opts)
248 248 wctx = repo[None]
249 249 rctx = scmutil.revsingle(repo, rev, rev)
250 250 ancestor = wctx.ancestor(rctx)
251 251 # we don't want working dir files to be stat'd in the benchmark, so prime
252 252 # that cache
253 253 wctx.dirty()
254 254 def d():
255 255 # acceptremote is True because we don't want prompts in the middle of
256 256 # our benchmark
257 257 merge.calculateupdates(repo, wctx, rctx, [ancestor], False, False,
258 258 acceptremote=True, followcopies=True)
259 259 timer(d)
260 260 fm.end()
261 261
262 262 @command('perfpathcopies', [], "REV REV")
263 263 def perfpathcopies(ui, repo, rev1, rev2, **opts):
264 264 timer, fm = gettimer(ui, opts)
265 265 ctx1 = scmutil.revsingle(repo, rev1, rev1)
266 266 ctx2 = scmutil.revsingle(repo, rev2, rev2)
267 267 def d():
268 268 copies.pathcopies(ctx1, ctx2)
269 269 timer(d)
270 270 fm.end()
271 271
272 272 @command('perfmanifest', [], 'REV')
273 273 def perfmanifest(ui, repo, rev, **opts):
274 274 timer, fm = gettimer(ui, opts)
275 275 ctx = scmutil.revsingle(repo, rev, rev)
276 276 t = ctx.manifestnode()
277 277 def d():
278 278 repo.manifest.clearcaches()
279 279 repo.manifest.read(t)
280 280 timer(d)
281 281 fm.end()
282 282
283 283 @command('perfchangeset', formatteropts)
284 284 def perfchangeset(ui, repo, rev, **opts):
285 285 timer, fm = gettimer(ui, opts)
286 286 n = repo[rev].node()
287 287 def d():
288 288 repo.changelog.read(n)
289 289 #repo.changelog._cache = None
290 290 timer(d)
291 291 fm.end()
292 292
293 293 @command('perfindex', formatteropts)
294 294 def perfindex(ui, repo, **opts):
295 295 import mercurial.revlog
296 296 timer, fm = gettimer(ui, opts)
297 297 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
298 298 n = repo["tip"].node()
299 299 def d():
300 300 cl = mercurial.revlog.revlog(repo.svfs, "00changelog.i")
301 301 cl.rev(n)
302 302 timer(d)
303 303 fm.end()
304 304
305 305 @command('perfstartup', formatteropts)
306 306 def perfstartup(ui, repo, **opts):
307 307 timer, fm = gettimer(ui, opts)
308 308 cmd = sys.argv[0]
309 309 def d():
310 310 if os.name != 'nt':
311 311 os.system("HGRCPATH= %s version -q > /dev/null" % cmd)
312 312 else:
313 313 os.environ['HGRCPATH'] = ''
314 314 os.system("%s version -q > NUL" % cmd)
315 315 timer(d)
316 316 fm.end()
317 317
318 318 @command('perfparents', formatteropts)
319 319 def perfparents(ui, repo, **opts):
320 320 timer, fm = gettimer(ui, opts)
321 321 # control the number of commits perfparents iterates over
322 322 # experimental config: perf.parentscount
323 323 count = ui.configint("perf", "parentscount", 1000)
324 324 if len(repo.changelog) < count:
325 325 raise error.Abort("repo needs %d commits for this test" % count)
326 326 repo = repo.unfiltered()
327 327 nl = [repo.changelog.node(i) for i in xrange(count)]
328 328 def d():
329 329 for n in nl:
330 330 repo.changelog.parents(n)
331 331 timer(d)
332 332 fm.end()
333 333
334 334 @command('perfctxfiles', formatteropts)
335 335 def perfctxfiles(ui, repo, x, **opts):
336 336 x = int(x)
337 337 timer, fm = gettimer(ui, opts)
338 338 def d():
339 339 len(repo[x].files())
340 340 timer(d)
341 341 fm.end()
342 342
343 343 @command('perfrawfiles', formatteropts)
344 344 def perfrawfiles(ui, repo, x, **opts):
345 345 x = int(x)
346 346 timer, fm = gettimer(ui, opts)
347 347 cl = repo.changelog
348 348 def d():
349 349 len(cl.read(x)[3])
350 350 timer(d)
351 351 fm.end()
352 352
353 353 @command('perflookup', formatteropts)
354 354 def perflookup(ui, repo, rev, **opts):
355 355 timer, fm = gettimer(ui, opts)
356 356 timer(lambda: len(repo.lookup(rev)))
357 357 fm.end()
358 358
359 359 @command('perfrevrange', formatteropts)
360 360 def perfrevrange(ui, repo, *specs, **opts):
361 361 timer, fm = gettimer(ui, opts)
362 362 revrange = scmutil.revrange
363 363 timer(lambda: len(revrange(repo, specs)))
364 364 fm.end()
365 365
366 366 @command('perfnodelookup', formatteropts)
367 367 def perfnodelookup(ui, repo, rev, **opts):
368 368 timer, fm = gettimer(ui, opts)
369 369 import mercurial.revlog
370 370 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
371 371 n = repo[rev].node()
372 372 cl = mercurial.revlog.revlog(repo.svfs, "00changelog.i")
373 373 def d():
374 374 cl.rev(n)
375 375 clearcaches(cl)
376 376 timer(d)
377 377 fm.end()
378 378
379 379 @command('perflog',
380 380 [('', 'rename', False, 'ask log to follow renames')] + formatteropts)
381 381 def perflog(ui, repo, rev=None, **opts):
382 382 if rev is None:
383 383 rev=[]
384 384 timer, fm = gettimer(ui, opts)
385 385 ui.pushbuffer()
386 386 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
387 387 copies=opts.get('rename')))
388 388 ui.popbuffer()
389 389 fm.end()
390 390
391 391 @command('perfmoonwalk', formatteropts)
392 392 def perfmoonwalk(ui, repo, **opts):
393 393 """benchmark walking the changelog backwards
394 394
395 395 This also loads the changelog data for each revision in the changelog.
396 396 """
397 397 timer, fm = gettimer(ui, opts)
398 398 def moonwalk():
399 399 for i in xrange(len(repo), -1, -1):
400 400 ctx = repo[i]
401 401 ctx.branch() # read changelog data (in addition to the index)
402 402 timer(moonwalk)
403 403 fm.end()
404 404
405 405 @command('perftemplating', formatteropts)
406 406 def perftemplating(ui, repo, rev=None, **opts):
407 407 if rev is None:
408 408 rev=[]
409 409 timer, fm = gettimer(ui, opts)
410 410 ui.pushbuffer()
411 411 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
412 412 template='{date|shortdate} [{rev}:{node|short}]'
413 413 ' {author|person}: {desc|firstline}\n'))
414 414 ui.popbuffer()
415 415 fm.end()
416 416
417 417 @command('perfcca', formatteropts)
418 418 def perfcca(ui, repo, **opts):
419 419 timer, fm = gettimer(ui, opts)
420 420 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
421 421 fm.end()
422 422
423 423 @command('perffncacheload', formatteropts)
424 424 def perffncacheload(ui, repo, **opts):
425 425 timer, fm = gettimer(ui, opts)
426 426 s = repo.store
427 427 def d():
428 428 s.fncache._load()
429 429 timer(d)
430 430 fm.end()
431 431
432 432 @command('perffncachewrite', formatteropts)
433 433 def perffncachewrite(ui, repo, **opts):
434 434 timer, fm = gettimer(ui, opts)
435 435 s = repo.store
436 436 s.fncache._load()
437 437 lock = repo.lock()
438 438 tr = repo.transaction('perffncachewrite')
439 439 def d():
440 440 s.fncache._dirty = True
441 441 s.fncache.write(tr)
442 442 timer(d)
443 443 lock.release()
444 444 tr.close()
445 445 fm.end()
446 446
447 447 @command('perffncacheencode', formatteropts)
448 448 def perffncacheencode(ui, repo, **opts):
449 449 timer, fm = gettimer(ui, opts)
450 450 s = repo.store
451 451 s.fncache._load()
452 452 def d():
453 453 for p in s.fncache.entries:
454 454 s.encode(p)
455 455 timer(d)
456 456 fm.end()
457 457
458 458 @command('perfdiffwd', formatteropts)
459 459 def perfdiffwd(ui, repo, **opts):
460 460 """Profile diff of working directory changes"""
461 461 timer, fm = gettimer(ui, opts)
462 462 options = {
463 463 'w': 'ignore_all_space',
464 464 'b': 'ignore_space_change',
465 465 'B': 'ignore_blank_lines',
466 466 }
467 467
468 468 for diffopt in ('', 'w', 'b', 'B', 'wB'):
469 469 opts = dict((options[c], '1') for c in diffopt)
470 470 def d():
471 471 ui.pushbuffer()
472 472 commands.diff(ui, repo, **opts)
473 473 ui.popbuffer()
474 474 title = 'diffopts: %s' % (diffopt and ('-' + diffopt) or 'none')
475 475 timer(d, title)
476 476 fm.end()
477 477
478 478 @command('perfrevlog', revlogopts + formatteropts +
479 479 [('d', 'dist', 100, 'distance between the revisions'),
480 480 ('s', 'startrev', 0, 'revision to start reading at')],
481 481 '-c|-m|FILE')
482 482 def perfrevlog(ui, repo, file_=None, startrev=0, **opts):
483 483 """Benchmark reading a series of revisions from a revlog.
484 484
485 485 By default, we read every ``-d/--dist`` revision from 0 to tip of
486 486 the specified revlog.
487 487
488 488 The start revision can be defined via ``-s/--startrev``.
489 489 """
490 490 timer, fm = gettimer(ui, opts)
491 491 dist = opts['dist']
492 492 _len = getlen(ui)
493 493 def d():
494 494 r = cmdutil.openrevlog(repo, 'perfrevlog', file_, opts)
495 495 for x in xrange(startrev, _len(r), dist):
496 496 r.revision(r.node(x))
497 497
498 498 timer(d)
499 499 fm.end()
500 500
501 501 @command('perfrevlogrevision', revlogopts + formatteropts +
502 502 [('', 'cache', False, 'use caches instead of clearing')],
503 503 '-c|-m|FILE REV')
504 504 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
505 505 """Benchmark obtaining a revlog revision.
506 506
507 507 Obtaining a revlog revision consists of roughly the following steps:
508 508
509 509 1. Compute the delta chain
510 510 2. Obtain the raw chunks for that delta chain
511 511 3. Decompress each raw chunk
512 512 4. Apply binary patches to obtain fulltext
513 513 5. Verify hash of fulltext
514 514
515 515 This command measures the time spent in each of these phases.
516 516 """
517 517 if opts.get('changelog') or opts.get('manifest'):
518 518 file_, rev = None, file_
519 519 elif rev is None:
520 520 raise error.CommandError('perfrevlogrevision', 'invalid arguments')
521 521
522 522 r = cmdutil.openrevlog(repo, 'perfrevlogrevision', file_, opts)
523 523 node = r.lookup(rev)
524 524 rev = r.rev(node)
525 525
526 526 def dodeltachain(rev):
527 527 if not cache:
528 528 r.clearcaches()
529 529 r._deltachain(rev)
530 530
531 531 def doread(chain):
532 532 if not cache:
533 533 r.clearcaches()
534 534 r._chunkraw(chain[0], chain[-1])
535 535
536 536 def dodecompress(data, chain):
537 537 if not cache:
538 538 r.clearcaches()
539 539
540 540 start = r.start
541 541 length = r.length
542 542 inline = r._inline
543 543 iosize = r._io.size
544 544 buffer = util.buffer
545 545 offset = start(chain[0])
546 546
547 547 for rev in chain:
548 548 chunkstart = start(rev)
549 549 if inline:
550 550 chunkstart += (rev + 1) * iosize
551 551 chunklength = length(rev)
552 552 b = buffer(data, chunkstart - offset, chunklength)
553 553 revlog.decompress(b)
554 554
555 555 def dopatch(text, bins):
556 556 if not cache:
557 557 r.clearcaches()
558 558 mdiff.patches(text, bins)
559 559
560 560 def dohash(text):
561 561 if not cache:
562 562 r.clearcaches()
563 563 r._checkhash(text, node, rev)
564 564
565 565 def dorevision():
566 566 if not cache:
567 567 r.clearcaches()
568 568 r.revision(node)
569 569
570 570 chain = r._deltachain(rev)[0]
571 data = r._chunkraw(chain[0], chain[-1])
571 data = r._chunkraw(chain[0], chain[-1])[1]
572 572 bins = r._chunks(chain)
573 573 text = str(bins[0])
574 574 bins = bins[1:]
575 575 text = mdiff.patches(text, bins)
576 576
577 577 benches = [
578 578 (lambda: dorevision(), 'full'),
579 579 (lambda: dodeltachain(rev), 'deltachain'),
580 580 (lambda: doread(chain), 'read'),
581 581 (lambda: dodecompress(data, chain), 'decompress'),
582 582 (lambda: dopatch(text, bins), 'patch'),
583 583 (lambda: dohash(text), 'hash'),
584 584 ]
585 585
586 586 for fn, title in benches:
587 587 timer, fm = gettimer(ui, opts)
588 588 timer(fn, title=title)
589 589 fm.end()
590 590
591 591 @command('perfrevset',
592 592 [('C', 'clear', False, 'clear volatile cache between each call.'),
593 593 ('', 'contexts', False, 'obtain changectx for each revision')]
594 594 + formatteropts, "REVSET")
595 595 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
596 596 """benchmark the execution time of a revset
597 597
598 598 Use the --clean option if need to evaluate the impact of build volatile
599 599 revisions set cache on the revset execution. Volatile cache hold filtered
600 600 and obsolete related cache."""
601 601 timer, fm = gettimer(ui, opts)
602 602 def d():
603 603 if clear:
604 604 repo.invalidatevolatilesets()
605 605 if contexts:
606 606 for ctx in repo.set(expr): pass
607 607 else:
608 608 for r in repo.revs(expr): pass
609 609 timer(d)
610 610 fm.end()
611 611
612 612 @command('perfvolatilesets', formatteropts)
613 613 def perfvolatilesets(ui, repo, *names, **opts):
614 614 """benchmark the computation of various volatile set
615 615
616 616 Volatile set computes element related to filtering and obsolescence."""
617 617 timer, fm = gettimer(ui, opts)
618 618 repo = repo.unfiltered()
619 619
620 620 def getobs(name):
621 621 def d():
622 622 repo.invalidatevolatilesets()
623 623 obsolete.getrevs(repo, name)
624 624 return d
625 625
626 626 allobs = sorted(obsolete.cachefuncs)
627 627 if names:
628 628 allobs = [n for n in allobs if n in names]
629 629
630 630 for name in allobs:
631 631 timer(getobs(name), title=name)
632 632
633 633 def getfiltered(name):
634 634 def d():
635 635 repo.invalidatevolatilesets()
636 636 repoview.filterrevs(repo, name)
637 637 return d
638 638
639 639 allfilter = sorted(repoview.filtertable)
640 640 if names:
641 641 allfilter = [n for n in allfilter if n in names]
642 642
643 643 for name in allfilter:
644 644 timer(getfiltered(name), title=name)
645 645 fm.end()
646 646
647 647 @command('perfbranchmap',
648 648 [('f', 'full', False,
649 649 'Includes build time of subset'),
650 650 ] + formatteropts)
651 651 def perfbranchmap(ui, repo, full=False, **opts):
652 652 """benchmark the update of a branchmap
653 653
654 654 This benchmarks the full repo.branchmap() call with read and write disabled
655 655 """
656 656 timer, fm = gettimer(ui, opts)
657 657 def getbranchmap(filtername):
658 658 """generate a benchmark function for the filtername"""
659 659 if filtername is None:
660 660 view = repo
661 661 else:
662 662 view = repo.filtered(filtername)
663 663 def d():
664 664 if full:
665 665 view._branchcaches.clear()
666 666 else:
667 667 view._branchcaches.pop(filtername, None)
668 668 view.branchmap()
669 669 return d
670 670 # add filter in smaller subset to bigger subset
671 671 possiblefilters = set(repoview.filtertable)
672 672 allfilters = []
673 673 while possiblefilters:
674 674 for name in possiblefilters:
675 675 subset = branchmap.subsettable.get(name)
676 676 if subset not in possiblefilters:
677 677 break
678 678 else:
679 679 assert False, 'subset cycle %s!' % possiblefilters
680 680 allfilters.append(name)
681 681 possiblefilters.remove(name)
682 682
683 683 # warm the cache
684 684 if not full:
685 685 for name in allfilters:
686 686 repo.filtered(name).branchmap()
687 687 # add unfiltered
688 688 allfilters.append(None)
689 689 oldread = branchmap.read
690 690 oldwrite = branchmap.branchcache.write
691 691 try:
692 692 branchmap.read = lambda repo: None
693 693 branchmap.write = lambda repo: None
694 694 for name in allfilters:
695 695 timer(getbranchmap(name), title=str(name))
696 696 finally:
697 697 branchmap.read = oldread
698 698 branchmap.branchcache.write = oldwrite
699 699 fm.end()
700 700
701 701 @command('perfloadmarkers')
702 702 def perfloadmarkers(ui, repo):
703 703 """benchmark the time to parse the on-disk markers for a repo
704 704
705 705 Result is the number of markers in the repo."""
706 706 timer, fm = gettimer(ui)
707 707 timer(lambda: len(obsolete.obsstore(repo.svfs)))
708 708 fm.end()
709 709
710 710 @command('perflrucachedict', formatteropts +
711 711 [('', 'size', 4, 'size of cache'),
712 712 ('', 'gets', 10000, 'number of key lookups'),
713 713 ('', 'sets', 10000, 'number of key sets'),
714 714 ('', 'mixed', 10000, 'number of mixed mode operations'),
715 715 ('', 'mixedgetfreq', 50, 'frequency of get vs set ops in mixed mode')],
716 716 norepo=True)
717 717 def perflrucache(ui, size=4, gets=10000, sets=10000, mixed=10000,
718 718 mixedgetfreq=50, **opts):
719 719 def doinit():
720 720 for i in xrange(10000):
721 721 util.lrucachedict(size)
722 722
723 723 values = []
724 724 for i in xrange(size):
725 725 values.append(random.randint(0, sys.maxint))
726 726
727 727 # Get mode fills the cache and tests raw lookup performance with no
728 728 # eviction.
729 729 getseq = []
730 730 for i in xrange(gets):
731 731 getseq.append(random.choice(values))
732 732
733 733 def dogets():
734 734 d = util.lrucachedict(size)
735 735 for v in values:
736 736 d[v] = v
737 737 for key in getseq:
738 738 value = d[key]
739 739 value # silence pyflakes warning
740 740
741 741 # Set mode tests insertion speed with cache eviction.
742 742 setseq = []
743 743 for i in xrange(sets):
744 744 setseq.append(random.randint(0, sys.maxint))
745 745
746 746 def dosets():
747 747 d = util.lrucachedict(size)
748 748 for v in setseq:
749 749 d[v] = v
750 750
751 751 # Mixed mode randomly performs gets and sets with eviction.
752 752 mixedops = []
753 753 for i in xrange(mixed):
754 754 r = random.randint(0, 100)
755 755 if r < mixedgetfreq:
756 756 op = 0
757 757 else:
758 758 op = 1
759 759
760 760 mixedops.append((op, random.randint(0, size * 2)))
761 761
762 762 def domixed():
763 763 d = util.lrucachedict(size)
764 764
765 765 for op, v in mixedops:
766 766 if op == 0:
767 767 try:
768 768 d[v]
769 769 except KeyError:
770 770 pass
771 771 else:
772 772 d[v] = v
773 773
774 774 benches = [
775 775 (doinit, 'init'),
776 776 (dogets, 'gets'),
777 777 (dosets, 'sets'),
778 778 (domixed, 'mixed')
779 779 ]
780 780
781 781 for fn, title in benches:
782 782 timer, fm = gettimer(ui, opts)
783 783 timer(fn, title=title)
784 784 fm.end()
@@ -1,1786 +1,1790 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import os
19 19 import struct
20 20 import zlib
21 21
22 22 # import stuff from node for others to import from revlog
23 23 from .node import (
24 24 bin,
25 25 hex,
26 26 nullid,
27 27 nullrev,
28 28 )
29 29 from .i18n import _
30 30 from . import (
31 31 ancestor,
32 32 error,
33 33 mdiff,
34 34 parsers,
35 35 templatefilters,
36 36 util,
37 37 )
38 38
39 39 _pack = struct.pack
40 40 _unpack = struct.unpack
41 41 _compress = zlib.compress
42 42 _decompress = zlib.decompress
43 43 _sha = util.sha1
44 44
45 45 # revlog header flags
46 46 REVLOGV0 = 0
47 47 REVLOGNG = 1
48 48 REVLOGNGINLINEDATA = (1 << 16)
49 49 REVLOGGENERALDELTA = (1 << 17)
50 50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54 54
55 55 # revlog index flags
56 56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 57 REVIDX_DEFAULT_FLAGS = 0
58 58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59 59
60 60 # max size of revlog with inline data
61 61 _maxinline = 131072
62 62 _chunksize = 1048576
63 63
64 64 RevlogError = error.RevlogError
65 65 LookupError = error.LookupError
66 66 CensoredNodeError = error.CensoredNodeError
67 67
68 68 def getoffset(q):
69 69 return int(q >> 16)
70 70
71 71 def gettype(q):
72 72 return int(q & 0xFFFF)
73 73
74 74 def offset_type(offset, type):
75 75 return long(long(offset) << 16 | type)
76 76
77 77 _nullhash = _sha(nullid)
78 78
79 79 def hash(text, p1, p2):
80 80 """generate a hash from the given text and its parent hashes
81 81
82 82 This hash combines both the current file contents and its history
83 83 in a manner that makes it easy to distinguish nodes with the same
84 84 content in the revision graph.
85 85 """
86 86 # As of now, if one of the parent node is null, p2 is null
87 87 if p2 == nullid:
88 88 # deep copy of a hash is faster than creating one
89 89 s = _nullhash.copy()
90 90 s.update(p1)
91 91 else:
92 92 # none of the parent nodes are nullid
93 93 l = [p1, p2]
94 94 l.sort()
95 95 s = _sha(l[0])
96 96 s.update(l[1])
97 97 s.update(text)
98 98 return s.digest()
99 99
100 100 def decompress(bin):
101 101 """ decompress the given input """
102 102 if not bin:
103 103 return bin
104 104 t = bin[0]
105 105 if t == '\0':
106 106 return bin
107 107 if t == 'x':
108 108 try:
109 109 return _decompress(bin)
110 110 except zlib.error as e:
111 111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 112 if t == 'u':
113 113 return util.buffer(bin, 1)
114 114 raise RevlogError(_("unknown compression type %r") % t)
115 115
116 116 # index v0:
117 117 # 4 bytes: offset
118 118 # 4 bytes: compressed length
119 119 # 4 bytes: base rev
120 120 # 4 bytes: link rev
121 121 # 20 bytes: parent 1 nodeid
122 122 # 20 bytes: parent 2 nodeid
123 123 # 20 bytes: nodeid
124 124 indexformatv0 = ">4l20s20s20s"
125 125
126 126 class revlogoldio(object):
127 127 def __init__(self):
128 128 self.size = struct.calcsize(indexformatv0)
129 129
130 130 def parseindex(self, data, inline):
131 131 s = self.size
132 132 index = []
133 133 nodemap = {nullid: nullrev}
134 134 n = off = 0
135 135 l = len(data)
136 136 while off + s <= l:
137 137 cur = data[off:off + s]
138 138 off += s
139 139 e = _unpack(indexformatv0, cur)
140 140 # transform to revlogv1 format
141 141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 143 index.append(e2)
144 144 nodemap[e[6]] = n
145 145 n += 1
146 146
147 147 # add the magic null revision at -1
148 148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149 149
150 150 return index, nodemap, None
151 151
152 152 def packentry(self, entry, node, version, rev):
153 153 if gettype(entry[0]):
154 154 raise RevlogError(_("index entry flags need RevlogNG"))
155 155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 156 node(entry[5]), node(entry[6]), entry[7])
157 157 return _pack(indexformatv0, *e2)
158 158
159 159 # index ng:
160 160 # 6 bytes: offset
161 161 # 2 bytes: flags
162 162 # 4 bytes: compressed length
163 163 # 4 bytes: uncompressed length
164 164 # 4 bytes: base rev
165 165 # 4 bytes: link rev
166 166 # 4 bytes: parent 1 rev
167 167 # 4 bytes: parent 2 rev
168 168 # 32 bytes: nodeid
169 169 indexformatng = ">Qiiiiii20s12x"
170 170 versionformat = ">I"
171 171
172 172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 173 # signed integer)
174 174 _maxentrysize = 0x7fffffff
175 175
176 176 class revlogio(object):
177 177 def __init__(self):
178 178 self.size = struct.calcsize(indexformatng)
179 179
180 180 def parseindex(self, data, inline):
181 181 # call the C implementation to parse the index data
182 182 index, cache = parsers.parse_index2(data, inline)
183 183 return index, getattr(index, 'nodemap', None), cache
184 184
185 185 def packentry(self, entry, node, version, rev):
186 186 p = _pack(indexformatng, *entry)
187 187 if rev == 0:
188 188 p = _pack(versionformat, version) + p[4:]
189 189 return p
190 190
191 191 class revlog(object):
192 192 """
193 193 the underlying revision storage object
194 194
195 195 A revlog consists of two parts, an index and the revision data.
196 196
197 197 The index is a file with a fixed record size containing
198 198 information on each revision, including its nodeid (hash), the
199 199 nodeids of its parents, the position and offset of its data within
200 200 the data file, and the revision it's based on. Finally, each entry
201 201 contains a linkrev entry that can serve as a pointer to external
202 202 data.
203 203
204 204 The revision data itself is a linear collection of data chunks.
205 205 Each chunk represents a revision and is usually represented as a
206 206 delta against the previous chunk. To bound lookup time, runs of
207 207 deltas are limited to about 2 times the length of the original
208 208 version data. This makes retrieval of a version proportional to
209 209 its size, or O(1) relative to the number of revisions.
210 210
211 211 Both pieces of the revlog are written to in an append-only
212 212 fashion, which means we never need to rewrite a file to insert or
213 213 remove data, and can use some simple techniques to avoid the need
214 214 for locking while reading.
215 215 """
216 216 def __init__(self, opener, indexfile):
217 217 """
218 218 create a revlog object
219 219
220 220 opener is a function that abstracts the file opening operation
221 221 and can be used to implement COW semantics or the like.
222 222 """
223 223 self.indexfile = indexfile
224 224 self.datafile = indexfile[:-2] + ".d"
225 225 self.opener = opener
226 226 # 3-tuple of (node, rev, text) for a raw revision.
227 227 self._cache = None
228 228 # 2-tuple of (rev, baserev) defining the base revision the delta chain
229 229 # begins at for a revision.
230 230 self._basecache = None
231 231 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
232 232 self._chunkcache = (0, '')
233 233 # How much data to read and cache into the raw revlog data cache.
234 234 self._chunkcachesize = 65536
235 235 self._maxchainlen = None
236 236 self._aggressivemergedeltas = False
237 237 self.index = []
238 238 # Mapping of partial identifiers to full nodes.
239 239 self._pcache = {}
240 240 # Mapping of revision integer to full node.
241 241 self._nodecache = {nullid: nullrev}
242 242 self._nodepos = None
243 243
244 244 v = REVLOG_DEFAULT_VERSION
245 245 opts = getattr(opener, 'options', None)
246 246 if opts is not None:
247 247 if 'revlogv1' in opts:
248 248 if 'generaldelta' in opts:
249 249 v |= REVLOGGENERALDELTA
250 250 else:
251 251 v = 0
252 252 if 'chunkcachesize' in opts:
253 253 self._chunkcachesize = opts['chunkcachesize']
254 254 if 'maxchainlen' in opts:
255 255 self._maxchainlen = opts['maxchainlen']
256 256 if 'aggressivemergedeltas' in opts:
257 257 self._aggressivemergedeltas = opts['aggressivemergedeltas']
258 258 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
259 259
260 260 if self._chunkcachesize <= 0:
261 261 raise RevlogError(_('revlog chunk cache size %r is not greater '
262 262 'than 0') % self._chunkcachesize)
263 263 elif self._chunkcachesize & (self._chunkcachesize - 1):
264 264 raise RevlogError(_('revlog chunk cache size %r is not a power '
265 265 'of 2') % self._chunkcachesize)
266 266
267 267 indexdata = ''
268 268 self._initempty = True
269 269 try:
270 270 f = self.opener(self.indexfile)
271 271 indexdata = f.read()
272 272 f.close()
273 273 if len(indexdata) > 0:
274 274 v = struct.unpack(versionformat, indexdata[:4])[0]
275 275 self._initempty = False
276 276 except IOError as inst:
277 277 if inst.errno != errno.ENOENT:
278 278 raise
279 279
280 280 self.version = v
281 281 self._inline = v & REVLOGNGINLINEDATA
282 282 self._generaldelta = v & REVLOGGENERALDELTA
283 283 flags = v & ~0xFFFF
284 284 fmt = v & 0xFFFF
285 285 if fmt == REVLOGV0 and flags:
286 286 raise RevlogError(_("index %s unknown flags %#04x for format v0")
287 287 % (self.indexfile, flags >> 16))
288 288 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
289 289 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
290 290 % (self.indexfile, flags >> 16))
291 291 elif fmt > REVLOGNG:
292 292 raise RevlogError(_("index %s unknown format %d")
293 293 % (self.indexfile, fmt))
294 294
295 295 self._io = revlogio()
296 296 if self.version == REVLOGV0:
297 297 self._io = revlogoldio()
298 298 try:
299 299 d = self._io.parseindex(indexdata, self._inline)
300 300 except (ValueError, IndexError):
301 301 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
302 302 self.index, nodemap, self._chunkcache = d
303 303 if nodemap is not None:
304 304 self.nodemap = self._nodecache = nodemap
305 305 if not self._chunkcache:
306 306 self._chunkclear()
307 307 # revnum -> (chain-length, sum-delta-length)
308 308 self._chaininfocache = {}
309 309
310 310 def tip(self):
311 311 return self.node(len(self.index) - 2)
312 312 def __contains__(self, rev):
313 313 return 0 <= rev < len(self)
314 314 def __len__(self):
315 315 return len(self.index) - 1
316 316 def __iter__(self):
317 317 return iter(xrange(len(self)))
318 318 def revs(self, start=0, stop=None):
319 319 """iterate over all rev in this revlog (from start to stop)"""
320 320 step = 1
321 321 if stop is not None:
322 322 if start > stop:
323 323 step = -1
324 324 stop += step
325 325 else:
326 326 stop = len(self)
327 327 return xrange(start, stop, step)
328 328
329 329 @util.propertycache
330 330 def nodemap(self):
331 331 self.rev(self.node(0))
332 332 return self._nodecache
333 333
334 334 def hasnode(self, node):
335 335 try:
336 336 self.rev(node)
337 337 return True
338 338 except KeyError:
339 339 return False
340 340
341 341 def clearcaches(self):
342 342 self._cache = None
343 343 self._basecache = None
344 344 self._chunkcache = (0, '')
345 345 self._pcache = {}
346 346
347 347 try:
348 348 self._nodecache.clearcaches()
349 349 except AttributeError:
350 350 self._nodecache = {nullid: nullrev}
351 351 self._nodepos = None
352 352
353 353 def rev(self, node):
354 354 try:
355 355 return self._nodecache[node]
356 356 except TypeError:
357 357 raise
358 358 except RevlogError:
359 359 # parsers.c radix tree lookup failed
360 360 raise LookupError(node, self.indexfile, _('no node'))
361 361 except KeyError:
362 362 # pure python cache lookup failed
363 363 n = self._nodecache
364 364 i = self.index
365 365 p = self._nodepos
366 366 if p is None:
367 367 p = len(i) - 2
368 368 for r in xrange(p, -1, -1):
369 369 v = i[r][7]
370 370 n[v] = r
371 371 if v == node:
372 372 self._nodepos = r - 1
373 373 return r
374 374 raise LookupError(node, self.indexfile, _('no node'))
375 375
376 376 def node(self, rev):
377 377 return self.index[rev][7]
378 378 def linkrev(self, rev):
379 379 return self.index[rev][4]
380 380 def parents(self, node):
381 381 i = self.index
382 382 d = i[self.rev(node)]
383 383 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
384 384 def parentrevs(self, rev):
385 385 return self.index[rev][5:7]
386 386 def start(self, rev):
387 387 return int(self.index[rev][0] >> 16)
388 388 def end(self, rev):
389 389 return self.start(rev) + self.length(rev)
390 390 def length(self, rev):
391 391 return self.index[rev][1]
392 392 def chainbase(self, rev):
393 393 index = self.index
394 394 base = index[rev][3]
395 395 while base != rev:
396 396 rev = base
397 397 base = index[rev][3]
398 398 return base
399 399 def chainlen(self, rev):
400 400 return self._chaininfo(rev)[0]
401 401
402 402 def _chaininfo(self, rev):
403 403 chaininfocache = self._chaininfocache
404 404 if rev in chaininfocache:
405 405 return chaininfocache[rev]
406 406 index = self.index
407 407 generaldelta = self._generaldelta
408 408 iterrev = rev
409 409 e = index[iterrev]
410 410 clen = 0
411 411 compresseddeltalen = 0
412 412 while iterrev != e[3]:
413 413 clen += 1
414 414 compresseddeltalen += e[1]
415 415 if generaldelta:
416 416 iterrev = e[3]
417 417 else:
418 418 iterrev -= 1
419 419 if iterrev in chaininfocache:
420 420 t = chaininfocache[iterrev]
421 421 clen += t[0]
422 422 compresseddeltalen += t[1]
423 423 break
424 424 e = index[iterrev]
425 425 else:
426 426 # Add text length of base since decompressing that also takes
427 427 # work. For cache hits the length is already included.
428 428 compresseddeltalen += e[1]
429 429 r = (clen, compresseddeltalen)
430 430 chaininfocache[rev] = r
431 431 return r
432 432
433 433 def _deltachain(self, rev, stoprev=None):
434 434 """Obtain the delta chain for a revision.
435 435
436 436 ``stoprev`` specifies a revision to stop at. If not specified, we
437 437 stop at the base of the chain.
438 438
439 439 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
440 440 revs in ascending order and ``stopped`` is a bool indicating whether
441 441 ``stoprev`` was hit.
442 442 """
443 443 chain = []
444 444
445 445 # Alias to prevent attribute lookup in tight loop.
446 446 index = self.index
447 447 generaldelta = self._generaldelta
448 448
449 449 iterrev = rev
450 450 e = index[iterrev]
451 451 while iterrev != e[3] and iterrev != stoprev:
452 452 chain.append(iterrev)
453 453 if generaldelta:
454 454 iterrev = e[3]
455 455 else:
456 456 iterrev -= 1
457 457 e = index[iterrev]
458 458
459 459 if iterrev == stoprev:
460 460 stopped = True
461 461 else:
462 462 chain.append(iterrev)
463 463 stopped = False
464 464
465 465 chain.reverse()
466 466 return chain, stopped
467 467
468 468 def flags(self, rev):
469 469 return self.index[rev][0] & 0xFFFF
470 470 def rawsize(self, rev):
471 471 """return the length of the uncompressed text for a given revision"""
472 472 l = self.index[rev][2]
473 473 if l >= 0:
474 474 return l
475 475
476 476 t = self.revision(self.node(rev))
477 477 return len(t)
478 478 size = rawsize
479 479
480 480 def ancestors(self, revs, stoprev=0, inclusive=False):
481 481 """Generate the ancestors of 'revs' in reverse topological order.
482 482 Does not generate revs lower than stoprev.
483 483
484 484 See the documentation for ancestor.lazyancestors for more details."""
485 485
486 486 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
487 487 inclusive=inclusive)
488 488
489 489 def descendants(self, revs):
490 490 """Generate the descendants of 'revs' in revision order.
491 491
492 492 Yield a sequence of revision numbers starting with a child of
493 493 some rev in revs, i.e., each revision is *not* considered a
494 494 descendant of itself. Results are ordered by revision number (a
495 495 topological sort)."""
496 496 first = min(revs)
497 497 if first == nullrev:
498 498 for i in self:
499 499 yield i
500 500 return
501 501
502 502 seen = set(revs)
503 503 for i in self.revs(start=first + 1):
504 504 for x in self.parentrevs(i):
505 505 if x != nullrev and x in seen:
506 506 seen.add(i)
507 507 yield i
508 508 break
509 509
510 510 def findcommonmissing(self, common=None, heads=None):
511 511 """Return a tuple of the ancestors of common and the ancestors of heads
512 512 that are not ancestors of common. In revset terminology, we return the
513 513 tuple:
514 514
515 515 ::common, (::heads) - (::common)
516 516
517 517 The list is sorted by revision number, meaning it is
518 518 topologically sorted.
519 519
520 520 'heads' and 'common' are both lists of node IDs. If heads is
521 521 not supplied, uses all of the revlog's heads. If common is not
522 522 supplied, uses nullid."""
523 523 if common is None:
524 524 common = [nullid]
525 525 if heads is None:
526 526 heads = self.heads()
527 527
528 528 common = [self.rev(n) for n in common]
529 529 heads = [self.rev(n) for n in heads]
530 530
531 531 # we want the ancestors, but inclusive
532 532 class lazyset(object):
533 533 def __init__(self, lazyvalues):
534 534 self.addedvalues = set()
535 535 self.lazyvalues = lazyvalues
536 536
537 537 def __contains__(self, value):
538 538 return value in self.addedvalues or value in self.lazyvalues
539 539
540 540 def __iter__(self):
541 541 added = self.addedvalues
542 542 for r in added:
543 543 yield r
544 544 for r in self.lazyvalues:
545 545 if not r in added:
546 546 yield r
547 547
548 548 def add(self, value):
549 549 self.addedvalues.add(value)
550 550
551 551 def update(self, values):
552 552 self.addedvalues.update(values)
553 553
554 554 has = lazyset(self.ancestors(common))
555 555 has.add(nullrev)
556 556 has.update(common)
557 557
558 558 # take all ancestors from heads that aren't in has
559 559 missing = set()
560 560 visit = collections.deque(r for r in heads if r not in has)
561 561 while visit:
562 562 r = visit.popleft()
563 563 if r in missing:
564 564 continue
565 565 else:
566 566 missing.add(r)
567 567 for p in self.parentrevs(r):
568 568 if p not in has:
569 569 visit.append(p)
570 570 missing = list(missing)
571 571 missing.sort()
572 572 return has, [self.node(r) for r in missing]
573 573
574 574 def incrementalmissingrevs(self, common=None):
575 575 """Return an object that can be used to incrementally compute the
576 576 revision numbers of the ancestors of arbitrary sets that are not
577 577 ancestors of common. This is an ancestor.incrementalmissingancestors
578 578 object.
579 579
580 580 'common' is a list of revision numbers. If common is not supplied, uses
581 581 nullrev.
582 582 """
583 583 if common is None:
584 584 common = [nullrev]
585 585
586 586 return ancestor.incrementalmissingancestors(self.parentrevs, common)
587 587
588 588 def findmissingrevs(self, common=None, heads=None):
589 589 """Return the revision numbers of the ancestors of heads that
590 590 are not ancestors of common.
591 591
592 592 More specifically, return a list of revision numbers corresponding to
593 593 nodes N such that every N satisfies the following constraints:
594 594
595 595 1. N is an ancestor of some node in 'heads'
596 596 2. N is not an ancestor of any node in 'common'
597 597
598 598 The list is sorted by revision number, meaning it is
599 599 topologically sorted.
600 600
601 601 'heads' and 'common' are both lists of revision numbers. If heads is
602 602 not supplied, uses all of the revlog's heads. If common is not
603 603 supplied, uses nullid."""
604 604 if common is None:
605 605 common = [nullrev]
606 606 if heads is None:
607 607 heads = self.headrevs()
608 608
609 609 inc = self.incrementalmissingrevs(common=common)
610 610 return inc.missingancestors(heads)
611 611
612 612 def findmissing(self, common=None, heads=None):
613 613 """Return the ancestors of heads that are not ancestors of common.
614 614
615 615 More specifically, return a list of nodes N such that every N
616 616 satisfies the following constraints:
617 617
618 618 1. N is an ancestor of some node in 'heads'
619 619 2. N is not an ancestor of any node in 'common'
620 620
621 621 The list is sorted by revision number, meaning it is
622 622 topologically sorted.
623 623
624 624 'heads' and 'common' are both lists of node IDs. If heads is
625 625 not supplied, uses all of the revlog's heads. If common is not
626 626 supplied, uses nullid."""
627 627 if common is None:
628 628 common = [nullid]
629 629 if heads is None:
630 630 heads = self.heads()
631 631
632 632 common = [self.rev(n) for n in common]
633 633 heads = [self.rev(n) for n in heads]
634 634
635 635 inc = self.incrementalmissingrevs(common=common)
636 636 return [self.node(r) for r in inc.missingancestors(heads)]
637 637
638 638 def nodesbetween(self, roots=None, heads=None):
639 639 """Return a topological path from 'roots' to 'heads'.
640 640
641 641 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
642 642 topologically sorted list of all nodes N that satisfy both of
643 643 these constraints:
644 644
645 645 1. N is a descendant of some node in 'roots'
646 646 2. N is an ancestor of some node in 'heads'
647 647
648 648 Every node is considered to be both a descendant and an ancestor
649 649 of itself, so every reachable node in 'roots' and 'heads' will be
650 650 included in 'nodes'.
651 651
652 652 'outroots' is the list of reachable nodes in 'roots', i.e., the
653 653 subset of 'roots' that is returned in 'nodes'. Likewise,
654 654 'outheads' is the subset of 'heads' that is also in 'nodes'.
655 655
656 656 'roots' and 'heads' are both lists of node IDs. If 'roots' is
657 657 unspecified, uses nullid as the only root. If 'heads' is
658 658 unspecified, uses list of all of the revlog's heads."""
659 659 nonodes = ([], [], [])
660 660 if roots is not None:
661 661 roots = list(roots)
662 662 if not roots:
663 663 return nonodes
664 664 lowestrev = min([self.rev(n) for n in roots])
665 665 else:
666 666 roots = [nullid] # Everybody's a descendant of nullid
667 667 lowestrev = nullrev
668 668 if (lowestrev == nullrev) and (heads is None):
669 669 # We want _all_ the nodes!
670 670 return ([self.node(r) for r in self], [nullid], list(self.heads()))
671 671 if heads is None:
672 672 # All nodes are ancestors, so the latest ancestor is the last
673 673 # node.
674 674 highestrev = len(self) - 1
675 675 # Set ancestors to None to signal that every node is an ancestor.
676 676 ancestors = None
677 677 # Set heads to an empty dictionary for later discovery of heads
678 678 heads = {}
679 679 else:
680 680 heads = list(heads)
681 681 if not heads:
682 682 return nonodes
683 683 ancestors = set()
684 684 # Turn heads into a dictionary so we can remove 'fake' heads.
685 685 # Also, later we will be using it to filter out the heads we can't
686 686 # find from roots.
687 687 heads = dict.fromkeys(heads, False)
688 688 # Start at the top and keep marking parents until we're done.
689 689 nodestotag = set(heads)
690 690 # Remember where the top was so we can use it as a limit later.
691 691 highestrev = max([self.rev(n) for n in nodestotag])
692 692 while nodestotag:
693 693 # grab a node to tag
694 694 n = nodestotag.pop()
695 695 # Never tag nullid
696 696 if n == nullid:
697 697 continue
698 698 # A node's revision number represents its place in a
699 699 # topologically sorted list of nodes.
700 700 r = self.rev(n)
701 701 if r >= lowestrev:
702 702 if n not in ancestors:
703 703 # If we are possibly a descendant of one of the roots
704 704 # and we haven't already been marked as an ancestor
705 705 ancestors.add(n) # Mark as ancestor
706 706 # Add non-nullid parents to list of nodes to tag.
707 707 nodestotag.update([p for p in self.parents(n) if
708 708 p != nullid])
709 709 elif n in heads: # We've seen it before, is it a fake head?
710 710 # So it is, real heads should not be the ancestors of
711 711 # any other heads.
712 712 heads.pop(n)
713 713 if not ancestors:
714 714 return nonodes
715 715 # Now that we have our set of ancestors, we want to remove any
716 716 # roots that are not ancestors.
717 717
718 718 # If one of the roots was nullid, everything is included anyway.
719 719 if lowestrev > nullrev:
720 720 # But, since we weren't, let's recompute the lowest rev to not
721 721 # include roots that aren't ancestors.
722 722
723 723 # Filter out roots that aren't ancestors of heads
724 724 roots = [n for n in roots if n in ancestors]
725 725 # Recompute the lowest revision
726 726 if roots:
727 727 lowestrev = min([self.rev(n) for n in roots])
728 728 else:
729 729 # No more roots? Return empty list
730 730 return nonodes
731 731 else:
732 732 # We are descending from nullid, and don't need to care about
733 733 # any other roots.
734 734 lowestrev = nullrev
735 735 roots = [nullid]
736 736 # Transform our roots list into a set.
737 737 descendants = set(roots)
738 738 # Also, keep the original roots so we can filter out roots that aren't
739 739 # 'real' roots (i.e. are descended from other roots).
740 740 roots = descendants.copy()
741 741 # Our topologically sorted list of output nodes.
742 742 orderedout = []
743 743 # Don't start at nullid since we don't want nullid in our output list,
744 744 # and if nullid shows up in descendants, empty parents will look like
745 745 # they're descendants.
746 746 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
747 747 n = self.node(r)
748 748 isdescendant = False
749 749 if lowestrev == nullrev: # Everybody is a descendant of nullid
750 750 isdescendant = True
751 751 elif n in descendants:
752 752 # n is already a descendant
753 753 isdescendant = True
754 754 # This check only needs to be done here because all the roots
755 755 # will start being marked is descendants before the loop.
756 756 if n in roots:
757 757 # If n was a root, check if it's a 'real' root.
758 758 p = tuple(self.parents(n))
759 759 # If any of its parents are descendants, it's not a root.
760 760 if (p[0] in descendants) or (p[1] in descendants):
761 761 roots.remove(n)
762 762 else:
763 763 p = tuple(self.parents(n))
764 764 # A node is a descendant if either of its parents are
765 765 # descendants. (We seeded the dependents list with the roots
766 766 # up there, remember?)
767 767 if (p[0] in descendants) or (p[1] in descendants):
768 768 descendants.add(n)
769 769 isdescendant = True
770 770 if isdescendant and ((ancestors is None) or (n in ancestors)):
771 771 # Only include nodes that are both descendants and ancestors.
772 772 orderedout.append(n)
773 773 if (ancestors is not None) and (n in heads):
774 774 # We're trying to figure out which heads are reachable
775 775 # from roots.
776 776 # Mark this head as having been reached
777 777 heads[n] = True
778 778 elif ancestors is None:
779 779 # Otherwise, we're trying to discover the heads.
780 780 # Assume this is a head because if it isn't, the next step
781 781 # will eventually remove it.
782 782 heads[n] = True
783 783 # But, obviously its parents aren't.
784 784 for p in self.parents(n):
785 785 heads.pop(p, None)
786 786 heads = [n for n, flag in heads.iteritems() if flag]
787 787 roots = list(roots)
788 788 assert orderedout
789 789 assert roots
790 790 assert heads
791 791 return (orderedout, roots, heads)
792 792
793 793 def headrevs(self):
794 794 try:
795 795 return self.index.headrevs()
796 796 except AttributeError:
797 797 return self._headrevs()
798 798
799 799 def computephases(self, roots):
800 800 return self.index.computephasesmapsets(roots)
801 801
802 802 def _headrevs(self):
803 803 count = len(self)
804 804 if not count:
805 805 return [nullrev]
806 806 # we won't iter over filtered rev so nobody is a head at start
807 807 ishead = [0] * (count + 1)
808 808 index = self.index
809 809 for r in self:
810 810 ishead[r] = 1 # I may be an head
811 811 e = index[r]
812 812 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
813 813 return [r for r, val in enumerate(ishead) if val]
814 814
815 815 def heads(self, start=None, stop=None):
816 816 """return the list of all nodes that have no children
817 817
818 818 if start is specified, only heads that are descendants of
819 819 start will be returned
820 820 if stop is specified, it will consider all the revs from stop
821 821 as if they had no children
822 822 """
823 823 if start is None and stop is None:
824 824 if not len(self):
825 825 return [nullid]
826 826 return [self.node(r) for r in self.headrevs()]
827 827
828 828 if start is None:
829 829 start = nullid
830 830 if stop is None:
831 831 stop = []
832 832 stoprevs = set([self.rev(n) for n in stop])
833 833 startrev = self.rev(start)
834 834 reachable = set((startrev,))
835 835 heads = set((startrev,))
836 836
837 837 parentrevs = self.parentrevs
838 838 for r in self.revs(start=startrev + 1):
839 839 for p in parentrevs(r):
840 840 if p in reachable:
841 841 if r not in stoprevs:
842 842 reachable.add(r)
843 843 heads.add(r)
844 844 if p in heads and p not in stoprevs:
845 845 heads.remove(p)
846 846
847 847 return [self.node(r) for r in heads]
848 848
849 849 def children(self, node):
850 850 """find the children of a given node"""
851 851 c = []
852 852 p = self.rev(node)
853 853 for r in self.revs(start=p + 1):
854 854 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
855 855 if prevs:
856 856 for pr in prevs:
857 857 if pr == p:
858 858 c.append(self.node(r))
859 859 elif p == nullrev:
860 860 c.append(self.node(r))
861 861 return c
862 862
863 863 def descendant(self, start, end):
864 864 if start == nullrev:
865 865 return True
866 866 for i in self.descendants([start]):
867 867 if i == end:
868 868 return True
869 869 elif i > end:
870 870 break
871 871 return False
872 872
873 873 def commonancestorsheads(self, a, b):
874 874 """calculate all the heads of the common ancestors of nodes a and b"""
875 875 a, b = self.rev(a), self.rev(b)
876 876 try:
877 877 ancs = self.index.commonancestorsheads(a, b)
878 878 except (AttributeError, OverflowError): # C implementation failed
879 879 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
880 880 return map(self.node, ancs)
881 881
882 882 def isancestor(self, a, b):
883 883 """return True if node a is an ancestor of node b
884 884
885 885 The implementation of this is trivial but the use of
886 886 commonancestorsheads is not."""
887 887 return a in self.commonancestorsheads(a, b)
888 888
889 889 def ancestor(self, a, b):
890 890 """calculate the "best" common ancestor of nodes a and b"""
891 891
892 892 a, b = self.rev(a), self.rev(b)
893 893 try:
894 894 ancs = self.index.ancestors(a, b)
895 895 except (AttributeError, OverflowError):
896 896 ancs = ancestor.ancestors(self.parentrevs, a, b)
897 897 if ancs:
898 898 # choose a consistent winner when there's a tie
899 899 return min(map(self.node, ancs))
900 900 return nullid
901 901
902 902 def _match(self, id):
903 903 if isinstance(id, int):
904 904 # rev
905 905 return self.node(id)
906 906 if len(id) == 20:
907 907 # possibly a binary node
908 908 # odds of a binary node being all hex in ASCII are 1 in 10**25
909 909 try:
910 910 node = id
911 911 self.rev(node) # quick search the index
912 912 return node
913 913 except LookupError:
914 914 pass # may be partial hex id
915 915 try:
916 916 # str(rev)
917 917 rev = int(id)
918 918 if str(rev) != id:
919 919 raise ValueError
920 920 if rev < 0:
921 921 rev = len(self) + rev
922 922 if rev < 0 or rev >= len(self):
923 923 raise ValueError
924 924 return self.node(rev)
925 925 except (ValueError, OverflowError):
926 926 pass
927 927 if len(id) == 40:
928 928 try:
929 929 # a full hex nodeid?
930 930 node = bin(id)
931 931 self.rev(node)
932 932 return node
933 933 except (TypeError, LookupError):
934 934 pass
935 935
936 936 def _partialmatch(self, id):
937 937 try:
938 938 n = self.index.partialmatch(id)
939 939 if n and self.hasnode(n):
940 940 return n
941 941 return None
942 942 except RevlogError:
943 943 # parsers.c radix tree lookup gave multiple matches
944 944 # fall through to slow path that filters hidden revisions
945 945 pass
946 946 except (AttributeError, ValueError):
947 947 # we are pure python, or key was too short to search radix tree
948 948 pass
949 949
950 950 if id in self._pcache:
951 951 return self._pcache[id]
952 952
953 953 if len(id) < 40:
954 954 try:
955 955 # hex(node)[:...]
956 956 l = len(id) // 2 # grab an even number of digits
957 957 prefix = bin(id[:l * 2])
958 958 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
959 959 nl = [n for n in nl if hex(n).startswith(id) and
960 960 self.hasnode(n)]
961 961 if len(nl) > 0:
962 962 if len(nl) == 1:
963 963 self._pcache[id] = nl[0]
964 964 return nl[0]
965 965 raise LookupError(id, self.indexfile,
966 966 _('ambiguous identifier'))
967 967 return None
968 968 except TypeError:
969 969 pass
970 970
971 971 def lookup(self, id):
972 972 """locate a node based on:
973 973 - revision number or str(revision number)
974 974 - nodeid or subset of hex nodeid
975 975 """
976 976 n = self._match(id)
977 977 if n is not None:
978 978 return n
979 979 n = self._partialmatch(id)
980 980 if n:
981 981 return n
982 982
983 983 raise LookupError(id, self.indexfile, _('no match found'))
984 984
985 985 def cmp(self, node, text):
986 986 """compare text with a given file revision
987 987
988 988 returns True if text is different than what is stored.
989 989 """
990 990 p1, p2 = self.parents(node)
991 991 return hash(text, p1, p2) != node
992 992
993 993 def _addchunk(self, offset, data):
994 994 """Add a segment to the revlog cache.
995 995
996 996 Accepts an absolute offset and the data that is at that location.
997 997 """
998 998 o, d = self._chunkcache
999 999 # try to add to existing cache
1000 1000 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1001 1001 self._chunkcache = o, d + data
1002 1002 else:
1003 1003 self._chunkcache = offset, data
1004 1004
1005 1005 def _loadchunk(self, offset, length, df=None):
1006 1006 """Load a segment of raw data from the revlog.
1007 1007
1008 1008 Accepts an absolute offset, length to read, and an optional existing
1009 1009 file handle to read from.
1010 1010
1011 1011 If an existing file handle is passed, it will be seeked and the
1012 1012 original seek position will NOT be restored.
1013 1013
1014 1014 Returns a str or buffer of raw byte data.
1015 1015 """
1016 1016 if df is not None:
1017 1017 closehandle = False
1018 1018 else:
1019 1019 if self._inline:
1020 1020 df = self.opener(self.indexfile)
1021 1021 else:
1022 1022 df = self.opener(self.datafile)
1023 1023 closehandle = True
1024 1024
1025 1025 # Cache data both forward and backward around the requested
1026 1026 # data, in a fixed size window. This helps speed up operations
1027 1027 # involving reading the revlog backwards.
1028 1028 cachesize = self._chunkcachesize
1029 1029 realoffset = offset & ~(cachesize - 1)
1030 1030 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1031 1031 - realoffset)
1032 1032 df.seek(realoffset)
1033 1033 d = df.read(reallength)
1034 1034 if closehandle:
1035 1035 df.close()
1036 1036 self._addchunk(realoffset, d)
1037 1037 if offset != realoffset or reallength != length:
1038 1038 return util.buffer(d, offset - realoffset, length)
1039 1039 return d
1040 1040
1041 1041 def _getchunk(self, offset, length, df=None):
1042 1042 """Obtain a segment of raw data from the revlog.
1043 1043
1044 1044 Accepts an absolute offset, length of bytes to obtain, and an
1045 1045 optional file handle to the already-opened revlog. If the file
1046 1046 handle is used, it's original seek position will not be preserved.
1047 1047
1048 1048 Requests for data may be returned from a cache.
1049 1049
1050 1050 Returns a str or a buffer instance of raw byte data.
1051 1051 """
1052 1052 o, d = self._chunkcache
1053 1053 l = len(d)
1054 1054
1055 1055 # is it in the cache?
1056 1056 cachestart = offset - o
1057 1057 cacheend = cachestart + length
1058 1058 if cachestart >= 0 and cacheend <= l:
1059 1059 if cachestart == 0 and cacheend == l:
1060 1060 return d # avoid a copy
1061 1061 return util.buffer(d, cachestart, cacheend - cachestart)
1062 1062
1063 1063 return self._loadchunk(offset, length, df=df)
1064 1064
1065 1065 def _chunkraw(self, startrev, endrev, df=None):
1066 1066 """Obtain a segment of raw data corresponding to a range of revisions.
1067 1067
1068 1068 Accepts the start and end revisions and an optional already-open
1069 1069 file handle to be used for reading. If the file handle is read, its
1070 1070 seek position will not be preserved.
1071 1071
1072 1072 Requests for data may be satisfied by a cache.
1073 1073
1074 Returns a str or a buffer instance of raw byte data. Callers will
1075 need to call ``self.start(rev)`` and ``self.length()`` to determine
1076 where each revision's data begins and ends.
1074 Returns a 2-tuple of (offset, data) for the requested range of
1075 revisions. Offset is the integer offset from the beginning of the
1076 revlog and data is a str or buffer of the raw byte data.
1077
1078 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1079 to determine where each revision's data begins and ends.
1077 1080 """
1078 1081 start = self.start(startrev)
1079 1082 end = self.end(endrev)
1080 1083 if self._inline:
1081 1084 start += (startrev + 1) * self._io.size
1082 1085 end += (endrev + 1) * self._io.size
1083 1086 length = end - start
1084 return self._getchunk(start, length, df=df)
1087
1088 return start, self._getchunk(start, length, df=df)
1085 1089
1086 1090 def _chunk(self, rev, df=None):
1087 1091 """Obtain a single decompressed chunk for a revision.
1088 1092
1089 1093 Accepts an integer revision and an optional already-open file handle
1090 1094 to be used for reading. If used, the seek position of the file will not
1091 1095 be preserved.
1092 1096
1093 1097 Returns a str holding uncompressed data for the requested revision.
1094 1098 """
1095 return decompress(self._chunkraw(rev, rev, df=df))
1099 return decompress(self._chunkraw(rev, rev, df=df)[1])
1096 1100
1097 1101 def _chunks(self, revs, df=None):
1098 1102 """Obtain decompressed chunks for the specified revisions.
1099 1103
1100 1104 Accepts an iterable of numeric revisions that are assumed to be in
1101 1105 ascending order. Also accepts an optional already-open file handle
1102 1106 to be used for reading. If used, the seek position of the file will
1103 1107 not be preserved.
1104 1108
1105 1109 This function is similar to calling ``self._chunk()`` multiple times,
1106 1110 but is faster.
1107 1111
1108 1112 Returns a list with decompressed data for each requested revision.
1109 1113 """
1110 1114 if not revs:
1111 1115 return []
1112 1116 start = self.start
1113 1117 length = self.length
1114 1118 inline = self._inline
1115 1119 iosize = self._io.size
1116 1120 buffer = util.buffer
1117 1121
1118 1122 l = []
1119 1123 ladd = l.append
1120 1124
1121 1125 # preload the cache
1122 1126 try:
1123 1127 while True:
1124 1128 # ensure that the cache doesn't change out from under us
1125 1129 _cache = self._chunkcache
1126 self._chunkraw(revs[0], revs[-1], df=df)
1130 self._chunkraw(revs[0], revs[-1], df=df)[1]
1127 1131 if _cache == self._chunkcache:
1128 1132 break
1129 1133 offset, data = _cache
1130 1134 except OverflowError:
1131 1135 # issue4215 - we can't cache a run of chunks greater than
1132 1136 # 2G on Windows
1133 1137 return [self._chunk(rev, df=df) for rev in revs]
1134 1138
1135 1139 for rev in revs:
1136 1140 chunkstart = start(rev)
1137 1141 if inline:
1138 1142 chunkstart += (rev + 1) * iosize
1139 1143 chunklength = length(rev)
1140 1144 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1141 1145
1142 1146 return l
1143 1147
1144 1148 def _chunkclear(self):
1145 1149 """Clear the raw chunk cache."""
1146 1150 self._chunkcache = (0, '')
1147 1151
1148 1152 def deltaparent(self, rev):
1149 1153 """return deltaparent of the given revision"""
1150 1154 base = self.index[rev][3]
1151 1155 if base == rev:
1152 1156 return nullrev
1153 1157 elif self._generaldelta:
1154 1158 return base
1155 1159 else:
1156 1160 return rev - 1
1157 1161
1158 1162 def revdiff(self, rev1, rev2):
1159 1163 """return or calculate a delta between two revisions"""
1160 1164 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1161 1165 return str(self._chunk(rev2))
1162 1166
1163 1167 return mdiff.textdiff(self.revision(rev1),
1164 1168 self.revision(rev2))
1165 1169
1166 1170 def revision(self, nodeorrev, _df=None):
1167 1171 """return an uncompressed revision of a given node or revision
1168 1172 number.
1169 1173
1170 1174 _df is an existing file handle to read from. It is meant to only be
1171 1175 used internally.
1172 1176 """
1173 1177 if isinstance(nodeorrev, int):
1174 1178 rev = nodeorrev
1175 1179 node = self.node(rev)
1176 1180 else:
1177 1181 node = nodeorrev
1178 1182 rev = None
1179 1183
1180 1184 cachedrev = None
1181 1185 if node == nullid:
1182 1186 return ""
1183 1187 if self._cache:
1184 1188 if self._cache[0] == node:
1185 1189 return self._cache[2]
1186 1190 cachedrev = self._cache[1]
1187 1191
1188 1192 # look up what we need to read
1189 1193 text = None
1190 1194 if rev is None:
1191 1195 rev = self.rev(node)
1192 1196
1193 1197 # check rev flags
1194 1198 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1195 1199 raise RevlogError(_('incompatible revision flag %x') %
1196 1200 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1197 1201
1198 1202 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1199 1203 if stopped:
1200 1204 text = self._cache[2]
1201 1205
1202 1206 # drop cache to save memory
1203 1207 self._cache = None
1204 1208
1205 1209 bins = self._chunks(chain, df=_df)
1206 1210 if text is None:
1207 1211 text = str(bins[0])
1208 1212 bins = bins[1:]
1209 1213
1210 1214 text = mdiff.patches(text, bins)
1211 1215
1212 1216 text = self._checkhash(text, node, rev)
1213 1217
1214 1218 self._cache = (node, rev, text)
1215 1219 return text
1216 1220
1217 1221 def hash(self, text, p1, p2):
1218 1222 """Compute a node hash.
1219 1223
1220 1224 Available as a function so that subclasses can replace the hash
1221 1225 as needed.
1222 1226 """
1223 1227 return hash(text, p1, p2)
1224 1228
1225 1229 def _checkhash(self, text, node, rev):
1226 1230 p1, p2 = self.parents(node)
1227 1231 self.checkhash(text, p1, p2, node, rev)
1228 1232 return text
1229 1233
1230 1234 def checkhash(self, text, p1, p2, node, rev=None):
1231 1235 if node != self.hash(text, p1, p2):
1232 1236 revornode = rev
1233 1237 if revornode is None:
1234 1238 revornode = templatefilters.short(hex(node))
1235 1239 raise RevlogError(_("integrity check failed on %s:%s")
1236 1240 % (self.indexfile, revornode))
1237 1241
1238 1242 def checkinlinesize(self, tr, fp=None):
1239 1243 """Check if the revlog is too big for inline and convert if so.
1240 1244
1241 1245 This should be called after revisions are added to the revlog. If the
1242 1246 revlog has grown too large to be an inline revlog, it will convert it
1243 1247 to use multiple index and data files.
1244 1248 """
1245 1249 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1246 1250 return
1247 1251
1248 1252 trinfo = tr.find(self.indexfile)
1249 1253 if trinfo is None:
1250 1254 raise RevlogError(_("%s not found in the transaction")
1251 1255 % self.indexfile)
1252 1256
1253 1257 trindex = trinfo[2]
1254 1258 if trindex is not None:
1255 1259 dataoff = self.start(trindex)
1256 1260 else:
1257 1261 # revlog was stripped at start of transaction, use all leftover data
1258 1262 trindex = len(self) - 1
1259 1263 dataoff = self.end(-2)
1260 1264
1261 1265 tr.add(self.datafile, dataoff)
1262 1266
1263 1267 if fp:
1264 1268 fp.flush()
1265 1269 fp.close()
1266 1270
1267 1271 df = self.opener(self.datafile, 'w')
1268 1272 try:
1269 1273 for r in self:
1270 df.write(self._chunkraw(r, r))
1274 df.write(self._chunkraw(r, r)[1])
1271 1275 finally:
1272 1276 df.close()
1273 1277
1274 1278 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1275 1279 self.version &= ~(REVLOGNGINLINEDATA)
1276 1280 self._inline = False
1277 1281 for i in self:
1278 1282 e = self._io.packentry(self.index[i], self.node, self.version, i)
1279 1283 fp.write(e)
1280 1284
1281 1285 # if we don't call close, the temp file will never replace the
1282 1286 # real index
1283 1287 fp.close()
1284 1288
1285 1289 tr.replace(self.indexfile, trindex * self._io.size)
1286 1290 self._chunkclear()
1287 1291
1288 1292 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1289 1293 node=None):
1290 1294 """add a revision to the log
1291 1295
1292 1296 text - the revision data to add
1293 1297 transaction - the transaction object used for rollback
1294 1298 link - the linkrev data to add
1295 1299 p1, p2 - the parent nodeids of the revision
1296 1300 cachedelta - an optional precomputed delta
1297 1301 node - nodeid of revision; typically node is not specified, and it is
1298 1302 computed by default as hash(text, p1, p2), however subclasses might
1299 1303 use different hashing method (and override checkhash() in such case)
1300 1304 """
1301 1305 if link == nullrev:
1302 1306 raise RevlogError(_("attempted to add linkrev -1 to %s")
1303 1307 % self.indexfile)
1304 1308
1305 1309 if len(text) > _maxentrysize:
1306 1310 raise RevlogError(
1307 1311 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1308 1312 % (self.indexfile, len(text)))
1309 1313
1310 1314 node = node or self.hash(text, p1, p2)
1311 1315 if node in self.nodemap:
1312 1316 return node
1313 1317
1314 1318 dfh = None
1315 1319 if not self._inline:
1316 1320 dfh = self.opener(self.datafile, "a+")
1317 1321 ifh = self.opener(self.indexfile, "a+")
1318 1322 try:
1319 1323 return self._addrevision(node, text, transaction, link, p1, p2,
1320 1324 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1321 1325 finally:
1322 1326 if dfh:
1323 1327 dfh.close()
1324 1328 ifh.close()
1325 1329
1326 1330 def compress(self, text):
1327 1331 """ generate a possibly-compressed representation of text """
1328 1332 if not text:
1329 1333 return ("", text)
1330 1334 l = len(text)
1331 1335 bin = None
1332 1336 if l < 44:
1333 1337 pass
1334 1338 elif l > 1000000:
1335 1339 # zlib makes an internal copy, thus doubling memory usage for
1336 1340 # large files, so lets do this in pieces
1337 1341 z = zlib.compressobj()
1338 1342 p = []
1339 1343 pos = 0
1340 1344 while pos < l:
1341 1345 pos2 = pos + 2**20
1342 1346 p.append(z.compress(text[pos:pos2]))
1343 1347 pos = pos2
1344 1348 p.append(z.flush())
1345 1349 if sum(map(len, p)) < l:
1346 1350 bin = "".join(p)
1347 1351 else:
1348 1352 bin = _compress(text)
1349 1353 if bin is None or len(bin) > l:
1350 1354 if text[0] == '\0':
1351 1355 return ("", text)
1352 1356 return ('u', text)
1353 1357 return ("", bin)
1354 1358
1355 1359 def _isgooddelta(self, d, textlen):
1356 1360 """Returns True if the given delta is good. Good means that it is within
1357 1361 the disk span, disk size, and chain length bounds that we know to be
1358 1362 performant."""
1359 1363 if d is None:
1360 1364 return False
1361 1365
1362 1366 # - 'dist' is the distance from the base revision -- bounding it limits
1363 1367 # the amount of I/O we need to do.
1364 1368 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1365 1369 # to apply -- bounding it limits the amount of CPU we consume.
1366 1370 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1367 1371 if (dist > textlen * 4 or l > textlen or
1368 1372 compresseddeltalen > textlen * 2 or
1369 1373 (self._maxchainlen and chainlen > self._maxchainlen)):
1370 1374 return False
1371 1375
1372 1376 return True
1373 1377
1374 1378 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1375 1379 cachedelta, ifh, dfh, alwayscache=False):
1376 1380 """internal function to add revisions to the log
1377 1381
1378 1382 see addrevision for argument descriptions.
1379 1383 invariants:
1380 1384 - text is optional (can be None); if not set, cachedelta must be set.
1381 1385 if both are set, they must correspond to each other.
1382 1386 """
1383 1387 btext = [text]
1384 1388 def buildtext():
1385 1389 if btext[0] is not None:
1386 1390 return btext[0]
1387 1391 baserev = cachedelta[0]
1388 1392 delta = cachedelta[1]
1389 1393 # special case deltas which replace entire base; no need to decode
1390 1394 # base revision. this neatly avoids censored bases, which throw when
1391 1395 # they're decoded.
1392 1396 hlen = struct.calcsize(">lll")
1393 1397 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1394 1398 len(delta) - hlen):
1395 1399 btext[0] = delta[hlen:]
1396 1400 else:
1397 1401 if self._inline:
1398 1402 fh = ifh
1399 1403 else:
1400 1404 fh = dfh
1401 1405 basetext = self.revision(self.node(baserev), _df=fh)
1402 1406 btext[0] = mdiff.patch(basetext, delta)
1403 1407 try:
1404 1408 self.checkhash(btext[0], p1, p2, node)
1405 1409 if flags & REVIDX_ISCENSORED:
1406 1410 raise RevlogError(_('node %s is not censored') % node)
1407 1411 except CensoredNodeError:
1408 1412 # must pass the censored index flag to add censored revisions
1409 1413 if not flags & REVIDX_ISCENSORED:
1410 1414 raise
1411 1415 return btext[0]
1412 1416
1413 1417 def builddelta(rev):
1414 1418 # can we use the cached delta?
1415 1419 if cachedelta and cachedelta[0] == rev:
1416 1420 delta = cachedelta[1]
1417 1421 else:
1418 1422 t = buildtext()
1419 1423 if self.iscensored(rev):
1420 1424 # deltas based on a censored revision must replace the
1421 1425 # full content in one patch, so delta works everywhere
1422 1426 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1423 1427 delta = header + t
1424 1428 else:
1425 1429 if self._inline:
1426 1430 fh = ifh
1427 1431 else:
1428 1432 fh = dfh
1429 1433 ptext = self.revision(self.node(rev), _df=fh)
1430 1434 delta = mdiff.textdiff(ptext, t)
1431 1435 data = self.compress(delta)
1432 1436 l = len(data[1]) + len(data[0])
1433 1437 if basecache[0] == rev:
1434 1438 chainbase = basecache[1]
1435 1439 else:
1436 1440 chainbase = self.chainbase(rev)
1437 1441 dist = l + offset - self.start(chainbase)
1438 1442 if self._generaldelta:
1439 1443 base = rev
1440 1444 else:
1441 1445 base = chainbase
1442 1446 chainlen, compresseddeltalen = self._chaininfo(rev)
1443 1447 chainlen += 1
1444 1448 compresseddeltalen += l
1445 1449 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1446 1450
1447 1451 curr = len(self)
1448 1452 prev = curr - 1
1449 1453 base = chainbase = curr
1450 1454 offset = self.end(prev)
1451 1455 delta = None
1452 1456 if self._basecache is None:
1453 1457 self._basecache = (prev, self.chainbase(prev))
1454 1458 basecache = self._basecache
1455 1459 p1r, p2r = self.rev(p1), self.rev(p2)
1456 1460
1457 1461 # full versions are inserted when the needed deltas
1458 1462 # become comparable to the uncompressed text
1459 1463 if text is None:
1460 1464 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1461 1465 cachedelta[1])
1462 1466 else:
1463 1467 textlen = len(text)
1464 1468
1465 1469 # should we try to build a delta?
1466 1470 if prev != nullrev:
1467 1471 tested = set()
1468 1472 if cachedelta and self._generaldelta and self._lazydeltabase:
1469 1473 # Assume what we received from the server is a good choice
1470 1474 # build delta will reuse the cache
1471 1475 candidatedelta = builddelta(cachedelta[0])
1472 1476 tested.add(cachedelta[0])
1473 1477 if self._isgooddelta(candidatedelta, textlen):
1474 1478 delta = candidatedelta
1475 1479 if delta is None and self._generaldelta:
1476 1480 # exclude already lazy tested base if any
1477 1481 parents = [p for p in (p1r, p2r)
1478 1482 if p != nullrev and p not in tested]
1479 1483 if parents and not self._aggressivemergedeltas:
1480 1484 # Pick whichever parent is closer to us (to minimize the
1481 1485 # chance of having to build a fulltext).
1482 1486 parents = [max(parents)]
1483 1487 tested.update(parents)
1484 1488 pdeltas = []
1485 1489 for p in parents:
1486 1490 pd = builddelta(p)
1487 1491 if self._isgooddelta(pd, textlen):
1488 1492 pdeltas.append(pd)
1489 1493 if pdeltas:
1490 1494 delta = min(pdeltas, key=lambda x: x[1])
1491 1495 if delta is None and prev not in tested:
1492 1496 # other approach failed try against prev to hopefully save us a
1493 1497 # fulltext.
1494 1498 candidatedelta = builddelta(prev)
1495 1499 if self._isgooddelta(candidatedelta, textlen):
1496 1500 delta = candidatedelta
1497 1501 if delta is not None:
1498 1502 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1499 1503 else:
1500 1504 text = buildtext()
1501 1505 data = self.compress(text)
1502 1506 l = len(data[1]) + len(data[0])
1503 1507 base = chainbase = curr
1504 1508
1505 1509 e = (offset_type(offset, flags), l, textlen,
1506 1510 base, link, p1r, p2r, node)
1507 1511 self.index.insert(-1, e)
1508 1512 self.nodemap[node] = curr
1509 1513
1510 1514 entry = self._io.packentry(e, self.node, self.version, curr)
1511 1515 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1512 1516
1513 1517 if alwayscache and text is None:
1514 1518 text = buildtext()
1515 1519
1516 1520 if type(text) == str: # only accept immutable objects
1517 1521 self._cache = (node, curr, text)
1518 1522 self._basecache = (curr, chainbase)
1519 1523 return node
1520 1524
1521 1525 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1522 1526 # Files opened in a+ mode have inconsistent behavior on various
1523 1527 # platforms. Windows requires that a file positioning call be made
1524 1528 # when the file handle transitions between reads and writes. See
1525 1529 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1526 1530 # platforms, Python or the platform itself can be buggy. Some versions
1527 1531 # of Solaris have been observed to not append at the end of the file
1528 1532 # if the file was seeked to before the end. See issue4943 for more.
1529 1533 #
1530 1534 # We work around this issue by inserting a seek() before writing.
1531 1535 # Note: This is likely not necessary on Python 3.
1532 1536 ifh.seek(0, os.SEEK_END)
1533 1537 if dfh:
1534 1538 dfh.seek(0, os.SEEK_END)
1535 1539
1536 1540 curr = len(self) - 1
1537 1541 if not self._inline:
1538 1542 transaction.add(self.datafile, offset)
1539 1543 transaction.add(self.indexfile, curr * len(entry))
1540 1544 if data[0]:
1541 1545 dfh.write(data[0])
1542 1546 dfh.write(data[1])
1543 1547 ifh.write(entry)
1544 1548 else:
1545 1549 offset += curr * self._io.size
1546 1550 transaction.add(self.indexfile, offset, curr)
1547 1551 ifh.write(entry)
1548 1552 ifh.write(data[0])
1549 1553 ifh.write(data[1])
1550 1554 self.checkinlinesize(transaction, ifh)
1551 1555
1552 1556 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1553 1557 """
1554 1558 add a delta group
1555 1559
1556 1560 given a set of deltas, add them to the revision log. the
1557 1561 first delta is against its parent, which should be in our
1558 1562 log, the rest are against the previous delta.
1559 1563
1560 1564 If ``addrevisioncb`` is defined, it will be called with arguments of
1561 1565 this revlog and the node that was added.
1562 1566 """
1563 1567
1564 1568 # track the base of the current delta log
1565 1569 content = []
1566 1570 node = None
1567 1571
1568 1572 r = len(self)
1569 1573 end = 0
1570 1574 if r:
1571 1575 end = self.end(r - 1)
1572 1576 ifh = self.opener(self.indexfile, "a+")
1573 1577 isize = r * self._io.size
1574 1578 if self._inline:
1575 1579 transaction.add(self.indexfile, end + isize, r)
1576 1580 dfh = None
1577 1581 else:
1578 1582 transaction.add(self.indexfile, isize, r)
1579 1583 transaction.add(self.datafile, end)
1580 1584 dfh = self.opener(self.datafile, "a+")
1581 1585 def flush():
1582 1586 if dfh:
1583 1587 dfh.flush()
1584 1588 ifh.flush()
1585 1589 try:
1586 1590 # loop through our set of deltas
1587 1591 chain = None
1588 1592 while True:
1589 1593 chunkdata = cg.deltachunk(chain)
1590 1594 if not chunkdata:
1591 1595 break
1592 1596 node = chunkdata['node']
1593 1597 p1 = chunkdata['p1']
1594 1598 p2 = chunkdata['p2']
1595 1599 cs = chunkdata['cs']
1596 1600 deltabase = chunkdata['deltabase']
1597 1601 delta = chunkdata['delta']
1598 1602 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1599 1603
1600 1604 content.append(node)
1601 1605
1602 1606 link = linkmapper(cs)
1603 1607 if node in self.nodemap:
1604 1608 # this can happen if two branches make the same change
1605 1609 chain = node
1606 1610 continue
1607 1611
1608 1612 for p in (p1, p2):
1609 1613 if p not in self.nodemap:
1610 1614 raise LookupError(p, self.indexfile,
1611 1615 _('unknown parent'))
1612 1616
1613 1617 if deltabase not in self.nodemap:
1614 1618 raise LookupError(deltabase, self.indexfile,
1615 1619 _('unknown delta base'))
1616 1620
1617 1621 baserev = self.rev(deltabase)
1618 1622
1619 1623 if baserev != nullrev and self.iscensored(baserev):
1620 1624 # if base is censored, delta must be full replacement in a
1621 1625 # single patch operation
1622 1626 hlen = struct.calcsize(">lll")
1623 1627 oldlen = self.rawsize(baserev)
1624 1628 newlen = len(delta) - hlen
1625 1629 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1626 1630 raise error.CensoredBaseError(self.indexfile,
1627 1631 self.node(baserev))
1628 1632
1629 1633 if not flags and self._peek_iscensored(baserev, delta, flush):
1630 1634 flags |= REVIDX_ISCENSORED
1631 1635
1632 1636 # We assume consumers of addrevisioncb will want to retrieve
1633 1637 # the added revision, which will require a call to
1634 1638 # revision(). revision() will fast path if there is a cache
1635 1639 # hit. So, we tell _addrevision() to always cache in this case.
1636 1640 chain = self._addrevision(node, None, transaction, link,
1637 1641 p1, p2, flags, (baserev, delta),
1638 1642 ifh, dfh,
1639 1643 alwayscache=bool(addrevisioncb))
1640 1644
1641 1645 if addrevisioncb:
1642 1646 addrevisioncb(self, chain)
1643 1647
1644 1648 if not dfh and not self._inline:
1645 1649 # addrevision switched from inline to conventional
1646 1650 # reopen the index
1647 1651 ifh.close()
1648 1652 dfh = self.opener(self.datafile, "a+")
1649 1653 ifh = self.opener(self.indexfile, "a+")
1650 1654 finally:
1651 1655 if dfh:
1652 1656 dfh.close()
1653 1657 ifh.close()
1654 1658
1655 1659 return content
1656 1660
1657 1661 def iscensored(self, rev):
1658 1662 """Check if a file revision is censored."""
1659 1663 return False
1660 1664
1661 1665 def _peek_iscensored(self, baserev, delta, flush):
1662 1666 """Quickly check if a delta produces a censored revision."""
1663 1667 return False
1664 1668
1665 1669 def getstrippoint(self, minlink):
1666 1670 """find the minimum rev that must be stripped to strip the linkrev
1667 1671
1668 1672 Returns a tuple containing the minimum rev and a set of all revs that
1669 1673 have linkrevs that will be broken by this strip.
1670 1674 """
1671 1675 brokenrevs = set()
1672 1676 strippoint = len(self)
1673 1677
1674 1678 heads = {}
1675 1679 futurelargelinkrevs = set()
1676 1680 for head in self.headrevs():
1677 1681 headlinkrev = self.linkrev(head)
1678 1682 heads[head] = headlinkrev
1679 1683 if headlinkrev >= minlink:
1680 1684 futurelargelinkrevs.add(headlinkrev)
1681 1685
1682 1686 # This algorithm involves walking down the rev graph, starting at the
1683 1687 # heads. Since the revs are topologically sorted according to linkrev,
1684 1688 # once all head linkrevs are below the minlink, we know there are
1685 1689 # no more revs that could have a linkrev greater than minlink.
1686 1690 # So we can stop walking.
1687 1691 while futurelargelinkrevs:
1688 1692 strippoint -= 1
1689 1693 linkrev = heads.pop(strippoint)
1690 1694
1691 1695 if linkrev < minlink:
1692 1696 brokenrevs.add(strippoint)
1693 1697 else:
1694 1698 futurelargelinkrevs.remove(linkrev)
1695 1699
1696 1700 for p in self.parentrevs(strippoint):
1697 1701 if p != nullrev:
1698 1702 plinkrev = self.linkrev(p)
1699 1703 heads[p] = plinkrev
1700 1704 if plinkrev >= minlink:
1701 1705 futurelargelinkrevs.add(plinkrev)
1702 1706
1703 1707 return strippoint, brokenrevs
1704 1708
1705 1709 def strip(self, minlink, transaction):
1706 1710 """truncate the revlog on the first revision with a linkrev >= minlink
1707 1711
1708 1712 This function is called when we're stripping revision minlink and
1709 1713 its descendants from the repository.
1710 1714
1711 1715 We have to remove all revisions with linkrev >= minlink, because
1712 1716 the equivalent changelog revisions will be renumbered after the
1713 1717 strip.
1714 1718
1715 1719 So we truncate the revlog on the first of these revisions, and
1716 1720 trust that the caller has saved the revisions that shouldn't be
1717 1721 removed and that it'll re-add them after this truncation.
1718 1722 """
1719 1723 if len(self) == 0:
1720 1724 return
1721 1725
1722 1726 rev, _ = self.getstrippoint(minlink)
1723 1727 if rev == len(self):
1724 1728 return
1725 1729
1726 1730 # first truncate the files on disk
1727 1731 end = self.start(rev)
1728 1732 if not self._inline:
1729 1733 transaction.add(self.datafile, end)
1730 1734 end = rev * self._io.size
1731 1735 else:
1732 1736 end += rev * self._io.size
1733 1737
1734 1738 transaction.add(self.indexfile, end)
1735 1739
1736 1740 # then reset internal state in memory to forget those revisions
1737 1741 self._cache = None
1738 1742 self._chaininfocache = {}
1739 1743 self._chunkclear()
1740 1744 for x in xrange(rev, len(self)):
1741 1745 del self.nodemap[self.node(x)]
1742 1746
1743 1747 del self.index[rev:-1]
1744 1748
1745 1749 def checksize(self):
1746 1750 expected = 0
1747 1751 if len(self):
1748 1752 expected = max(0, self.end(len(self) - 1))
1749 1753
1750 1754 try:
1751 1755 f = self.opener(self.datafile)
1752 1756 f.seek(0, 2)
1753 1757 actual = f.tell()
1754 1758 f.close()
1755 1759 dd = actual - expected
1756 1760 except IOError as inst:
1757 1761 if inst.errno != errno.ENOENT:
1758 1762 raise
1759 1763 dd = 0
1760 1764
1761 1765 try:
1762 1766 f = self.opener(self.indexfile)
1763 1767 f.seek(0, 2)
1764 1768 actual = f.tell()
1765 1769 f.close()
1766 1770 s = self._io.size
1767 1771 i = max(0, actual // s)
1768 1772 di = actual - (i * s)
1769 1773 if self._inline:
1770 1774 databytes = 0
1771 1775 for r in self:
1772 1776 databytes += max(0, self.length(r))
1773 1777 dd = 0
1774 1778 di = actual - len(self) * s - databytes
1775 1779 except IOError as inst:
1776 1780 if inst.errno != errno.ENOENT:
1777 1781 raise
1778 1782 di = 0
1779 1783
1780 1784 return (dd, di)
1781 1785
1782 1786 def files(self):
1783 1787 res = [self.indexfile]
1784 1788 if not self._inline:
1785 1789 res.append(self.datafile)
1786 1790 return res
General Comments 0
You need to be logged in to leave comments. Login now