##// END OF EJS Templates
revbranchcache: populate cache incrementally...
revbranchcache: populate cache incrementally Previously the cache would populate completely the first time it was accessed. This could take over a minute on larger repos. This patch changes it to update incrementally. Only values that are read will be written, and it will only rewrite as much of the file as strictly necessary. This adds a magic value of '\0\0\0\0' to represent an empty cache entry. The probability of this matching an actual commit hash prefix is tiny, so it's ok if that's always considered a cache miss. This is also BC safe since any existing entries with '\0\0\0\0' will just be considered misses. Perf numbers: Mozilla-central: hg --time log -r 'branch(mobile)' -T. Cold Cache: 14.7s -> 15.1s (3% worse) Warm Cache: 1.6s -> 2.1s (30% worse) Mozilla-cental: hg perfbranchmap 2s -> 2.4s (20% worse) hg: hg log -r 'branch(stable) & branch(default)' Cold Cache: 3.1s -> 1.9s (40% better - because the old code missed the cache on both branch() revset iterations, so it did twice the work) Warm Cache: 0.2 -> 0.26 (30% worse) internal huge repo: hg --time log -r 'tip & branch(default)' Cold Cache: 65.4s -> 0.2s (327x better) While this change introduces minor regressions when iterating over every commit in a branch, it massively improves the cold cache time for operations which touch a single commit. I feel the better O() is worth it in this case.

File last commit:

r15530:eeac5e17 default
r24376:203a078d default
Show More
bdiff.py
87 lines | 2.3 KiB | text/x-python | PythonLexer
Martin Geisler
pure Python implementation of bdiff.c
r7703 # bdiff.py - Python implementation of bdiff.c
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Martin Geisler
pure Python implementation of bdiff.c
r7703
Patrick Mezard
mdiff: replace wscleanup() regexps with C loops...
r15530 import struct, difflib, re
Matt Mackall
pure/bdiff: fix circular import
r7944
def splitnewlines(text):
'''like str.splitlines, but only split on newlines.'''
lines = [l + '\n' for l in text.split('\n')]
if lines:
if lines[-1] == '\n':
lines.pop()
else:
lines[-1] = lines[-1][:-1]
return lines
Martin Geisler
pure Python implementation of bdiff.c
r7703
def _normalizeblocks(a, b, blocks):
prev = None
Dan Villiom Podlaski Christiansen
pure bdiff: don't use a generator...
r14066 r = []
Martin Geisler
pure Python implementation of bdiff.c
r7703 for curr in blocks:
if prev is None:
prev = curr
continue
shift = 0
a1, b1, l1 = prev
a1end = a1 + l1
b1end = b1 + l1
a2, b2, l2 = curr
a2end = a2 + l2
b2end = b2 + l2
if a1end == a2:
Matt Mackall
many, many trivial check-code fixups
r10282 while (a1end + shift < a2end and
a[a1end + shift] == b[b1end + shift]):
Martin Geisler
pure Python implementation of bdiff.c
r7703 shift += 1
elif b1end == b2:
Matt Mackall
many, many trivial check-code fixups
r10282 while (b1end + shift < b2end and
a[a1end + shift] == b[b1end + shift]):
Martin Geisler
pure Python implementation of bdiff.c
r7703 shift += 1
Dan Villiom Podlaski Christiansen
pure bdiff: don't use a generator...
r14066 r.append((a1, b1, l1 + shift))
Matt Mackall
many, many trivial check-code fixups
r10282 prev = a2 + shift, b2 + shift, l2 - shift
Dan Villiom Podlaski Christiansen
pure bdiff: don't use a generator...
r14066 r.append(prev)
return r
Martin Geisler
pure Python implementation of bdiff.c
r7703
def bdiff(a, b):
a = str(a).splitlines(True)
b = str(b).splitlines(True)
if not a:
s = "".join(b)
return s and (struct.pack(">lll", 0, 0, len(s)) + s)
bin = []
p = [0]
for i in a: p.append(p[-1] + len(i))
d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
d = _normalizeblocks(a, b, d)
la = 0
lb = 0
for am, bm, size in d:
s = "".join(b[lb:bm])
if am > la or s:
bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
la = am + size
lb = bm + size
return "".join(bin)
def blocks(a, b):
Matt Mackall
pure/bdiff: fix circular import
r7944 an = splitnewlines(a)
bn = splitnewlines(b)
Martin Geisler
pure Python implementation of bdiff.c
r7703 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
d = _normalizeblocks(an, bn, d)
return [(i, i + n, j, j + n) for (i, j, n) in d]
Patrick Mezard
mdiff: replace wscleanup() regexps with C loops...
r15530 def fixws(text, allws):
if allws:
text = re.sub('[ \t\r]+', '', text)
else:
text = re.sub('[ \t\r]+', ' ', text)
text = text.replace(' \n', '\n')
return text