##// END OF EJS Templates
repoview: fix corrupted hiddencache crash Mercurial (issue5042)...
repoview: fix corrupted hiddencache crash Mercurial (issue5042) Before this patch if the hiddencache existed but was empty, it would crash mercurial. This patch adds exception handling when reading the hiddencache to avoid the issue. When encountering a corrupted cache file we print a devel warning. There would be no point in issuing a normal warning as the user wouldn't be able to do anything about the situation. The warning looks like: devel-warn: corrupted hidden cache, removing it at: /path/to/repoview.py

File last commit:

r26849:a6eb4e48 default
r27917:97e0dc6d stable
Show More
test-pathencode.py
194 lines | 6.2 KiB | text/x-python | PythonLexer
# This is a randomized test that generates different pathnames every
# time it is invoked, and tests the encoding of those pathnames.
#
# It uses a simple probabilistic model to generate valid pathnames
# that have proven likely to expose bugs and divergent behavior in
# different encoding implementations.
from mercurial import store
import binascii, itertools, math, os, random, sys, time
import collections
validchars = set(map(chr, range(0, 256)))
alphanum = range(ord('A'), ord('Z'))
for c in '\0/':
validchars.remove(c)
winreserved = ('aux con prn nul'.split() +
['com%d' % i for i in xrange(1, 10)] +
['lpt%d' % i for i in xrange(1, 10)])
def casecombinations(names):
'''Build all case-diddled combinations of names.'''
combos = set()
for r in names:
for i in xrange(len(r) + 1):
for c in itertools.combinations(xrange(len(r)), i):
d = r
for j in c:
d = ''.join((d[:j], d[j].upper(), d[j + 1:]))
combos.add(d)
return sorted(combos)
def buildprobtable(fp, cmd='hg manifest tip'):
'''Construct and print a table of probabilities for path name
components. The numbers are percentages.'''
counts = collections.defaultdict(lambda: 0)
for line in os.popen(cmd).read().splitlines():
if line[-2:] in ('.i', '.d'):
line = line[:-2]
if line.startswith('data/'):
line = line[5:]
for c in line:
counts[c] += 1
for c in '\r/\n':
counts.pop(c, None)
t = sum(counts.itervalues()) / 100.0
fp.write('probtable = (')
for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1],
reverse=True)):
if (i % 5) == 0:
fp.write('\n ')
vt = v / t
if vt < 0.0005:
break
fp.write('(%r, %.03f), ' % (k, vt))
fp.write('\n )\n')
# A table of character frequencies (as percentages), gleaned by
# looking at filelog names from a real-world, very large repo.
probtable = (
('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618),
('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258),
('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194),
('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625),
('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477),
('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326),
('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299),
('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095),
('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076),
('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046),
('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026),
('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011),
('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001),
)
for c, _ in probtable:
validchars.remove(c)
validchars = list(validchars)
def pickfrom(rng, table):
c = 0
r = rng.random() * sum(i[1] for i in table)
for i, p in table:
c += p
if c >= r:
return i
reservedcombos = casecombinations(winreserved)
# The first component of a name following a slash.
firsttable = (
(lambda rng: pickfrom(rng, probtable), 90),
(lambda rng: rng.choice(validchars), 5),
(lambda rng: rng.choice(reservedcombos), 5),
)
# Components of a name following the first.
resttable = firsttable[:-1]
# Special suffixes.
internalsuffixcombos = casecombinations('.hg .i .d'.split())
# The last component of a path, before a slash or at the end of a name.
lasttable = resttable + (
(lambda rng: '', 95),
(lambda rng: rng.choice(internalsuffixcombos), 5),
)
def makepart(rng, k):
'''Construct a part of a pathname, without slashes.'''
p = pickfrom(rng, firsttable)(rng)
l = len(p)
ps = [p]
maxl = rng.randint(1, k)
while l < maxl:
p = pickfrom(rng, resttable)(rng)
l += len(p)
ps.append(p)
ps.append(pickfrom(rng, lasttable)(rng))
return ''.join(ps)
def makepath(rng, j, k):
'''Construct a complete pathname.'''
return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) +
rng.choice(['.d', '.i']))
def genpath(rng, count):
'''Generate random pathnames with gradually increasing lengths.'''
mink, maxk = 1, 4096
def steps():
for i in xrange(count):
yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
for k in steps():
x = rng.randint(1, k)
y = rng.randint(1, k)
yield makepath(rng, x, y)
def runtests(rng, seed, count):
nerrs = 0
for p in genpath(rng, count):
h = store._pathencode(p) # uses C implementation, if available
r = store._hybridencode(p, True) # reference implementation in Python
if h != r:
if nerrs == 0:
print >> sys.stderr, 'seed:', hex(seed)[:-1]
print >> sys.stderr, "\np: '%s'" % p.encode("string_escape")
print >> sys.stderr, "h: '%s'" % h.encode("string_escape")
print >> sys.stderr, "r: '%s'" % r.encode("string_escape")
nerrs += 1
return nerrs
def main():
import getopt
# Empirically observed to take about a second to run
count = 100
seed = None
opts, args = getopt.getopt(sys.argv[1:], 'c:s:',
['build', 'count=', 'seed='])
for o, a in opts:
if o in ('-c', '--count'):
count = int(a)
elif o in ('-s', '--seed'):
seed = long(a, base=0) # accepts base 10 or 16 strings
elif o == '--build':
buildprobtable(sys.stdout,
'find .hg/store/data -type f && '
'cat .hg/store/fncache 2>/dev/null')
sys.exit(0)
if seed is None:
try:
seed = long(binascii.hexlify(os.urandom(16)), 16)
except AttributeError:
seed = long(time.time() * 1000)
rng = random.Random(seed)
if runtests(rng, seed, count):
sys.exit(1)
if __name__ == '__main__':
main()