test-pathencode.py
278 lines
| 6.5 KiB
| text/x-python
|
PythonLexer
/ tests / test-pathencode.py
Bryan O'Sullivan
|
r17934 | # This is a randomized test that generates different pathnames every | ||
# time it is invoked, and tests the encoding of those pathnames. | ||||
# | ||||
# It uses a simple probabilistic model to generate valid pathnames | ||||
timeless@mozdev.org
|
r26098 | # that have proven likely to expose bugs and divergent behavior in | ||
Bryan O'Sullivan
|
r17934 | # different encoding implementations. | ||
Pulkit Goyal
|
r28918 | |||
Pulkit Goyal
|
r28928 | import binascii | ||
Bryan O'Sullivan
|
r17935 | import collections | ||
Pulkit Goyal
|
r28928 | import itertools | ||
import math | ||||
import os | ||||
import random | ||||
import sys | ||||
import time | ||||
from mercurial import ( | ||||
Augie Fackler
|
r37897 | pycompat, | ||
Pulkit Goyal
|
r28928 | store, | ||
) | ||||
Bryan O'Sullivan
|
r17934 | |||
Augie Fackler
|
r37897 | validchars = set(map(pycompat.bytechr, range(0, 256))) | ||
Bryan O'Sullivan
|
r17934 | alphanum = range(ord('A'), ord('Z')) | ||
Augie Fackler
|
r37897 | for c in (b'\0', b'/'): | ||
Bryan O'Sullivan
|
r17934 | validchars.remove(c) | ||
Augie Fackler
|
r43346 | winreserved = ( | ||
b'aux con prn nul'.split() | ||||
Manuel Jacob
|
r50180 | + [b'com%d' % i for i in range(1, 10)] | ||
+ [b'lpt%d' % i for i in range(1, 10)] | ||||
Augie Fackler
|
r43346 | ) | ||
Bryan O'Sullivan
|
r17934 | |||
def casecombinations(names): | ||||
'''Build all case-diddled combinations of names.''' | ||||
combos = set() | ||||
for r in names: | ||||
Manuel Jacob
|
r50180 | for i in range(len(r) + 1): | ||
for c in itertools.combinations(range(len(r)), i): | ||||
Bryan O'Sullivan
|
r17934 | d = r | ||
for j in c: | ||||
Augie Fackler
|
r43346 | d = b''.join((d[:j], d[j : j + 1].upper(), d[j + 1 :])) | ||
Bryan O'Sullivan
|
r17934 | combos.add(d) | ||
return sorted(combos) | ||||
Augie Fackler
|
r43346 | |||
Bryan O'Sullivan
|
r17934 | def buildprobtable(fp, cmd='hg manifest tip'): | ||
Augie Fackler
|
r46554 | """Construct and print a table of probabilities for path name | ||
components. The numbers are percentages.""" | ||||
Bryan O'Sullivan
|
r17934 | |||
Bryan O'Sullivan
|
r17935 | counts = collections.defaultdict(lambda: 0) | ||
Bryan O'Sullivan
|
r17934 | for line in os.popen(cmd).read().splitlines(): | ||
if line[-2:] in ('.i', '.d'): | ||||
line = line[:-2] | ||||
if line.startswith('data/'): | ||||
line = line[5:] | ||||
for c in line: | ||||
counts[c] += 1 | ||||
for c in '\r/\n': | ||||
counts.pop(c, None) | ||||
Gregory Szorc
|
r49790 | t = sum(counts.values()) / 100.0 | ||
Bryan O'Sullivan
|
r17934 | fp.write('probtable = (') | ||
Augie Fackler
|
r43346 | for i, (k, v) in enumerate( | ||
sorted(counts.items(), key=lambda x: x[1], reverse=True) | ||||
): | ||||
Bryan O'Sullivan
|
r17934 | if (i % 5) == 0: | ||
fp.write('\n ') | ||||
vt = v / t | ||||
if vt < 0.0005: | ||||
break | ||||
fp.write('(%r, %.03f), ' % (k, vt)) | ||||
fp.write('\n )\n') | ||||
Augie Fackler
|
r43346 | |||
Bryan O'Sullivan
|
r17934 | # A table of character frequencies (as percentages), gleaned by | ||
# looking at filelog names from a real-world, very large repo. | ||||
probtable = ( | ||||
Augie Fackler
|
r43346 | (b't', 9.828), | ||
(b'e', 9.042), | ||||
(b's', 8.011), | ||||
(b'a', 6.801), | ||||
(b'i', 6.618), | ||||
(b'g', 5.053), | ||||
(b'r', 5.030), | ||||
(b'o', 4.887), | ||||
(b'p', 4.363), | ||||
(b'n', 4.258), | ||||
(b'l', 3.830), | ||||
(b'h', 3.693), | ||||
(b'_', 3.659), | ||||
(b'.', 3.377), | ||||
(b'm', 3.194), | ||||
(b'u', 2.364), | ||||
(b'd', 2.296), | ||||
(b'c', 2.163), | ||||
(b'b', 1.739), | ||||
(b'f', 1.625), | ||||
(b'6', 0.666), | ||||
(b'j', 0.610), | ||||
(b'y', 0.554), | ||||
(b'x', 0.487), | ||||
(b'w', 0.477), | ||||
(b'k', 0.476), | ||||
(b'v', 0.473), | ||||
(b'3', 0.336), | ||||
(b'1', 0.335), | ||||
(b'2', 0.326), | ||||
(b'4', 0.310), | ||||
(b'5', 0.305), | ||||
(b'9', 0.302), | ||||
(b'8', 0.300), | ||||
(b'7', 0.299), | ||||
(b'q', 0.298), | ||||
(b'0', 0.250), | ||||
(b'z', 0.223), | ||||
(b'-', 0.118), | ||||
(b'C', 0.095), | ||||
(b'T', 0.087), | ||||
(b'F', 0.085), | ||||
(b'B', 0.077), | ||||
(b'S', 0.076), | ||||
(b'P', 0.076), | ||||
(b'L', 0.059), | ||||
(b'A', 0.058), | ||||
(b'N', 0.051), | ||||
(b'D', 0.049), | ||||
(b'M', 0.046), | ||||
(b'E', 0.039), | ||||
(b'I', 0.035), | ||||
(b'R', 0.035), | ||||
(b'G', 0.028), | ||||
(b'U', 0.026), | ||||
(b'W', 0.025), | ||||
(b'O', 0.017), | ||||
(b'V', 0.015), | ||||
(b'H', 0.013), | ||||
(b'Q', 0.011), | ||||
(b'J', 0.007), | ||||
(b'K', 0.005), | ||||
(b'+', 0.004), | ||||
(b'X', 0.003), | ||||
(b'Y', 0.001), | ||||
) | ||||
Bryan O'Sullivan
|
r17934 | |||
for c, _ in probtable: | ||||
validchars.remove(c) | ||||
validchars = list(validchars) | ||||
Augie Fackler
|
r43346 | |||
Bryan O'Sullivan
|
r17934 | def pickfrom(rng, table): | ||
c = 0 | ||||
r = rng.random() * sum(i[1] for i in table) | ||||
for i, p in table: | ||||
c += p | ||||
if c >= r: | ||||
return i | ||||
Augie Fackler
|
r43346 | |||
Bryan O'Sullivan
|
r17934 | reservedcombos = casecombinations(winreserved) | ||
# The first component of a name following a slash. | ||||
firsttable = ( | ||||
(lambda rng: pickfrom(rng, probtable), 90), | ||||
(lambda rng: rng.choice(validchars), 5), | ||||
(lambda rng: rng.choice(reservedcombos), 5), | ||||
Augie Fackler
|
r43346 | ) | ||
Bryan O'Sullivan
|
r17934 | |||
# Components of a name following the first. | ||||
resttable = firsttable[:-1] | ||||
# Special suffixes. | ||||
Augie Fackler
|
r37897 | internalsuffixcombos = casecombinations(b'.hg .i .d'.split()) | ||
Bryan O'Sullivan
|
r17934 | |||
# The last component of a path, before a slash or at the end of a name. | ||||
lasttable = resttable + ( | ||||
Augie Fackler
|
r37897 | (lambda rng: b'', 95), | ||
Bryan O'Sullivan
|
r17934 | (lambda rng: rng.choice(internalsuffixcombos), 5), | ||
Augie Fackler
|
r43346 | ) | ||
Bryan O'Sullivan
|
r17934 | |||
def makepart(rng, k): | ||||
'''Construct a part of a pathname, without slashes.''' | ||||
p = pickfrom(rng, firsttable)(rng) | ||||
l = len(p) | ||||
ps = [p] | ||||
Siddharth Agarwal
|
r19319 | maxl = rng.randint(1, k) | ||
while l < maxl: | ||||
Bryan O'Sullivan
|
r17934 | p = pickfrom(rng, resttable)(rng) | ||
l += len(p) | ||||
ps.append(p) | ||||
ps.append(pickfrom(rng, lasttable)(rng)) | ||||
Augie Fackler
|
r37897 | return b''.join(ps) | ||
Bryan O'Sullivan
|
r17934 | |||
Augie Fackler
|
r43346 | |||
Bryan O'Sullivan
|
r17934 | def makepath(rng, j, k): | ||
'''Construct a complete pathname.''' | ||||
Augie Fackler
|
r43346 | return ( | ||
b'data/' | ||||
Manuel Jacob
|
r50180 | + b'/'.join(makepart(rng, k) for _ in range(j)) | ||
Augie Fackler
|
r43346 | + rng.choice([b'.d', b'.i']) | ||
) | ||||
Bryan O'Sullivan
|
r17934 | |||
def genpath(rng, count): | ||||
'''Generate random pathnames with gradually increasing lengths.''' | ||||
mink, maxk = 1, 4096 | ||||
Augie Fackler
|
r43346 | |||
Bryan O'Sullivan
|
r17934 | def steps(): | ||
Manuel Jacob
|
r50180 | for i in range(count): | ||
Bryan O'Sullivan
|
r17934 | yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count))) | ||
Augie Fackler
|
r43346 | |||
Bryan O'Sullivan
|
r17934 | for k in steps(): | ||
x = rng.randint(1, k) | ||||
y = rng.randint(1, k) | ||||
yield makepath(rng, x, y) | ||||
Augie Fackler
|
r43346 | |||
Bryan O'Sullivan
|
r17934 | def runtests(rng, seed, count): | ||
nerrs = 0 | ||||
for p in genpath(rng, count): | ||||
Augie Fackler
|
r43346 | h = store._pathencode(p) # uses C implementation, if available | ||
r = store._hybridencode(p, True) # reference implementation in Python | ||||
Adrian Buehlmann
|
r18094 | if h != r: | ||
if nerrs == 0: | ||||
Pulkit Goyal
|
r28918 | print('seed:', hex(seed)[:-1], file=sys.stderr) | ||
print("\np: '%s'" % p.encode("string_escape"), file=sys.stderr) | ||||
print("h: '%s'" % h.encode("string_escape"), file=sys.stderr) | ||||
print("r: '%s'" % r.encode("string_escape"), file=sys.stderr) | ||||
Adrian Buehlmann
|
r18094 | nerrs += 1 | ||
Bryan O'Sullivan
|
r17934 | return nerrs | ||
Augie Fackler
|
r43346 | |||
Bryan O'Sullivan
|
r17934 | def main(): | ||
import getopt | ||||
# Empirically observed to take about a second to run | ||||
count = 100 | ||||
seed = None | ||||
Augie Fackler
|
r43346 | opts, args = getopt.getopt( | ||
sys.argv[1:], 'c:s:', ['build', 'count=', 'seed='] | ||||
) | ||||
Bryan O'Sullivan
|
r17934 | for o, a in opts: | ||
if o in ('-c', '--count'): | ||||
count = int(a) | ||||
elif o in ('-s', '--seed'): | ||||
Augie Fackler
|
r43346 | seed = int(a, base=0) # accepts base 10 or 16 strings | ||
Bryan O'Sullivan
|
r17934 | elif o == '--build': | ||
Augie Fackler
|
r43346 | buildprobtable( | ||
sys.stdout, | ||||
'find .hg/store/data -type f && ' | ||||
'cat .hg/store/fncache 2>/dev/null', | ||||
) | ||||
Bryan O'Sullivan
|
r17934 | sys.exit(0) | ||
if seed is None: | ||||
try: | ||||
Augie Fackler
|
r34222 | seed = int(binascii.hexlify(os.urandom(16)), 16) | ||
Bryan O'Sullivan
|
r17934 | except AttributeError: | ||
Augie Fackler
|
r34222 | seed = int(time.time() * 1000) | ||
Bryan O'Sullivan
|
r17934 | |||
rng = random.Random(seed) | ||||
if runtests(rng, seed, count): | ||||
sys.exit(1) | ||||
Augie Fackler
|
r43346 | |||
Bryan O'Sullivan
|
r17947 | if __name__ == '__main__': | ||
Bryan O'Sullivan
|
r17934 | main() | ||