test-pathencode.py
210 lines
| 6.5 KiB
| text/x-python
|
PythonLexer
/ tests / test-pathencode.py
Bryan O'Sullivan
|
r17934 | # This is a randomized test that generates different pathnames every | ||
# time it is invoked, and tests the encoding of those pathnames. | ||||
# | ||||
# It uses a simple probabilistic model to generate valid pathnames | ||||
timeless@mozdev.org
|
r26098 | # that have proven likely to expose bugs and divergent behavior in | ||
Bryan O'Sullivan
|
r17934 | # different encoding implementations. | ||
Pulkit Goyal
|
r28928 | from __future__ import absolute_import, print_function | ||
Pulkit Goyal
|
r28918 | |||
Pulkit Goyal
|
r28928 | import binascii | ||
Bryan O'Sullivan
|
r17935 | import collections | ||
Pulkit Goyal
|
r28928 | import itertools | ||
import math | ||||
import os | ||||
import random | ||||
import sys | ||||
import time | ||||
from mercurial import ( | ||||
Augie Fackler
|
r37897 | pycompat, | ||
Pulkit Goyal
|
r28928 | store, | ||
) | ||||
Bryan O'Sullivan
|
r17934 | |||
Augie Fackler
|
r34221 | try: | ||
xrange | ||||
except NameError: | ||||
xrange = range | ||||
Augie Fackler
|
r37897 | validchars = set(map(pycompat.bytechr, range(0, 256))) | ||
Bryan O'Sullivan
|
r17934 | alphanum = range(ord('A'), ord('Z')) | ||
Augie Fackler
|
r37897 | for c in (b'\0', b'/'): | ||
Bryan O'Sullivan
|
r17934 | validchars.remove(c) | ||
Augie Fackler
|
r37897 | winreserved = (b'aux con prn nul'.split() + | ||
[b'com%d' % i for i in xrange(1, 10)] + | ||||
[b'lpt%d' % i for i in xrange(1, 10)]) | ||||
Bryan O'Sullivan
|
r17934 | |||
def casecombinations(names): | ||||
'''Build all case-diddled combinations of names.''' | ||||
combos = set() | ||||
for r in names: | ||||
for i in xrange(len(r) + 1): | ||||
for c in itertools.combinations(xrange(len(r)), i): | ||||
d = r | ||||
for j in c: | ||||
Augie Fackler
|
r37897 | d = b''.join((d[:j], d[j:j + 1].upper(), d[j + 1:])) | ||
Bryan O'Sullivan
|
r17934 | combos.add(d) | ||
return sorted(combos) | ||||
def buildprobtable(fp, cmd='hg manifest tip'): | ||||
'''Construct and print a table of probabilities for path name | ||||
components. The numbers are percentages.''' | ||||
Bryan O'Sullivan
|
r17935 | counts = collections.defaultdict(lambda: 0) | ||
Bryan O'Sullivan
|
r17934 | for line in os.popen(cmd).read().splitlines(): | ||
if line[-2:] in ('.i', '.d'): | ||||
line = line[:-2] | ||||
if line.startswith('data/'): | ||||
line = line[5:] | ||||
for c in line: | ||||
counts[c] += 1 | ||||
for c in '\r/\n': | ||||
counts.pop(c, None) | ||||
t = sum(counts.itervalues()) / 100.0 | ||||
fp.write('probtable = (') | ||||
Pulkit Goyal
|
r36345 | for i, (k, v) in enumerate(sorted(counts.items(), key=lambda x: x[1], | ||
Bryan O'Sullivan
|
r17934 | reverse=True)): | ||
if (i % 5) == 0: | ||||
fp.write('\n ') | ||||
vt = v / t | ||||
if vt < 0.0005: | ||||
break | ||||
fp.write('(%r, %.03f), ' % (k, vt)) | ||||
fp.write('\n )\n') | ||||
# A table of character frequencies (as percentages), gleaned by | ||||
# looking at filelog names from a real-world, very large repo. | ||||
probtable = ( | ||||
Augie Fackler
|
r37897 | (b't', 9.828), (b'e', 9.042), (b's', 8.011), (b'a', 6.801), (b'i', 6.618), | ||
(b'g', 5.053), (b'r', 5.030), (b'o', 4.887), (b'p', 4.363), (b'n', 4.258), | ||||
(b'l', 3.830), (b'h', 3.693), (b'_', 3.659), (b'.', 3.377), (b'm', 3.194), | ||||
(b'u', 2.364), (b'd', 2.296), (b'c', 2.163), (b'b', 1.739), (b'f', 1.625), | ||||
(b'6', 0.666), (b'j', 0.610), (b'y', 0.554), (b'x', 0.487), (b'w', 0.477), | ||||
(b'k', 0.476), (b'v', 0.473), (b'3', 0.336), (b'1', 0.335), (b'2', 0.326), | ||||
(b'4', 0.310), (b'5', 0.305), (b'9', 0.302), (b'8', 0.300), (b'7', 0.299), | ||||
(b'q', 0.298), (b'0', 0.250), (b'z', 0.223), (b'-', 0.118), (b'C', 0.095), | ||||
(b'T', 0.087), (b'F', 0.085), (b'B', 0.077), (b'S', 0.076), (b'P', 0.076), | ||||
(b'L', 0.059), (b'A', 0.058), (b'N', 0.051), (b'D', 0.049), (b'M', 0.046), | ||||
(b'E', 0.039), (b'I', 0.035), (b'R', 0.035), (b'G', 0.028), (b'U', 0.026), | ||||
(b'W', 0.025), (b'O', 0.017), (b'V', 0.015), (b'H', 0.013), (b'Q', 0.011), | ||||
(b'J', 0.007), (b'K', 0.005), (b'+', 0.004), (b'X', 0.003), (b'Y', 0.001), | ||||
Bryan O'Sullivan
|
r17934 | ) | ||
for c, _ in probtable: | ||||
validchars.remove(c) | ||||
validchars = list(validchars) | ||||
def pickfrom(rng, table): | ||||
c = 0 | ||||
r = rng.random() * sum(i[1] for i in table) | ||||
for i, p in table: | ||||
c += p | ||||
if c >= r: | ||||
return i | ||||
reservedcombos = casecombinations(winreserved) | ||||
# The first component of a name following a slash. | ||||
firsttable = ( | ||||
(lambda rng: pickfrom(rng, probtable), 90), | ||||
(lambda rng: rng.choice(validchars), 5), | ||||
(lambda rng: rng.choice(reservedcombos), 5), | ||||
) | ||||
# Components of a name following the first. | ||||
resttable = firsttable[:-1] | ||||
# Special suffixes. | ||||
Augie Fackler
|
r37897 | internalsuffixcombos = casecombinations(b'.hg .i .d'.split()) | ||
Bryan O'Sullivan
|
r17934 | |||
# The last component of a path, before a slash or at the end of a name. | ||||
lasttable = resttable + ( | ||||
Augie Fackler
|
r37897 | (lambda rng: b'', 95), | ||
Bryan O'Sullivan
|
r17934 | (lambda rng: rng.choice(internalsuffixcombos), 5), | ||
) | ||||
def makepart(rng, k): | ||||
'''Construct a part of a pathname, without slashes.''' | ||||
p = pickfrom(rng, firsttable)(rng) | ||||
l = len(p) | ||||
ps = [p] | ||||
Siddharth Agarwal
|
r19319 | maxl = rng.randint(1, k) | ||
while l < maxl: | ||||
Bryan O'Sullivan
|
r17934 | p = pickfrom(rng, resttable)(rng) | ||
l += len(p) | ||||
ps.append(p) | ||||
ps.append(pickfrom(rng, lasttable)(rng)) | ||||
Augie Fackler
|
r37897 | return b''.join(ps) | ||
Bryan O'Sullivan
|
r17934 | |||
def makepath(rng, j, k): | ||||
'''Construct a complete pathname.''' | ||||
Augie Fackler
|
r37897 | return (b'data/' + b'/'.join(makepart(rng, k) for _ in xrange(j)) + | ||
rng.choice([b'.d', b'.i'])) | ||||
Bryan O'Sullivan
|
r17934 | |||
def genpath(rng, count): | ||||
'''Generate random pathnames with gradually increasing lengths.''' | ||||
mink, maxk = 1, 4096 | ||||
def steps(): | ||||
for i in xrange(count): | ||||
yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count))) | ||||
for k in steps(): | ||||
x = rng.randint(1, k) | ||||
y = rng.randint(1, k) | ||||
yield makepath(rng, x, y) | ||||
def runtests(rng, seed, count): | ||||
nerrs = 0 | ||||
for p in genpath(rng, count): | ||||
Bryan O'Sullivan
|
r18435 | h = store._pathencode(p) # uses C implementation, if available | ||
Adrian Buehlmann
|
r18094 | r = store._hybridencode(p, True) # reference implementation in Python | ||
if h != r: | ||||
if nerrs == 0: | ||||
Pulkit Goyal
|
r28918 | print('seed:', hex(seed)[:-1], file=sys.stderr) | ||
print("\np: '%s'" % p.encode("string_escape"), file=sys.stderr) | ||||
print("h: '%s'" % h.encode("string_escape"), file=sys.stderr) | ||||
print("r: '%s'" % r.encode("string_escape"), file=sys.stderr) | ||||
Adrian Buehlmann
|
r18094 | nerrs += 1 | ||
Bryan O'Sullivan
|
r17934 | return nerrs | ||
def main(): | ||||
import getopt | ||||
# Empirically observed to take about a second to run | ||||
count = 100 | ||||
seed = None | ||||
opts, args = getopt.getopt(sys.argv[1:], 'c:s:', | ||||
['build', 'count=', 'seed=']) | ||||
for o, a in opts: | ||||
if o in ('-c', '--count'): | ||||
count = int(a) | ||||
elif o in ('-s', '--seed'): | ||||
Augie Fackler
|
r34222 | seed = int(a, base=0) # accepts base 10 or 16 strings | ||
Bryan O'Sullivan
|
r17934 | elif o == '--build': | ||
buildprobtable(sys.stdout, | ||||
'find .hg/store/data -type f && ' | ||||
'cat .hg/store/fncache 2>/dev/null') | ||||
sys.exit(0) | ||||
if seed is None: | ||||
try: | ||||
Augie Fackler
|
r34222 | seed = int(binascii.hexlify(os.urandom(16)), 16) | ||
Bryan O'Sullivan
|
r17934 | except AttributeError: | ||
Augie Fackler
|
r34222 | seed = int(time.time() * 1000) | ||
Bryan O'Sullivan
|
r17934 | |||
rng = random.Random(seed) | ||||
if runtests(rng, seed, count): | ||||
sys.exit(1) | ||||
Bryan O'Sullivan
|
r17947 | if __name__ == '__main__': | ||
Bryan O'Sullivan
|
r17934 | main() | ||