##// END OF EJS Templates
flagutil: move addflagprocessor to the new module (API)
flagutil: move addflagprocessor to the new module (API)

File last commit:

r37897:1b230e19 default
r42958:6d61be15 default
Show More
test-pathencode.py
210 lines | 6.5 KiB | text/x-python | PythonLexer
/ tests / test-pathencode.py
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 # This is a randomized test that generates different pathnames every
# time it is invoked, and tests the encoding of those pathnames.
#
# It uses a simple probabilistic model to generate valid pathnames
timeless@mozdev.org
spelling: behaviour -> behavior
r26098 # that have proven likely to expose bugs and divergent behavior in
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 # different encoding implementations.
Pulkit Goyal
tests: make test-pathencode use absolute_import
r28928 from __future__ import absolute_import, print_function
Pulkit Goyal
tests: make test-pathencode use print_function
r28918
Pulkit Goyal
tests: make test-pathencode use absolute_import
r28928 import binascii
Bryan O'Sullivan
test-pathencode: make a 2.4-safe import of collections
r17935 import collections
Pulkit Goyal
tests: make test-pathencode use absolute_import
r28928 import itertools
import math
import os
import random
import sys
import time
from mercurial import (
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 pycompat,
Pulkit Goyal
tests: make test-pathencode use absolute_import
r28928 store,
)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
Augie Fackler
tests: add xrange alias for test-pathencode.py
r34221 try:
xrange
except NameError:
xrange = range
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 validchars = set(map(pycompat.bytechr, range(0, 256)))
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 alphanum = range(ord('A'), ord('Z'))
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 for c in (b'\0', b'/'):
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 validchars.remove(c)
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 winreserved = (b'aux con prn nul'.split() +
[b'com%d' % i for i in xrange(1, 10)] +
[b'lpt%d' % i for i in xrange(1, 10)])
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
def casecombinations(names):
'''Build all case-diddled combinations of names.'''
combos = set()
for r in names:
for i in xrange(len(r) + 1):
for c in itertools.combinations(xrange(len(r)), i):
d = r
for j in c:
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 d = b''.join((d[:j], d[j:j + 1].upper(), d[j + 1:]))
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 combos.add(d)
return sorted(combos)
def buildprobtable(fp, cmd='hg manifest tip'):
'''Construct and print a table of probabilities for path name
components. The numbers are percentages.'''
Bryan O'Sullivan
test-pathencode: make a 2.4-safe import of collections
r17935 counts = collections.defaultdict(lambda: 0)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 for line in os.popen(cmd).read().splitlines():
if line[-2:] in ('.i', '.d'):
line = line[:-2]
if line.startswith('data/'):
line = line[5:]
for c in line:
counts[c] += 1
for c in '\r/\n':
counts.pop(c, None)
t = sum(counts.itervalues()) / 100.0
fp.write('probtable = (')
Pulkit Goyal
py3: use dict.items() instead of dict.iteritems() in tests...
r36345 for i, (k, v) in enumerate(sorted(counts.items(), key=lambda x: x[1],
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 reverse=True)):
if (i % 5) == 0:
fp.write('\n ')
vt = v / t
if vt < 0.0005:
break
fp.write('(%r, %.03f), ' % (k, vt))
fp.write('\n )\n')
# A table of character frequencies (as percentages), gleaned by
# looking at filelog names from a real-world, very large repo.
probtable = (
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 (b't', 9.828), (b'e', 9.042), (b's', 8.011), (b'a', 6.801), (b'i', 6.618),
(b'g', 5.053), (b'r', 5.030), (b'o', 4.887), (b'p', 4.363), (b'n', 4.258),
(b'l', 3.830), (b'h', 3.693), (b'_', 3.659), (b'.', 3.377), (b'm', 3.194),
(b'u', 2.364), (b'd', 2.296), (b'c', 2.163), (b'b', 1.739), (b'f', 1.625),
(b'6', 0.666), (b'j', 0.610), (b'y', 0.554), (b'x', 0.487), (b'w', 0.477),
(b'k', 0.476), (b'v', 0.473), (b'3', 0.336), (b'1', 0.335), (b'2', 0.326),
(b'4', 0.310), (b'5', 0.305), (b'9', 0.302), (b'8', 0.300), (b'7', 0.299),
(b'q', 0.298), (b'0', 0.250), (b'z', 0.223), (b'-', 0.118), (b'C', 0.095),
(b'T', 0.087), (b'F', 0.085), (b'B', 0.077), (b'S', 0.076), (b'P', 0.076),
(b'L', 0.059), (b'A', 0.058), (b'N', 0.051), (b'D', 0.049), (b'M', 0.046),
(b'E', 0.039), (b'I', 0.035), (b'R', 0.035), (b'G', 0.028), (b'U', 0.026),
(b'W', 0.025), (b'O', 0.017), (b'V', 0.015), (b'H', 0.013), (b'Q', 0.011),
(b'J', 0.007), (b'K', 0.005), (b'+', 0.004), (b'X', 0.003), (b'Y', 0.001),
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 )
for c, _ in probtable:
validchars.remove(c)
validchars = list(validchars)
def pickfrom(rng, table):
c = 0
r = rng.random() * sum(i[1] for i in table)
for i, p in table:
c += p
if c >= r:
return i
reservedcombos = casecombinations(winreserved)
# The first component of a name following a slash.
firsttable = (
(lambda rng: pickfrom(rng, probtable), 90),
(lambda rng: rng.choice(validchars), 5),
(lambda rng: rng.choice(reservedcombos), 5),
)
# Components of a name following the first.
resttable = firsttable[:-1]
# Special suffixes.
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 internalsuffixcombos = casecombinations(b'.hg .i .d'.split())
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
# The last component of a path, before a slash or at the end of a name.
lasttable = resttable + (
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 (lambda rng: b'', 95),
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 (lambda rng: rng.choice(internalsuffixcombos), 5),
)
def makepart(rng, k):
'''Construct a part of a pathname, without slashes.'''
p = pickfrom(rng, firsttable)(rng)
l = len(p)
ps = [p]
Siddharth Agarwal
test-pathencode: randomize length of each path component...
r19319 maxl = rng.randint(1, k)
while l < maxl:
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 p = pickfrom(rng, resttable)(rng)
l += len(p)
ps.append(p)
ps.append(pickfrom(rng, lasttable)(rng))
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 return b''.join(ps)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
def makepath(rng, j, k):
'''Construct a complete pathname.'''
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 return (b'data/' + b'/'.join(makepart(rng, k) for _ in xrange(j)) +
rng.choice([b'.d', b'.i']))
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
def genpath(rng, count):
'''Generate random pathnames with gradually increasing lengths.'''
mink, maxk = 1, 4096
def steps():
for i in xrange(count):
yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
for k in steps():
x = rng.randint(1, k)
y = rng.randint(1, k)
yield makepath(rng, x, y)
def runtests(rng, seed, count):
nerrs = 0
for p in genpath(rng, count):
Bryan O'Sullivan
store: switch to C-based hashed path encoding
r18435 h = store._pathencode(p) # uses C implementation, if available
Adrian Buehlmann
test-pathencode: compare current pathencoding implementations...
r18094 r = store._hybridencode(p, True) # reference implementation in Python
if h != r:
if nerrs == 0:
Pulkit Goyal
tests: make test-pathencode use print_function
r28918 print('seed:', hex(seed)[:-1], file=sys.stderr)
print("\np: '%s'" % p.encode("string_escape"), file=sys.stderr)
print("h: '%s'" % h.encode("string_escape"), file=sys.stderr)
print("r: '%s'" % r.encode("string_escape"), file=sys.stderr)
Adrian Buehlmann
test-pathencode: compare current pathencoding implementations...
r18094 nerrs += 1
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 return nerrs
def main():
import getopt
# Empirically observed to take about a second to run
count = 100
seed = None
opts, args = getopt.getopt(sys.argv[1:], 'c:s:',
['build', 'count=', 'seed='])
for o, a in opts:
if o in ('-c', '--count'):
count = int(a)
elif o in ('-s', '--seed'):
Augie Fackler
tests: use int() instead of long() in test-pathencode.py
r34222 seed = int(a, base=0) # accepts base 10 or 16 strings
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 elif o == '--build':
buildprobtable(sys.stdout,
'find .hg/store/data -type f && '
'cat .hg/store/fncache 2>/dev/null')
sys.exit(0)
if seed is None:
try:
Augie Fackler
tests: use int() instead of long() in test-pathencode.py
r34222 seed = int(binascii.hexlify(os.urandom(16)), 16)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 except AttributeError:
Augie Fackler
tests: use int() instead of long() in test-pathencode.py
r34222 seed = int(time.time() * 1000)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
rng = random.Random(seed)
if runtests(rng, seed, count):
sys.exit(1)
Bryan O'Sullivan
test-pathencode: more aggressively check for python < 2.6
r17947 if __name__ == '__main__':
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 main()