##// END OF EJS Templates
util: provide a helper function to estimate RAM size...
util: provide a helper function to estimate RAM size For POSIX systems, it uses sysconf. For Windows, it uses the win32 API directly. Differential Revision: https://phab.mercurial-scm.org/D8644

File last commit:

r43374:74802979 default
r45607:cfe0f491 default
Show More
test-pathencode.py
284 lines | 6.7 KiB | text/x-python | PythonLexer
/ tests / test-pathencode.py
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 # This is a randomized test that generates different pathnames every
# time it is invoked, and tests the encoding of those pathnames.
#
# It uses a simple probabilistic model to generate valid pathnames
timeless@mozdev.org
spelling: behaviour -> behavior
r26098 # that have proven likely to expose bugs and divergent behavior in
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 # different encoding implementations.
Pulkit Goyal
tests: make test-pathencode use absolute_import
r28928 from __future__ import absolute_import, print_function
Pulkit Goyal
tests: make test-pathencode use print_function
r28918
Pulkit Goyal
tests: make test-pathencode use absolute_import
r28928 import binascii
Bryan O'Sullivan
test-pathencode: make a 2.4-safe import of collections
r17935 import collections
Pulkit Goyal
tests: make test-pathencode use absolute_import
r28928 import itertools
import math
import os
import random
import sys
import time
from mercurial import (
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 pycompat,
Pulkit Goyal
tests: make test-pathencode use absolute_import
r28928 store,
)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
Augie Fackler
tests: add xrange alias for test-pathencode.py
r34221 try:
xrange
except NameError:
xrange = range
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 validchars = set(map(pycompat.bytechr, range(0, 256)))
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 alphanum = range(ord('A'), ord('Z'))
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 for c in (b'\0', b'/'):
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 validchars.remove(c)
Augie Fackler
formatting: blacken the codebase...
r43346 winreserved = (
b'aux con prn nul'.split()
+ [b'com%d' % i for i in xrange(1, 10)]
+ [b'lpt%d' % i for i in xrange(1, 10)]
)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
def casecombinations(names):
'''Build all case-diddled combinations of names.'''
combos = set()
for r in names:
for i in xrange(len(r) + 1):
for c in itertools.combinations(xrange(len(r)), i):
d = r
for j in c:
Augie Fackler
formatting: blacken the codebase...
r43346 d = b''.join((d[:j], d[j : j + 1].upper(), d[j + 1 :]))
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 combos.add(d)
return sorted(combos)
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 def buildprobtable(fp, cmd='hg manifest tip'):
'''Construct and print a table of probabilities for path name
components. The numbers are percentages.'''
Bryan O'Sullivan
test-pathencode: make a 2.4-safe import of collections
r17935 counts = collections.defaultdict(lambda: 0)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 for line in os.popen(cmd).read().splitlines():
if line[-2:] in ('.i', '.d'):
line = line[:-2]
if line.startswith('data/'):
line = line[5:]
for c in line:
counts[c] += 1
for c in '\r/\n':
counts.pop(c, None)
Gregory Szorc
py3: define and use pycompat.itervalues()...
r43374 t = sum(pycompat.itervalues(counts)) / 100.0
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 fp.write('probtable = (')
Augie Fackler
formatting: blacken the codebase...
r43346 for i, (k, v) in enumerate(
sorted(counts.items(), key=lambda x: x[1], reverse=True)
):
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 if (i % 5) == 0:
fp.write('\n ')
vt = v / t
if vt < 0.0005:
break
fp.write('(%r, %.03f), ' % (k, vt))
fp.write('\n )\n')
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 # A table of character frequencies (as percentages), gleaned by
# looking at filelog names from a real-world, very large repo.
probtable = (
Augie Fackler
formatting: blacken the codebase...
r43346 (b't', 9.828),
(b'e', 9.042),
(b's', 8.011),
(b'a', 6.801),
(b'i', 6.618),
(b'g', 5.053),
(b'r', 5.030),
(b'o', 4.887),
(b'p', 4.363),
(b'n', 4.258),
(b'l', 3.830),
(b'h', 3.693),
(b'_', 3.659),
(b'.', 3.377),
(b'm', 3.194),
(b'u', 2.364),
(b'd', 2.296),
(b'c', 2.163),
(b'b', 1.739),
(b'f', 1.625),
(b'6', 0.666),
(b'j', 0.610),
(b'y', 0.554),
(b'x', 0.487),
(b'w', 0.477),
(b'k', 0.476),
(b'v', 0.473),
(b'3', 0.336),
(b'1', 0.335),
(b'2', 0.326),
(b'4', 0.310),
(b'5', 0.305),
(b'9', 0.302),
(b'8', 0.300),
(b'7', 0.299),
(b'q', 0.298),
(b'0', 0.250),
(b'z', 0.223),
(b'-', 0.118),
(b'C', 0.095),
(b'T', 0.087),
(b'F', 0.085),
(b'B', 0.077),
(b'S', 0.076),
(b'P', 0.076),
(b'L', 0.059),
(b'A', 0.058),
(b'N', 0.051),
(b'D', 0.049),
(b'M', 0.046),
(b'E', 0.039),
(b'I', 0.035),
(b'R', 0.035),
(b'G', 0.028),
(b'U', 0.026),
(b'W', 0.025),
(b'O', 0.017),
(b'V', 0.015),
(b'H', 0.013),
(b'Q', 0.011),
(b'J', 0.007),
(b'K', 0.005),
(b'+', 0.004),
(b'X', 0.003),
(b'Y', 0.001),
)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
for c, _ in probtable:
validchars.remove(c)
validchars = list(validchars)
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 def pickfrom(rng, table):
c = 0
r = rng.random() * sum(i[1] for i in table)
for i, p in table:
c += p
if c >= r:
return i
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 reservedcombos = casecombinations(winreserved)
# The first component of a name following a slash.
firsttable = (
(lambda rng: pickfrom(rng, probtable), 90),
(lambda rng: rng.choice(validchars), 5),
(lambda rng: rng.choice(reservedcombos), 5),
Augie Fackler
formatting: blacken the codebase...
r43346 )
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
# Components of a name following the first.
resttable = firsttable[:-1]
# Special suffixes.
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 internalsuffixcombos = casecombinations(b'.hg .i .d'.split())
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
# The last component of a path, before a slash or at the end of a name.
lasttable = resttable + (
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 (lambda rng: b'', 95),
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 (lambda rng: rng.choice(internalsuffixcombos), 5),
Augie Fackler
formatting: blacken the codebase...
r43346 )
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
def makepart(rng, k):
'''Construct a part of a pathname, without slashes.'''
p = pickfrom(rng, firsttable)(rng)
l = len(p)
ps = [p]
Siddharth Agarwal
test-pathencode: randomize length of each path component...
r19319 maxl = rng.randint(1, k)
while l < maxl:
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 p = pickfrom(rng, resttable)(rng)
l += len(p)
ps.append(p)
ps.append(pickfrom(rng, lasttable)(rng))
Augie Fackler
tests: port test-pathencode.py to Python 3...
r37897 return b''.join(ps)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 def makepath(rng, j, k):
'''Construct a complete pathname.'''
Augie Fackler
formatting: blacken the codebase...
r43346 return (
b'data/'
+ b'/'.join(makepart(rng, k) for _ in xrange(j))
+ rng.choice([b'.d', b'.i'])
)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
def genpath(rng, count):
'''Generate random pathnames with gradually increasing lengths.'''
mink, maxk = 1, 4096
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 def steps():
for i in xrange(count):
yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 for k in steps():
x = rng.randint(1, k)
y = rng.randint(1, k)
yield makepath(rng, x, y)
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 def runtests(rng, seed, count):
nerrs = 0
for p in genpath(rng, count):
Augie Fackler
formatting: blacken the codebase...
r43346 h = store._pathencode(p) # uses C implementation, if available
r = store._hybridencode(p, True) # reference implementation in Python
Adrian Buehlmann
test-pathencode: compare current pathencoding implementations...
r18094 if h != r:
if nerrs == 0:
Pulkit Goyal
tests: make test-pathencode use print_function
r28918 print('seed:', hex(seed)[:-1], file=sys.stderr)
print("\np: '%s'" % p.encode("string_escape"), file=sys.stderr)
print("h: '%s'" % h.encode("string_escape"), file=sys.stderr)
print("r: '%s'" % r.encode("string_escape"), file=sys.stderr)
Adrian Buehlmann
test-pathencode: compare current pathencoding implementations...
r18094 nerrs += 1
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 return nerrs
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 def main():
import getopt
# Empirically observed to take about a second to run
count = 100
seed = None
Augie Fackler
formatting: blacken the codebase...
r43346 opts, args = getopt.getopt(
sys.argv[1:], 'c:s:', ['build', 'count=', 'seed=']
)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 for o, a in opts:
if o in ('-c', '--count'):
count = int(a)
elif o in ('-s', '--seed'):
Augie Fackler
formatting: blacken the codebase...
r43346 seed = int(a, base=0) # accepts base 10 or 16 strings
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 elif o == '--build':
Augie Fackler
formatting: blacken the codebase...
r43346 buildprobtable(
sys.stdout,
'find .hg/store/data -type f && '
'cat .hg/store/fncache 2>/dev/null',
)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 sys.exit(0)
if seed is None:
try:
Augie Fackler
tests: use int() instead of long() in test-pathencode.py
r34222 seed = int(binascii.hexlify(os.urandom(16)), 16)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 except AttributeError:
Augie Fackler
tests: use int() instead of long() in test-pathencode.py
r34222 seed = int(time.time() * 1000)
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934
rng = random.Random(seed)
if runtests(rng, seed, count):
sys.exit(1)
Augie Fackler
formatting: blacken the codebase...
r43346
Bryan O'Sullivan
test-pathencode: more aggressively check for python < 2.6
r17947 if __name__ == '__main__':
Bryan O'Sullivan
tests: add a randomized test for pathencode...
r17934 main()