##// END OF EJS Templates
tests: add xrange alias for test-pathencode.py
Augie Fackler -
r34221:0f200e23 default
parent child Browse files
Show More
@@ -1,204 +1,209 b''
1 # This is a randomized test that generates different pathnames every
1 # This is a randomized test that generates different pathnames every
2 # time it is invoked, and tests the encoding of those pathnames.
2 # time it is invoked, and tests the encoding of those pathnames.
3 #
3 #
4 # It uses a simple probabilistic model to generate valid pathnames
4 # It uses a simple probabilistic model to generate valid pathnames
5 # that have proven likely to expose bugs and divergent behavior in
5 # that have proven likely to expose bugs and divergent behavior in
6 # different encoding implementations.
6 # different encoding implementations.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import binascii
10 import binascii
11 import collections
11 import collections
12 import itertools
12 import itertools
13 import math
13 import math
14 import os
14 import os
15 import random
15 import random
16 import sys
16 import sys
17 import time
17 import time
18 from mercurial import (
18 from mercurial import (
19 store,
19 store,
20 )
20 )
21
21
22 try:
23 xrange
24 except NameError:
25 xrange = range
26
22 validchars = set(map(chr, range(0, 256)))
27 validchars = set(map(chr, range(0, 256)))
23 alphanum = range(ord('A'), ord('Z'))
28 alphanum = range(ord('A'), ord('Z'))
24
29
25 for c in '\0/':
30 for c in '\0/':
26 validchars.remove(c)
31 validchars.remove(c)
27
32
28 winreserved = ('aux con prn nul'.split() +
33 winreserved = ('aux con prn nul'.split() +
29 ['com%d' % i for i in xrange(1, 10)] +
34 ['com%d' % i for i in xrange(1, 10)] +
30 ['lpt%d' % i for i in xrange(1, 10)])
35 ['lpt%d' % i for i in xrange(1, 10)])
31
36
32 def casecombinations(names):
37 def casecombinations(names):
33 '''Build all case-diddled combinations of names.'''
38 '''Build all case-diddled combinations of names.'''
34
39
35 combos = set()
40 combos = set()
36
41
37 for r in names:
42 for r in names:
38 for i in xrange(len(r) + 1):
43 for i in xrange(len(r) + 1):
39 for c in itertools.combinations(xrange(len(r)), i):
44 for c in itertools.combinations(xrange(len(r)), i):
40 d = r
45 d = r
41 for j in c:
46 for j in c:
42 d = ''.join((d[:j], d[j].upper(), d[j + 1:]))
47 d = ''.join((d[:j], d[j].upper(), d[j + 1:]))
43 combos.add(d)
48 combos.add(d)
44 return sorted(combos)
49 return sorted(combos)
45
50
46 def buildprobtable(fp, cmd='hg manifest tip'):
51 def buildprobtable(fp, cmd='hg manifest tip'):
47 '''Construct and print a table of probabilities for path name
52 '''Construct and print a table of probabilities for path name
48 components. The numbers are percentages.'''
53 components. The numbers are percentages.'''
49
54
50 counts = collections.defaultdict(lambda: 0)
55 counts = collections.defaultdict(lambda: 0)
51 for line in os.popen(cmd).read().splitlines():
56 for line in os.popen(cmd).read().splitlines():
52 if line[-2:] in ('.i', '.d'):
57 if line[-2:] in ('.i', '.d'):
53 line = line[:-2]
58 line = line[:-2]
54 if line.startswith('data/'):
59 if line.startswith('data/'):
55 line = line[5:]
60 line = line[5:]
56 for c in line:
61 for c in line:
57 counts[c] += 1
62 counts[c] += 1
58 for c in '\r/\n':
63 for c in '\r/\n':
59 counts.pop(c, None)
64 counts.pop(c, None)
60 t = sum(counts.itervalues()) / 100.0
65 t = sum(counts.itervalues()) / 100.0
61 fp.write('probtable = (')
66 fp.write('probtable = (')
62 for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1],
67 for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1],
63 reverse=True)):
68 reverse=True)):
64 if (i % 5) == 0:
69 if (i % 5) == 0:
65 fp.write('\n ')
70 fp.write('\n ')
66 vt = v / t
71 vt = v / t
67 if vt < 0.0005:
72 if vt < 0.0005:
68 break
73 break
69 fp.write('(%r, %.03f), ' % (k, vt))
74 fp.write('(%r, %.03f), ' % (k, vt))
70 fp.write('\n )\n')
75 fp.write('\n )\n')
71
76
72 # A table of character frequencies (as percentages), gleaned by
77 # A table of character frequencies (as percentages), gleaned by
73 # looking at filelog names from a real-world, very large repo.
78 # looking at filelog names from a real-world, very large repo.
74
79
75 probtable = (
80 probtable = (
76 ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618),
81 ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618),
77 ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258),
82 ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258),
78 ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194),
83 ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194),
79 ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625),
84 ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625),
80 ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477),
85 ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477),
81 ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326),
86 ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326),
82 ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299),
87 ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299),
83 ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095),
88 ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095),
84 ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076),
89 ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076),
85 ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046),
90 ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046),
86 ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026),
91 ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026),
87 ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011),
92 ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011),
88 ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001),
93 ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001),
89 )
94 )
90
95
91 for c, _ in probtable:
96 for c, _ in probtable:
92 validchars.remove(c)
97 validchars.remove(c)
93 validchars = list(validchars)
98 validchars = list(validchars)
94
99
95 def pickfrom(rng, table):
100 def pickfrom(rng, table):
96 c = 0
101 c = 0
97 r = rng.random() * sum(i[1] for i in table)
102 r = rng.random() * sum(i[1] for i in table)
98 for i, p in table:
103 for i, p in table:
99 c += p
104 c += p
100 if c >= r:
105 if c >= r:
101 return i
106 return i
102
107
103 reservedcombos = casecombinations(winreserved)
108 reservedcombos = casecombinations(winreserved)
104
109
105 # The first component of a name following a slash.
110 # The first component of a name following a slash.
106
111
107 firsttable = (
112 firsttable = (
108 (lambda rng: pickfrom(rng, probtable), 90),
113 (lambda rng: pickfrom(rng, probtable), 90),
109 (lambda rng: rng.choice(validchars), 5),
114 (lambda rng: rng.choice(validchars), 5),
110 (lambda rng: rng.choice(reservedcombos), 5),
115 (lambda rng: rng.choice(reservedcombos), 5),
111 )
116 )
112
117
113 # Components of a name following the first.
118 # Components of a name following the first.
114
119
115 resttable = firsttable[:-1]
120 resttable = firsttable[:-1]
116
121
117 # Special suffixes.
122 # Special suffixes.
118
123
119 internalsuffixcombos = casecombinations('.hg .i .d'.split())
124 internalsuffixcombos = casecombinations('.hg .i .d'.split())
120
125
121 # The last component of a path, before a slash or at the end of a name.
126 # The last component of a path, before a slash or at the end of a name.
122
127
123 lasttable = resttable + (
128 lasttable = resttable + (
124 (lambda rng: '', 95),
129 (lambda rng: '', 95),
125 (lambda rng: rng.choice(internalsuffixcombos), 5),
130 (lambda rng: rng.choice(internalsuffixcombos), 5),
126 )
131 )
127
132
128 def makepart(rng, k):
133 def makepart(rng, k):
129 '''Construct a part of a pathname, without slashes.'''
134 '''Construct a part of a pathname, without slashes.'''
130
135
131 p = pickfrom(rng, firsttable)(rng)
136 p = pickfrom(rng, firsttable)(rng)
132 l = len(p)
137 l = len(p)
133 ps = [p]
138 ps = [p]
134 maxl = rng.randint(1, k)
139 maxl = rng.randint(1, k)
135 while l < maxl:
140 while l < maxl:
136 p = pickfrom(rng, resttable)(rng)
141 p = pickfrom(rng, resttable)(rng)
137 l += len(p)
142 l += len(p)
138 ps.append(p)
143 ps.append(p)
139 ps.append(pickfrom(rng, lasttable)(rng))
144 ps.append(pickfrom(rng, lasttable)(rng))
140 return ''.join(ps)
145 return ''.join(ps)
141
146
142 def makepath(rng, j, k):
147 def makepath(rng, j, k):
143 '''Construct a complete pathname.'''
148 '''Construct a complete pathname.'''
144
149
145 return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) +
150 return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) +
146 rng.choice(['.d', '.i']))
151 rng.choice(['.d', '.i']))
147
152
148 def genpath(rng, count):
153 def genpath(rng, count):
149 '''Generate random pathnames with gradually increasing lengths.'''
154 '''Generate random pathnames with gradually increasing lengths.'''
150
155
151 mink, maxk = 1, 4096
156 mink, maxk = 1, 4096
152 def steps():
157 def steps():
153 for i in xrange(count):
158 for i in xrange(count):
154 yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
159 yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
155 for k in steps():
160 for k in steps():
156 x = rng.randint(1, k)
161 x = rng.randint(1, k)
157 y = rng.randint(1, k)
162 y = rng.randint(1, k)
158 yield makepath(rng, x, y)
163 yield makepath(rng, x, y)
159
164
160 def runtests(rng, seed, count):
165 def runtests(rng, seed, count):
161 nerrs = 0
166 nerrs = 0
162 for p in genpath(rng, count):
167 for p in genpath(rng, count):
163 h = store._pathencode(p) # uses C implementation, if available
168 h = store._pathencode(p) # uses C implementation, if available
164 r = store._hybridencode(p, True) # reference implementation in Python
169 r = store._hybridencode(p, True) # reference implementation in Python
165 if h != r:
170 if h != r:
166 if nerrs == 0:
171 if nerrs == 0:
167 print('seed:', hex(seed)[:-1], file=sys.stderr)
172 print('seed:', hex(seed)[:-1], file=sys.stderr)
168 print("\np: '%s'" % p.encode("string_escape"), file=sys.stderr)
173 print("\np: '%s'" % p.encode("string_escape"), file=sys.stderr)
169 print("h: '%s'" % h.encode("string_escape"), file=sys.stderr)
174 print("h: '%s'" % h.encode("string_escape"), file=sys.stderr)
170 print("r: '%s'" % r.encode("string_escape"), file=sys.stderr)
175 print("r: '%s'" % r.encode("string_escape"), file=sys.stderr)
171 nerrs += 1
176 nerrs += 1
172 return nerrs
177 return nerrs
173
178
174 def main():
179 def main():
175 import getopt
180 import getopt
176
181
177 # Empirically observed to take about a second to run
182 # Empirically observed to take about a second to run
178 count = 100
183 count = 100
179 seed = None
184 seed = None
180 opts, args = getopt.getopt(sys.argv[1:], 'c:s:',
185 opts, args = getopt.getopt(sys.argv[1:], 'c:s:',
181 ['build', 'count=', 'seed='])
186 ['build', 'count=', 'seed='])
182 for o, a in opts:
187 for o, a in opts:
183 if o in ('-c', '--count'):
188 if o in ('-c', '--count'):
184 count = int(a)
189 count = int(a)
185 elif o in ('-s', '--seed'):
190 elif o in ('-s', '--seed'):
186 seed = long(a, base=0) # accepts base 10 or 16 strings
191 seed = long(a, base=0) # accepts base 10 or 16 strings
187 elif o == '--build':
192 elif o == '--build':
188 buildprobtable(sys.stdout,
193 buildprobtable(sys.stdout,
189 'find .hg/store/data -type f && '
194 'find .hg/store/data -type f && '
190 'cat .hg/store/fncache 2>/dev/null')
195 'cat .hg/store/fncache 2>/dev/null')
191 sys.exit(0)
196 sys.exit(0)
192
197
193 if seed is None:
198 if seed is None:
194 try:
199 try:
195 seed = long(binascii.hexlify(os.urandom(16)), 16)
200 seed = long(binascii.hexlify(os.urandom(16)), 16)
196 except AttributeError:
201 except AttributeError:
197 seed = long(time.time() * 1000)
202 seed = long(time.time() * 1000)
198
203
199 rng = random.Random(seed)
204 rng = random.Random(seed)
200 if runtests(rng, seed, count):
205 if runtests(rng, seed, count):
201 sys.exit(1)
206 sys.exit(1)
202
207
203 if __name__ == '__main__':
208 if __name__ == '__main__':
204 main()
209 main()
General Comments 0
You need to be logged in to leave comments. Login now