Show More
@@ -1,204 +1,209 b'' | |||||
1 | # This is a randomized test that generates different pathnames every |
|
1 | # This is a randomized test that generates different pathnames every | |
2 | # time it is invoked, and tests the encoding of those pathnames. |
|
2 | # time it is invoked, and tests the encoding of those pathnames. | |
3 | # |
|
3 | # | |
4 | # It uses a simple probabilistic model to generate valid pathnames |
|
4 | # It uses a simple probabilistic model to generate valid pathnames | |
5 | # that have proven likely to expose bugs and divergent behavior in |
|
5 | # that have proven likely to expose bugs and divergent behavior in | |
6 | # different encoding implementations. |
|
6 | # different encoding implementations. | |
7 |
|
7 | |||
8 | from __future__ import absolute_import, print_function |
|
8 | from __future__ import absolute_import, print_function | |
9 |
|
9 | |||
10 | import binascii |
|
10 | import binascii | |
11 | import collections |
|
11 | import collections | |
12 | import itertools |
|
12 | import itertools | |
13 | import math |
|
13 | import math | |
14 | import os |
|
14 | import os | |
15 | import random |
|
15 | import random | |
16 | import sys |
|
16 | import sys | |
17 | import time |
|
17 | import time | |
18 | from mercurial import ( |
|
18 | from mercurial import ( | |
19 | store, |
|
19 | store, | |
20 | ) |
|
20 | ) | |
21 |
|
21 | |||
|
22 | try: | |||
|
23 | xrange | |||
|
24 | except NameError: | |||
|
25 | xrange = range | |||
|
26 | ||||
22 | validchars = set(map(chr, range(0, 256))) |
|
27 | validchars = set(map(chr, range(0, 256))) | |
23 | alphanum = range(ord('A'), ord('Z')) |
|
28 | alphanum = range(ord('A'), ord('Z')) | |
24 |
|
29 | |||
25 | for c in '\0/': |
|
30 | for c in '\0/': | |
26 | validchars.remove(c) |
|
31 | validchars.remove(c) | |
27 |
|
32 | |||
28 | winreserved = ('aux con prn nul'.split() + |
|
33 | winreserved = ('aux con prn nul'.split() + | |
29 | ['com%d' % i for i in xrange(1, 10)] + |
|
34 | ['com%d' % i for i in xrange(1, 10)] + | |
30 | ['lpt%d' % i for i in xrange(1, 10)]) |
|
35 | ['lpt%d' % i for i in xrange(1, 10)]) | |
31 |
|
36 | |||
32 | def casecombinations(names): |
|
37 | def casecombinations(names): | |
33 | '''Build all case-diddled combinations of names.''' |
|
38 | '''Build all case-diddled combinations of names.''' | |
34 |
|
39 | |||
35 | combos = set() |
|
40 | combos = set() | |
36 |
|
41 | |||
37 | for r in names: |
|
42 | for r in names: | |
38 | for i in xrange(len(r) + 1): |
|
43 | for i in xrange(len(r) + 1): | |
39 | for c in itertools.combinations(xrange(len(r)), i): |
|
44 | for c in itertools.combinations(xrange(len(r)), i): | |
40 | d = r |
|
45 | d = r | |
41 | for j in c: |
|
46 | for j in c: | |
42 | d = ''.join((d[:j], d[j].upper(), d[j + 1:])) |
|
47 | d = ''.join((d[:j], d[j].upper(), d[j + 1:])) | |
43 | combos.add(d) |
|
48 | combos.add(d) | |
44 | return sorted(combos) |
|
49 | return sorted(combos) | |
45 |
|
50 | |||
46 | def buildprobtable(fp, cmd='hg manifest tip'): |
|
51 | def buildprobtable(fp, cmd='hg manifest tip'): | |
47 | '''Construct and print a table of probabilities for path name |
|
52 | '''Construct and print a table of probabilities for path name | |
48 | components. The numbers are percentages.''' |
|
53 | components. The numbers are percentages.''' | |
49 |
|
54 | |||
50 | counts = collections.defaultdict(lambda: 0) |
|
55 | counts = collections.defaultdict(lambda: 0) | |
51 | for line in os.popen(cmd).read().splitlines(): |
|
56 | for line in os.popen(cmd).read().splitlines(): | |
52 | if line[-2:] in ('.i', '.d'): |
|
57 | if line[-2:] in ('.i', '.d'): | |
53 | line = line[:-2] |
|
58 | line = line[:-2] | |
54 | if line.startswith('data/'): |
|
59 | if line.startswith('data/'): | |
55 | line = line[5:] |
|
60 | line = line[5:] | |
56 | for c in line: |
|
61 | for c in line: | |
57 | counts[c] += 1 |
|
62 | counts[c] += 1 | |
58 | for c in '\r/\n': |
|
63 | for c in '\r/\n': | |
59 | counts.pop(c, None) |
|
64 | counts.pop(c, None) | |
60 | t = sum(counts.itervalues()) / 100.0 |
|
65 | t = sum(counts.itervalues()) / 100.0 | |
61 | fp.write('probtable = (') |
|
66 | fp.write('probtable = (') | |
62 | for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1], |
|
67 | for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1], | |
63 | reverse=True)): |
|
68 | reverse=True)): | |
64 | if (i % 5) == 0: |
|
69 | if (i % 5) == 0: | |
65 | fp.write('\n ') |
|
70 | fp.write('\n ') | |
66 | vt = v / t |
|
71 | vt = v / t | |
67 | if vt < 0.0005: |
|
72 | if vt < 0.0005: | |
68 | break |
|
73 | break | |
69 | fp.write('(%r, %.03f), ' % (k, vt)) |
|
74 | fp.write('(%r, %.03f), ' % (k, vt)) | |
70 | fp.write('\n )\n') |
|
75 | fp.write('\n )\n') | |
71 |
|
76 | |||
72 | # A table of character frequencies (as percentages), gleaned by |
|
77 | # A table of character frequencies (as percentages), gleaned by | |
73 | # looking at filelog names from a real-world, very large repo. |
|
78 | # looking at filelog names from a real-world, very large repo. | |
74 |
|
79 | |||
75 | probtable = ( |
|
80 | probtable = ( | |
76 | ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618), |
|
81 | ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618), | |
77 | ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258), |
|
82 | ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258), | |
78 | ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194), |
|
83 | ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194), | |
79 | ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625), |
|
84 | ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625), | |
80 | ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477), |
|
85 | ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477), | |
81 | ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326), |
|
86 | ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326), | |
82 | ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299), |
|
87 | ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299), | |
83 | ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095), |
|
88 | ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095), | |
84 | ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076), |
|
89 | ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076), | |
85 | ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046), |
|
90 | ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046), | |
86 | ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026), |
|
91 | ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026), | |
87 | ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011), |
|
92 | ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011), | |
88 | ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001), |
|
93 | ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001), | |
89 | ) |
|
94 | ) | |
90 |
|
95 | |||
91 | for c, _ in probtable: |
|
96 | for c, _ in probtable: | |
92 | validchars.remove(c) |
|
97 | validchars.remove(c) | |
93 | validchars = list(validchars) |
|
98 | validchars = list(validchars) | |
94 |
|
99 | |||
95 | def pickfrom(rng, table): |
|
100 | def pickfrom(rng, table): | |
96 | c = 0 |
|
101 | c = 0 | |
97 | r = rng.random() * sum(i[1] for i in table) |
|
102 | r = rng.random() * sum(i[1] for i in table) | |
98 | for i, p in table: |
|
103 | for i, p in table: | |
99 | c += p |
|
104 | c += p | |
100 | if c >= r: |
|
105 | if c >= r: | |
101 | return i |
|
106 | return i | |
102 |
|
107 | |||
103 | reservedcombos = casecombinations(winreserved) |
|
108 | reservedcombos = casecombinations(winreserved) | |
104 |
|
109 | |||
105 | # The first component of a name following a slash. |
|
110 | # The first component of a name following a slash. | |
106 |
|
111 | |||
107 | firsttable = ( |
|
112 | firsttable = ( | |
108 | (lambda rng: pickfrom(rng, probtable), 90), |
|
113 | (lambda rng: pickfrom(rng, probtable), 90), | |
109 | (lambda rng: rng.choice(validchars), 5), |
|
114 | (lambda rng: rng.choice(validchars), 5), | |
110 | (lambda rng: rng.choice(reservedcombos), 5), |
|
115 | (lambda rng: rng.choice(reservedcombos), 5), | |
111 | ) |
|
116 | ) | |
112 |
|
117 | |||
113 | # Components of a name following the first. |
|
118 | # Components of a name following the first. | |
114 |
|
119 | |||
115 | resttable = firsttable[:-1] |
|
120 | resttable = firsttable[:-1] | |
116 |
|
121 | |||
117 | # Special suffixes. |
|
122 | # Special suffixes. | |
118 |
|
123 | |||
119 | internalsuffixcombos = casecombinations('.hg .i .d'.split()) |
|
124 | internalsuffixcombos = casecombinations('.hg .i .d'.split()) | |
120 |
|
125 | |||
121 | # The last component of a path, before a slash or at the end of a name. |
|
126 | # The last component of a path, before a slash or at the end of a name. | |
122 |
|
127 | |||
123 | lasttable = resttable + ( |
|
128 | lasttable = resttable + ( | |
124 | (lambda rng: '', 95), |
|
129 | (lambda rng: '', 95), | |
125 | (lambda rng: rng.choice(internalsuffixcombos), 5), |
|
130 | (lambda rng: rng.choice(internalsuffixcombos), 5), | |
126 | ) |
|
131 | ) | |
127 |
|
132 | |||
128 | def makepart(rng, k): |
|
133 | def makepart(rng, k): | |
129 | '''Construct a part of a pathname, without slashes.''' |
|
134 | '''Construct a part of a pathname, without slashes.''' | |
130 |
|
135 | |||
131 | p = pickfrom(rng, firsttable)(rng) |
|
136 | p = pickfrom(rng, firsttable)(rng) | |
132 | l = len(p) |
|
137 | l = len(p) | |
133 | ps = [p] |
|
138 | ps = [p] | |
134 | maxl = rng.randint(1, k) |
|
139 | maxl = rng.randint(1, k) | |
135 | while l < maxl: |
|
140 | while l < maxl: | |
136 | p = pickfrom(rng, resttable)(rng) |
|
141 | p = pickfrom(rng, resttable)(rng) | |
137 | l += len(p) |
|
142 | l += len(p) | |
138 | ps.append(p) |
|
143 | ps.append(p) | |
139 | ps.append(pickfrom(rng, lasttable)(rng)) |
|
144 | ps.append(pickfrom(rng, lasttable)(rng)) | |
140 | return ''.join(ps) |
|
145 | return ''.join(ps) | |
141 |
|
146 | |||
142 | def makepath(rng, j, k): |
|
147 | def makepath(rng, j, k): | |
143 | '''Construct a complete pathname.''' |
|
148 | '''Construct a complete pathname.''' | |
144 |
|
149 | |||
145 | return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) + |
|
150 | return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) + | |
146 | rng.choice(['.d', '.i'])) |
|
151 | rng.choice(['.d', '.i'])) | |
147 |
|
152 | |||
148 | def genpath(rng, count): |
|
153 | def genpath(rng, count): | |
149 | '''Generate random pathnames with gradually increasing lengths.''' |
|
154 | '''Generate random pathnames with gradually increasing lengths.''' | |
150 |
|
155 | |||
151 | mink, maxk = 1, 4096 |
|
156 | mink, maxk = 1, 4096 | |
152 | def steps(): |
|
157 | def steps(): | |
153 | for i in xrange(count): |
|
158 | for i in xrange(count): | |
154 | yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count))) |
|
159 | yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count))) | |
155 | for k in steps(): |
|
160 | for k in steps(): | |
156 | x = rng.randint(1, k) |
|
161 | x = rng.randint(1, k) | |
157 | y = rng.randint(1, k) |
|
162 | y = rng.randint(1, k) | |
158 | yield makepath(rng, x, y) |
|
163 | yield makepath(rng, x, y) | |
159 |
|
164 | |||
160 | def runtests(rng, seed, count): |
|
165 | def runtests(rng, seed, count): | |
161 | nerrs = 0 |
|
166 | nerrs = 0 | |
162 | for p in genpath(rng, count): |
|
167 | for p in genpath(rng, count): | |
163 | h = store._pathencode(p) # uses C implementation, if available |
|
168 | h = store._pathencode(p) # uses C implementation, if available | |
164 | r = store._hybridencode(p, True) # reference implementation in Python |
|
169 | r = store._hybridencode(p, True) # reference implementation in Python | |
165 | if h != r: |
|
170 | if h != r: | |
166 | if nerrs == 0: |
|
171 | if nerrs == 0: | |
167 | print('seed:', hex(seed)[:-1], file=sys.stderr) |
|
172 | print('seed:', hex(seed)[:-1], file=sys.stderr) | |
168 | print("\np: '%s'" % p.encode("string_escape"), file=sys.stderr) |
|
173 | print("\np: '%s'" % p.encode("string_escape"), file=sys.stderr) | |
169 | print("h: '%s'" % h.encode("string_escape"), file=sys.stderr) |
|
174 | print("h: '%s'" % h.encode("string_escape"), file=sys.stderr) | |
170 | print("r: '%s'" % r.encode("string_escape"), file=sys.stderr) |
|
175 | print("r: '%s'" % r.encode("string_escape"), file=sys.stderr) | |
171 | nerrs += 1 |
|
176 | nerrs += 1 | |
172 | return nerrs |
|
177 | return nerrs | |
173 |
|
178 | |||
174 | def main(): |
|
179 | def main(): | |
175 | import getopt |
|
180 | import getopt | |
176 |
|
181 | |||
177 | # Empirically observed to take about a second to run |
|
182 | # Empirically observed to take about a second to run | |
178 | count = 100 |
|
183 | count = 100 | |
179 | seed = None |
|
184 | seed = None | |
180 | opts, args = getopt.getopt(sys.argv[1:], 'c:s:', |
|
185 | opts, args = getopt.getopt(sys.argv[1:], 'c:s:', | |
181 | ['build', 'count=', 'seed=']) |
|
186 | ['build', 'count=', 'seed=']) | |
182 | for o, a in opts: |
|
187 | for o, a in opts: | |
183 | if o in ('-c', '--count'): |
|
188 | if o in ('-c', '--count'): | |
184 | count = int(a) |
|
189 | count = int(a) | |
185 | elif o in ('-s', '--seed'): |
|
190 | elif o in ('-s', '--seed'): | |
186 | seed = long(a, base=0) # accepts base 10 or 16 strings |
|
191 | seed = long(a, base=0) # accepts base 10 or 16 strings | |
187 | elif o == '--build': |
|
192 | elif o == '--build': | |
188 | buildprobtable(sys.stdout, |
|
193 | buildprobtable(sys.stdout, | |
189 | 'find .hg/store/data -type f && ' |
|
194 | 'find .hg/store/data -type f && ' | |
190 | 'cat .hg/store/fncache 2>/dev/null') |
|
195 | 'cat .hg/store/fncache 2>/dev/null') | |
191 | sys.exit(0) |
|
196 | sys.exit(0) | |
192 |
|
197 | |||
193 | if seed is None: |
|
198 | if seed is None: | |
194 | try: |
|
199 | try: | |
195 | seed = long(binascii.hexlify(os.urandom(16)), 16) |
|
200 | seed = long(binascii.hexlify(os.urandom(16)), 16) | |
196 | except AttributeError: |
|
201 | except AttributeError: | |
197 | seed = long(time.time() * 1000) |
|
202 | seed = long(time.time() * 1000) | |
198 |
|
203 | |||
199 | rng = random.Random(seed) |
|
204 | rng = random.Random(seed) | |
200 | if runtests(rng, seed, count): |
|
205 | if runtests(rng, seed, count): | |
201 | sys.exit(1) |
|
206 | sys.exit(1) | |
202 |
|
207 | |||
203 | if __name__ == '__main__': |
|
208 | if __name__ == '__main__': | |
204 | main() |
|
209 | main() |
General Comments 0
You need to be logged in to leave comments.
Login now