Show More
@@ -1,198 +1,199 | |||||
1 | # This is a randomized test that generates different pathnames every |
|
1 | # This is a randomized test that generates different pathnames every | |
2 | # time it is invoked, and tests the encoding of those pathnames. |
|
2 | # time it is invoked, and tests the encoding of those pathnames. | |
3 | # |
|
3 | # | |
4 | # It uses a simple probabilistic model to generate valid pathnames |
|
4 | # It uses a simple probabilistic model to generate valid pathnames | |
5 | # that have proven likely to expose bugs and divergent behaviour in |
|
5 | # that have proven likely to expose bugs and divergent behaviour in | |
6 | # different encoding implementations. |
|
6 | # different encoding implementations. | |
7 |
|
7 | |||
8 | from mercurial import parsers |
|
8 | from mercurial import parsers | |
9 | from mercurial import store |
|
9 | from mercurial import store | |
10 | import binascii, itertools, math, os, random, sys, time |
|
10 | import binascii, itertools, math, os, random, sys, time | |
11 | import collections |
|
11 | import collections | |
12 |
|
12 | |||
13 | if sys.version_info[:2] < (2, 6): |
|
13 | if sys.version_info[:2] < (2, 6): | |
14 | sys.exit(0) |
|
14 | sys.exit(0) | |
15 |
|
15 | |||
16 | validchars = set(map(chr, range(0, 256))) |
|
16 | validchars = set(map(chr, range(0, 256))) | |
17 | alphanum = range(ord('A'), ord('Z')) |
|
17 | alphanum = range(ord('A'), ord('Z')) | |
18 |
|
18 | |||
19 | for c in '\0/': |
|
19 | for c in '\0/': | |
20 | validchars.remove(c) |
|
20 | validchars.remove(c) | |
21 |
|
21 | |||
22 | winreserved = ('aux con prn nul'.split() + |
|
22 | winreserved = ('aux con prn nul'.split() + | |
23 | ['com%d' % i for i in xrange(1, 10)] + |
|
23 | ['com%d' % i for i in xrange(1, 10)] + | |
24 | ['lpt%d' % i for i in xrange(1, 10)]) |
|
24 | ['lpt%d' % i for i in xrange(1, 10)]) | |
25 |
|
25 | |||
26 | def casecombinations(names): |
|
26 | def casecombinations(names): | |
27 | '''Build all case-diddled combinations of names.''' |
|
27 | '''Build all case-diddled combinations of names.''' | |
28 |
|
28 | |||
29 | combos = set() |
|
29 | combos = set() | |
30 |
|
30 | |||
31 | for r in names: |
|
31 | for r in names: | |
32 | for i in xrange(len(r) + 1): |
|
32 | for i in xrange(len(r) + 1): | |
33 | for c in itertools.combinations(xrange(len(r)), i): |
|
33 | for c in itertools.combinations(xrange(len(r)), i): | |
34 | d = r |
|
34 | d = r | |
35 | for j in c: |
|
35 | for j in c: | |
36 | d = ''.join((d[:j], d[j].upper(), d[j + 1:])) |
|
36 | d = ''.join((d[:j], d[j].upper(), d[j + 1:])) | |
37 | combos.add(d) |
|
37 | combos.add(d) | |
38 | return sorted(combos) |
|
38 | return sorted(combos) | |
39 |
|
39 | |||
40 | def buildprobtable(fp, cmd='hg manifest tip'): |
|
40 | def buildprobtable(fp, cmd='hg manifest tip'): | |
41 | '''Construct and print a table of probabilities for path name |
|
41 | '''Construct and print a table of probabilities for path name | |
42 | components. The numbers are percentages.''' |
|
42 | components. The numbers are percentages.''' | |
43 |
|
43 | |||
44 | counts = collections.defaultdict(lambda: 0) |
|
44 | counts = collections.defaultdict(lambda: 0) | |
45 | for line in os.popen(cmd).read().splitlines(): |
|
45 | for line in os.popen(cmd).read().splitlines(): | |
46 | if line[-2:] in ('.i', '.d'): |
|
46 | if line[-2:] in ('.i', '.d'): | |
47 | line = line[:-2] |
|
47 | line = line[:-2] | |
48 | if line.startswith('data/'): |
|
48 | if line.startswith('data/'): | |
49 | line = line[5:] |
|
49 | line = line[5:] | |
50 | for c in line: |
|
50 | for c in line: | |
51 | counts[c] += 1 |
|
51 | counts[c] += 1 | |
52 | for c in '\r/\n': |
|
52 | for c in '\r/\n': | |
53 | counts.pop(c, None) |
|
53 | counts.pop(c, None) | |
54 | t = sum(counts.itervalues()) / 100.0 |
|
54 | t = sum(counts.itervalues()) / 100.0 | |
55 | fp.write('probtable = (') |
|
55 | fp.write('probtable = (') | |
56 | for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1], |
|
56 | for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1], | |
57 | reverse=True)): |
|
57 | reverse=True)): | |
58 | if (i % 5) == 0: |
|
58 | if (i % 5) == 0: | |
59 | fp.write('\n ') |
|
59 | fp.write('\n ') | |
60 | vt = v / t |
|
60 | vt = v / t | |
61 | if vt < 0.0005: |
|
61 | if vt < 0.0005: | |
62 | break |
|
62 | break | |
63 | fp.write('(%r, %.03f), ' % (k, vt)) |
|
63 | fp.write('(%r, %.03f), ' % (k, vt)) | |
64 | fp.write('\n )\n') |
|
64 | fp.write('\n )\n') | |
65 |
|
65 | |||
66 | # A table of character frequencies (as percentages), gleaned by |
|
66 | # A table of character frequencies (as percentages), gleaned by | |
67 | # looking at filelog names from a real-world, very large repo. |
|
67 | # looking at filelog names from a real-world, very large repo. | |
68 |
|
68 | |||
69 | probtable = ( |
|
69 | probtable = ( | |
70 | ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618), |
|
70 | ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618), | |
71 | ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258), |
|
71 | ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258), | |
72 | ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194), |
|
72 | ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194), | |
73 | ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625), |
|
73 | ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625), | |
74 | ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477), |
|
74 | ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477), | |
75 | ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326), |
|
75 | ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326), | |
76 | ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299), |
|
76 | ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299), | |
77 | ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095), |
|
77 | ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095), | |
78 | ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076), |
|
78 | ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076), | |
79 | ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046), |
|
79 | ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046), | |
80 | ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026), |
|
80 | ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026), | |
81 | ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011), |
|
81 | ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011), | |
82 | ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001), |
|
82 | ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001), | |
83 | ) |
|
83 | ) | |
84 |
|
84 | |||
85 | for c, _ in probtable: |
|
85 | for c, _ in probtable: | |
86 | validchars.remove(c) |
|
86 | validchars.remove(c) | |
87 | validchars = list(validchars) |
|
87 | validchars = list(validchars) | |
88 |
|
88 | |||
89 | def pickfrom(rng, table): |
|
89 | def pickfrom(rng, table): | |
90 | c = 0 |
|
90 | c = 0 | |
91 | r = rng.random() * sum(i[1] for i in table) |
|
91 | r = rng.random() * sum(i[1] for i in table) | |
92 | for i, p in table: |
|
92 | for i, p in table: | |
93 | c += p |
|
93 | c += p | |
94 | if c >= r: |
|
94 | if c >= r: | |
95 | return i |
|
95 | return i | |
96 |
|
96 | |||
97 | reservedcombos = casecombinations(winreserved) |
|
97 | reservedcombos = casecombinations(winreserved) | |
98 |
|
98 | |||
99 | # The first component of a name following a slash. |
|
99 | # The first component of a name following a slash. | |
100 |
|
100 | |||
101 | firsttable = ( |
|
101 | firsttable = ( | |
102 | (lambda rng: pickfrom(rng, probtable), 90), |
|
102 | (lambda rng: pickfrom(rng, probtable), 90), | |
103 | (lambda rng: rng.choice(validchars), 5), |
|
103 | (lambda rng: rng.choice(validchars), 5), | |
104 | (lambda rng: rng.choice(reservedcombos), 5), |
|
104 | (lambda rng: rng.choice(reservedcombos), 5), | |
105 | ) |
|
105 | ) | |
106 |
|
106 | |||
107 | # Components of a name following the first. |
|
107 | # Components of a name following the first. | |
108 |
|
108 | |||
109 | resttable = firsttable[:-1] |
|
109 | resttable = firsttable[:-1] | |
110 |
|
110 | |||
111 | # Special suffixes. |
|
111 | # Special suffixes. | |
112 |
|
112 | |||
113 | internalsuffixcombos = casecombinations('.hg .i .d'.split()) |
|
113 | internalsuffixcombos = casecombinations('.hg .i .d'.split()) | |
114 |
|
114 | |||
115 | # The last component of a path, before a slash or at the end of a name. |
|
115 | # The last component of a path, before a slash or at the end of a name. | |
116 |
|
116 | |||
117 | lasttable = resttable + ( |
|
117 | lasttable = resttable + ( | |
118 | (lambda rng: '', 95), |
|
118 | (lambda rng: '', 95), | |
119 | (lambda rng: rng.choice(internalsuffixcombos), 5), |
|
119 | (lambda rng: rng.choice(internalsuffixcombos), 5), | |
120 | ) |
|
120 | ) | |
121 |
|
121 | |||
122 | def makepart(rng, k): |
|
122 | def makepart(rng, k): | |
123 | '''Construct a part of a pathname, without slashes.''' |
|
123 | '''Construct a part of a pathname, without slashes.''' | |
124 |
|
124 | |||
125 | p = pickfrom(rng, firsttable)(rng) |
|
125 | p = pickfrom(rng, firsttable)(rng) | |
126 | l = len(p) |
|
126 | l = len(p) | |
127 | ps = [p] |
|
127 | ps = [p] | |
128 | while l < k: |
|
128 | maxl = rng.randint(1, k) | |
|
129 | while l < maxl: | |||
129 | p = pickfrom(rng, resttable)(rng) |
|
130 | p = pickfrom(rng, resttable)(rng) | |
130 | l += len(p) |
|
131 | l += len(p) | |
131 | ps.append(p) |
|
132 | ps.append(p) | |
132 | ps.append(pickfrom(rng, lasttable)(rng)) |
|
133 | ps.append(pickfrom(rng, lasttable)(rng)) | |
133 | return ''.join(ps) |
|
134 | return ''.join(ps) | |
134 |
|
135 | |||
135 | def makepath(rng, j, k): |
|
136 | def makepath(rng, j, k): | |
136 | '''Construct a complete pathname.''' |
|
137 | '''Construct a complete pathname.''' | |
137 |
|
138 | |||
138 | return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) + |
|
139 | return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) + | |
139 | rng.choice(['.d', '.i'])) |
|
140 | rng.choice(['.d', '.i'])) | |
140 |
|
141 | |||
141 | def genpath(rng, count): |
|
142 | def genpath(rng, count): | |
142 | '''Generate random pathnames with gradually increasing lengths.''' |
|
143 | '''Generate random pathnames with gradually increasing lengths.''' | |
143 |
|
144 | |||
144 | mink, maxk = 1, 4096 |
|
145 | mink, maxk = 1, 4096 | |
145 | def steps(): |
|
146 | def steps(): | |
146 | x, k = 0, mink |
|
147 | x, k = 0, mink | |
147 | for i in xrange(count): |
|
148 | for i in xrange(count): | |
148 | yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count))) |
|
149 | yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count))) | |
149 | for k in steps(): |
|
150 | for k in steps(): | |
150 | x = rng.randint(1, k) |
|
151 | x = rng.randint(1, k) | |
151 | y = rng.randint(1, k) |
|
152 | y = rng.randint(1, k) | |
152 | yield makepath(rng, x, y) |
|
153 | yield makepath(rng, x, y) | |
153 |
|
154 | |||
154 | def runtests(rng, seed, count): |
|
155 | def runtests(rng, seed, count): | |
155 | nerrs = 0 |
|
156 | nerrs = 0 | |
156 | for p in genpath(rng, count): |
|
157 | for p in genpath(rng, count): | |
157 | h = store._pathencode(p) # uses C implementation, if available |
|
158 | h = store._pathencode(p) # uses C implementation, if available | |
158 | r = store._hybridencode(p, True) # reference implementation in Python |
|
159 | r = store._hybridencode(p, True) # reference implementation in Python | |
159 | if h != r: |
|
160 | if h != r: | |
160 | if nerrs == 0: |
|
161 | if nerrs == 0: | |
161 | print >> sys.stderr, 'seed:', hex(seed)[:-1] |
|
162 | print >> sys.stderr, 'seed:', hex(seed)[:-1] | |
162 | print >> sys.stderr, "\np: '%s'" % p.encode("string_escape") |
|
163 | print >> sys.stderr, "\np: '%s'" % p.encode("string_escape") | |
163 | print >> sys.stderr, "h: '%s'" % h.encode("string_escape") |
|
164 | print >> sys.stderr, "h: '%s'" % h.encode("string_escape") | |
164 | print >> sys.stderr, "r: '%s'" % r.encode("string_escape") |
|
165 | print >> sys.stderr, "r: '%s'" % r.encode("string_escape") | |
165 | nerrs += 1 |
|
166 | nerrs += 1 | |
166 | return nerrs |
|
167 | return nerrs | |
167 |
|
168 | |||
168 | def main(): |
|
169 | def main(): | |
169 | import getopt |
|
170 | import getopt | |
170 |
|
171 | |||
171 | # Empirically observed to take about a second to run |
|
172 | # Empirically observed to take about a second to run | |
172 | count = 100 |
|
173 | count = 100 | |
173 | seed = None |
|
174 | seed = None | |
174 | opts, args = getopt.getopt(sys.argv[1:], 'c:s:', |
|
175 | opts, args = getopt.getopt(sys.argv[1:], 'c:s:', | |
175 | ['build', 'count=', 'seed=']) |
|
176 | ['build', 'count=', 'seed=']) | |
176 | for o, a in opts: |
|
177 | for o, a in opts: | |
177 | if o in ('-c', '--count'): |
|
178 | if o in ('-c', '--count'): | |
178 | count = int(a) |
|
179 | count = int(a) | |
179 | elif o in ('-s', '--seed'): |
|
180 | elif o in ('-s', '--seed'): | |
180 | seed = long(a, base=0) # accepts base 10 or 16 strings |
|
181 | seed = long(a, base=0) # accepts base 10 or 16 strings | |
181 | elif o == '--build': |
|
182 | elif o == '--build': | |
182 | buildprobtable(sys.stdout, |
|
183 | buildprobtable(sys.stdout, | |
183 | 'find .hg/store/data -type f && ' |
|
184 | 'find .hg/store/data -type f && ' | |
184 | 'cat .hg/store/fncache 2>/dev/null') |
|
185 | 'cat .hg/store/fncache 2>/dev/null') | |
185 | sys.exit(0) |
|
186 | sys.exit(0) | |
186 |
|
187 | |||
187 | if seed is None: |
|
188 | if seed is None: | |
188 | try: |
|
189 | try: | |
189 | seed = long(binascii.hexlify(os.urandom(16)), 16) |
|
190 | seed = long(binascii.hexlify(os.urandom(16)), 16) | |
190 | except AttributeError: |
|
191 | except AttributeError: | |
191 | seed = long(time.time() * 1000) |
|
192 | seed = long(time.time() * 1000) | |
192 |
|
193 | |||
193 | rng = random.Random(seed) |
|
194 | rng = random.Random(seed) | |
194 | if runtests(rng, seed, count): |
|
195 | if runtests(rng, seed, count): | |
195 | sys.exit(1) |
|
196 | sys.exit(1) | |
196 |
|
197 | |||
197 | if __name__ == '__main__': |
|
198 | if __name__ == '__main__': | |
198 | main() |
|
199 | main() |
General Comments 0
You need to be logged in to leave comments.
Login now