Show More
@@ -1,190 +1,193 b'' | |||
|
1 | 1 | # This is a randomized test that generates different pathnames every |
|
2 | 2 | # time it is invoked, and tests the encoding of those pathnames. |
|
3 | 3 | # |
|
4 | 4 | # It uses a simple probabilistic model to generate valid pathnames |
|
5 | 5 | # that have proven likely to expose bugs and divergent behaviour in |
|
6 | 6 | # different encoding implementations. |
|
7 | 7 | |
|
8 | 8 | from mercurial import parsers |
|
9 | 9 | from mercurial import store |
|
10 | 10 | import binascii, itertools, math, os, random, sys, time |
|
11 | 11 | import collections |
|
12 | 12 | |
|
13 | if sys.version_info[:2] < (2, 6): | |
|
14 | sys.exit(0) | |
|
15 | ||
|
13 | 16 | def hybridencode(path): |
|
14 | 17 | return store._hybridencode(path, True) |
|
15 | 18 | |
|
16 | 19 | validchars = set(map(chr, range(0, 256))) |
|
17 | 20 | alphanum = range(ord('A'), ord('Z')) |
|
18 | 21 | |
|
19 | 22 | for c in '\0/': |
|
20 | 23 | validchars.remove(c) |
|
21 | 24 | |
|
22 | 25 | winreserved = ('aux con prn nul'.split() + |
|
23 | 26 | ['com%d' % i for i in xrange(1, 10)] + |
|
24 | 27 | ['lpt%d' % i for i in xrange(1, 10)]) |
|
25 | 28 | |
|
26 | 29 | def casecombinations(names): |
|
27 | 30 | '''Build all case-diddled combinations of names.''' |
|
28 | 31 | |
|
29 | 32 | combos = set() |
|
30 | 33 | |
|
31 | 34 | for r in names: |
|
32 | 35 | for i in xrange(len(r) + 1): |
|
33 | 36 | for c in itertools.combinations(xrange(len(r)), i): |
|
34 | 37 | d = r |
|
35 | 38 | for j in c: |
|
36 | 39 | d = ''.join((d[:j], d[j].upper(), d[j + 1:])) |
|
37 | 40 | combos.add(d) |
|
38 | 41 | return sorted(combos) |
|
39 | 42 | |
|
40 | 43 | def buildprobtable(fp, cmd='hg manifest tip'): |
|
41 | 44 | '''Construct and print a table of probabilities for path name |
|
42 | 45 | components. The numbers are percentages.''' |
|
43 | 46 | |
|
44 | 47 | counts = collections.defaultdict(lambda: 0) |
|
45 | 48 | for line in os.popen(cmd).read().splitlines(): |
|
46 | 49 | if line[-2:] in ('.i', '.d'): |
|
47 | 50 | line = line[:-2] |
|
48 | 51 | if line.startswith('data/'): |
|
49 | 52 | line = line[5:] |
|
50 | 53 | for c in line: |
|
51 | 54 | counts[c] += 1 |
|
52 | 55 | for c in '\r/\n': |
|
53 | 56 | counts.pop(c, None) |
|
54 | 57 | t = sum(counts.itervalues()) / 100.0 |
|
55 | 58 | fp.write('probtable = (') |
|
56 | 59 | for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1], |
|
57 | 60 | reverse=True)): |
|
58 | 61 | if (i % 5) == 0: |
|
59 | 62 | fp.write('\n ') |
|
60 | 63 | vt = v / t |
|
61 | 64 | if vt < 0.0005: |
|
62 | 65 | break |
|
63 | 66 | fp.write('(%r, %.03f), ' % (k, vt)) |
|
64 | 67 | fp.write('\n )\n') |
|
65 | 68 | |
|
66 | 69 | # A table of character frequencies (as percentages), gleaned by |
|
67 | 70 | # looking at filelog names from a real-world, very large repo. |
|
68 | 71 | |
|
69 | 72 | probtable = ( |
|
70 | 73 | ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618), |
|
71 | 74 | ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258), |
|
72 | 75 | ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194), |
|
73 | 76 | ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625), |
|
74 | 77 | ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477), |
|
75 | 78 | ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326), |
|
76 | 79 | ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299), |
|
77 | 80 | ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095), |
|
78 | 81 | ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076), |
|
79 | 82 | ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046), |
|
80 | 83 | ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026), |
|
81 | 84 | ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011), |
|
82 | 85 | ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001), |
|
83 | 86 | ) |
|
84 | 87 | |
|
85 | 88 | for c, _ in probtable: |
|
86 | 89 | validchars.remove(c) |
|
87 | 90 | validchars = list(validchars) |
|
88 | 91 | |
|
89 | 92 | def pickfrom(rng, table): |
|
90 | 93 | c = 0 |
|
91 | 94 | r = rng.random() * sum(i[1] for i in table) |
|
92 | 95 | for i, p in table: |
|
93 | 96 | c += p |
|
94 | 97 | if c >= r: |
|
95 | 98 | return i |
|
96 | 99 | |
|
97 | 100 | reservedcombos = casecombinations(winreserved) |
|
98 | 101 | |
|
99 | 102 | # The first component of a name following a slash. |
|
100 | 103 | |
|
101 | 104 | firsttable = ( |
|
102 | 105 | (lambda rng: pickfrom(rng, probtable), 90), |
|
103 | 106 | (lambda rng: rng.choice(validchars), 5), |
|
104 | 107 | (lambda rng: rng.choice(reservedcombos), 5), |
|
105 | 108 | ) |
|
106 | 109 | |
|
107 | 110 | # Components of a name following the first. |
|
108 | 111 | |
|
109 | 112 | resttable = firsttable[:-1] |
|
110 | 113 | |
|
111 | 114 | # Special suffixes. |
|
112 | 115 | |
|
113 | 116 | internalsuffixcombos = casecombinations('.hg .i .d'.split()) |
|
114 | 117 | |
|
115 | 118 | # The last component of a path, before a slash or at the end of a name. |
|
116 | 119 | |
|
117 | 120 | lasttable = resttable + ( |
|
118 | 121 | (lambda rng: '', 95), |
|
119 | 122 | (lambda rng: rng.choice(internalsuffixcombos), 5), |
|
120 | 123 | ) |
|
121 | 124 | |
|
122 | 125 | def makepart(rng, k): |
|
123 | 126 | '''Construct a part of a pathname, without slashes.''' |
|
124 | 127 | |
|
125 | 128 | p = pickfrom(rng, firsttable)(rng) |
|
126 | 129 | l = len(p) |
|
127 | 130 | ps = [p] |
|
128 | 131 | while l <= k: |
|
129 | 132 | p = pickfrom(rng, resttable)(rng) |
|
130 | 133 | l += len(p) |
|
131 | 134 | ps.append(p) |
|
132 | 135 | ps.append(pickfrom(rng, lasttable)(rng)) |
|
133 | 136 | return ''.join(ps) |
|
134 | 137 | |
|
135 | 138 | def makepath(rng, j, k): |
|
136 | 139 | '''Construct a complete pathname.''' |
|
137 | 140 | |
|
138 | 141 | return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) + |
|
139 | 142 | rng.choice(['.d', '.i'])) |
|
140 | 143 | |
|
141 | 144 | def genpath(rng, count): |
|
142 | 145 | '''Generate random pathnames with gradually increasing lengths.''' |
|
143 | 146 | |
|
144 | 147 | mink, maxk = 1, 4096 |
|
145 | 148 | def steps(): |
|
146 | 149 | x, k = 0, mink |
|
147 | 150 | for i in xrange(count): |
|
148 | 151 | yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count))) |
|
149 | 152 | for k in steps(): |
|
150 | 153 | x = rng.randint(1, k) |
|
151 | 154 | y = rng.randint(1, k) |
|
152 | 155 | yield makepath(rng, x, y) |
|
153 | 156 | |
|
154 | 157 | def runtests(rng, seed, count): |
|
155 | 158 | nerrs = 0 |
|
156 | 159 | for p in genpath(rng, count): |
|
157 | 160 | hybridencode(p) |
|
158 | 161 | return nerrs |
|
159 | 162 | |
|
160 | 163 | def main(): |
|
161 | 164 | import getopt |
|
162 | 165 | |
|
163 | 166 | # Empirically observed to take about a second to run |
|
164 | 167 | count = 100 |
|
165 | 168 | seed = None |
|
166 | 169 | opts, args = getopt.getopt(sys.argv[1:], 'c:s:', |
|
167 | 170 | ['build', 'count=', 'seed=']) |
|
168 | 171 | for o, a in opts: |
|
169 | 172 | if o in ('-c', '--count'): |
|
170 | 173 | count = int(a) |
|
171 | 174 | elif o in ('-s', '--seed'): |
|
172 | 175 | seed = long(a) |
|
173 | 176 | elif o == '--build': |
|
174 | 177 | buildprobtable(sys.stdout, |
|
175 | 178 | 'find .hg/store/data -type f && ' |
|
176 | 179 | 'cat .hg/store/fncache 2>/dev/null') |
|
177 | 180 | sys.exit(0) |
|
178 | 181 | |
|
179 | 182 | if seed is None: |
|
180 | 183 | try: |
|
181 | 184 | seed = long(binascii.hexlify(os.urandom(16)), 16) |
|
182 | 185 | except AttributeError: |
|
183 | 186 | seed = long(time.time() * 1000) |
|
184 | 187 | |
|
185 | 188 | rng = random.Random(seed) |
|
186 | 189 | if runtests(rng, seed, count): |
|
187 | 190 | sys.exit(1) |
|
188 | 191 | |
|
189 | if __name__ == '__main__' and sys.version_info[:2] >= (2, 6): | |
|
192 | if __name__ == '__main__': | |
|
190 | 193 | main() |
General Comments 0
You need to be logged in to leave comments.
Login now