##// END OF EJS Templates
test-pathencode: more aggressively check for python < 2.6
Bryan O'Sullivan -
r17947:f945caa5 default
parent child Browse files
Show More
@@ -1,190 +1,193 b''
1 1 # This is a randomized test that generates different pathnames every
2 2 # time it is invoked, and tests the encoding of those pathnames.
3 3 #
4 4 # It uses a simple probabilistic model to generate valid pathnames
5 5 # that have proven likely to expose bugs and divergent behaviour in
6 6 # different encoding implementations.
7 7
8 8 from mercurial import parsers
9 9 from mercurial import store
10 10 import binascii, itertools, math, os, random, sys, time
11 11 import collections
12 12
13 if sys.version_info[:2] < (2, 6):
14 sys.exit(0)
15
13 16 def hybridencode(path):
14 17 return store._hybridencode(path, True)
15 18
16 19 validchars = set(map(chr, range(0, 256)))
17 20 alphanum = range(ord('A'), ord('Z'))
18 21
19 22 for c in '\0/':
20 23 validchars.remove(c)
21 24
22 25 winreserved = ('aux con prn nul'.split() +
23 26 ['com%d' % i for i in xrange(1, 10)] +
24 27 ['lpt%d' % i for i in xrange(1, 10)])
25 28
26 29 def casecombinations(names):
27 30 '''Build all case-diddled combinations of names.'''
28 31
29 32 combos = set()
30 33
31 34 for r in names:
32 35 for i in xrange(len(r) + 1):
33 36 for c in itertools.combinations(xrange(len(r)), i):
34 37 d = r
35 38 for j in c:
36 39 d = ''.join((d[:j], d[j].upper(), d[j + 1:]))
37 40 combos.add(d)
38 41 return sorted(combos)
39 42
40 43 def buildprobtable(fp, cmd='hg manifest tip'):
41 44 '''Construct and print a table of probabilities for path name
42 45 components. The numbers are percentages.'''
43 46
44 47 counts = collections.defaultdict(lambda: 0)
45 48 for line in os.popen(cmd).read().splitlines():
46 49 if line[-2:] in ('.i', '.d'):
47 50 line = line[:-2]
48 51 if line.startswith('data/'):
49 52 line = line[5:]
50 53 for c in line:
51 54 counts[c] += 1
52 55 for c in '\r/\n':
53 56 counts.pop(c, None)
54 57 t = sum(counts.itervalues()) / 100.0
55 58 fp.write('probtable = (')
56 59 for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1],
57 60 reverse=True)):
58 61 if (i % 5) == 0:
59 62 fp.write('\n ')
60 63 vt = v / t
61 64 if vt < 0.0005:
62 65 break
63 66 fp.write('(%r, %.03f), ' % (k, vt))
64 67 fp.write('\n )\n')
65 68
66 69 # A table of character frequencies (as percentages), gleaned by
67 70 # looking at filelog names from a real-world, very large repo.
68 71
69 72 probtable = (
70 73 ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618),
71 74 ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258),
72 75 ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194),
73 76 ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625),
74 77 ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477),
75 78 ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326),
76 79 ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299),
77 80 ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095),
78 81 ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076),
79 82 ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046),
80 83 ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026),
81 84 ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011),
82 85 ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001),
83 86 )
84 87
85 88 for c, _ in probtable:
86 89 validchars.remove(c)
87 90 validchars = list(validchars)
88 91
89 92 def pickfrom(rng, table):
90 93 c = 0
91 94 r = rng.random() * sum(i[1] for i in table)
92 95 for i, p in table:
93 96 c += p
94 97 if c >= r:
95 98 return i
96 99
97 100 reservedcombos = casecombinations(winreserved)
98 101
99 102 # The first component of a name following a slash.
100 103
101 104 firsttable = (
102 105 (lambda rng: pickfrom(rng, probtable), 90),
103 106 (lambda rng: rng.choice(validchars), 5),
104 107 (lambda rng: rng.choice(reservedcombos), 5),
105 108 )
106 109
107 110 # Components of a name following the first.
108 111
109 112 resttable = firsttable[:-1]
110 113
111 114 # Special suffixes.
112 115
113 116 internalsuffixcombos = casecombinations('.hg .i .d'.split())
114 117
115 118 # The last component of a path, before a slash or at the end of a name.
116 119
117 120 lasttable = resttable + (
118 121 (lambda rng: '', 95),
119 122 (lambda rng: rng.choice(internalsuffixcombos), 5),
120 123 )
121 124
122 125 def makepart(rng, k):
123 126 '''Construct a part of a pathname, without slashes.'''
124 127
125 128 p = pickfrom(rng, firsttable)(rng)
126 129 l = len(p)
127 130 ps = [p]
128 131 while l <= k:
129 132 p = pickfrom(rng, resttable)(rng)
130 133 l += len(p)
131 134 ps.append(p)
132 135 ps.append(pickfrom(rng, lasttable)(rng))
133 136 return ''.join(ps)
134 137
135 138 def makepath(rng, j, k):
136 139 '''Construct a complete pathname.'''
137 140
138 141 return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) +
139 142 rng.choice(['.d', '.i']))
140 143
141 144 def genpath(rng, count):
142 145 '''Generate random pathnames with gradually increasing lengths.'''
143 146
144 147 mink, maxk = 1, 4096
145 148 def steps():
146 149 x, k = 0, mink
147 150 for i in xrange(count):
148 151 yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
149 152 for k in steps():
150 153 x = rng.randint(1, k)
151 154 y = rng.randint(1, k)
152 155 yield makepath(rng, x, y)
153 156
154 157 def runtests(rng, seed, count):
155 158 nerrs = 0
156 159 for p in genpath(rng, count):
157 160 hybridencode(p)
158 161 return nerrs
159 162
160 163 def main():
161 164 import getopt
162 165
163 166 # Empirically observed to take about a second to run
164 167 count = 100
165 168 seed = None
166 169 opts, args = getopt.getopt(sys.argv[1:], 'c:s:',
167 170 ['build', 'count=', 'seed='])
168 171 for o, a in opts:
169 172 if o in ('-c', '--count'):
170 173 count = int(a)
171 174 elif o in ('-s', '--seed'):
172 175 seed = long(a)
173 176 elif o == '--build':
174 177 buildprobtable(sys.stdout,
175 178 'find .hg/store/data -type f && '
176 179 'cat .hg/store/fncache 2>/dev/null')
177 180 sys.exit(0)
178 181
179 182 if seed is None:
180 183 try:
181 184 seed = long(binascii.hexlify(os.urandom(16)), 16)
182 185 except AttributeError:
183 186 seed = long(time.time() * 1000)
184 187
185 188 rng = random.Random(seed)
186 189 if runtests(rng, seed, count):
187 190 sys.exit(1)
188 191
189 if __name__ == '__main__' and sys.version_info[:2] >= (2, 6):
192 if __name__ == '__main__':
190 193 main()
General Comments 0
You need to be logged in to leave comments. Login now