##// END OF EJS Templates
test-pathencode: more aggressively check for python < 2.6
Bryan O'Sullivan -
r17947:f945caa5 default
parent child Browse files
Show More
@@ -1,190 +1,193 b''
1 # This is a randomized test that generates different pathnames every
1 # This is a randomized test that generates different pathnames every
2 # time it is invoked, and tests the encoding of those pathnames.
2 # time it is invoked, and tests the encoding of those pathnames.
3 #
3 #
4 # It uses a simple probabilistic model to generate valid pathnames
4 # It uses a simple probabilistic model to generate valid pathnames
5 # that have proven likely to expose bugs and divergent behaviour in
5 # that have proven likely to expose bugs and divergent behaviour in
6 # different encoding implementations.
6 # different encoding implementations.
7
7
8 from mercurial import parsers
8 from mercurial import parsers
9 from mercurial import store
9 from mercurial import store
10 import binascii, itertools, math, os, random, sys, time
10 import binascii, itertools, math, os, random, sys, time
11 import collections
11 import collections
12
12
13 if sys.version_info[:2] < (2, 6):
14 sys.exit(0)
15
13 def hybridencode(path):
16 def hybridencode(path):
14 return store._hybridencode(path, True)
17 return store._hybridencode(path, True)
15
18
16 validchars = set(map(chr, range(0, 256)))
19 validchars = set(map(chr, range(0, 256)))
17 alphanum = range(ord('A'), ord('Z'))
20 alphanum = range(ord('A'), ord('Z'))
18
21
19 for c in '\0/':
22 for c in '\0/':
20 validchars.remove(c)
23 validchars.remove(c)
21
24
22 winreserved = ('aux con prn nul'.split() +
25 winreserved = ('aux con prn nul'.split() +
23 ['com%d' % i for i in xrange(1, 10)] +
26 ['com%d' % i for i in xrange(1, 10)] +
24 ['lpt%d' % i for i in xrange(1, 10)])
27 ['lpt%d' % i for i in xrange(1, 10)])
25
28
26 def casecombinations(names):
29 def casecombinations(names):
27 '''Build all case-diddled combinations of names.'''
30 '''Build all case-diddled combinations of names.'''
28
31
29 combos = set()
32 combos = set()
30
33
31 for r in names:
34 for r in names:
32 for i in xrange(len(r) + 1):
35 for i in xrange(len(r) + 1):
33 for c in itertools.combinations(xrange(len(r)), i):
36 for c in itertools.combinations(xrange(len(r)), i):
34 d = r
37 d = r
35 for j in c:
38 for j in c:
36 d = ''.join((d[:j], d[j].upper(), d[j + 1:]))
39 d = ''.join((d[:j], d[j].upper(), d[j + 1:]))
37 combos.add(d)
40 combos.add(d)
38 return sorted(combos)
41 return sorted(combos)
39
42
40 def buildprobtable(fp, cmd='hg manifest tip'):
43 def buildprobtable(fp, cmd='hg manifest tip'):
41 '''Construct and print a table of probabilities for path name
44 '''Construct and print a table of probabilities for path name
42 components. The numbers are percentages.'''
45 components. The numbers are percentages.'''
43
46
44 counts = collections.defaultdict(lambda: 0)
47 counts = collections.defaultdict(lambda: 0)
45 for line in os.popen(cmd).read().splitlines():
48 for line in os.popen(cmd).read().splitlines():
46 if line[-2:] in ('.i', '.d'):
49 if line[-2:] in ('.i', '.d'):
47 line = line[:-2]
50 line = line[:-2]
48 if line.startswith('data/'):
51 if line.startswith('data/'):
49 line = line[5:]
52 line = line[5:]
50 for c in line:
53 for c in line:
51 counts[c] += 1
54 counts[c] += 1
52 for c in '\r/\n':
55 for c in '\r/\n':
53 counts.pop(c, None)
56 counts.pop(c, None)
54 t = sum(counts.itervalues()) / 100.0
57 t = sum(counts.itervalues()) / 100.0
55 fp.write('probtable = (')
58 fp.write('probtable = (')
56 for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1],
59 for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1],
57 reverse=True)):
60 reverse=True)):
58 if (i % 5) == 0:
61 if (i % 5) == 0:
59 fp.write('\n ')
62 fp.write('\n ')
60 vt = v / t
63 vt = v / t
61 if vt < 0.0005:
64 if vt < 0.0005:
62 break
65 break
63 fp.write('(%r, %.03f), ' % (k, vt))
66 fp.write('(%r, %.03f), ' % (k, vt))
64 fp.write('\n )\n')
67 fp.write('\n )\n')
65
68
66 # A table of character frequencies (as percentages), gleaned by
69 # A table of character frequencies (as percentages), gleaned by
67 # looking at filelog names from a real-world, very large repo.
70 # looking at filelog names from a real-world, very large repo.
68
71
69 probtable = (
72 probtable = (
70 ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618),
73 ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618),
71 ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258),
74 ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258),
72 ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194),
75 ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194),
73 ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625),
76 ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625),
74 ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477),
77 ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477),
75 ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326),
78 ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326),
76 ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299),
79 ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299),
77 ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095),
80 ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095),
78 ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076),
81 ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076),
79 ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046),
82 ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046),
80 ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026),
83 ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026),
81 ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011),
84 ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011),
82 ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001),
85 ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001),
83 )
86 )
84
87
85 for c, _ in probtable:
88 for c, _ in probtable:
86 validchars.remove(c)
89 validchars.remove(c)
87 validchars = list(validchars)
90 validchars = list(validchars)
88
91
89 def pickfrom(rng, table):
92 def pickfrom(rng, table):
90 c = 0
93 c = 0
91 r = rng.random() * sum(i[1] for i in table)
94 r = rng.random() * sum(i[1] for i in table)
92 for i, p in table:
95 for i, p in table:
93 c += p
96 c += p
94 if c >= r:
97 if c >= r:
95 return i
98 return i
96
99
97 reservedcombos = casecombinations(winreserved)
100 reservedcombos = casecombinations(winreserved)
98
101
99 # The first component of a name following a slash.
102 # The first component of a name following a slash.
100
103
101 firsttable = (
104 firsttable = (
102 (lambda rng: pickfrom(rng, probtable), 90),
105 (lambda rng: pickfrom(rng, probtable), 90),
103 (lambda rng: rng.choice(validchars), 5),
106 (lambda rng: rng.choice(validchars), 5),
104 (lambda rng: rng.choice(reservedcombos), 5),
107 (lambda rng: rng.choice(reservedcombos), 5),
105 )
108 )
106
109
107 # Components of a name following the first.
110 # Components of a name following the first.
108
111
109 resttable = firsttable[:-1]
112 resttable = firsttable[:-1]
110
113
111 # Special suffixes.
114 # Special suffixes.
112
115
113 internalsuffixcombos = casecombinations('.hg .i .d'.split())
116 internalsuffixcombos = casecombinations('.hg .i .d'.split())
114
117
115 # The last component of a path, before a slash or at the end of a name.
118 # The last component of a path, before a slash or at the end of a name.
116
119
117 lasttable = resttable + (
120 lasttable = resttable + (
118 (lambda rng: '', 95),
121 (lambda rng: '', 95),
119 (lambda rng: rng.choice(internalsuffixcombos), 5),
122 (lambda rng: rng.choice(internalsuffixcombos), 5),
120 )
123 )
121
124
122 def makepart(rng, k):
125 def makepart(rng, k):
123 '''Construct a part of a pathname, without slashes.'''
126 '''Construct a part of a pathname, without slashes.'''
124
127
125 p = pickfrom(rng, firsttable)(rng)
128 p = pickfrom(rng, firsttable)(rng)
126 l = len(p)
129 l = len(p)
127 ps = [p]
130 ps = [p]
128 while l <= k:
131 while l <= k:
129 p = pickfrom(rng, resttable)(rng)
132 p = pickfrom(rng, resttable)(rng)
130 l += len(p)
133 l += len(p)
131 ps.append(p)
134 ps.append(p)
132 ps.append(pickfrom(rng, lasttable)(rng))
135 ps.append(pickfrom(rng, lasttable)(rng))
133 return ''.join(ps)
136 return ''.join(ps)
134
137
135 def makepath(rng, j, k):
138 def makepath(rng, j, k):
136 '''Construct a complete pathname.'''
139 '''Construct a complete pathname.'''
137
140
138 return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) +
141 return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) +
139 rng.choice(['.d', '.i']))
142 rng.choice(['.d', '.i']))
140
143
141 def genpath(rng, count):
144 def genpath(rng, count):
142 '''Generate random pathnames with gradually increasing lengths.'''
145 '''Generate random pathnames with gradually increasing lengths.'''
143
146
144 mink, maxk = 1, 4096
147 mink, maxk = 1, 4096
145 def steps():
148 def steps():
146 x, k = 0, mink
149 x, k = 0, mink
147 for i in xrange(count):
150 for i in xrange(count):
148 yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
151 yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
149 for k in steps():
152 for k in steps():
150 x = rng.randint(1, k)
153 x = rng.randint(1, k)
151 y = rng.randint(1, k)
154 y = rng.randint(1, k)
152 yield makepath(rng, x, y)
155 yield makepath(rng, x, y)
153
156
154 def runtests(rng, seed, count):
157 def runtests(rng, seed, count):
155 nerrs = 0
158 nerrs = 0
156 for p in genpath(rng, count):
159 for p in genpath(rng, count):
157 hybridencode(p)
160 hybridencode(p)
158 return nerrs
161 return nerrs
159
162
160 def main():
163 def main():
161 import getopt
164 import getopt
162
165
163 # Empirically observed to take about a second to run
166 # Empirically observed to take about a second to run
164 count = 100
167 count = 100
165 seed = None
168 seed = None
166 opts, args = getopt.getopt(sys.argv[1:], 'c:s:',
169 opts, args = getopt.getopt(sys.argv[1:], 'c:s:',
167 ['build', 'count=', 'seed='])
170 ['build', 'count=', 'seed='])
168 for o, a in opts:
171 for o, a in opts:
169 if o in ('-c', '--count'):
172 if o in ('-c', '--count'):
170 count = int(a)
173 count = int(a)
171 elif o in ('-s', '--seed'):
174 elif o in ('-s', '--seed'):
172 seed = long(a)
175 seed = long(a)
173 elif o == '--build':
176 elif o == '--build':
174 buildprobtable(sys.stdout,
177 buildprobtable(sys.stdout,
175 'find .hg/store/data -type f && '
178 'find .hg/store/data -type f && '
176 'cat .hg/store/fncache 2>/dev/null')
179 'cat .hg/store/fncache 2>/dev/null')
177 sys.exit(0)
180 sys.exit(0)
178
181
179 if seed is None:
182 if seed is None:
180 try:
183 try:
181 seed = long(binascii.hexlify(os.urandom(16)), 16)
184 seed = long(binascii.hexlify(os.urandom(16)), 16)
182 except AttributeError:
185 except AttributeError:
183 seed = long(time.time() * 1000)
186 seed = long(time.time() * 1000)
184
187
185 rng = random.Random(seed)
188 rng = random.Random(seed)
186 if runtests(rng, seed, count):
189 if runtests(rng, seed, count):
187 sys.exit(1)
190 sys.exit(1)
188
191
189 if __name__ == '__main__' and sys.version_info[:2] >= (2, 6):
192 if __name__ == '__main__':
190 main()
193 main()
General Comments 0
You need to be logged in to leave comments. Login now