##// END OF EJS Templates
test-pathencode: randomize length of each path component...
Siddharth Agarwal -
r19319:ec17ddec stable
parent child Browse files
Show More
@@ -1,198 +1,199
1 # This is a randomized test that generates different pathnames every
1 # This is a randomized test that generates different pathnames every
2 # time it is invoked, and tests the encoding of those pathnames.
2 # time it is invoked, and tests the encoding of those pathnames.
3 #
3 #
4 # It uses a simple probabilistic model to generate valid pathnames
4 # It uses a simple probabilistic model to generate valid pathnames
5 # that have proven likely to expose bugs and divergent behaviour in
5 # that have proven likely to expose bugs and divergent behaviour in
6 # different encoding implementations.
6 # different encoding implementations.
7
7
8 from mercurial import parsers
8 from mercurial import parsers
9 from mercurial import store
9 from mercurial import store
10 import binascii, itertools, math, os, random, sys, time
10 import binascii, itertools, math, os, random, sys, time
11 import collections
11 import collections
12
12
13 if sys.version_info[:2] < (2, 6):
13 if sys.version_info[:2] < (2, 6):
14 sys.exit(0)
14 sys.exit(0)
15
15
16 validchars = set(map(chr, range(0, 256)))
16 validchars = set(map(chr, range(0, 256)))
17 alphanum = range(ord('A'), ord('Z'))
17 alphanum = range(ord('A'), ord('Z'))
18
18
19 for c in '\0/':
19 for c in '\0/':
20 validchars.remove(c)
20 validchars.remove(c)
21
21
22 winreserved = ('aux con prn nul'.split() +
22 winreserved = ('aux con prn nul'.split() +
23 ['com%d' % i for i in xrange(1, 10)] +
23 ['com%d' % i for i in xrange(1, 10)] +
24 ['lpt%d' % i for i in xrange(1, 10)])
24 ['lpt%d' % i for i in xrange(1, 10)])
25
25
26 def casecombinations(names):
26 def casecombinations(names):
27 '''Build all case-diddled combinations of names.'''
27 '''Build all case-diddled combinations of names.'''
28
28
29 combos = set()
29 combos = set()
30
30
31 for r in names:
31 for r in names:
32 for i in xrange(len(r) + 1):
32 for i in xrange(len(r) + 1):
33 for c in itertools.combinations(xrange(len(r)), i):
33 for c in itertools.combinations(xrange(len(r)), i):
34 d = r
34 d = r
35 for j in c:
35 for j in c:
36 d = ''.join((d[:j], d[j].upper(), d[j + 1:]))
36 d = ''.join((d[:j], d[j].upper(), d[j + 1:]))
37 combos.add(d)
37 combos.add(d)
38 return sorted(combos)
38 return sorted(combos)
39
39
40 def buildprobtable(fp, cmd='hg manifest tip'):
40 def buildprobtable(fp, cmd='hg manifest tip'):
41 '''Construct and print a table of probabilities for path name
41 '''Construct and print a table of probabilities for path name
42 components. The numbers are percentages.'''
42 components. The numbers are percentages.'''
43
43
44 counts = collections.defaultdict(lambda: 0)
44 counts = collections.defaultdict(lambda: 0)
45 for line in os.popen(cmd).read().splitlines():
45 for line in os.popen(cmd).read().splitlines():
46 if line[-2:] in ('.i', '.d'):
46 if line[-2:] in ('.i', '.d'):
47 line = line[:-2]
47 line = line[:-2]
48 if line.startswith('data/'):
48 if line.startswith('data/'):
49 line = line[5:]
49 line = line[5:]
50 for c in line:
50 for c in line:
51 counts[c] += 1
51 counts[c] += 1
52 for c in '\r/\n':
52 for c in '\r/\n':
53 counts.pop(c, None)
53 counts.pop(c, None)
54 t = sum(counts.itervalues()) / 100.0
54 t = sum(counts.itervalues()) / 100.0
55 fp.write('probtable = (')
55 fp.write('probtable = (')
56 for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1],
56 for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1],
57 reverse=True)):
57 reverse=True)):
58 if (i % 5) == 0:
58 if (i % 5) == 0:
59 fp.write('\n ')
59 fp.write('\n ')
60 vt = v / t
60 vt = v / t
61 if vt < 0.0005:
61 if vt < 0.0005:
62 break
62 break
63 fp.write('(%r, %.03f), ' % (k, vt))
63 fp.write('(%r, %.03f), ' % (k, vt))
64 fp.write('\n )\n')
64 fp.write('\n )\n')
65
65
66 # A table of character frequencies (as percentages), gleaned by
66 # A table of character frequencies (as percentages), gleaned by
67 # looking at filelog names from a real-world, very large repo.
67 # looking at filelog names from a real-world, very large repo.
68
68
69 probtable = (
69 probtable = (
70 ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618),
70 ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618),
71 ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258),
71 ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258),
72 ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194),
72 ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194),
73 ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625),
73 ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625),
74 ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477),
74 ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477),
75 ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326),
75 ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326),
76 ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299),
76 ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299),
77 ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095),
77 ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095),
78 ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076),
78 ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076),
79 ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046),
79 ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046),
80 ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026),
80 ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026),
81 ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011),
81 ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011),
82 ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001),
82 ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001),
83 )
83 )
84
84
85 for c, _ in probtable:
85 for c, _ in probtable:
86 validchars.remove(c)
86 validchars.remove(c)
87 validchars = list(validchars)
87 validchars = list(validchars)
88
88
89 def pickfrom(rng, table):
89 def pickfrom(rng, table):
90 c = 0
90 c = 0
91 r = rng.random() * sum(i[1] for i in table)
91 r = rng.random() * sum(i[1] for i in table)
92 for i, p in table:
92 for i, p in table:
93 c += p
93 c += p
94 if c >= r:
94 if c >= r:
95 return i
95 return i
96
96
97 reservedcombos = casecombinations(winreserved)
97 reservedcombos = casecombinations(winreserved)
98
98
99 # The first component of a name following a slash.
99 # The first component of a name following a slash.
100
100
101 firsttable = (
101 firsttable = (
102 (lambda rng: pickfrom(rng, probtable), 90),
102 (lambda rng: pickfrom(rng, probtable), 90),
103 (lambda rng: rng.choice(validchars), 5),
103 (lambda rng: rng.choice(validchars), 5),
104 (lambda rng: rng.choice(reservedcombos), 5),
104 (lambda rng: rng.choice(reservedcombos), 5),
105 )
105 )
106
106
107 # Components of a name following the first.
107 # Components of a name following the first.
108
108
109 resttable = firsttable[:-1]
109 resttable = firsttable[:-1]
110
110
111 # Special suffixes.
111 # Special suffixes.
112
112
113 internalsuffixcombos = casecombinations('.hg .i .d'.split())
113 internalsuffixcombos = casecombinations('.hg .i .d'.split())
114
114
115 # The last component of a path, before a slash or at the end of a name.
115 # The last component of a path, before a slash or at the end of a name.
116
116
117 lasttable = resttable + (
117 lasttable = resttable + (
118 (lambda rng: '', 95),
118 (lambda rng: '', 95),
119 (lambda rng: rng.choice(internalsuffixcombos), 5),
119 (lambda rng: rng.choice(internalsuffixcombos), 5),
120 )
120 )
121
121
122 def makepart(rng, k):
122 def makepart(rng, k):
123 '''Construct a part of a pathname, without slashes.'''
123 '''Construct a part of a pathname, without slashes.'''
124
124
125 p = pickfrom(rng, firsttable)(rng)
125 p = pickfrom(rng, firsttable)(rng)
126 l = len(p)
126 l = len(p)
127 ps = [p]
127 ps = [p]
128 while l < k:
128 maxl = rng.randint(1, k)
129 while l < maxl:
129 p = pickfrom(rng, resttable)(rng)
130 p = pickfrom(rng, resttable)(rng)
130 l += len(p)
131 l += len(p)
131 ps.append(p)
132 ps.append(p)
132 ps.append(pickfrom(rng, lasttable)(rng))
133 ps.append(pickfrom(rng, lasttable)(rng))
133 return ''.join(ps)
134 return ''.join(ps)
134
135
135 def makepath(rng, j, k):
136 def makepath(rng, j, k):
136 '''Construct a complete pathname.'''
137 '''Construct a complete pathname.'''
137
138
138 return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) +
139 return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) +
139 rng.choice(['.d', '.i']))
140 rng.choice(['.d', '.i']))
140
141
141 def genpath(rng, count):
142 def genpath(rng, count):
142 '''Generate random pathnames with gradually increasing lengths.'''
143 '''Generate random pathnames with gradually increasing lengths.'''
143
144
144 mink, maxk = 1, 4096
145 mink, maxk = 1, 4096
145 def steps():
146 def steps():
146 x, k = 0, mink
147 x, k = 0, mink
147 for i in xrange(count):
148 for i in xrange(count):
148 yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
149 yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
149 for k in steps():
150 for k in steps():
150 x = rng.randint(1, k)
151 x = rng.randint(1, k)
151 y = rng.randint(1, k)
152 y = rng.randint(1, k)
152 yield makepath(rng, x, y)
153 yield makepath(rng, x, y)
153
154
154 def runtests(rng, seed, count):
155 def runtests(rng, seed, count):
155 nerrs = 0
156 nerrs = 0
156 for p in genpath(rng, count):
157 for p in genpath(rng, count):
157 h = store._pathencode(p) # uses C implementation, if available
158 h = store._pathencode(p) # uses C implementation, if available
158 r = store._hybridencode(p, True) # reference implementation in Python
159 r = store._hybridencode(p, True) # reference implementation in Python
159 if h != r:
160 if h != r:
160 if nerrs == 0:
161 if nerrs == 0:
161 print >> sys.stderr, 'seed:', hex(seed)[:-1]
162 print >> sys.stderr, 'seed:', hex(seed)[:-1]
162 print >> sys.stderr, "\np: '%s'" % p.encode("string_escape")
163 print >> sys.stderr, "\np: '%s'" % p.encode("string_escape")
163 print >> sys.stderr, "h: '%s'" % h.encode("string_escape")
164 print >> sys.stderr, "h: '%s'" % h.encode("string_escape")
164 print >> sys.stderr, "r: '%s'" % r.encode("string_escape")
165 print >> sys.stderr, "r: '%s'" % r.encode("string_escape")
165 nerrs += 1
166 nerrs += 1
166 return nerrs
167 return nerrs
167
168
168 def main():
169 def main():
169 import getopt
170 import getopt
170
171
171 # Empirically observed to take about a second to run
172 # Empirically observed to take about a second to run
172 count = 100
173 count = 100
173 seed = None
174 seed = None
174 opts, args = getopt.getopt(sys.argv[1:], 'c:s:',
175 opts, args = getopt.getopt(sys.argv[1:], 'c:s:',
175 ['build', 'count=', 'seed='])
176 ['build', 'count=', 'seed='])
176 for o, a in opts:
177 for o, a in opts:
177 if o in ('-c', '--count'):
178 if o in ('-c', '--count'):
178 count = int(a)
179 count = int(a)
179 elif o in ('-s', '--seed'):
180 elif o in ('-s', '--seed'):
180 seed = long(a, base=0) # accepts base 10 or 16 strings
181 seed = long(a, base=0) # accepts base 10 or 16 strings
181 elif o == '--build':
182 elif o == '--build':
182 buildprobtable(sys.stdout,
183 buildprobtable(sys.stdout,
183 'find .hg/store/data -type f && '
184 'find .hg/store/data -type f && '
184 'cat .hg/store/fncache 2>/dev/null')
185 'cat .hg/store/fncache 2>/dev/null')
185 sys.exit(0)
186 sys.exit(0)
186
187
187 if seed is None:
188 if seed is None:
188 try:
189 try:
189 seed = long(binascii.hexlify(os.urandom(16)), 16)
190 seed = long(binascii.hexlify(os.urandom(16)), 16)
190 except AttributeError:
191 except AttributeError:
191 seed = long(time.time() * 1000)
192 seed = long(time.time() * 1000)
192
193
193 rng = random.Random(seed)
194 rng = random.Random(seed)
194 if runtests(rng, seed, count):
195 if runtests(rng, seed, count):
195 sys.exit(1)
196 sys.exit(1)
196
197
197 if __name__ == '__main__':
198 if __name__ == '__main__':
198 main()
199 main()
General Comments 0
You need to be logged in to leave comments. Login now