##// END OF EJS Templates
merge with stable
Matt Mackall -
r19327:cf1b0a58 merge default
parent child Browse files
Show More
@@ -1,41 +1,41 b''
1 ======
1 ======
2 hgrc
2 hgrc
3 ======
3 ======
4
4
5 ---------------------------------
5 ---------------------------------
6 configuration files for Mercurial
6 configuration files for Mercurial
7 ---------------------------------
7 ---------------------------------
8
8
9 :Author: Bryan O'Sullivan <bos@serpentine.com>
9 :Author: Bryan O'Sullivan <bos@serpentine.com>
10 :Organization: Mercurial
10 :Organization: Mercurial
11 :Manual section: 5
11 :Manual section: 5
12 :Manual group: Mercurial Manual
12 :Manual group: Mercurial Manual
13
13
14 .. contents::
14 .. contents::
15 :backlinks: top
15 :backlinks: top
16 :class: htmlonly
16 :class: htmlonly
17
17
18
18
19 Synopsis
19 Description
20 ========
20 ===========
21
21
22 .. include:: ../mercurial/help/config.txt
22 .. include:: ../mercurial/help/config.txt
23
23
24 Author
24 Author
25 ======
25 ======
26 Bryan O'Sullivan <bos@serpentine.com>.
26 Bryan O'Sullivan <bos@serpentine.com>.
27
27
28 Mercurial was written by Matt Mackall <mpm@selenic.com>.
28 Mercurial was written by Matt Mackall <mpm@selenic.com>.
29
29
30 See Also
30 See Also
31 ========
31 ========
32 |hg(1)|_, |hgignore(5)|_
32 |hg(1)|_, |hgignore(5)|_
33
33
34 Copying
34 Copying
35 =======
35 =======
36 This manual page is copyright 2005 Bryan O'Sullivan.
36 This manual page is copyright 2005 Bryan O'Sullivan.
37 Mercurial is copyright 2005-2012 Matt Mackall.
37 Mercurial is copyright 2005-2012 Matt Mackall.
38 Free use of this software is granted under the terms of the GNU General
38 Free use of this software is granted under the terms of the GNU General
39 Public License version 2 or any later version.
39 Public License version 2 or any later version.
40
40
41 .. include:: common.txt
41 .. include:: common.txt
@@ -1,760 +1,761 b''
1 /*
1 /*
2 pathencode.c - efficient path name encoding
2 pathencode.c - efficient path name encoding
3
3
4 Copyright 2012 Facebook
4 Copyright 2012 Facebook
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 /*
10 /*
11 * An implementation of the name encoding scheme used by the fncache
11 * An implementation of the name encoding scheme used by the fncache
12 * store. The common case is of a path < 120 bytes long, which is
12 * store. The common case is of a path < 120 bytes long, which is
13 * handled either in a single pass with no allocations or two passes
13 * handled either in a single pass with no allocations or two passes
14 * with a single allocation. For longer paths, multiple passes are
14 * with a single allocation. For longer paths, multiple passes are
15 * required.
15 * required.
16 */
16 */
17
17
18 #define PY_SSIZE_T_CLEAN
18 #define PY_SSIZE_T_CLEAN
19 #include <Python.h>
19 #include <Python.h>
20 #include <assert.h>
20 #include <assert.h>
21 #include <ctype.h>
21 #include <ctype.h>
22 #include <stdlib.h>
22 #include <stdlib.h>
23 #include <string.h>
23 #include <string.h>
24
24
25 #include "util.h"
25 #include "util.h"
26
26
27 /* state machine for the fast path */
27 /* state machine for the fast path */
28 enum path_state {
28 enum path_state {
29 START, /* first byte of a path component */
29 START, /* first byte of a path component */
30 A, /* "AUX" */
30 A, /* "AUX" */
31 AU,
31 AU,
32 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
32 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
33 C, /* "CON" or "COMn" */
33 C, /* "CON" or "COMn" */
34 CO,
34 CO,
35 COMLPT, /* "COM" or "LPT" */
35 COMLPT, /* "COM" or "LPT" */
36 COMLPTn,
36 COMLPTn,
37 L,
37 L,
38 LP,
38 LP,
39 N,
39 N,
40 NU,
40 NU,
41 P, /* "PRN" */
41 P, /* "PRN" */
42 PR,
42 PR,
43 LDOT, /* leading '.' */
43 LDOT, /* leading '.' */
44 DOT, /* '.' in a non-leading position */
44 DOT, /* '.' in a non-leading position */
45 H, /* ".h" */
45 H, /* ".h" */
46 HGDI, /* ".hg", ".d", or ".i" */
46 HGDI, /* ".hg", ".d", or ".i" */
47 SPACE,
47 SPACE,
48 DEFAULT /* byte of a path component after the first */
48 DEFAULT /* byte of a path component after the first */
49 };
49 };
50
50
51 /* state machine for dir-encoding */
51 /* state machine for dir-encoding */
52 enum dir_state {
52 enum dir_state {
53 DDOT,
53 DDOT,
54 DH,
54 DH,
55 DHGDI,
55 DHGDI,
56 DDEFAULT
56 DDEFAULT
57 };
57 };
58
58
59 static inline int inset(const uint32_t bitset[], char c)
59 static inline int inset(const uint32_t bitset[], char c)
60 {
60 {
61 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
61 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
62 }
62 }
63
63
64 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
64 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
65 char c)
65 char c)
66 {
66 {
67 if (dest) {
67 if (dest) {
68 assert(*destlen < destsize);
68 assert(*destlen < destsize);
69 dest[*destlen] = c;
69 dest[*destlen] = c;
70 }
70 }
71 (*destlen)++;
71 (*destlen)++;
72 }
72 }
73
73
74 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
74 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
75 const void *src, Py_ssize_t len)
75 const void *src, Py_ssize_t len)
76 {
76 {
77 if (dest) {
77 if (dest) {
78 assert(*destlen + len < destsize);
78 assert(*destlen + len < destsize);
79 memcpy((void *)&dest[*destlen], src, len);
79 memcpy((void *)&dest[*destlen], src, len);
80 }
80 }
81 *destlen += len;
81 *destlen += len;
82 }
82 }
83
83
84 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
84 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
85 uint8_t c)
85 uint8_t c)
86 {
86 {
87 static const char hexdigit[] = "0123456789abcdef";
87 static const char hexdigit[] = "0123456789abcdef";
88
88
89 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
89 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
90 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
90 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
91 }
91 }
92
92
93 /* 3-byte escape: tilde followed by two hex digits */
93 /* 3-byte escape: tilde followed by two hex digits */
94 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
94 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
95 char c)
95 char c)
96 {
96 {
97 charcopy(dest, destlen, destsize, '~');
97 charcopy(dest, destlen, destsize, '~');
98 hexencode(dest, destlen, destsize, c);
98 hexencode(dest, destlen, destsize, c);
99 }
99 }
100
100
101 static Py_ssize_t _encodedir(char *dest, size_t destsize,
101 static Py_ssize_t _encodedir(char *dest, size_t destsize,
102 const char *src, Py_ssize_t len)
102 const char *src, Py_ssize_t len)
103 {
103 {
104 enum dir_state state = DDEFAULT;
104 enum dir_state state = DDEFAULT;
105 Py_ssize_t i = 0, destlen = 0;
105 Py_ssize_t i = 0, destlen = 0;
106
106
107 while (i < len) {
107 while (i < len) {
108 switch (state) {
108 switch (state) {
109 case DDOT:
109 case DDOT:
110 switch (src[i]) {
110 switch (src[i]) {
111 case 'd':
111 case 'd':
112 case 'i':
112 case 'i':
113 state = DHGDI;
113 state = DHGDI;
114 charcopy(dest, &destlen, destsize, src[i++]);
114 charcopy(dest, &destlen, destsize, src[i++]);
115 break;
115 break;
116 case 'h':
116 case 'h':
117 state = DH;
117 state = DH;
118 charcopy(dest, &destlen, destsize, src[i++]);
118 charcopy(dest, &destlen, destsize, src[i++]);
119 break;
119 break;
120 default:
120 default:
121 state = DDEFAULT;
121 state = DDEFAULT;
122 break;
122 break;
123 }
123 }
124 break;
124 break;
125 case DH:
125 case DH:
126 if (src[i] == 'g') {
126 if (src[i] == 'g') {
127 state = DHGDI;
127 state = DHGDI;
128 charcopy(dest, &destlen, destsize, src[i++]);
128 charcopy(dest, &destlen, destsize, src[i++]);
129 }
129 }
130 else state = DDEFAULT;
130 else state = DDEFAULT;
131 break;
131 break;
132 case DHGDI:
132 case DHGDI:
133 if (src[i] == '/') {
133 if (src[i] == '/') {
134 memcopy(dest, &destlen, destsize, ".hg", 3);
134 memcopy(dest, &destlen, destsize, ".hg", 3);
135 charcopy(dest, &destlen, destsize, src[i++]);
135 charcopy(dest, &destlen, destsize, src[i++]);
136 }
136 }
137 state = DDEFAULT;
137 state = DDEFAULT;
138 break;
138 break;
139 case DDEFAULT:
139 case DDEFAULT:
140 if (src[i] == '.')
140 if (src[i] == '.')
141 state = DDOT;
141 state = DDOT;
142 charcopy(dest, &destlen, destsize, src[i++]);
142 charcopy(dest, &destlen, destsize, src[i++]);
143 break;
143 break;
144 }
144 }
145 }
145 }
146
146
147 return destlen;
147 return destlen;
148 }
148 }
149
149
150 PyObject *encodedir(PyObject *self, PyObject *args)
150 PyObject *encodedir(PyObject *self, PyObject *args)
151 {
151 {
152 Py_ssize_t len, newlen;
152 Py_ssize_t len, newlen;
153 PyObject *pathobj, *newobj;
153 PyObject *pathobj, *newobj;
154 char *path;
154 char *path;
155
155
156 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
156 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
157 return NULL;
157 return NULL;
158
158
159 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
159 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
160 PyErr_SetString(PyExc_TypeError, "expected a string");
160 PyErr_SetString(PyExc_TypeError, "expected a string");
161 return NULL;
161 return NULL;
162 }
162 }
163
163
164 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
164 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
165
165
166 if (newlen == len + 1) {
166 if (newlen == len + 1) {
167 Py_INCREF(pathobj);
167 Py_INCREF(pathobj);
168 return pathobj;
168 return pathobj;
169 }
169 }
170
170
171 newobj = PyString_FromStringAndSize(NULL, newlen);
171 newobj = PyString_FromStringAndSize(NULL, newlen);
172
172
173 if (newobj) {
173 if (newobj) {
174 PyString_GET_SIZE(newobj)--;
174 PyString_GET_SIZE(newobj)--;
175 _encodedir(PyString_AS_STRING(newobj), newlen, path,
175 _encodedir(PyString_AS_STRING(newobj), newlen, path,
176 len + 1);
176 len + 1);
177 }
177 }
178
178
179 return newobj;
179 return newobj;
180 }
180 }
181
181
182 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
182 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
183 char *dest, Py_ssize_t destlen, size_t destsize,
183 char *dest, Py_ssize_t destlen, size_t destsize,
184 const char *src, Py_ssize_t len,
184 const char *src, Py_ssize_t len,
185 int encodedir)
185 int encodedir)
186 {
186 {
187 enum path_state state = START;
187 enum path_state state = START;
188 Py_ssize_t i = 0;
188 Py_ssize_t i = 0;
189
189
190 /*
190 /*
191 * Python strings end with a zero byte, which we use as a
191 * Python strings end with a zero byte, which we use as a
192 * terminal token as they are not valid inside path names.
192 * terminal token as they are not valid inside path names.
193 */
193 */
194
194
195 while (i < len) {
195 while (i < len) {
196 switch (state) {
196 switch (state) {
197 case START:
197 case START:
198 switch (src[i]) {
198 switch (src[i]) {
199 case '/':
199 case '/':
200 charcopy(dest, &destlen, destsize, src[i++]);
200 charcopy(dest, &destlen, destsize, src[i++]);
201 break;
201 break;
202 case '.':
202 case '.':
203 state = LDOT;
203 state = LDOT;
204 escape3(dest, &destlen, destsize, src[i++]);
204 escape3(dest, &destlen, destsize, src[i++]);
205 break;
205 break;
206 case ' ':
206 case ' ':
207 state = DEFAULT;
207 state = DEFAULT;
208 escape3(dest, &destlen, destsize, src[i++]);
208 escape3(dest, &destlen, destsize, src[i++]);
209 break;
209 break;
210 case 'a':
210 case 'a':
211 state = A;
211 state = A;
212 charcopy(dest, &destlen, destsize, src[i++]);
212 charcopy(dest, &destlen, destsize, src[i++]);
213 break;
213 break;
214 case 'c':
214 case 'c':
215 state = C;
215 state = C;
216 charcopy(dest, &destlen, destsize, src[i++]);
216 charcopy(dest, &destlen, destsize, src[i++]);
217 break;
217 break;
218 case 'l':
218 case 'l':
219 state = L;
219 state = L;
220 charcopy(dest, &destlen, destsize, src[i++]);
220 charcopy(dest, &destlen, destsize, src[i++]);
221 break;
221 break;
222 case 'n':
222 case 'n':
223 state = N;
223 state = N;
224 charcopy(dest, &destlen, destsize, src[i++]);
224 charcopy(dest, &destlen, destsize, src[i++]);
225 break;
225 break;
226 case 'p':
226 case 'p':
227 state = P;
227 state = P;
228 charcopy(dest, &destlen, destsize, src[i++]);
228 charcopy(dest, &destlen, destsize, src[i++]);
229 break;
229 break;
230 default:
230 default:
231 state = DEFAULT;
231 state = DEFAULT;
232 break;
232 break;
233 }
233 }
234 break;
234 break;
235 case A:
235 case A:
236 if (src[i] == 'u') {
236 if (src[i] == 'u') {
237 state = AU;
237 state = AU;
238 charcopy(dest, &destlen, destsize, src[i++]);
238 charcopy(dest, &destlen, destsize, src[i++]);
239 }
239 }
240 else state = DEFAULT;
240 else state = DEFAULT;
241 break;
241 break;
242 case AU:
242 case AU:
243 if (src[i] == 'x') {
243 if (src[i] == 'x') {
244 state = THIRD;
244 state = THIRD;
245 i++;
245 i++;
246 }
246 }
247 else state = DEFAULT;
247 else state = DEFAULT;
248 break;
248 break;
249 case THIRD:
249 case THIRD:
250 state = DEFAULT;
250 state = DEFAULT;
251 switch (src[i]) {
251 switch (src[i]) {
252 case '.':
252 case '.':
253 case '/':
253 case '/':
254 case '\0':
254 case '\0':
255 escape3(dest, &destlen, destsize, src[i - 1]);
255 escape3(dest, &destlen, destsize, src[i - 1]);
256 break;
256 break;
257 default:
257 default:
258 i--;
258 i--;
259 break;
259 break;
260 }
260 }
261 break;
261 break;
262 case C:
262 case C:
263 if (src[i] == 'o') {
263 if (src[i] == 'o') {
264 state = CO;
264 state = CO;
265 charcopy(dest, &destlen, destsize, src[i++]);
265 charcopy(dest, &destlen, destsize, src[i++]);
266 }
266 }
267 else state = DEFAULT;
267 else state = DEFAULT;
268 break;
268 break;
269 case CO:
269 case CO:
270 if (src[i] == 'm') {
270 if (src[i] == 'm') {
271 state = COMLPT;
271 state = COMLPT;
272 i++;
272 i++;
273 }
273 }
274 else if (src[i] == 'n') {
274 else if (src[i] == 'n') {
275 state = THIRD;
275 state = THIRD;
276 i++;
276 i++;
277 }
277 }
278 else state = DEFAULT;
278 else state = DEFAULT;
279 break;
279 break;
280 case COMLPT:
280 case COMLPT:
281 switch (src[i]) {
281 switch (src[i]) {
282 case '1': case '2': case '3': case '4': case '5':
282 case '1': case '2': case '3': case '4': case '5':
283 case '6': case '7': case '8': case '9':
283 case '6': case '7': case '8': case '9':
284 state = COMLPTn;
284 state = COMLPTn;
285 i++;
285 i++;
286 break;
286 break;
287 default:
287 default:
288 state = DEFAULT;
288 state = DEFAULT;
289 charcopy(dest, &destlen, destsize, src[i - 1]);
289 charcopy(dest, &destlen, destsize, src[i - 1]);
290 break;
290 break;
291 }
291 }
292 break;
292 break;
293 case COMLPTn:
293 case COMLPTn:
294 state = DEFAULT;
294 state = DEFAULT;
295 switch (src[i]) {
295 switch (src[i]) {
296 case '.':
296 case '.':
297 case '/':
297 case '/':
298 case '\0':
298 case '\0':
299 escape3(dest, &destlen, destsize, src[i - 2]);
299 escape3(dest, &destlen, destsize, src[i - 2]);
300 charcopy(dest, &destlen, destsize, src[i - 1]);
300 charcopy(dest, &destlen, destsize, src[i - 1]);
301 break;
301 break;
302 default:
302 default:
303 memcopy(dest, &destlen, destsize,
303 memcopy(dest, &destlen, destsize,
304 &src[i - 2], 2);
304 &src[i - 2], 2);
305 break;
305 break;
306 }
306 }
307 break;
307 break;
308 case L:
308 case L:
309 if (src[i] == 'p') {
309 if (src[i] == 'p') {
310 state = LP;
310 state = LP;
311 charcopy(dest, &destlen, destsize, src[i++]);
311 charcopy(dest, &destlen, destsize, src[i++]);
312 }
312 }
313 else state = DEFAULT;
313 else state = DEFAULT;
314 break;
314 break;
315 case LP:
315 case LP:
316 if (src[i] == 't') {
316 if (src[i] == 't') {
317 state = COMLPT;
317 state = COMLPT;
318 i++;
318 i++;
319 }
319 }
320 else state = DEFAULT;
320 else state = DEFAULT;
321 break;
321 break;
322 case N:
322 case N:
323 if (src[i] == 'u') {
323 if (src[i] == 'u') {
324 state = NU;
324 state = NU;
325 charcopy(dest, &destlen, destsize, src[i++]);
325 charcopy(dest, &destlen, destsize, src[i++]);
326 }
326 }
327 else state = DEFAULT;
327 else state = DEFAULT;
328 break;
328 break;
329 case NU:
329 case NU:
330 if (src[i] == 'l') {
330 if (src[i] == 'l') {
331 state = THIRD;
331 state = THIRD;
332 i++;
332 i++;
333 }
333 }
334 else state = DEFAULT;
334 else state = DEFAULT;
335 break;
335 break;
336 case P:
336 case P:
337 if (src[i] == 'r') {
337 if (src[i] == 'r') {
338 state = PR;
338 state = PR;
339 charcopy(dest, &destlen, destsize, src[i++]);
339 charcopy(dest, &destlen, destsize, src[i++]);
340 }
340 }
341 else state = DEFAULT;
341 else state = DEFAULT;
342 break;
342 break;
343 case PR:
343 case PR:
344 if (src[i] == 'n') {
344 if (src[i] == 'n') {
345 state = THIRD;
345 state = THIRD;
346 i++;
346 i++;
347 }
347 }
348 else state = DEFAULT;
348 else state = DEFAULT;
349 break;
349 break;
350 case LDOT:
350 case LDOT:
351 switch (src[i]) {
351 switch (src[i]) {
352 case 'd':
352 case 'd':
353 case 'i':
353 case 'i':
354 state = HGDI;
354 state = HGDI;
355 charcopy(dest, &destlen, destsize, src[i++]);
355 charcopy(dest, &destlen, destsize, src[i++]);
356 break;
356 break;
357 case 'h':
357 case 'h':
358 state = H;
358 state = H;
359 charcopy(dest, &destlen, destsize, src[i++]);
359 charcopy(dest, &destlen, destsize, src[i++]);
360 break;
360 break;
361 default:
361 default:
362 state = DEFAULT;
362 state = DEFAULT;
363 break;
363 break;
364 }
364 }
365 break;
365 break;
366 case DOT:
366 case DOT:
367 switch (src[i]) {
367 switch (src[i]) {
368 case '/':
368 case '/':
369 case '\0':
369 case '\0':
370 state = START;
370 state = START;
371 memcopy(dest, &destlen, destsize, "~2e", 3);
371 memcopy(dest, &destlen, destsize, "~2e", 3);
372 charcopy(dest, &destlen, destsize, src[i++]);
372 charcopy(dest, &destlen, destsize, src[i++]);
373 break;
373 break;
374 case 'd':
374 case 'd':
375 case 'i':
375 case 'i':
376 state = HGDI;
376 state = HGDI;
377 charcopy(dest, &destlen, destsize, '.');
377 charcopy(dest, &destlen, destsize, '.');
378 charcopy(dest, &destlen, destsize, src[i++]);
378 charcopy(dest, &destlen, destsize, src[i++]);
379 break;
379 break;
380 case 'h':
380 case 'h':
381 state = H;
381 state = H;
382 memcopy(dest, &destlen, destsize, ".h", 2);
382 memcopy(dest, &destlen, destsize, ".h", 2);
383 i++;
383 i++;
384 break;
384 break;
385 default:
385 default:
386 state = DEFAULT;
386 state = DEFAULT;
387 charcopy(dest, &destlen, destsize, '.');
387 charcopy(dest, &destlen, destsize, '.');
388 break;
388 break;
389 }
389 }
390 break;
390 break;
391 case H:
391 case H:
392 if (src[i] == 'g') {
392 if (src[i] == 'g') {
393 state = HGDI;
393 state = HGDI;
394 charcopy(dest, &destlen, destsize, src[i++]);
394 charcopy(dest, &destlen, destsize, src[i++]);
395 }
395 }
396 else state = DEFAULT;
396 else state = DEFAULT;
397 break;
397 break;
398 case HGDI:
398 case HGDI:
399 if (src[i] == '/') {
399 if (src[i] == '/') {
400 state = START;
400 state = START;
401 if (encodedir)
401 if (encodedir)
402 memcopy(dest, &destlen, destsize, ".hg",
402 memcopy(dest, &destlen, destsize, ".hg",
403 3);
403 3);
404 charcopy(dest, &destlen, destsize, src[i++]);
404 charcopy(dest, &destlen, destsize, src[i++]);
405 }
405 }
406 else state = DEFAULT;
406 else state = DEFAULT;
407 break;
407 break;
408 case SPACE:
408 case SPACE:
409 switch (src[i]) {
409 switch (src[i]) {
410 case '/':
410 case '/':
411 case '\0':
411 case '\0':
412 state = START;
412 state = START;
413 memcopy(dest, &destlen, destsize, "~20", 3);
413 memcopy(dest, &destlen, destsize, "~20", 3);
414 charcopy(dest, &destlen, destsize, src[i++]);
414 charcopy(dest, &destlen, destsize, src[i++]);
415 break;
415 break;
416 default:
416 default:
417 state = DEFAULT;
417 state = DEFAULT;
418 charcopy(dest, &destlen, destsize, ' ');
418 charcopy(dest, &destlen, destsize, ' ');
419 break;
419 break;
420 }
420 }
421 break;
421 break;
422 case DEFAULT:
422 case DEFAULT:
423 while (inset(onebyte, src[i])) {
423 while (inset(onebyte, src[i])) {
424 charcopy(dest, &destlen, destsize, src[i++]);
424 charcopy(dest, &destlen, destsize, src[i++]);
425 if (i == len)
425 if (i == len)
426 goto done;
426 goto done;
427 }
427 }
428 switch (src[i]) {
428 switch (src[i]) {
429 case '.':
429 case '.':
430 state = DOT;
430 state = DOT;
431 i++;
431 i++;
432 break;
432 break;
433 case ' ':
433 case ' ':
434 state = SPACE;
434 state = SPACE;
435 i++;
435 i++;
436 break;
436 break;
437 case '/':
437 case '/':
438 state = START;
438 state = START;
439 charcopy(dest, &destlen, destsize, '/');
439 charcopy(dest, &destlen, destsize, '/');
440 i++;
440 i++;
441 break;
441 break;
442 default:
442 default:
443 if (inset(onebyte, src[i])) {
443 if (inset(onebyte, src[i])) {
444 do {
444 do {
445 charcopy(dest, &destlen,
445 charcopy(dest, &destlen,
446 destsize, src[i++]);
446 destsize, src[i++]);
447 } while (i < len &&
447 } while (i < len &&
448 inset(onebyte, src[i]));
448 inset(onebyte, src[i]));
449 }
449 }
450 else if (inset(twobytes, src[i])) {
450 else if (inset(twobytes, src[i])) {
451 char c = src[i++];
451 char c = src[i++];
452 charcopy(dest, &destlen, destsize, '_');
452 charcopy(dest, &destlen, destsize, '_');
453 charcopy(dest, &destlen, destsize,
453 charcopy(dest, &destlen, destsize,
454 c == '_' ? '_' : c + 32);
454 c == '_' ? '_' : c + 32);
455 }
455 }
456 else
456 else
457 escape3(dest, &destlen, destsize,
457 escape3(dest, &destlen, destsize,
458 src[i++]);
458 src[i++]);
459 break;
459 break;
460 }
460 }
461 break;
461 break;
462 }
462 }
463 }
463 }
464 done:
464 done:
465 return destlen;
465 return destlen;
466 }
466 }
467
467
468 static Py_ssize_t basicencode(char *dest, size_t destsize,
468 static Py_ssize_t basicencode(char *dest, size_t destsize,
469 const char *src, Py_ssize_t len)
469 const char *src, Py_ssize_t len)
470 {
470 {
471 static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };
471 static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };
472
472
473 static const uint32_t onebyte[8] = {
473 static const uint32_t onebyte[8] = {
474 1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
474 1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
475 };
475 };
476
476
477 Py_ssize_t destlen = 0;
477 Py_ssize_t destlen = 0;
478
478
479 return _encode(twobytes, onebyte, dest, destlen, destsize,
479 return _encode(twobytes, onebyte, dest, destlen, destsize,
480 src, len, 1);
480 src, len, 1);
481 }
481 }
482
482
483 static const Py_ssize_t maxstorepathlen = 120;
483 static const Py_ssize_t maxstorepathlen = 120;
484
484
485 static Py_ssize_t _lowerencode(char *dest, size_t destsize,
485 static Py_ssize_t _lowerencode(char *dest, size_t destsize,
486 const char *src, Py_ssize_t len)
486 const char *src, Py_ssize_t len)
487 {
487 {
488 static const uint32_t onebyte[8] = {
488 static const uint32_t onebyte[8] = {
489 1, 0x2bfffbfb, 0xe8000001, 0x2fffffff
489 1, 0x2bfffbfb, 0xe8000001, 0x2fffffff
490 };
490 };
491
491
492 static const uint32_t lower[8] = { 0, 0, 0x7fffffe };
492 static const uint32_t lower[8] = { 0, 0, 0x7fffffe };
493
493
494 Py_ssize_t i, destlen = 0;
494 Py_ssize_t i, destlen = 0;
495
495
496 for (i = 0; i < len; i++) {
496 for (i = 0; i < len; i++) {
497 if (inset(onebyte, src[i]))
497 if (inset(onebyte, src[i]))
498 charcopy(dest, &destlen, destsize, src[i]);
498 charcopy(dest, &destlen, destsize, src[i]);
499 else if (inset(lower, src[i]))
499 else if (inset(lower, src[i]))
500 charcopy(dest, &destlen, destsize, src[i] + 32);
500 charcopy(dest, &destlen, destsize, src[i] + 32);
501 else
501 else
502 escape3(dest, &destlen, destsize, src[i]);
502 escape3(dest, &destlen, destsize, src[i]);
503 }
503 }
504
504
505 return destlen;
505 return destlen;
506 }
506 }
507
507
508 PyObject *lowerencode(PyObject *self, PyObject *args)
508 PyObject *lowerencode(PyObject *self, PyObject *args)
509 {
509 {
510 char *path;
510 char *path;
511 Py_ssize_t len, newlen;
511 Py_ssize_t len, newlen;
512 PyObject *ret;
512 PyObject *ret;
513
513
514 if (!PyArg_ParseTuple(args, "s#:lowerencode", &path, &len))
514 if (!PyArg_ParseTuple(args, "s#:lowerencode", &path, &len))
515 return NULL;
515 return NULL;
516
516
517 newlen = _lowerencode(NULL, 0, path, len);
517 newlen = _lowerencode(NULL, 0, path, len);
518 ret = PyString_FromStringAndSize(NULL, newlen);
518 ret = PyString_FromStringAndSize(NULL, newlen);
519 if (ret)
519 if (ret)
520 newlen = _lowerencode(PyString_AS_STRING(ret), newlen,
520 newlen = _lowerencode(PyString_AS_STRING(ret), newlen,
521 path, len);
521 path, len);
522
522
523 return ret;
523 return ret;
524 }
524 }
525
525
526 /* See store.py:_auxencode for a description. */
526 /* See store.py:_auxencode for a description. */
527 static Py_ssize_t auxencode(char *dest, size_t destsize,
527 static Py_ssize_t auxencode(char *dest, size_t destsize,
528 const char *src, Py_ssize_t len)
528 const char *src, Py_ssize_t len)
529 {
529 {
530 static const uint32_t twobytes[8];
530 static const uint32_t twobytes[8];
531
531
532 static const uint32_t onebyte[8] = {
532 static const uint32_t onebyte[8] = {
533 ~0, 0xffff3ffe, ~0, ~0, ~0, ~0, ~0, ~0,
533 ~0, 0xffff3ffe, ~0, ~0, ~0, ~0, ~0, ~0,
534 };
534 };
535
535
536 return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);
536 return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);
537 }
537 }
538
538
539 static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])
539 static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])
540 {
540 {
541 static const Py_ssize_t dirprefixlen = 8;
541 static const Py_ssize_t dirprefixlen = 8;
542 static const Py_ssize_t maxshortdirslen = 68;
542 static const Py_ssize_t maxshortdirslen = 68;
543 char *dest;
543 char *dest;
544 PyObject *ret;
544 PyObject *ret;
545
545
546 Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
546 Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
547 Py_ssize_t destsize, destlen = 0, slop, used;
547 Py_ssize_t destsize, destlen = 0, slop, used;
548
548
549 while (lastslash >= 0 && src[lastslash] != '/') {
549 while (lastslash >= 0 && src[lastslash] != '/') {
550 if (src[lastslash] == '.' && lastdot == -1)
550 if (src[lastslash] == '.' && lastdot == -1)
551 lastdot = lastslash;
551 lastdot = lastslash;
552 lastslash--;
552 lastslash--;
553 }
553 }
554
554
555 #if 0
555 #if 0
556 /* All paths should end in a suffix of ".i" or ".d".
556 /* All paths should end in a suffix of ".i" or ".d".
557 Unfortunately, the file names in test-hybridencode.py
557 Unfortunately, the file names in test-hybridencode.py
558 violate this rule. */
558 violate this rule. */
559 if (lastdot != len - 3) {
559 if (lastdot != len - 3) {
560 PyErr_SetString(PyExc_ValueError,
560 PyErr_SetString(PyExc_ValueError,
561 "suffix missing or wrong length");
561 "suffix missing or wrong length");
562 return NULL;
562 return NULL;
563 }
563 }
564 #endif
564 #endif
565
565
566 /* If src contains a suffix, we will append it to the end of
566 /* If src contains a suffix, we will append it to the end of
567 the new string, so make room. */
567 the new string, so make room. */
568 destsize = 120;
568 destsize = 120;
569 if (lastdot >= 0)
569 if (lastdot >= 0)
570 destsize += len - lastdot - 1;
570 destsize += len - lastdot - 1;
571
571
572 ret = PyString_FromStringAndSize(NULL, destsize);
572 ret = PyString_FromStringAndSize(NULL, destsize);
573 if (ret == NULL)
573 if (ret == NULL)
574 return NULL;
574 return NULL;
575
575
576 dest = PyString_AS_STRING(ret);
576 dest = PyString_AS_STRING(ret);
577 memcopy(dest, &destlen, destsize, "dh/", 3);
577 memcopy(dest, &destlen, destsize, "dh/", 3);
578
578
579 /* Copy up to dirprefixlen bytes of each path component, up to
579 /* Copy up to dirprefixlen bytes of each path component, up to
580 a limit of maxshortdirslen bytes. */
580 a limit of maxshortdirslen bytes. */
581 for (i = d = p = 0; i < lastslash; i++, p++) {
581 for (i = d = p = 0; i < lastslash; i++, p++) {
582 if (src[i] == '/') {
582 if (src[i] == '/') {
583 char d = dest[destlen - 1];
583 char d = dest[destlen - 1];
584 /* After truncation, a directory name may end
584 /* After truncation, a directory name may end
585 in a space or dot, which are unportable. */
585 in a space or dot, which are unportable. */
586 if (d == '.' || d == ' ')
586 if (d == '.' || d == ' ')
587 dest[destlen - 1] = '_';
587 dest[destlen - 1] = '_';
588 if (destlen > maxshortdirslen)
588 /* The + 3 is to account for "dh/" in the beginning */
589 if (destlen > maxshortdirslen + 3)
589 break;
590 break;
590 charcopy(dest, &destlen, destsize, src[i]);
591 charcopy(dest, &destlen, destsize, src[i]);
591 p = -1;
592 p = -1;
592 }
593 }
593 else if (p < dirprefixlen)
594 else if (p < dirprefixlen)
594 charcopy(dest, &destlen, destsize, src[i]);
595 charcopy(dest, &destlen, destsize, src[i]);
595 }
596 }
596
597
597 /* Rewind to just before the last slash copied. */
598 /* Rewind to just before the last slash copied. */
598 if (destlen > maxshortdirslen + 3)
599 if (destlen > maxshortdirslen + 3)
599 do {
600 do {
600 destlen--;
601 destlen--;
601 } while (destlen > 0 && dest[destlen] != '/');
602 } while (destlen > 0 && dest[destlen] != '/');
602
603
603 if (destlen > 3) {
604 if (destlen > 3) {
604 if (lastslash > 0) {
605 if (lastslash > 0) {
605 char d = dest[destlen - 1];
606 char d = dest[destlen - 1];
606 /* The last directory component may be
607 /* The last directory component may be
607 truncated, so make it safe. */
608 truncated, so make it safe. */
608 if (d == '.' || d == ' ')
609 if (d == '.' || d == ' ')
609 dest[destlen - 1] = '_';
610 dest[destlen - 1] = '_';
610 }
611 }
611
612
612 charcopy(dest, &destlen, destsize, '/');
613 charcopy(dest, &destlen, destsize, '/');
613 }
614 }
614
615
615 /* Add a prefix of the original file's name. Its length
616 /* Add a prefix of the original file's name. Its length
616 depends on the number of bytes left after accounting for
617 depends on the number of bytes left after accounting for
617 hash and suffix. */
618 hash and suffix. */
618 used = destlen + 40;
619 used = destlen + 40;
619 if (lastdot >= 0)
620 if (lastdot >= 0)
620 used += len - lastdot - 1;
621 used += len - lastdot - 1;
621 slop = maxstorepathlen - used;
622 slop = maxstorepathlen - used;
622 if (slop > 0) {
623 if (slop > 0) {
623 Py_ssize_t basenamelen =
624 Py_ssize_t basenamelen =
624 lastslash >= 0 ? len - lastslash - 2 : len - 1;
625 lastslash >= 0 ? len - lastslash - 2 : len - 1;
625
626
626 if (basenamelen > slop)
627 if (basenamelen > slop)
627 basenamelen = slop;
628 basenamelen = slop;
628 if (basenamelen > 0)
629 if (basenamelen > 0)
629 memcopy(dest, &destlen, destsize, &src[lastslash + 1],
630 memcopy(dest, &destlen, destsize, &src[lastslash + 1],
630 basenamelen);
631 basenamelen);
631 }
632 }
632
633
633 /* Add hash and suffix. */
634 /* Add hash and suffix. */
634 for (i = 0; i < 20; i++)
635 for (i = 0; i < 20; i++)
635 hexencode(dest, &destlen, destsize, sha[i]);
636 hexencode(dest, &destlen, destsize, sha[i]);
636
637
637 if (lastdot >= 0)
638 if (lastdot >= 0)
638 memcopy(dest, &destlen, destsize, &src[lastdot],
639 memcopy(dest, &destlen, destsize, &src[lastdot],
639 len - lastdot - 1);
640 len - lastdot - 1);
640
641
641 PyString_GET_SIZE(ret) = destlen;
642 PyString_GET_SIZE(ret) = destlen;
642
643
643 return ret;
644 return ret;
644 }
645 }
645
646
646 /*
647 /*
647 * Avoiding a trip through Python would improve performance by 50%,
648 * Avoiding a trip through Python would improve performance by 50%,
648 * but we don't encounter enough long names to be worth the code.
649 * but we don't encounter enough long names to be worth the code.
649 */
650 */
650 static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
651 static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
651 {
652 {
652 static PyObject *shafunc;
653 static PyObject *shafunc;
653 PyObject *shaobj, *hashobj;
654 PyObject *shaobj, *hashobj;
654
655
655 if (shafunc == NULL) {
656 if (shafunc == NULL) {
656 PyObject *util, *name = PyString_FromString("mercurial.util");
657 PyObject *util, *name = PyString_FromString("mercurial.util");
657
658
658 if (name == NULL)
659 if (name == NULL)
659 return -1;
660 return -1;
660
661
661 util = PyImport_Import(name);
662 util = PyImport_Import(name);
662 Py_DECREF(name);
663 Py_DECREF(name);
663
664
664 if (util == NULL) {
665 if (util == NULL) {
665 PyErr_SetString(PyExc_ImportError, "mercurial.util");
666 PyErr_SetString(PyExc_ImportError, "mercurial.util");
666 return -1;
667 return -1;
667 }
668 }
668 shafunc = PyObject_GetAttrString(util, "sha1");
669 shafunc = PyObject_GetAttrString(util, "sha1");
669 Py_DECREF(util);
670 Py_DECREF(util);
670
671
671 if (shafunc == NULL) {
672 if (shafunc == NULL) {
672 PyErr_SetString(PyExc_AttributeError,
673 PyErr_SetString(PyExc_AttributeError,
673 "module 'mercurial.util' has no "
674 "module 'mercurial.util' has no "
674 "attribute 'sha1'");
675 "attribute 'sha1'");
675 return -1;
676 return -1;
676 }
677 }
677 }
678 }
678
679
679 shaobj = PyObject_CallFunction(shafunc, "s#", str, len);
680 shaobj = PyObject_CallFunction(shafunc, "s#", str, len);
680
681
681 if (shaobj == NULL)
682 if (shaobj == NULL)
682 return -1;
683 return -1;
683
684
684 hashobj = PyObject_CallMethod(shaobj, "digest", "");
685 hashobj = PyObject_CallMethod(shaobj, "digest", "");
685 Py_DECREF(shaobj);
686 Py_DECREF(shaobj);
686
687
687 if (!PyString_Check(hashobj) || PyString_GET_SIZE(hashobj) != 20) {
688 if (!PyString_Check(hashobj) || PyString_GET_SIZE(hashobj) != 20) {
688 PyErr_SetString(PyExc_TypeError,
689 PyErr_SetString(PyExc_TypeError,
689 "result of digest is not a 20-byte hash");
690 "result of digest is not a 20-byte hash");
690 Py_DECREF(hashobj);
691 Py_DECREF(hashobj);
691 return -1;
692 return -1;
692 }
693 }
693
694
694 memcpy(hash, PyString_AS_STRING(hashobj), 20);
695 memcpy(hash, PyString_AS_STRING(hashobj), 20);
695 Py_DECREF(hashobj);
696 Py_DECREF(hashobj);
696 return 0;
697 return 0;
697 }
698 }
698
699
699 #define MAXENCODE 4096 * 4
700 #define MAXENCODE 4096 * 4
700
701
701 static PyObject *hashencode(const char *src, Py_ssize_t len)
702 static PyObject *hashencode(const char *src, Py_ssize_t len)
702 {
703 {
703 char dired[MAXENCODE];
704 char dired[MAXENCODE];
704 char lowered[MAXENCODE];
705 char lowered[MAXENCODE];
705 char auxed[MAXENCODE];
706 char auxed[MAXENCODE];
706 Py_ssize_t dirlen, lowerlen, auxlen, baselen;
707 Py_ssize_t dirlen, lowerlen, auxlen, baselen;
707 char sha[20];
708 char sha[20];
708
709
709 baselen = (len - 5) * 3;
710 baselen = (len - 5) * 3;
710 if (baselen >= MAXENCODE) {
711 if (baselen >= MAXENCODE) {
711 PyErr_SetString(PyExc_ValueError, "string too long");
712 PyErr_SetString(PyExc_ValueError, "string too long");
712 return NULL;
713 return NULL;
713 }
714 }
714
715
715 dirlen = _encodedir(dired, baselen, src, len);
716 dirlen = _encodedir(dired, baselen, src, len);
716 if (sha1hash(sha, dired, dirlen - 1) == -1)
717 if (sha1hash(sha, dired, dirlen - 1) == -1)
717 return NULL;
718 return NULL;
718 lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);
719 lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);
719 auxlen = auxencode(auxed, baselen, lowered, lowerlen);
720 auxlen = auxencode(auxed, baselen, lowered, lowerlen);
720 return hashmangle(auxed, auxlen, sha);
721 return hashmangle(auxed, auxlen, sha);
721 }
722 }
722
723
723 PyObject *pathencode(PyObject *self, PyObject *args)
724 PyObject *pathencode(PyObject *self, PyObject *args)
724 {
725 {
725 Py_ssize_t len, newlen;
726 Py_ssize_t len, newlen;
726 PyObject *pathobj, *newobj;
727 PyObject *pathobj, *newobj;
727 char *path;
728 char *path;
728
729
729 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
730 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
730 return NULL;
731 return NULL;
731
732
732 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
733 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
733 PyErr_SetString(PyExc_TypeError, "expected a string");
734 PyErr_SetString(PyExc_TypeError, "expected a string");
734 return NULL;
735 return NULL;
735 }
736 }
736
737
737 if (len > maxstorepathlen)
738 if (len > maxstorepathlen)
738 newlen = maxstorepathlen + 2;
739 newlen = maxstorepathlen + 2;
739 else
740 else
740 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
741 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
741
742
742 if (newlen <= maxstorepathlen + 1) {
743 if (newlen <= maxstorepathlen + 1) {
743 if (newlen == len + 1) {
744 if (newlen == len + 1) {
744 Py_INCREF(pathobj);
745 Py_INCREF(pathobj);
745 return pathobj;
746 return pathobj;
746 }
747 }
747
748
748 newobj = PyString_FromStringAndSize(NULL, newlen);
749 newobj = PyString_FromStringAndSize(NULL, newlen);
749
750
750 if (newobj) {
751 if (newobj) {
751 PyString_GET_SIZE(newobj)--;
752 PyString_GET_SIZE(newobj)--;
752 basicencode(PyString_AS_STRING(newobj), newlen, path,
753 basicencode(PyString_AS_STRING(newobj), newlen, path,
753 len + 1);
754 len + 1);
754 }
755 }
755 }
756 }
756 else
757 else
757 newobj = hashencode(path, len + 1);
758 newobj = hashencode(path, len + 1);
758
759
759 return newobj;
760 return newobj;
760 }
761 }
@@ -1,197 +1,198 b''
1 # This is a randomized test that generates different pathnames every
1 # This is a randomized test that generates different pathnames every
2 # time it is invoked, and tests the encoding of those pathnames.
2 # time it is invoked, and tests the encoding of those pathnames.
3 #
3 #
4 # It uses a simple probabilistic model to generate valid pathnames
4 # It uses a simple probabilistic model to generate valid pathnames
5 # that have proven likely to expose bugs and divergent behaviour in
5 # that have proven likely to expose bugs and divergent behaviour in
6 # different encoding implementations.
6 # different encoding implementations.
7
7
8 from mercurial import store
8 from mercurial import store
9 import binascii, itertools, math, os, random, sys, time
9 import binascii, itertools, math, os, random, sys, time
10 import collections
10 import collections
11
11
12 if sys.version_info[:2] < (2, 6):
12 if sys.version_info[:2] < (2, 6):
13 sys.exit(0)
13 sys.exit(0)
14
14
15 validchars = set(map(chr, range(0, 256)))
15 validchars = set(map(chr, range(0, 256)))
16 alphanum = range(ord('A'), ord('Z'))
16 alphanum = range(ord('A'), ord('Z'))
17
17
18 for c in '\0/':
18 for c in '\0/':
19 validchars.remove(c)
19 validchars.remove(c)
20
20
21 winreserved = ('aux con prn nul'.split() +
21 winreserved = ('aux con prn nul'.split() +
22 ['com%d' % i for i in xrange(1, 10)] +
22 ['com%d' % i for i in xrange(1, 10)] +
23 ['lpt%d' % i for i in xrange(1, 10)])
23 ['lpt%d' % i for i in xrange(1, 10)])
24
24
25 def casecombinations(names):
25 def casecombinations(names):
26 '''Build all case-diddled combinations of names.'''
26 '''Build all case-diddled combinations of names.'''
27
27
28 combos = set()
28 combos = set()
29
29
30 for r in names:
30 for r in names:
31 for i in xrange(len(r) + 1):
31 for i in xrange(len(r) + 1):
32 for c in itertools.combinations(xrange(len(r)), i):
32 for c in itertools.combinations(xrange(len(r)), i):
33 d = r
33 d = r
34 for j in c:
34 for j in c:
35 d = ''.join((d[:j], d[j].upper(), d[j + 1:]))
35 d = ''.join((d[:j], d[j].upper(), d[j + 1:]))
36 combos.add(d)
36 combos.add(d)
37 return sorted(combos)
37 return sorted(combos)
38
38
39 def buildprobtable(fp, cmd='hg manifest tip'):
39 def buildprobtable(fp, cmd='hg manifest tip'):
40 '''Construct and print a table of probabilities for path name
40 '''Construct and print a table of probabilities for path name
41 components. The numbers are percentages.'''
41 components. The numbers are percentages.'''
42
42
43 counts = collections.defaultdict(lambda: 0)
43 counts = collections.defaultdict(lambda: 0)
44 for line in os.popen(cmd).read().splitlines():
44 for line in os.popen(cmd).read().splitlines():
45 if line[-2:] in ('.i', '.d'):
45 if line[-2:] in ('.i', '.d'):
46 line = line[:-2]
46 line = line[:-2]
47 if line.startswith('data/'):
47 if line.startswith('data/'):
48 line = line[5:]
48 line = line[5:]
49 for c in line:
49 for c in line:
50 counts[c] += 1
50 counts[c] += 1
51 for c in '\r/\n':
51 for c in '\r/\n':
52 counts.pop(c, None)
52 counts.pop(c, None)
53 t = sum(counts.itervalues()) / 100.0
53 t = sum(counts.itervalues()) / 100.0
54 fp.write('probtable = (')
54 fp.write('probtable = (')
55 for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1],
55 for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1],
56 reverse=True)):
56 reverse=True)):
57 if (i % 5) == 0:
57 if (i % 5) == 0:
58 fp.write('\n ')
58 fp.write('\n ')
59 vt = v / t
59 vt = v / t
60 if vt < 0.0005:
60 if vt < 0.0005:
61 break
61 break
62 fp.write('(%r, %.03f), ' % (k, vt))
62 fp.write('(%r, %.03f), ' % (k, vt))
63 fp.write('\n )\n')
63 fp.write('\n )\n')
64
64
65 # A table of character frequencies (as percentages), gleaned by
65 # A table of character frequencies (as percentages), gleaned by
66 # looking at filelog names from a real-world, very large repo.
66 # looking at filelog names from a real-world, very large repo.
67
67
68 probtable = (
68 probtable = (
69 ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618),
69 ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618),
70 ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258),
70 ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258),
71 ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194),
71 ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194),
72 ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625),
72 ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625),
73 ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477),
73 ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477),
74 ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326),
74 ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326),
75 ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299),
75 ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299),
76 ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095),
76 ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095),
77 ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076),
77 ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076),
78 ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046),
78 ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046),
79 ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026),
79 ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026),
80 ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011),
80 ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011),
81 ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001),
81 ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001),
82 )
82 )
83
83
84 for c, _ in probtable:
84 for c, _ in probtable:
85 validchars.remove(c)
85 validchars.remove(c)
86 validchars = list(validchars)
86 validchars = list(validchars)
87
87
88 def pickfrom(rng, table):
88 def pickfrom(rng, table):
89 c = 0
89 c = 0
90 r = rng.random() * sum(i[1] for i in table)
90 r = rng.random() * sum(i[1] for i in table)
91 for i, p in table:
91 for i, p in table:
92 c += p
92 c += p
93 if c >= r:
93 if c >= r:
94 return i
94 return i
95
95
96 reservedcombos = casecombinations(winreserved)
96 reservedcombos = casecombinations(winreserved)
97
97
98 # The first component of a name following a slash.
98 # The first component of a name following a slash.
99
99
100 firsttable = (
100 firsttable = (
101 (lambda rng: pickfrom(rng, probtable), 90),
101 (lambda rng: pickfrom(rng, probtable), 90),
102 (lambda rng: rng.choice(validchars), 5),
102 (lambda rng: rng.choice(validchars), 5),
103 (lambda rng: rng.choice(reservedcombos), 5),
103 (lambda rng: rng.choice(reservedcombos), 5),
104 )
104 )
105
105
106 # Components of a name following the first.
106 # Components of a name following the first.
107
107
108 resttable = firsttable[:-1]
108 resttable = firsttable[:-1]
109
109
110 # Special suffixes.
110 # Special suffixes.
111
111
112 internalsuffixcombos = casecombinations('.hg .i .d'.split())
112 internalsuffixcombos = casecombinations('.hg .i .d'.split())
113
113
114 # The last component of a path, before a slash or at the end of a name.
114 # The last component of a path, before a slash or at the end of a name.
115
115
116 lasttable = resttable + (
116 lasttable = resttable + (
117 (lambda rng: '', 95),
117 (lambda rng: '', 95),
118 (lambda rng: rng.choice(internalsuffixcombos), 5),
118 (lambda rng: rng.choice(internalsuffixcombos), 5),
119 )
119 )
120
120
121 def makepart(rng, k):
121 def makepart(rng, k):
122 '''Construct a part of a pathname, without slashes.'''
122 '''Construct a part of a pathname, without slashes.'''
123
123
124 p = pickfrom(rng, firsttable)(rng)
124 p = pickfrom(rng, firsttable)(rng)
125 l = len(p)
125 l = len(p)
126 ps = [p]
126 ps = [p]
127 while l <= k:
127 maxl = rng.randint(1, k)
128 while l < maxl:
128 p = pickfrom(rng, resttable)(rng)
129 p = pickfrom(rng, resttable)(rng)
129 l += len(p)
130 l += len(p)
130 ps.append(p)
131 ps.append(p)
131 ps.append(pickfrom(rng, lasttable)(rng))
132 ps.append(pickfrom(rng, lasttable)(rng))
132 return ''.join(ps)
133 return ''.join(ps)
133
134
134 def makepath(rng, j, k):
135 def makepath(rng, j, k):
135 '''Construct a complete pathname.'''
136 '''Construct a complete pathname.'''
136
137
137 return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) +
138 return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) +
138 rng.choice(['.d', '.i']))
139 rng.choice(['.d', '.i']))
139
140
140 def genpath(rng, count):
141 def genpath(rng, count):
141 '''Generate random pathnames with gradually increasing lengths.'''
142 '''Generate random pathnames with gradually increasing lengths.'''
142
143
143 mink, maxk = 1, 4096
144 mink, maxk = 1, 4096
144 def steps():
145 def steps():
145 x, k = 0, mink
146 x, k = 0, mink
146 for i in xrange(count):
147 for i in xrange(count):
147 yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
148 yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
148 for k in steps():
149 for k in steps():
149 x = rng.randint(1, k)
150 x = rng.randint(1, k)
150 y = rng.randint(1, k)
151 y = rng.randint(1, k)
151 yield makepath(rng, x, y)
152 yield makepath(rng, x, y)
152
153
153 def runtests(rng, seed, count):
154 def runtests(rng, seed, count):
154 nerrs = 0
155 nerrs = 0
155 for p in genpath(rng, count):
156 for p in genpath(rng, count):
156 h = store._pathencode(p) # uses C implementation, if available
157 h = store._pathencode(p) # uses C implementation, if available
157 r = store._hybridencode(p, True) # reference implementation in Python
158 r = store._hybridencode(p, True) # reference implementation in Python
158 if h != r:
159 if h != r:
159 if nerrs == 0:
160 if nerrs == 0:
160 print >> sys.stderr, 'seed:', hex(seed)[:-1]
161 print >> sys.stderr, 'seed:', hex(seed)[:-1]
161 print >> sys.stderr, "\np: '%s'" % p.encode("string_escape")
162 print >> sys.stderr, "\np: '%s'" % p.encode("string_escape")
162 print >> sys.stderr, "h: '%s'" % h.encode("string_escape")
163 print >> sys.stderr, "h: '%s'" % h.encode("string_escape")
163 print >> sys.stderr, "r: '%s'" % r.encode("string_escape")
164 print >> sys.stderr, "r: '%s'" % r.encode("string_escape")
164 nerrs += 1
165 nerrs += 1
165 return nerrs
166 return nerrs
166
167
167 def main():
168 def main():
168 import getopt
169 import getopt
169
170
170 # Empirically observed to take about a second to run
171 # Empirically observed to take about a second to run
171 count = 100
172 count = 100
172 seed = None
173 seed = None
173 opts, args = getopt.getopt(sys.argv[1:], 'c:s:',
174 opts, args = getopt.getopt(sys.argv[1:], 'c:s:',
174 ['build', 'count=', 'seed='])
175 ['build', 'count=', 'seed='])
175 for o, a in opts:
176 for o, a in opts:
176 if o in ('-c', '--count'):
177 if o in ('-c', '--count'):
177 count = int(a)
178 count = int(a)
178 elif o in ('-s', '--seed'):
179 elif o in ('-s', '--seed'):
179 seed = long(a, base=0) # accepts base 10 or 16 strings
180 seed = long(a, base=0) # accepts base 10 or 16 strings
180 elif o == '--build':
181 elif o == '--build':
181 buildprobtable(sys.stdout,
182 buildprobtable(sys.stdout,
182 'find .hg/store/data -type f && '
183 'find .hg/store/data -type f && '
183 'cat .hg/store/fncache 2>/dev/null')
184 'cat .hg/store/fncache 2>/dev/null')
184 sys.exit(0)
185 sys.exit(0)
185
186
186 if seed is None:
187 if seed is None:
187 try:
188 try:
188 seed = long(binascii.hexlify(os.urandom(16)), 16)
189 seed = long(binascii.hexlify(os.urandom(16)), 16)
189 except AttributeError:
190 except AttributeError:
190 seed = long(time.time() * 1000)
191 seed = long(time.time() * 1000)
191
192
192 rng = random.Random(seed)
193 rng = random.Random(seed)
193 if runtests(rng, seed, count):
194 if runtests(rng, seed, count):
194 sys.exit(1)
195 sys.exit(1)
195
196
196 if __name__ == '__main__':
197 if __name__ == '__main__':
197 main()
198 main()
General Comments 0
You need to be logged in to leave comments. Login now