##// END OF EJS Templates
merge with stable
Matt Mackall -
r19327:cf1b0a58 merge default
parent child Browse files
Show More
@@ -1,41 +1,41
1 1 ======
2 2 hgrc
3 3 ======
4 4
5 5 ---------------------------------
6 6 configuration files for Mercurial
7 7 ---------------------------------
8 8
9 9 :Author: Bryan O'Sullivan <bos@serpentine.com>
10 10 :Organization: Mercurial
11 11 :Manual section: 5
12 12 :Manual group: Mercurial Manual
13 13
14 14 .. contents::
15 15 :backlinks: top
16 16 :class: htmlonly
17 17
18 18
19 Synopsis
20 ========
19 Description
20 ===========
21 21
22 22 .. include:: ../mercurial/help/config.txt
23 23
24 24 Author
25 25 ======
26 26 Bryan O'Sullivan <bos@serpentine.com>.
27 27
28 28 Mercurial was written by Matt Mackall <mpm@selenic.com>.
29 29
30 30 See Also
31 31 ========
32 32 |hg(1)|_, |hgignore(5)|_
33 33
34 34 Copying
35 35 =======
36 36 This manual page is copyright 2005 Bryan O'Sullivan.
37 37 Mercurial is copyright 2005-2012 Matt Mackall.
38 38 Free use of this software is granted under the terms of the GNU General
39 39 Public License version 2 or any later version.
40 40
41 41 .. include:: common.txt
@@ -1,760 +1,761
1 1 /*
2 2 pathencode.c - efficient path name encoding
3 3
4 4 Copyright 2012 Facebook
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 /*
11 11 * An implementation of the name encoding scheme used by the fncache
12 12 * store. The common case is of a path < 120 bytes long, which is
13 13 * handled either in a single pass with no allocations or two passes
14 14 * with a single allocation. For longer paths, multiple passes are
15 15 * required.
16 16 */
17 17
18 18 #define PY_SSIZE_T_CLEAN
19 19 #include <Python.h>
20 20 #include <assert.h>
21 21 #include <ctype.h>
22 22 #include <stdlib.h>
23 23 #include <string.h>
24 24
25 25 #include "util.h"
26 26
27 27 /* state machine for the fast path */
28 28 enum path_state {
29 29 START, /* first byte of a path component */
30 30 A, /* "AUX" */
31 31 AU,
32 32 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
33 33 C, /* "CON" or "COMn" */
34 34 CO,
35 35 COMLPT, /* "COM" or "LPT" */
36 36 COMLPTn,
37 37 L,
38 38 LP,
39 39 N,
40 40 NU,
41 41 P, /* "PRN" */
42 42 PR,
43 43 LDOT, /* leading '.' */
44 44 DOT, /* '.' in a non-leading position */
45 45 H, /* ".h" */
46 46 HGDI, /* ".hg", ".d", or ".i" */
47 47 SPACE,
48 48 DEFAULT /* byte of a path component after the first */
49 49 };
50 50
51 51 /* state machine for dir-encoding */
52 52 enum dir_state {
53 53 DDOT,
54 54 DH,
55 55 DHGDI,
56 56 DDEFAULT
57 57 };
58 58
59 59 static inline int inset(const uint32_t bitset[], char c)
60 60 {
61 61 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
62 62 }
63 63
64 64 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
65 65 char c)
66 66 {
67 67 if (dest) {
68 68 assert(*destlen < destsize);
69 69 dest[*destlen] = c;
70 70 }
71 71 (*destlen)++;
72 72 }
73 73
74 74 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
75 75 const void *src, Py_ssize_t len)
76 76 {
77 77 if (dest) {
78 78 assert(*destlen + len < destsize);
79 79 memcpy((void *)&dest[*destlen], src, len);
80 80 }
81 81 *destlen += len;
82 82 }
83 83
84 84 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
85 85 uint8_t c)
86 86 {
87 87 static const char hexdigit[] = "0123456789abcdef";
88 88
89 89 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
90 90 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
91 91 }
92 92
93 93 /* 3-byte escape: tilde followed by two hex digits */
94 94 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
95 95 char c)
96 96 {
97 97 charcopy(dest, destlen, destsize, '~');
98 98 hexencode(dest, destlen, destsize, c);
99 99 }
100 100
101 101 static Py_ssize_t _encodedir(char *dest, size_t destsize,
102 102 const char *src, Py_ssize_t len)
103 103 {
104 104 enum dir_state state = DDEFAULT;
105 105 Py_ssize_t i = 0, destlen = 0;
106 106
107 107 while (i < len) {
108 108 switch (state) {
109 109 case DDOT:
110 110 switch (src[i]) {
111 111 case 'd':
112 112 case 'i':
113 113 state = DHGDI;
114 114 charcopy(dest, &destlen, destsize, src[i++]);
115 115 break;
116 116 case 'h':
117 117 state = DH;
118 118 charcopy(dest, &destlen, destsize, src[i++]);
119 119 break;
120 120 default:
121 121 state = DDEFAULT;
122 122 break;
123 123 }
124 124 break;
125 125 case DH:
126 126 if (src[i] == 'g') {
127 127 state = DHGDI;
128 128 charcopy(dest, &destlen, destsize, src[i++]);
129 129 }
130 130 else state = DDEFAULT;
131 131 break;
132 132 case DHGDI:
133 133 if (src[i] == '/') {
134 134 memcopy(dest, &destlen, destsize, ".hg", 3);
135 135 charcopy(dest, &destlen, destsize, src[i++]);
136 136 }
137 137 state = DDEFAULT;
138 138 break;
139 139 case DDEFAULT:
140 140 if (src[i] == '.')
141 141 state = DDOT;
142 142 charcopy(dest, &destlen, destsize, src[i++]);
143 143 break;
144 144 }
145 145 }
146 146
147 147 return destlen;
148 148 }
149 149
150 150 PyObject *encodedir(PyObject *self, PyObject *args)
151 151 {
152 152 Py_ssize_t len, newlen;
153 153 PyObject *pathobj, *newobj;
154 154 char *path;
155 155
156 156 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
157 157 return NULL;
158 158
159 159 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
160 160 PyErr_SetString(PyExc_TypeError, "expected a string");
161 161 return NULL;
162 162 }
163 163
164 164 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
165 165
166 166 if (newlen == len + 1) {
167 167 Py_INCREF(pathobj);
168 168 return pathobj;
169 169 }
170 170
171 171 newobj = PyString_FromStringAndSize(NULL, newlen);
172 172
173 173 if (newobj) {
174 174 PyString_GET_SIZE(newobj)--;
175 175 _encodedir(PyString_AS_STRING(newobj), newlen, path,
176 176 len + 1);
177 177 }
178 178
179 179 return newobj;
180 180 }
181 181
182 182 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
183 183 char *dest, Py_ssize_t destlen, size_t destsize,
184 184 const char *src, Py_ssize_t len,
185 185 int encodedir)
186 186 {
187 187 enum path_state state = START;
188 188 Py_ssize_t i = 0;
189 189
190 190 /*
191 191 * Python strings end with a zero byte, which we use as a
192 192 * terminal token as they are not valid inside path names.
193 193 */
194 194
195 195 while (i < len) {
196 196 switch (state) {
197 197 case START:
198 198 switch (src[i]) {
199 199 case '/':
200 200 charcopy(dest, &destlen, destsize, src[i++]);
201 201 break;
202 202 case '.':
203 203 state = LDOT;
204 204 escape3(dest, &destlen, destsize, src[i++]);
205 205 break;
206 206 case ' ':
207 207 state = DEFAULT;
208 208 escape3(dest, &destlen, destsize, src[i++]);
209 209 break;
210 210 case 'a':
211 211 state = A;
212 212 charcopy(dest, &destlen, destsize, src[i++]);
213 213 break;
214 214 case 'c':
215 215 state = C;
216 216 charcopy(dest, &destlen, destsize, src[i++]);
217 217 break;
218 218 case 'l':
219 219 state = L;
220 220 charcopy(dest, &destlen, destsize, src[i++]);
221 221 break;
222 222 case 'n':
223 223 state = N;
224 224 charcopy(dest, &destlen, destsize, src[i++]);
225 225 break;
226 226 case 'p':
227 227 state = P;
228 228 charcopy(dest, &destlen, destsize, src[i++]);
229 229 break;
230 230 default:
231 231 state = DEFAULT;
232 232 break;
233 233 }
234 234 break;
235 235 case A:
236 236 if (src[i] == 'u') {
237 237 state = AU;
238 238 charcopy(dest, &destlen, destsize, src[i++]);
239 239 }
240 240 else state = DEFAULT;
241 241 break;
242 242 case AU:
243 243 if (src[i] == 'x') {
244 244 state = THIRD;
245 245 i++;
246 246 }
247 247 else state = DEFAULT;
248 248 break;
249 249 case THIRD:
250 250 state = DEFAULT;
251 251 switch (src[i]) {
252 252 case '.':
253 253 case '/':
254 254 case '\0':
255 255 escape3(dest, &destlen, destsize, src[i - 1]);
256 256 break;
257 257 default:
258 258 i--;
259 259 break;
260 260 }
261 261 break;
262 262 case C:
263 263 if (src[i] == 'o') {
264 264 state = CO;
265 265 charcopy(dest, &destlen, destsize, src[i++]);
266 266 }
267 267 else state = DEFAULT;
268 268 break;
269 269 case CO:
270 270 if (src[i] == 'm') {
271 271 state = COMLPT;
272 272 i++;
273 273 }
274 274 else if (src[i] == 'n') {
275 275 state = THIRD;
276 276 i++;
277 277 }
278 278 else state = DEFAULT;
279 279 break;
280 280 case COMLPT:
281 281 switch (src[i]) {
282 282 case '1': case '2': case '3': case '4': case '5':
283 283 case '6': case '7': case '8': case '9':
284 284 state = COMLPTn;
285 285 i++;
286 286 break;
287 287 default:
288 288 state = DEFAULT;
289 289 charcopy(dest, &destlen, destsize, src[i - 1]);
290 290 break;
291 291 }
292 292 break;
293 293 case COMLPTn:
294 294 state = DEFAULT;
295 295 switch (src[i]) {
296 296 case '.':
297 297 case '/':
298 298 case '\0':
299 299 escape3(dest, &destlen, destsize, src[i - 2]);
300 300 charcopy(dest, &destlen, destsize, src[i - 1]);
301 301 break;
302 302 default:
303 303 memcopy(dest, &destlen, destsize,
304 304 &src[i - 2], 2);
305 305 break;
306 306 }
307 307 break;
308 308 case L:
309 309 if (src[i] == 'p') {
310 310 state = LP;
311 311 charcopy(dest, &destlen, destsize, src[i++]);
312 312 }
313 313 else state = DEFAULT;
314 314 break;
315 315 case LP:
316 316 if (src[i] == 't') {
317 317 state = COMLPT;
318 318 i++;
319 319 }
320 320 else state = DEFAULT;
321 321 break;
322 322 case N:
323 323 if (src[i] == 'u') {
324 324 state = NU;
325 325 charcopy(dest, &destlen, destsize, src[i++]);
326 326 }
327 327 else state = DEFAULT;
328 328 break;
329 329 case NU:
330 330 if (src[i] == 'l') {
331 331 state = THIRD;
332 332 i++;
333 333 }
334 334 else state = DEFAULT;
335 335 break;
336 336 case P:
337 337 if (src[i] == 'r') {
338 338 state = PR;
339 339 charcopy(dest, &destlen, destsize, src[i++]);
340 340 }
341 341 else state = DEFAULT;
342 342 break;
343 343 case PR:
344 344 if (src[i] == 'n') {
345 345 state = THIRD;
346 346 i++;
347 347 }
348 348 else state = DEFAULT;
349 349 break;
350 350 case LDOT:
351 351 switch (src[i]) {
352 352 case 'd':
353 353 case 'i':
354 354 state = HGDI;
355 355 charcopy(dest, &destlen, destsize, src[i++]);
356 356 break;
357 357 case 'h':
358 358 state = H;
359 359 charcopy(dest, &destlen, destsize, src[i++]);
360 360 break;
361 361 default:
362 362 state = DEFAULT;
363 363 break;
364 364 }
365 365 break;
366 366 case DOT:
367 367 switch (src[i]) {
368 368 case '/':
369 369 case '\0':
370 370 state = START;
371 371 memcopy(dest, &destlen, destsize, "~2e", 3);
372 372 charcopy(dest, &destlen, destsize, src[i++]);
373 373 break;
374 374 case 'd':
375 375 case 'i':
376 376 state = HGDI;
377 377 charcopy(dest, &destlen, destsize, '.');
378 378 charcopy(dest, &destlen, destsize, src[i++]);
379 379 break;
380 380 case 'h':
381 381 state = H;
382 382 memcopy(dest, &destlen, destsize, ".h", 2);
383 383 i++;
384 384 break;
385 385 default:
386 386 state = DEFAULT;
387 387 charcopy(dest, &destlen, destsize, '.');
388 388 break;
389 389 }
390 390 break;
391 391 case H:
392 392 if (src[i] == 'g') {
393 393 state = HGDI;
394 394 charcopy(dest, &destlen, destsize, src[i++]);
395 395 }
396 396 else state = DEFAULT;
397 397 break;
398 398 case HGDI:
399 399 if (src[i] == '/') {
400 400 state = START;
401 401 if (encodedir)
402 402 memcopy(dest, &destlen, destsize, ".hg",
403 403 3);
404 404 charcopy(dest, &destlen, destsize, src[i++]);
405 405 }
406 406 else state = DEFAULT;
407 407 break;
408 408 case SPACE:
409 409 switch (src[i]) {
410 410 case '/':
411 411 case '\0':
412 412 state = START;
413 413 memcopy(dest, &destlen, destsize, "~20", 3);
414 414 charcopy(dest, &destlen, destsize, src[i++]);
415 415 break;
416 416 default:
417 417 state = DEFAULT;
418 418 charcopy(dest, &destlen, destsize, ' ');
419 419 break;
420 420 }
421 421 break;
422 422 case DEFAULT:
423 423 while (inset(onebyte, src[i])) {
424 424 charcopy(dest, &destlen, destsize, src[i++]);
425 425 if (i == len)
426 426 goto done;
427 427 }
428 428 switch (src[i]) {
429 429 case '.':
430 430 state = DOT;
431 431 i++;
432 432 break;
433 433 case ' ':
434 434 state = SPACE;
435 435 i++;
436 436 break;
437 437 case '/':
438 438 state = START;
439 439 charcopy(dest, &destlen, destsize, '/');
440 440 i++;
441 441 break;
442 442 default:
443 443 if (inset(onebyte, src[i])) {
444 444 do {
445 445 charcopy(dest, &destlen,
446 446 destsize, src[i++]);
447 447 } while (i < len &&
448 448 inset(onebyte, src[i]));
449 449 }
450 450 else if (inset(twobytes, src[i])) {
451 451 char c = src[i++];
452 452 charcopy(dest, &destlen, destsize, '_');
453 453 charcopy(dest, &destlen, destsize,
454 454 c == '_' ? '_' : c + 32);
455 455 }
456 456 else
457 457 escape3(dest, &destlen, destsize,
458 458 src[i++]);
459 459 break;
460 460 }
461 461 break;
462 462 }
463 463 }
464 464 done:
465 465 return destlen;
466 466 }
467 467
468 468 static Py_ssize_t basicencode(char *dest, size_t destsize,
469 469 const char *src, Py_ssize_t len)
470 470 {
471 471 static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };
472 472
473 473 static const uint32_t onebyte[8] = {
474 474 1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
475 475 };
476 476
477 477 Py_ssize_t destlen = 0;
478 478
479 479 return _encode(twobytes, onebyte, dest, destlen, destsize,
480 480 src, len, 1);
481 481 }
482 482
483 483 static const Py_ssize_t maxstorepathlen = 120;
484 484
485 485 static Py_ssize_t _lowerencode(char *dest, size_t destsize,
486 486 const char *src, Py_ssize_t len)
487 487 {
488 488 static const uint32_t onebyte[8] = {
489 489 1, 0x2bfffbfb, 0xe8000001, 0x2fffffff
490 490 };
491 491
492 492 static const uint32_t lower[8] = { 0, 0, 0x7fffffe };
493 493
494 494 Py_ssize_t i, destlen = 0;
495 495
496 496 for (i = 0; i < len; i++) {
497 497 if (inset(onebyte, src[i]))
498 498 charcopy(dest, &destlen, destsize, src[i]);
499 499 else if (inset(lower, src[i]))
500 500 charcopy(dest, &destlen, destsize, src[i] + 32);
501 501 else
502 502 escape3(dest, &destlen, destsize, src[i]);
503 503 }
504 504
505 505 return destlen;
506 506 }
507 507
508 508 PyObject *lowerencode(PyObject *self, PyObject *args)
509 509 {
510 510 char *path;
511 511 Py_ssize_t len, newlen;
512 512 PyObject *ret;
513 513
514 514 if (!PyArg_ParseTuple(args, "s#:lowerencode", &path, &len))
515 515 return NULL;
516 516
517 517 newlen = _lowerencode(NULL, 0, path, len);
518 518 ret = PyString_FromStringAndSize(NULL, newlen);
519 519 if (ret)
520 520 newlen = _lowerencode(PyString_AS_STRING(ret), newlen,
521 521 path, len);
522 522
523 523 return ret;
524 524 }
525 525
526 526 /* See store.py:_auxencode for a description. */
527 527 static Py_ssize_t auxencode(char *dest, size_t destsize,
528 528 const char *src, Py_ssize_t len)
529 529 {
530 530 static const uint32_t twobytes[8];
531 531
532 532 static const uint32_t onebyte[8] = {
533 533 ~0, 0xffff3ffe, ~0, ~0, ~0, ~0, ~0, ~0,
534 534 };
535 535
536 536 return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);
537 537 }
538 538
539 539 static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])
540 540 {
541 541 static const Py_ssize_t dirprefixlen = 8;
542 542 static const Py_ssize_t maxshortdirslen = 68;
543 543 char *dest;
544 544 PyObject *ret;
545 545
546 546 Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
547 547 Py_ssize_t destsize, destlen = 0, slop, used;
548 548
549 549 while (lastslash >= 0 && src[lastslash] != '/') {
550 550 if (src[lastslash] == '.' && lastdot == -1)
551 551 lastdot = lastslash;
552 552 lastslash--;
553 553 }
554 554
555 555 #if 0
556 556 /* All paths should end in a suffix of ".i" or ".d".
557 557 Unfortunately, the file names in test-hybridencode.py
558 558 violate this rule. */
559 559 if (lastdot != len - 3) {
560 560 PyErr_SetString(PyExc_ValueError,
561 561 "suffix missing or wrong length");
562 562 return NULL;
563 563 }
564 564 #endif
565 565
566 566 /* If src contains a suffix, we will append it to the end of
567 567 the new string, so make room. */
568 568 destsize = 120;
569 569 if (lastdot >= 0)
570 570 destsize += len - lastdot - 1;
571 571
572 572 ret = PyString_FromStringAndSize(NULL, destsize);
573 573 if (ret == NULL)
574 574 return NULL;
575 575
576 576 dest = PyString_AS_STRING(ret);
577 577 memcopy(dest, &destlen, destsize, "dh/", 3);
578 578
579 579 /* Copy up to dirprefixlen bytes of each path component, up to
580 580 a limit of maxshortdirslen bytes. */
581 581 for (i = d = p = 0; i < lastslash; i++, p++) {
582 582 if (src[i] == '/') {
583 583 char d = dest[destlen - 1];
584 584 /* After truncation, a directory name may end
585 585 in a space or dot, which are unportable. */
586 586 if (d == '.' || d == ' ')
587 587 dest[destlen - 1] = '_';
588 if (destlen > maxshortdirslen)
588 /* The + 3 is to account for "dh/" in the beginning */
589 if (destlen > maxshortdirslen + 3)
589 590 break;
590 591 charcopy(dest, &destlen, destsize, src[i]);
591 592 p = -1;
592 593 }
593 594 else if (p < dirprefixlen)
594 595 charcopy(dest, &destlen, destsize, src[i]);
595 596 }
596 597
597 598 /* Rewind to just before the last slash copied. */
598 599 if (destlen > maxshortdirslen + 3)
599 600 do {
600 601 destlen--;
601 602 } while (destlen > 0 && dest[destlen] != '/');
602 603
603 604 if (destlen > 3) {
604 605 if (lastslash > 0) {
605 606 char d = dest[destlen - 1];
606 607 /* The last directory component may be
607 608 truncated, so make it safe. */
608 609 if (d == '.' || d == ' ')
609 610 dest[destlen - 1] = '_';
610 611 }
611 612
612 613 charcopy(dest, &destlen, destsize, '/');
613 614 }
614 615
615 616 /* Add a prefix of the original file's name. Its length
616 617 depends on the number of bytes left after accounting for
617 618 hash and suffix. */
618 619 used = destlen + 40;
619 620 if (lastdot >= 0)
620 621 used += len - lastdot - 1;
621 622 slop = maxstorepathlen - used;
622 623 if (slop > 0) {
623 624 Py_ssize_t basenamelen =
624 625 lastslash >= 0 ? len - lastslash - 2 : len - 1;
625 626
626 627 if (basenamelen > slop)
627 628 basenamelen = slop;
628 629 if (basenamelen > 0)
629 630 memcopy(dest, &destlen, destsize, &src[lastslash + 1],
630 631 basenamelen);
631 632 }
632 633
633 634 /* Add hash and suffix. */
634 635 for (i = 0; i < 20; i++)
635 636 hexencode(dest, &destlen, destsize, sha[i]);
636 637
637 638 if (lastdot >= 0)
638 639 memcopy(dest, &destlen, destsize, &src[lastdot],
639 640 len - lastdot - 1);
640 641
641 642 PyString_GET_SIZE(ret) = destlen;
642 643
643 644 return ret;
644 645 }
645 646
646 647 /*
647 648 * Avoiding a trip through Python would improve performance by 50%,
648 649 * but we don't encounter enough long names to be worth the code.
649 650 */
650 651 static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
651 652 {
652 653 static PyObject *shafunc;
653 654 PyObject *shaobj, *hashobj;
654 655
655 656 if (shafunc == NULL) {
656 657 PyObject *util, *name = PyString_FromString("mercurial.util");
657 658
658 659 if (name == NULL)
659 660 return -1;
660 661
661 662 util = PyImport_Import(name);
662 663 Py_DECREF(name);
663 664
664 665 if (util == NULL) {
665 666 PyErr_SetString(PyExc_ImportError, "mercurial.util");
666 667 return -1;
667 668 }
668 669 shafunc = PyObject_GetAttrString(util, "sha1");
669 670 Py_DECREF(util);
670 671
671 672 if (shafunc == NULL) {
672 673 PyErr_SetString(PyExc_AttributeError,
673 674 "module 'mercurial.util' has no "
674 675 "attribute 'sha1'");
675 676 return -1;
676 677 }
677 678 }
678 679
679 680 shaobj = PyObject_CallFunction(shafunc, "s#", str, len);
680 681
681 682 if (shaobj == NULL)
682 683 return -1;
683 684
684 685 hashobj = PyObject_CallMethod(shaobj, "digest", "");
685 686 Py_DECREF(shaobj);
686 687
687 688 if (!PyString_Check(hashobj) || PyString_GET_SIZE(hashobj) != 20) {
688 689 PyErr_SetString(PyExc_TypeError,
689 690 "result of digest is not a 20-byte hash");
690 691 Py_DECREF(hashobj);
691 692 return -1;
692 693 }
693 694
694 695 memcpy(hash, PyString_AS_STRING(hashobj), 20);
695 696 Py_DECREF(hashobj);
696 697 return 0;
697 698 }
698 699
699 700 #define MAXENCODE 4096 * 4
700 701
701 702 static PyObject *hashencode(const char *src, Py_ssize_t len)
702 703 {
703 704 char dired[MAXENCODE];
704 705 char lowered[MAXENCODE];
705 706 char auxed[MAXENCODE];
706 707 Py_ssize_t dirlen, lowerlen, auxlen, baselen;
707 708 char sha[20];
708 709
709 710 baselen = (len - 5) * 3;
710 711 if (baselen >= MAXENCODE) {
711 712 PyErr_SetString(PyExc_ValueError, "string too long");
712 713 return NULL;
713 714 }
714 715
715 716 dirlen = _encodedir(dired, baselen, src, len);
716 717 if (sha1hash(sha, dired, dirlen - 1) == -1)
717 718 return NULL;
718 719 lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);
719 720 auxlen = auxencode(auxed, baselen, lowered, lowerlen);
720 721 return hashmangle(auxed, auxlen, sha);
721 722 }
722 723
723 724 PyObject *pathencode(PyObject *self, PyObject *args)
724 725 {
725 726 Py_ssize_t len, newlen;
726 727 PyObject *pathobj, *newobj;
727 728 char *path;
728 729
729 730 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
730 731 return NULL;
731 732
732 733 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
733 734 PyErr_SetString(PyExc_TypeError, "expected a string");
734 735 return NULL;
735 736 }
736 737
737 738 if (len > maxstorepathlen)
738 739 newlen = maxstorepathlen + 2;
739 740 else
740 741 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
741 742
742 743 if (newlen <= maxstorepathlen + 1) {
743 744 if (newlen == len + 1) {
744 745 Py_INCREF(pathobj);
745 746 return pathobj;
746 747 }
747 748
748 749 newobj = PyString_FromStringAndSize(NULL, newlen);
749 750
750 751 if (newobj) {
751 752 PyString_GET_SIZE(newobj)--;
752 753 basicencode(PyString_AS_STRING(newobj), newlen, path,
753 754 len + 1);
754 755 }
755 756 }
756 757 else
757 758 newobj = hashencode(path, len + 1);
758 759
759 760 return newobj;
760 761 }
@@ -1,197 +1,198
1 1 # This is a randomized test that generates different pathnames every
2 2 # time it is invoked, and tests the encoding of those pathnames.
3 3 #
4 4 # It uses a simple probabilistic model to generate valid pathnames
5 5 # that have proven likely to expose bugs and divergent behaviour in
6 6 # different encoding implementations.
7 7
8 8 from mercurial import store
9 9 import binascii, itertools, math, os, random, sys, time
10 10 import collections
11 11
12 12 if sys.version_info[:2] < (2, 6):
13 13 sys.exit(0)
14 14
15 15 validchars = set(map(chr, range(0, 256)))
16 16 alphanum = range(ord('A'), ord('Z'))
17 17
18 18 for c in '\0/':
19 19 validchars.remove(c)
20 20
21 21 winreserved = ('aux con prn nul'.split() +
22 22 ['com%d' % i for i in xrange(1, 10)] +
23 23 ['lpt%d' % i for i in xrange(1, 10)])
24 24
25 25 def casecombinations(names):
26 26 '''Build all case-diddled combinations of names.'''
27 27
28 28 combos = set()
29 29
30 30 for r in names:
31 31 for i in xrange(len(r) + 1):
32 32 for c in itertools.combinations(xrange(len(r)), i):
33 33 d = r
34 34 for j in c:
35 35 d = ''.join((d[:j], d[j].upper(), d[j + 1:]))
36 36 combos.add(d)
37 37 return sorted(combos)
38 38
39 39 def buildprobtable(fp, cmd='hg manifest tip'):
40 40 '''Construct and print a table of probabilities for path name
41 41 components. The numbers are percentages.'''
42 42
43 43 counts = collections.defaultdict(lambda: 0)
44 44 for line in os.popen(cmd).read().splitlines():
45 45 if line[-2:] in ('.i', '.d'):
46 46 line = line[:-2]
47 47 if line.startswith('data/'):
48 48 line = line[5:]
49 49 for c in line:
50 50 counts[c] += 1
51 51 for c in '\r/\n':
52 52 counts.pop(c, None)
53 53 t = sum(counts.itervalues()) / 100.0
54 54 fp.write('probtable = (')
55 55 for i, (k, v) in enumerate(sorted(counts.iteritems(), key=lambda x: x[1],
56 56 reverse=True)):
57 57 if (i % 5) == 0:
58 58 fp.write('\n ')
59 59 vt = v / t
60 60 if vt < 0.0005:
61 61 break
62 62 fp.write('(%r, %.03f), ' % (k, vt))
63 63 fp.write('\n )\n')
64 64
65 65 # A table of character frequencies (as percentages), gleaned by
66 66 # looking at filelog names from a real-world, very large repo.
67 67
68 68 probtable = (
69 69 ('t', 9.828), ('e', 9.042), ('s', 8.011), ('a', 6.801), ('i', 6.618),
70 70 ('g', 5.053), ('r', 5.030), ('o', 4.887), ('p', 4.363), ('n', 4.258),
71 71 ('l', 3.830), ('h', 3.693), ('_', 3.659), ('.', 3.377), ('m', 3.194),
72 72 ('u', 2.364), ('d', 2.296), ('c', 2.163), ('b', 1.739), ('f', 1.625),
73 73 ('6', 0.666), ('j', 0.610), ('y', 0.554), ('x', 0.487), ('w', 0.477),
74 74 ('k', 0.476), ('v', 0.473), ('3', 0.336), ('1', 0.335), ('2', 0.326),
75 75 ('4', 0.310), ('5', 0.305), ('9', 0.302), ('8', 0.300), ('7', 0.299),
76 76 ('q', 0.298), ('0', 0.250), ('z', 0.223), ('-', 0.118), ('C', 0.095),
77 77 ('T', 0.087), ('F', 0.085), ('B', 0.077), ('S', 0.076), ('P', 0.076),
78 78 ('L', 0.059), ('A', 0.058), ('N', 0.051), ('D', 0.049), ('M', 0.046),
79 79 ('E', 0.039), ('I', 0.035), ('R', 0.035), ('G', 0.028), ('U', 0.026),
80 80 ('W', 0.025), ('O', 0.017), ('V', 0.015), ('H', 0.013), ('Q', 0.011),
81 81 ('J', 0.007), ('K', 0.005), ('+', 0.004), ('X', 0.003), ('Y', 0.001),
82 82 )
83 83
84 84 for c, _ in probtable:
85 85 validchars.remove(c)
86 86 validchars = list(validchars)
87 87
88 88 def pickfrom(rng, table):
89 89 c = 0
90 90 r = rng.random() * sum(i[1] for i in table)
91 91 for i, p in table:
92 92 c += p
93 93 if c >= r:
94 94 return i
95 95
96 96 reservedcombos = casecombinations(winreserved)
97 97
98 98 # The first component of a name following a slash.
99 99
100 100 firsttable = (
101 101 (lambda rng: pickfrom(rng, probtable), 90),
102 102 (lambda rng: rng.choice(validchars), 5),
103 103 (lambda rng: rng.choice(reservedcombos), 5),
104 104 )
105 105
106 106 # Components of a name following the first.
107 107
108 108 resttable = firsttable[:-1]
109 109
110 110 # Special suffixes.
111 111
112 112 internalsuffixcombos = casecombinations('.hg .i .d'.split())
113 113
114 114 # The last component of a path, before a slash or at the end of a name.
115 115
116 116 lasttable = resttable + (
117 117 (lambda rng: '', 95),
118 118 (lambda rng: rng.choice(internalsuffixcombos), 5),
119 119 )
120 120
121 121 def makepart(rng, k):
122 122 '''Construct a part of a pathname, without slashes.'''
123 123
124 124 p = pickfrom(rng, firsttable)(rng)
125 125 l = len(p)
126 126 ps = [p]
127 while l <= k:
127 maxl = rng.randint(1, k)
128 while l < maxl:
128 129 p = pickfrom(rng, resttable)(rng)
129 130 l += len(p)
130 131 ps.append(p)
131 132 ps.append(pickfrom(rng, lasttable)(rng))
132 133 return ''.join(ps)
133 134
134 135 def makepath(rng, j, k):
135 136 '''Construct a complete pathname.'''
136 137
137 138 return ('data/' + '/'.join(makepart(rng, k) for _ in xrange(j)) +
138 139 rng.choice(['.d', '.i']))
139 140
140 141 def genpath(rng, count):
141 142 '''Generate random pathnames with gradually increasing lengths.'''
142 143
143 144 mink, maxk = 1, 4096
144 145 def steps():
145 146 x, k = 0, mink
146 147 for i in xrange(count):
147 148 yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
148 149 for k in steps():
149 150 x = rng.randint(1, k)
150 151 y = rng.randint(1, k)
151 152 yield makepath(rng, x, y)
152 153
153 154 def runtests(rng, seed, count):
154 155 nerrs = 0
155 156 for p in genpath(rng, count):
156 157 h = store._pathencode(p) # uses C implementation, if available
157 158 r = store._hybridencode(p, True) # reference implementation in Python
158 159 if h != r:
159 160 if nerrs == 0:
160 161 print >> sys.stderr, 'seed:', hex(seed)[:-1]
161 162 print >> sys.stderr, "\np: '%s'" % p.encode("string_escape")
162 163 print >> sys.stderr, "h: '%s'" % h.encode("string_escape")
163 164 print >> sys.stderr, "r: '%s'" % r.encode("string_escape")
164 165 nerrs += 1
165 166 return nerrs
166 167
167 168 def main():
168 169 import getopt
169 170
170 171 # Empirically observed to take about a second to run
171 172 count = 100
172 173 seed = None
173 174 opts, args = getopt.getopt(sys.argv[1:], 'c:s:',
174 175 ['build', 'count=', 'seed='])
175 176 for o, a in opts:
176 177 if o in ('-c', '--count'):
177 178 count = int(a)
178 179 elif o in ('-s', '--seed'):
179 180 seed = long(a, base=0) # accepts base 10 or 16 strings
180 181 elif o == '--build':
181 182 buildprobtable(sys.stdout,
182 183 'find .hg/store/data -type f && '
183 184 'cat .hg/store/fncache 2>/dev/null')
184 185 sys.exit(0)
185 186
186 187 if seed is None:
187 188 try:
188 189 seed = long(binascii.hexlify(os.urandom(16)), 16)
189 190 except AttributeError:
190 191 seed = long(time.time() * 1000)
191 192
192 193 rng = random.Random(seed)
193 194 if runtests(rng, seed, count):
194 195 sys.exit(1)
195 196
196 197 if __name__ == '__main__':
197 198 main()
General Comments 0
You need to be logged in to leave comments. Login now