##// END OF EJS Templates
pathencode: allow clang-format oversight...
Augie Fackler -
r36073:44cb058b default
parent child Browse files
Show More
@@ -1,60 +1,59
1 1 # Files that just need to be migrated to the formatter.
2 2 # Do not add new files here!
3 3 mercurial/cext/base85.c
4 4 mercurial/cext/charencode.c
5 5 mercurial/cext/charencode.h
6 6 mercurial/cext/diffhelpers.c
7 7 mercurial/cext/dirs.c
8 8 mercurial/cext/manifest.c
9 9 mercurial/cext/mpatch.c
10 10 mercurial/cext/osutil.c
11 mercurial/cext/pathencode.c
12 11 mercurial/cext/revlog.c
13 12 # Vendored code that we should never format:
14 13 contrib/python-zstandard/c-ext/bufferutil.c
15 14 contrib/python-zstandard/c-ext/compressiondict.c
16 15 contrib/python-zstandard/c-ext/compressionparams.c
17 16 contrib/python-zstandard/c-ext/compressionwriter.c
18 17 contrib/python-zstandard/c-ext/compressobj.c
19 18 contrib/python-zstandard/c-ext/compressor.c
20 19 contrib/python-zstandard/c-ext/compressoriterator.c
21 20 contrib/python-zstandard/c-ext/constants.c
22 21 contrib/python-zstandard/c-ext/decompressionwriter.c
23 22 contrib/python-zstandard/c-ext/decompressobj.c
24 23 contrib/python-zstandard/c-ext/decompressor.c
25 24 contrib/python-zstandard/c-ext/decompressoriterator.c
26 25 contrib/python-zstandard/c-ext/frameparams.c
27 26 contrib/python-zstandard/c-ext/python-zstandard.h
28 27 contrib/python-zstandard/zstd.c
29 28 contrib/python-zstandard/zstd/common/bitstream.h
30 29 contrib/python-zstandard/zstd/common/entropy_common.c
31 30 contrib/python-zstandard/zstd/common/error_private.c
32 31 contrib/python-zstandard/zstd/common/error_private.h
33 32 contrib/python-zstandard/zstd/common/fse.h
34 33 contrib/python-zstandard/zstd/common/fse_decompress.c
35 34 contrib/python-zstandard/zstd/common/huf.h
36 35 contrib/python-zstandard/zstd/common/mem.h
37 36 contrib/python-zstandard/zstd/common/pool.c
38 37 contrib/python-zstandard/zstd/common/pool.h
39 38 contrib/python-zstandard/zstd/common/threading.c
40 39 contrib/python-zstandard/zstd/common/threading.h
41 40 contrib/python-zstandard/zstd/common/xxhash.c
42 41 contrib/python-zstandard/zstd/common/xxhash.h
43 42 contrib/python-zstandard/zstd/common/zstd_common.c
44 43 contrib/python-zstandard/zstd/common/zstd_errors.h
45 44 contrib/python-zstandard/zstd/common/zstd_internal.h
46 45 contrib/python-zstandard/zstd/compress/fse_compress.c
47 46 contrib/python-zstandard/zstd/compress/huf_compress.c
48 47 contrib/python-zstandard/zstd/compress/zstd_compress.c
49 48 contrib/python-zstandard/zstd/compress/zstd_opt.h
50 49 contrib/python-zstandard/zstd/compress/zstdmt_compress.c
51 50 contrib/python-zstandard/zstd/compress/zstdmt_compress.h
52 51 contrib/python-zstandard/zstd/decompress/huf_decompress.c
53 52 contrib/python-zstandard/zstd/decompress/zstd_decompress.c
54 53 contrib/python-zstandard/zstd/dictBuilder/cover.c
55 54 contrib/python-zstandard/zstd/dictBuilder/divsufsort.c
56 55 contrib/python-zstandard/zstd/dictBuilder/divsufsort.h
57 56 contrib/python-zstandard/zstd/dictBuilder/zdict.c
58 57 contrib/python-zstandard/zstd/dictBuilder/zdict.h
59 58 contrib/python-zstandard/zstd/zstd.h
60 59 hgext/fsmonitor/pywatchman/bser.c
@@ -1,765 +1,763
1 1 /*
2 2 pathencode.c - efficient path name encoding
3 3
4 4 Copyright 2012 Facebook
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 /*
11 11 * An implementation of the name encoding scheme used by the fncache
12 12 * store. The common case is of a path < 120 bytes long, which is
13 13 * handled either in a single pass with no allocations or two passes
14 14 * with a single allocation. For longer paths, multiple passes are
15 15 * required.
16 16 */
17 17
18 18 #define PY_SSIZE_T_CLEAN
19 19 #include <Python.h>
20 20 #include <assert.h>
21 21 #include <ctype.h>
22 22 #include <stdlib.h>
23 23 #include <string.h>
24 24
25 25 #include "util.h"
26 26
27 27 /* state machine for the fast path */
28 28 enum path_state {
29 START, /* first byte of a path component */
30 A, /* "AUX" */
29 START, /* first byte of a path component */
30 A, /* "AUX" */
31 31 AU,
32 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
33 C, /* "CON" or "COMn" */
32 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
33 C, /* "CON" or "COMn" */
34 34 CO,
35 COMLPT, /* "COM" or "LPT" */
35 COMLPT, /* "COM" or "LPT" */
36 36 COMLPTn,
37 37 L,
38 38 LP,
39 39 N,
40 40 NU,
41 P, /* "PRN" */
41 P, /* "PRN" */
42 42 PR,
43 LDOT, /* leading '.' */
44 DOT, /* '.' in a non-leading position */
45 H, /* ".h" */
46 HGDI, /* ".hg", ".d", or ".i" */
43 LDOT, /* leading '.' */
44 DOT, /* '.' in a non-leading position */
45 H, /* ".h" */
46 HGDI, /* ".hg", ".d", or ".i" */
47 47 SPACE,
48 DEFAULT /* byte of a path component after the first */
48 DEFAULT, /* byte of a path component after the first */
49 49 };
50 50
51 51 /* state machine for dir-encoding */
52 52 enum dir_state {
53 53 DDOT,
54 54 DH,
55 55 DHGDI,
56 DDEFAULT
56 DDEFAULT,
57 57 };
58 58
59 59 static inline int inset(const uint32_t bitset[], char c)
60 60 {
61 61 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
62 62 }
63 63
64 64 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
65 65 char c)
66 66 {
67 67 if (dest) {
68 68 assert(*destlen < destsize);
69 69 dest[*destlen] = c;
70 70 }
71 71 (*destlen)++;
72 72 }
73 73
74 74 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
75 75 const void *src, Py_ssize_t len)
76 76 {
77 77 if (dest) {
78 78 assert(*destlen + len < destsize);
79 79 memcpy((void *)&dest[*destlen], src, len);
80 80 }
81 81 *destlen += len;
82 82 }
83 83
84 84 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
85 uint8_t c)
85 uint8_t c)
86 86 {
87 87 static const char hexdigit[] = "0123456789abcdef";
88 88
89 89 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
90 90 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
91 91 }
92 92
93 93 /* 3-byte escape: tilde followed by two hex digits */
94 94 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
95 char c)
95 char c)
96 96 {
97 97 charcopy(dest, destlen, destsize, '~');
98 98 hexencode(dest, destlen, destsize, c);
99 99 }
100 100
101 static Py_ssize_t _encodedir(char *dest, size_t destsize,
102 const char *src, Py_ssize_t len)
101 static Py_ssize_t _encodedir(char *dest, size_t destsize, const char *src,
102 Py_ssize_t len)
103 103 {
104 104 enum dir_state state = DDEFAULT;
105 105 Py_ssize_t i = 0, destlen = 0;
106 106
107 107 while (i < len) {
108 108 switch (state) {
109 109 case DDOT:
110 110 switch (src[i]) {
111 111 case 'd':
112 112 case 'i':
113 113 state = DHGDI;
114 114 charcopy(dest, &destlen, destsize, src[i++]);
115 115 break;
116 116 case 'h':
117 117 state = DH;
118 118 charcopy(dest, &destlen, destsize, src[i++]);
119 119 break;
120 120 default:
121 121 state = DDEFAULT;
122 122 break;
123 123 }
124 124 break;
125 125 case DH:
126 126 if (src[i] == 'g') {
127 127 state = DHGDI;
128 128 charcopy(dest, &destlen, destsize, src[i++]);
129 }
130 else state = DDEFAULT;
129 } else
130 state = DDEFAULT;
131 131 break;
132 132 case DHGDI:
133 133 if (src[i] == '/') {
134 134 memcopy(dest, &destlen, destsize, ".hg", 3);
135 135 charcopy(dest, &destlen, destsize, src[i++]);
136 136 }
137 137 state = DDEFAULT;
138 138 break;
139 139 case DDEFAULT:
140 140 if (src[i] == '.')
141 141 state = DDOT;
142 142 charcopy(dest, &destlen, destsize, src[i++]);
143 143 break;
144 144 }
145 145 }
146 146
147 147 return destlen;
148 148 }
149 149
150 150 PyObject *encodedir(PyObject *self, PyObject *args)
151 151 {
152 152 Py_ssize_t len, newlen;
153 153 PyObject *pathobj, *newobj;
154 154 char *path;
155 155
156 156 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
157 157 return NULL;
158 158
159 159 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
160 160 PyErr_SetString(PyExc_TypeError, "expected a string");
161 161 return NULL;
162 162 }
163 163
164 164 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
165 165
166 166 if (newlen == len + 1) {
167 167 Py_INCREF(pathobj);
168 168 return pathobj;
169 169 }
170 170
171 171 newobj = PyBytes_FromStringAndSize(NULL, newlen);
172 172
173 173 if (newobj) {
174 174 assert(PyBytes_Check(newobj));
175 175 Py_SIZE(newobj)--;
176 _encodedir(PyBytes_AS_STRING(newobj), newlen, path,
177 len + 1);
176 _encodedir(PyBytes_AS_STRING(newobj), newlen, path, len + 1);
178 177 }
179 178
180 179 return newobj;
181 180 }
182 181
183 182 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
184 char *dest, Py_ssize_t destlen, size_t destsize,
185 const char *src, Py_ssize_t len,
186 int encodedir)
183 char *dest, Py_ssize_t destlen, size_t destsize,
184 const char *src, Py_ssize_t len, int encodedir)
187 185 {
188 186 enum path_state state = START;
189 187 Py_ssize_t i = 0;
190 188
191 189 /*
192 190 * Python strings end with a zero byte, which we use as a
193 191 * terminal token as they are not valid inside path names.
194 192 */
195 193
196 194 while (i < len) {
197 195 switch (state) {
198 196 case START:
199 197 switch (src[i]) {
200 198 case '/':
201 199 charcopy(dest, &destlen, destsize, src[i++]);
202 200 break;
203 201 case '.':
204 202 state = LDOT;
205 203 escape3(dest, &destlen, destsize, src[i++]);
206 204 break;
207 205 case ' ':
208 206 state = DEFAULT;
209 207 escape3(dest, &destlen, destsize, src[i++]);
210 208 break;
211 209 case 'a':
212 210 state = A;
213 211 charcopy(dest, &destlen, destsize, src[i++]);
214 212 break;
215 213 case 'c':
216 214 state = C;
217 215 charcopy(dest, &destlen, destsize, src[i++]);
218 216 break;
219 217 case 'l':
220 218 state = L;
221 219 charcopy(dest, &destlen, destsize, src[i++]);
222 220 break;
223 221 case 'n':
224 222 state = N;
225 223 charcopy(dest, &destlen, destsize, src[i++]);
226 224 break;
227 225 case 'p':
228 226 state = P;
229 227 charcopy(dest, &destlen, destsize, src[i++]);
230 228 break;
231 229 default:
232 230 state = DEFAULT;
233 231 break;
234 232 }
235 233 break;
236 234 case A:
237 235 if (src[i] == 'u') {
238 236 state = AU;
239 237 charcopy(dest, &destlen, destsize, src[i++]);
240 }
241 else state = DEFAULT;
238 } else
239 state = DEFAULT;
242 240 break;
243 241 case AU:
244 242 if (src[i] == 'x') {
245 243 state = THIRD;
246 244 i++;
247 }
248 else state = DEFAULT;
245 } else
246 state = DEFAULT;
249 247 break;
250 248 case THIRD:
251 249 state = DEFAULT;
252 250 switch (src[i]) {
253 251 case '.':
254 252 case '/':
255 253 case '\0':
256 254 escape3(dest, &destlen, destsize, src[i - 1]);
257 255 break;
258 256 default:
259 257 i--;
260 258 break;
261 259 }
262 260 break;
263 261 case C:
264 262 if (src[i] == 'o') {
265 263 state = CO;
266 264 charcopy(dest, &destlen, destsize, src[i++]);
267 }
268 else state = DEFAULT;
265 } else
266 state = DEFAULT;
269 267 break;
270 268 case CO:
271 269 if (src[i] == 'm') {
272 270 state = COMLPT;
273 271 i++;
274 }
275 else if (src[i] == 'n') {
272 } else if (src[i] == 'n') {
276 273 state = THIRD;
277 274 i++;
278 }
279 else state = DEFAULT;
275 } else
276 state = DEFAULT;
280 277 break;
281 278 case COMLPT:
282 279 switch (src[i]) {
283 case '1': case '2': case '3': case '4': case '5':
284 case '6': case '7': case '8': case '9':
280 case '1':
281 case '2':
282 case '3':
283 case '4':
284 case '5':
285 case '6':
286 case '7':
287 case '8':
288 case '9':
285 289 state = COMLPTn;
286 290 i++;
287 291 break;
288 292 default:
289 293 state = DEFAULT;
290 294 charcopy(dest, &destlen, destsize, src[i - 1]);
291 295 break;
292 296 }
293 297 break;
294 298 case COMLPTn:
295 299 state = DEFAULT;
296 300 switch (src[i]) {
297 301 case '.':
298 302 case '/':
299 303 case '\0':
300 304 escape3(dest, &destlen, destsize, src[i - 2]);
301 305 charcopy(dest, &destlen, destsize, src[i - 1]);
302 306 break;
303 307 default:
304 memcopy(dest, &destlen, destsize,
305 &src[i - 2], 2);
308 memcopy(dest, &destlen, destsize, &src[i - 2],
309 2);
306 310 break;
307 311 }
308 312 break;
309 313 case L:
310 314 if (src[i] == 'p') {
311 315 state = LP;
312 316 charcopy(dest, &destlen, destsize, src[i++]);
313 }
314 else state = DEFAULT;
317 } else
318 state = DEFAULT;
315 319 break;
316 320 case LP:
317 321 if (src[i] == 't') {
318 322 state = COMLPT;
319 323 i++;
320 }
321 else state = DEFAULT;
324 } else
325 state = DEFAULT;
322 326 break;
323 327 case N:
324 328 if (src[i] == 'u') {
325 329 state = NU;
326 330 charcopy(dest, &destlen, destsize, src[i++]);
327 }
328 else state = DEFAULT;
331 } else
332 state = DEFAULT;
329 333 break;
330 334 case NU:
331 335 if (src[i] == 'l') {
332 336 state = THIRD;
333 337 i++;
334 }
335 else state = DEFAULT;
338 } else
339 state = DEFAULT;
336 340 break;
337 341 case P:
338 342 if (src[i] == 'r') {
339 343 state = PR;
340 344 charcopy(dest, &destlen, destsize, src[i++]);
341 }
342 else state = DEFAULT;
345 } else
346 state = DEFAULT;
343 347 break;
344 348 case PR:
345 349 if (src[i] == 'n') {
346 350 state = THIRD;
347 351 i++;
348 }
349 else state = DEFAULT;
352 } else
353 state = DEFAULT;
350 354 break;
351 355 case LDOT:
352 356 switch (src[i]) {
353 357 case 'd':
354 358 case 'i':
355 359 state = HGDI;
356 360 charcopy(dest, &destlen, destsize, src[i++]);
357 361 break;
358 362 case 'h':
359 363 state = H;
360 364 charcopy(dest, &destlen, destsize, src[i++]);
361 365 break;
362 366 default:
363 367 state = DEFAULT;
364 368 break;
365 369 }
366 370 break;
367 371 case DOT:
368 372 switch (src[i]) {
369 373 case '/':
370 374 case '\0':
371 375 state = START;
372 376 memcopy(dest, &destlen, destsize, "~2e", 3);
373 377 charcopy(dest, &destlen, destsize, src[i++]);
374 378 break;
375 379 case 'd':
376 380 case 'i':
377 381 state = HGDI;
378 382 charcopy(dest, &destlen, destsize, '.');
379 383 charcopy(dest, &destlen, destsize, src[i++]);
380 384 break;
381 385 case 'h':
382 386 state = H;
383 387 memcopy(dest, &destlen, destsize, ".h", 2);
384 388 i++;
385 389 break;
386 390 default:
387 391 state = DEFAULT;
388 392 charcopy(dest, &destlen, destsize, '.');
389 393 break;
390 394 }
391 395 break;
392 396 case H:
393 397 if (src[i] == 'g') {
394 398 state = HGDI;
395 399 charcopy(dest, &destlen, destsize, src[i++]);
396 }
397 else state = DEFAULT;
400 } else
401 state = DEFAULT;
398 402 break;
399 403 case HGDI:
400 404 if (src[i] == '/') {
401 405 state = START;
402 406 if (encodedir)
403 407 memcopy(dest, &destlen, destsize, ".hg",
404 3);
408 3);
405 409 charcopy(dest, &destlen, destsize, src[i++]);
406 }
407 else state = DEFAULT;
410 } else
411 state = DEFAULT;
408 412 break;
409 413 case SPACE:
410 414 switch (src[i]) {
411 415 case '/':
412 416 case '\0':
413 417 state = START;
414 418 memcopy(dest, &destlen, destsize, "~20", 3);
415 419 charcopy(dest, &destlen, destsize, src[i++]);
416 420 break;
417 421 default:
418 422 state = DEFAULT;
419 423 charcopy(dest, &destlen, destsize, ' ');
420 424 break;
421 425 }
422 426 break;
423 427 case DEFAULT:
424 428 while (inset(onebyte, src[i])) {
425 429 charcopy(dest, &destlen, destsize, src[i++]);
426 430 if (i == len)
427 431 goto done;
428 432 }
429 433 switch (src[i]) {
430 434 case '.':
431 435 state = DOT;
432 436 i++;
433 437 break;
434 438 case ' ':
435 439 state = SPACE;
436 440 i++;
437 441 break;
438 442 case '/':
439 443 state = START;
440 444 charcopy(dest, &destlen, destsize, '/');
441 445 i++;
442 446 break;
443 447 default:
444 448 if (inset(onebyte, src[i])) {
445 449 do {
446 450 charcopy(dest, &destlen,
447 destsize, src[i++]);
451 destsize, src[i++]);
448 452 } while (i < len &&
449 inset(onebyte, src[i]));
450 }
451 else if (inset(twobytes, src[i])) {
453 inset(onebyte, src[i]));
454 } else if (inset(twobytes, src[i])) {
452 455 char c = src[i++];
453 456 charcopy(dest, &destlen, destsize, '_');
454 457 charcopy(dest, &destlen, destsize,
455 c == '_' ? '_' : c + 32);
456 }
457 else
458 c == '_' ? '_' : c + 32);
459 } else
458 460 escape3(dest, &destlen, destsize,
459 src[i++]);
461 src[i++]);
460 462 break;
461 463 }
462 464 break;
463 465 }
464 466 }
465 467 done:
466 468 return destlen;
467 469 }
468 470
469 static Py_ssize_t basicencode(char *dest, size_t destsize,
470 const char *src, Py_ssize_t len)
471 static Py_ssize_t basicencode(char *dest, size_t destsize, const char *src,
472 Py_ssize_t len)
471 473 {
472 static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };
474 static const uint32_t twobytes[8] = {0, 0, 0x87fffffe};
473 475
474 476 static const uint32_t onebyte[8] = {
475 1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
477 1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
476 478 };
477 479
478 480 Py_ssize_t destlen = 0;
479 481
480 return _encode(twobytes, onebyte, dest, destlen, destsize,
481 src, len, 1);
482 return _encode(twobytes, onebyte, dest, destlen, destsize, src, len, 1);
482 483 }
483 484
484 485 static const Py_ssize_t maxstorepathlen = 120;
485 486
486 static Py_ssize_t _lowerencode(char *dest, size_t destsize,
487 const char *src, Py_ssize_t len)
487 static Py_ssize_t _lowerencode(char *dest, size_t destsize, const char *src,
488 Py_ssize_t len)
488 489 {
489 static const uint32_t onebyte[8] = {
490 1, 0x2bfffbfb, 0xe8000001, 0x2fffffff
491 };
490 static const uint32_t onebyte[8] = {1, 0x2bfffbfb, 0xe8000001,
491 0x2fffffff};
492 492
493 static const uint32_t lower[8] = { 0, 0, 0x7fffffe };
493 static const uint32_t lower[8] = {0, 0, 0x7fffffe};
494 494
495 495 Py_ssize_t i, destlen = 0;
496 496
497 497 for (i = 0; i < len; i++) {
498 498 if (inset(onebyte, src[i]))
499 499 charcopy(dest, &destlen, destsize, src[i]);
500 500 else if (inset(lower, src[i]))
501 501 charcopy(dest, &destlen, destsize, src[i] + 32);
502 502 else
503 503 escape3(dest, &destlen, destsize, src[i]);
504 504 }
505 505
506 506 return destlen;
507 507 }
508 508
509 509 PyObject *lowerencode(PyObject *self, PyObject *args)
510 510 {
511 511 char *path;
512 512 Py_ssize_t len, newlen;
513 513 PyObject *ret;
514 514
515 515 if (!PyArg_ParseTuple(args, "s#:lowerencode", &path, &len))
516 516 return NULL;
517 517
518 518 newlen = _lowerencode(NULL, 0, path, len);
519 519 ret = PyBytes_FromStringAndSize(NULL, newlen);
520 520 if (ret)
521 521 _lowerencode(PyBytes_AS_STRING(ret), newlen, path, len);
522 522
523 523 return ret;
524 524 }
525 525
526 526 /* See store.py:_auxencode for a description. */
527 static Py_ssize_t auxencode(char *dest, size_t destsize,
528 const char *src, Py_ssize_t len)
527 static Py_ssize_t auxencode(char *dest, size_t destsize, const char *src,
528 Py_ssize_t len)
529 529 {
530 530 static const uint32_t twobytes[8];
531 531
532 532 static const uint32_t onebyte[8] = {
533 ~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U,
533 ~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U,
534 534 };
535 535
536 536 return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);
537 537 }
538 538
539 539 static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])
540 540 {
541 541 static const Py_ssize_t dirprefixlen = 8;
542 542 static const Py_ssize_t maxshortdirslen = 68;
543 543 char *dest;
544 544 PyObject *ret;
545 545
546 546 Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
547 547 Py_ssize_t destsize, destlen = 0, slop, used;
548 548
549 549 while (lastslash >= 0 && src[lastslash] != '/') {
550 550 if (src[lastslash] == '.' && lastdot == -1)
551 551 lastdot = lastslash;
552 552 lastslash--;
553 553 }
554 554
555 555 #if 0
556 556 /* All paths should end in a suffix of ".i" or ".d".
557 557 Unfortunately, the file names in test-hybridencode.py
558 558 violate this rule. */
559 559 if (lastdot != len - 3) {
560 560 PyErr_SetString(PyExc_ValueError,
561 561 "suffix missing or wrong length");
562 562 return NULL;
563 563 }
564 564 #endif
565 565
566 566 /* If src contains a suffix, we will append it to the end of
567 567 the new string, so make room. */
568 568 destsize = 120;
569 569 if (lastdot >= 0)
570 570 destsize += len - lastdot - 1;
571 571
572 572 ret = PyBytes_FromStringAndSize(NULL, destsize);
573 573 if (ret == NULL)
574 574 return NULL;
575 575
576 576 dest = PyBytes_AS_STRING(ret);
577 577 memcopy(dest, &destlen, destsize, "dh/", 3);
578 578
579 579 /* Copy up to dirprefixlen bytes of each path component, up to
580 580 a limit of maxshortdirslen bytes. */
581 581 for (i = d = p = 0; i < lastslash; i++, p++) {
582 582 if (src[i] == '/') {
583 583 char d = dest[destlen - 1];
584 584 /* After truncation, a directory name may end
585 585 in a space or dot, which are unportable. */
586 586 if (d == '.' || d == ' ')
587 587 dest[destlen - 1] = '_';
588 588 /* The + 3 is to account for "dh/" in the beginning */
589 589 if (destlen > maxshortdirslen + 3)
590 590 break;
591 591 charcopy(dest, &destlen, destsize, src[i]);
592 592 p = -1;
593 }
594 else if (p < dirprefixlen)
593 } else if (p < dirprefixlen)
595 594 charcopy(dest, &destlen, destsize, src[i]);
596 595 }
597 596
598 597 /* Rewind to just before the last slash copied. */
599 598 if (destlen > maxshortdirslen + 3)
600 599 do {
601 600 destlen--;
602 601 } while (destlen > 0 && dest[destlen] != '/');
603 602
604 603 if (destlen > 3) {
605 604 if (lastslash > 0) {
606 605 char d = dest[destlen - 1];
607 606 /* The last directory component may be
608 607 truncated, so make it safe. */
609 608 if (d == '.' || d == ' ')
610 609 dest[destlen - 1] = '_';
611 610 }
612 611
613 612 charcopy(dest, &destlen, destsize, '/');
614 613 }
615 614
616 615 /* Add a prefix of the original file's name. Its length
617 616 depends on the number of bytes left after accounting for
618 617 hash and suffix. */
619 618 used = destlen + 40;
620 619 if (lastdot >= 0)
621 620 used += len - lastdot - 1;
622 621 slop = maxstorepathlen - used;
623 622 if (slop > 0) {
624 623 Py_ssize_t basenamelen =
625 lastslash >= 0 ? len - lastslash - 2 : len - 1;
624 lastslash >= 0 ? len - lastslash - 2 : len - 1;
626 625
627 626 if (basenamelen > slop)
628 627 basenamelen = slop;
629 628 if (basenamelen > 0)
630 629 memcopy(dest, &destlen, destsize, &src[lastslash + 1],
631 basenamelen);
630 basenamelen);
632 631 }
633 632
634 633 /* Add hash and suffix. */
635 634 for (i = 0; i < 20; i++)
636 635 hexencode(dest, &destlen, destsize, sha[i]);
637 636
638 637 if (lastdot >= 0)
639 638 memcopy(dest, &destlen, destsize, &src[lastdot],
640 len - lastdot - 1);
639 len - lastdot - 1);
641 640
642 641 assert(PyBytes_Check(ret));
643 642 Py_SIZE(ret) = destlen;
644 643
645 644 return ret;
646 645 }
647 646
648 647 /*
649 648 * Avoiding a trip through Python would improve performance by 50%,
650 649 * but we don't encounter enough long names to be worth the code.
651 650 */
652 651 static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
653 652 {
654 653 static PyObject *shafunc;
655 654 PyObject *shaobj, *hashobj;
656 655
657 656 if (shafunc == NULL) {
658 657 PyObject *hashlib, *name = PyBytes_FromString("hashlib");
659 658
660 659 if (name == NULL)
661 660 return -1;
662 661
663 662 hashlib = PyImport_Import(name);
664 663 Py_DECREF(name);
665 664
666 665 if (hashlib == NULL) {
667 666 PyErr_SetString(PyExc_ImportError, "hashlib");
668 667 return -1;
669 668 }
670 669 shafunc = PyObject_GetAttrString(hashlib, "sha1");
671 670 Py_DECREF(hashlib);
672 671
673 672 if (shafunc == NULL) {
674 673 PyErr_SetString(PyExc_AttributeError,
675 "module 'hashlib' has no "
676 "attribute 'sha1'");
674 "module 'hashlib' has no "
675 "attribute 'sha1'");
677 676 return -1;
678 677 }
679 678 }
680 679
681 680 shaobj = PyObject_CallFunction(shafunc, "s#", str, len);
682 681
683 682 if (shaobj == NULL)
684 683 return -1;
685 684
686 685 hashobj = PyObject_CallMethod(shaobj, "digest", "");
687 686 Py_DECREF(shaobj);
688 687 if (hashobj == NULL)
689 688 return -1;
690 689
691 690 if (!PyBytes_Check(hashobj) || PyBytes_GET_SIZE(hashobj) != 20) {
692 691 PyErr_SetString(PyExc_TypeError,
693 "result of digest is not a 20-byte hash");
692 "result of digest is not a 20-byte hash");
694 693 Py_DECREF(hashobj);
695 694 return -1;
696 695 }
697 696
698 697 memcpy(hash, PyBytes_AS_STRING(hashobj), 20);
699 698 Py_DECREF(hashobj);
700 699 return 0;
701 700 }
702 701
703 702 #define MAXENCODE 4096 * 4
704 703
705 704 static PyObject *hashencode(const char *src, Py_ssize_t len)
706 705 {
707 706 char dired[MAXENCODE];
708 707 char lowered[MAXENCODE];
709 708 char auxed[MAXENCODE];
710 709 Py_ssize_t dirlen, lowerlen, auxlen, baselen;
711 710 char sha[20];
712 711
713 712 baselen = (len - 5) * 3;
714 713 if (baselen >= MAXENCODE) {
715 714 PyErr_SetString(PyExc_ValueError, "string too long");
716 715 return NULL;
717 716 }
718 717
719 718 dirlen = _encodedir(dired, baselen, src, len);
720 719 if (sha1hash(sha, dired, dirlen - 1) == -1)
721 720 return NULL;
722 721 lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);
723 722 auxlen = auxencode(auxed, baselen, lowered, lowerlen);
724 723 return hashmangle(auxed, auxlen, sha);
725 724 }
726 725
727 726 PyObject *pathencode(PyObject *self, PyObject *args)
728 727 {
729 728 Py_ssize_t len, newlen;
730 729 PyObject *pathobj, *newobj;
731 730 char *path;
732 731
733 732 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
734 733 return NULL;
735 734
736 735 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
737 736 PyErr_SetString(PyExc_TypeError, "expected a string");
738 737 return NULL;
739 738 }
740 739
741 740 if (len > maxstorepathlen)
742 741 newlen = maxstorepathlen + 2;
743 742 else
744 743 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
745 744
746 745 if (newlen <= maxstorepathlen + 1) {
747 746 if (newlen == len + 1) {
748 747 Py_INCREF(pathobj);
749 748 return pathobj;
750 749 }
751 750
752 751 newobj = PyBytes_FromStringAndSize(NULL, newlen);
753 752
754 753 if (newobj) {
755 754 assert(PyBytes_Check(newobj));
756 755 Py_SIZE(newobj)--;
757 756 basicencode(PyBytes_AS_STRING(newobj), newlen, path,
758 len + 1);
757 len + 1);
759 758 }
760 }
761 else
759 } else
762 760 newobj = hashencode(path, len + 1);
763 761
764 762 return newobj;
765 763 }
General Comments 0
You need to be logged in to leave comments. Login now