##// END OF EJS Templates
cext: reformat with clang-format 6.0...
Yuya Nishihara -
r38724:992e1082 default
parent child Browse files
Show More
@@ -1,758 +1,761 b''
1 /*
1 /*
2 pathencode.c - efficient path name encoding
2 pathencode.c - efficient path name encoding
3
3
4 Copyright 2012 Facebook
4 Copyright 2012 Facebook
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 /*
10 /*
11 * An implementation of the name encoding scheme used by the fncache
11 * An implementation of the name encoding scheme used by the fncache
12 * store. The common case is of a path < 120 bytes long, which is
12 * store. The common case is of a path < 120 bytes long, which is
13 * handled either in a single pass with no allocations or two passes
13 * handled either in a single pass with no allocations or two passes
14 * with a single allocation. For longer paths, multiple passes are
14 * with a single allocation. For longer paths, multiple passes are
15 * required.
15 * required.
16 */
16 */
17
17
18 #define PY_SSIZE_T_CLEAN
18 #define PY_SSIZE_T_CLEAN
19 #include <Python.h>
19 #include <Python.h>
20 #include <assert.h>
20 #include <assert.h>
21 #include <ctype.h>
21 #include <ctype.h>
22 #include <stdlib.h>
22 #include <stdlib.h>
23 #include <string.h>
23 #include <string.h>
24
24
25 #include "util.h"
25 #include "util.h"
26
26
27 /* state machine for the fast path */
27 /* state machine for the fast path */
28 enum path_state {
28 enum path_state {
29 START, /* first byte of a path component */
29 START, /* first byte of a path component */
30 A, /* "AUX" */
30 A, /* "AUX" */
31 AU,
31 AU,
32 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
32 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
33 C, /* "CON" or "COMn" */
33 C, /* "CON" or "COMn" */
34 CO,
34 CO,
35 COMLPT, /* "COM" or "LPT" */
35 COMLPT, /* "COM" or "LPT" */
36 COMLPTn,
36 COMLPTn,
37 L,
37 L,
38 LP,
38 LP,
39 N,
39 N,
40 NU,
40 NU,
41 P, /* "PRN" */
41 P, /* "PRN" */
42 PR,
42 PR,
43 LDOT, /* leading '.' */
43 LDOT, /* leading '.' */
44 DOT, /* '.' in a non-leading position */
44 DOT, /* '.' in a non-leading position */
45 H, /* ".h" */
45 H, /* ".h" */
46 HGDI, /* ".hg", ".d", or ".i" */
46 HGDI, /* ".hg", ".d", or ".i" */
47 SPACE,
47 SPACE,
48 DEFAULT, /* byte of a path component after the first */
48 DEFAULT, /* byte of a path component after the first */
49 };
49 };
50
50
51 /* state machine for dir-encoding */
51 /* state machine for dir-encoding */
52 enum dir_state {
52 enum dir_state {
53 DDOT,
53 DDOT,
54 DH,
54 DH,
55 DHGDI,
55 DHGDI,
56 DDEFAULT,
56 DDEFAULT,
57 };
57 };
58
58
59 static inline int inset(const uint32_t bitset[], char c)
59 static inline int inset(const uint32_t bitset[], char c)
60 {
60 {
61 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
61 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
62 }
62 }
63
63
64 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
64 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
65 char c)
65 char c)
66 {
66 {
67 if (dest) {
67 if (dest) {
68 assert(*destlen < destsize);
68 assert(*destlen < destsize);
69 dest[*destlen] = c;
69 dest[*destlen] = c;
70 }
70 }
71 (*destlen)++;
71 (*destlen)++;
72 }
72 }
73
73
74 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
74 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
75 const void *src, Py_ssize_t len)
75 const void *src, Py_ssize_t len)
76 {
76 {
77 if (dest) {
77 if (dest) {
78 assert(*destlen + len < destsize);
78 assert(*destlen + len < destsize);
79 memcpy((void *)&dest[*destlen], src, len);
79 memcpy((void *)&dest[*destlen], src, len);
80 }
80 }
81 *destlen += len;
81 *destlen += len;
82 }
82 }
83
83
84 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
84 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
85 uint8_t c)
85 uint8_t c)
86 {
86 {
87 static const char hexdigit[] = "0123456789abcdef";
87 static const char hexdigit[] = "0123456789abcdef";
88
88
89 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
89 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
90 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
90 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
91 }
91 }
92
92
93 /* 3-byte escape: tilde followed by two hex digits */
93 /* 3-byte escape: tilde followed by two hex digits */
94 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
94 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
95 char c)
95 char c)
96 {
96 {
97 charcopy(dest, destlen, destsize, '~');
97 charcopy(dest, destlen, destsize, '~');
98 hexencode(dest, destlen, destsize, c);
98 hexencode(dest, destlen, destsize, c);
99 }
99 }
100
100
101 static Py_ssize_t _encodedir(char *dest, size_t destsize, const char *src,
101 static Py_ssize_t _encodedir(char *dest, size_t destsize, const char *src,
102 Py_ssize_t len)
102 Py_ssize_t len)
103 {
103 {
104 enum dir_state state = DDEFAULT;
104 enum dir_state state = DDEFAULT;
105 Py_ssize_t i = 0, destlen = 0;
105 Py_ssize_t i = 0, destlen = 0;
106
106
107 while (i < len) {
107 while (i < len) {
108 switch (state) {
108 switch (state) {
109 case DDOT:
109 case DDOT:
110 switch (src[i]) {
110 switch (src[i]) {
111 case 'd':
111 case 'd':
112 case 'i':
112 case 'i':
113 state = DHGDI;
113 state = DHGDI;
114 charcopy(dest, &destlen, destsize, src[i++]);
114 charcopy(dest, &destlen, destsize, src[i++]);
115 break;
115 break;
116 case 'h':
116 case 'h':
117 state = DH;
117 state = DH;
118 charcopy(dest, &destlen, destsize, src[i++]);
118 charcopy(dest, &destlen, destsize, src[i++]);
119 break;
119 break;
120 default:
120 default:
121 state = DDEFAULT;
121 state = DDEFAULT;
122 break;
122 break;
123 }
123 }
124 break;
124 break;
125 case DH:
125 case DH:
126 if (src[i] == 'g') {
126 if (src[i] == 'g') {
127 state = DHGDI;
127 state = DHGDI;
128 charcopy(dest, &destlen, destsize, src[i++]);
128 charcopy(dest, &destlen, destsize, src[i++]);
129 } else
129 } else
130 state = DDEFAULT;
130 state = DDEFAULT;
131 break;
131 break;
132 case DHGDI:
132 case DHGDI:
133 if (src[i] == '/') {
133 if (src[i] == '/') {
134 memcopy(dest, &destlen, destsize, ".hg", 3);
134 memcopy(dest, &destlen, destsize, ".hg", 3);
135 charcopy(dest, &destlen, destsize, src[i++]);
135 charcopy(dest, &destlen, destsize, src[i++]);
136 }
136 }
137 state = DDEFAULT;
137 state = DDEFAULT;
138 break;
138 break;
139 case DDEFAULT:
139 case DDEFAULT:
140 if (src[i] == '.')
140 if (src[i] == '.')
141 state = DDOT;
141 state = DDOT;
142 charcopy(dest, &destlen, destsize, src[i++]);
142 charcopy(dest, &destlen, destsize, src[i++]);
143 break;
143 break;
144 }
144 }
145 }
145 }
146
146
147 return destlen;
147 return destlen;
148 }
148 }
149
149
150 PyObject *encodedir(PyObject *self, PyObject *args)
150 PyObject *encodedir(PyObject *self, PyObject *args)
151 {
151 {
152 Py_ssize_t len, newlen;
152 Py_ssize_t len, newlen;
153 PyObject *pathobj, *newobj;
153 PyObject *pathobj, *newobj;
154 char *path;
154 char *path;
155
155
156 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
156 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
157 return NULL;
157 return NULL;
158
158
159 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
159 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
160 PyErr_SetString(PyExc_TypeError, "expected a string");
160 PyErr_SetString(PyExc_TypeError, "expected a string");
161 return NULL;
161 return NULL;
162 }
162 }
163
163
164 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
164 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
165
165
166 if (newlen == len + 1) {
166 if (newlen == len + 1) {
167 Py_INCREF(pathobj);
167 Py_INCREF(pathobj);
168 return pathobj;
168 return pathobj;
169 }
169 }
170
170
171 newobj = PyBytes_FromStringAndSize(NULL, newlen);
171 newobj = PyBytes_FromStringAndSize(NULL, newlen);
172
172
173 if (newobj) {
173 if (newobj) {
174 assert(PyBytes_Check(newobj));
174 assert(PyBytes_Check(newobj));
175 Py_SIZE(newobj)--;
175 Py_SIZE(newobj)--;
176 _encodedir(PyBytes_AS_STRING(newobj), newlen, path, len + 1);
176 _encodedir(PyBytes_AS_STRING(newobj), newlen, path, len + 1);
177 }
177 }
178
178
179 return newobj;
179 return newobj;
180 }
180 }
181
181
182 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
182 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
183 char *dest, Py_ssize_t destlen, size_t destsize,
183 char *dest, Py_ssize_t destlen, size_t destsize,
184 const char *src, Py_ssize_t len, int encodedir)
184 const char *src, Py_ssize_t len, int encodedir)
185 {
185 {
186 enum path_state state = START;
186 enum path_state state = START;
187 Py_ssize_t i = 0;
187 Py_ssize_t i = 0;
188
188
189 /*
189 /*
190 * Python strings end with a zero byte, which we use as a
190 * Python strings end with a zero byte, which we use as a
191 * terminal token as they are not valid inside path names.
191 * terminal token as they are not valid inside path names.
192 */
192 */
193
193
194 while (i < len) {
194 while (i < len) {
195 switch (state) {
195 switch (state) {
196 case START:
196 case START:
197 switch (src[i]) {
197 switch (src[i]) {
198 case '/':
198 case '/':
199 charcopy(dest, &destlen, destsize, src[i++]);
199 charcopy(dest, &destlen, destsize, src[i++]);
200 break;
200 break;
201 case '.':
201 case '.':
202 state = LDOT;
202 state = LDOT;
203 escape3(dest, &destlen, destsize, src[i++]);
203 escape3(dest, &destlen, destsize, src[i++]);
204 break;
204 break;
205 case ' ':
205 case ' ':
206 state = DEFAULT;
206 state = DEFAULT;
207 escape3(dest, &destlen, destsize, src[i++]);
207 escape3(dest, &destlen, destsize, src[i++]);
208 break;
208 break;
209 case 'a':
209 case 'a':
210 state = A;
210 state = A;
211 charcopy(dest, &destlen, destsize, src[i++]);
211 charcopy(dest, &destlen, destsize, src[i++]);
212 break;
212 break;
213 case 'c':
213 case 'c':
214 state = C;
214 state = C;
215 charcopy(dest, &destlen, destsize, src[i++]);
215 charcopy(dest, &destlen, destsize, src[i++]);
216 break;
216 break;
217 case 'l':
217 case 'l':
218 state = L;
218 state = L;
219 charcopy(dest, &destlen, destsize, src[i++]);
219 charcopy(dest, &destlen, destsize, src[i++]);
220 break;
220 break;
221 case 'n':
221 case 'n':
222 state = N;
222 state = N;
223 charcopy(dest, &destlen, destsize, src[i++]);
223 charcopy(dest, &destlen, destsize, src[i++]);
224 break;
224 break;
225 case 'p':
225 case 'p':
226 state = P;
226 state = P;
227 charcopy(dest, &destlen, destsize, src[i++]);
227 charcopy(dest, &destlen, destsize, src[i++]);
228 break;
228 break;
229 default:
229 default:
230 state = DEFAULT;
230 state = DEFAULT;
231 break;
231 break;
232 }
232 }
233 break;
233 break;
234 case A:
234 case A:
235 if (src[i] == 'u') {
235 if (src[i] == 'u') {
236 state = AU;
236 state = AU;
237 charcopy(dest, &destlen, destsize, src[i++]);
237 charcopy(dest, &destlen, destsize, src[i++]);
238 } else
238 } else
239 state = DEFAULT;
239 state = DEFAULT;
240 break;
240 break;
241 case AU:
241 case AU:
242 if (src[i] == 'x') {
242 if (src[i] == 'x') {
243 state = THIRD;
243 state = THIRD;
244 i++;
244 i++;
245 } else
245 } else
246 state = DEFAULT;
246 state = DEFAULT;
247 break;
247 break;
248 case THIRD:
248 case THIRD:
249 state = DEFAULT;
249 state = DEFAULT;
250 switch (src[i]) {
250 switch (src[i]) {
251 case '.':
251 case '.':
252 case '/':
252 case '/':
253 case '\0':
253 case '\0':
254 escape3(dest, &destlen, destsize, src[i - 1]);
254 escape3(dest, &destlen, destsize, src[i - 1]);
255 break;
255 break;
256 default:
256 default:
257 i--;
257 i--;
258 break;
258 break;
259 }
259 }
260 break;
260 break;
261 case C:
261 case C:
262 if (src[i] == 'o') {
262 if (src[i] == 'o') {
263 state = CO;
263 state = CO;
264 charcopy(dest, &destlen, destsize, src[i++]);
264 charcopy(dest, &destlen, destsize, src[i++]);
265 } else
265 } else
266 state = DEFAULT;
266 state = DEFAULT;
267 break;
267 break;
268 case CO:
268 case CO:
269 if (src[i] == 'm') {
269 if (src[i] == 'm') {
270 state = COMLPT;
270 state = COMLPT;
271 i++;
271 i++;
272 } else if (src[i] == 'n') {
272 } else if (src[i] == 'n') {
273 state = THIRD;
273 state = THIRD;
274 i++;
274 i++;
275 } else
275 } else
276 state = DEFAULT;
276 state = DEFAULT;
277 break;
277 break;
278 case COMLPT:
278 case COMLPT:
279 switch (src[i]) {
279 switch (src[i]) {
280 case '1':
280 case '1':
281 case '2':
281 case '2':
282 case '3':
282 case '3':
283 case '4':
283 case '4':
284 case '5':
284 case '5':
285 case '6':
285 case '6':
286 case '7':
286 case '7':
287 case '8':
287 case '8':
288 case '9':
288 case '9':
289 state = COMLPTn;
289 state = COMLPTn;
290 i++;
290 i++;
291 break;
291 break;
292 default:
292 default:
293 state = DEFAULT;
293 state = DEFAULT;
294 charcopy(dest, &destlen, destsize, src[i - 1]);
294 charcopy(dest, &destlen, destsize, src[i - 1]);
295 break;
295 break;
296 }
296 }
297 break;
297 break;
298 case COMLPTn:
298 case COMLPTn:
299 state = DEFAULT;
299 state = DEFAULT;
300 switch (src[i]) {
300 switch (src[i]) {
301 case '.':
301 case '.':
302 case '/':
302 case '/':
303 case '\0':
303 case '\0':
304 escape3(dest, &destlen, destsize, src[i - 2]);
304 escape3(dest, &destlen, destsize, src[i - 2]);
305 charcopy(dest, &destlen, destsize, src[i - 1]);
305 charcopy(dest, &destlen, destsize, src[i - 1]);
306 break;
306 break;
307 default:
307 default:
308 memcopy(dest, &destlen, destsize, &src[i - 2],
308 memcopy(dest, &destlen, destsize, &src[i - 2],
309 2);
309 2);
310 break;
310 break;
311 }
311 }
312 break;
312 break;
313 case L:
313 case L:
314 if (src[i] == 'p') {
314 if (src[i] == 'p') {
315 state = LP;
315 state = LP;
316 charcopy(dest, &destlen, destsize, src[i++]);
316 charcopy(dest, &destlen, destsize, src[i++]);
317 } else
317 } else
318 state = DEFAULT;
318 state = DEFAULT;
319 break;
319 break;
320 case LP:
320 case LP:
321 if (src[i] == 't') {
321 if (src[i] == 't') {
322 state = COMLPT;
322 state = COMLPT;
323 i++;
323 i++;
324 } else
324 } else
325 state = DEFAULT;
325 state = DEFAULT;
326 break;
326 break;
327 case N:
327 case N:
328 if (src[i] == 'u') {
328 if (src[i] == 'u') {
329 state = NU;
329 state = NU;
330 charcopy(dest, &destlen, destsize, src[i++]);
330 charcopy(dest, &destlen, destsize, src[i++]);
331 } else
331 } else
332 state = DEFAULT;
332 state = DEFAULT;
333 break;
333 break;
334 case NU:
334 case NU:
335 if (src[i] == 'l') {
335 if (src[i] == 'l') {
336 state = THIRD;
336 state = THIRD;
337 i++;
337 i++;
338 } else
338 } else
339 state = DEFAULT;
339 state = DEFAULT;
340 break;
340 break;
341 case P:
341 case P:
342 if (src[i] == 'r') {
342 if (src[i] == 'r') {
343 state = PR;
343 state = PR;
344 charcopy(dest, &destlen, destsize, src[i++]);
344 charcopy(dest, &destlen, destsize, src[i++]);
345 } else
345 } else
346 state = DEFAULT;
346 state = DEFAULT;
347 break;
347 break;
348 case PR:
348 case PR:
349 if (src[i] == 'n') {
349 if (src[i] == 'n') {
350 state = THIRD;
350 state = THIRD;
351 i++;
351 i++;
352 } else
352 } else
353 state = DEFAULT;
353 state = DEFAULT;
354 break;
354 break;
355 case LDOT:
355 case LDOT:
356 switch (src[i]) {
356 switch (src[i]) {
357 case 'd':
357 case 'd':
358 case 'i':
358 case 'i':
359 state = HGDI;
359 state = HGDI;
360 charcopy(dest, &destlen, destsize, src[i++]);
360 charcopy(dest, &destlen, destsize, src[i++]);
361 break;
361 break;
362 case 'h':
362 case 'h':
363 state = H;
363 state = H;
364 charcopy(dest, &destlen, destsize, src[i++]);
364 charcopy(dest, &destlen, destsize, src[i++]);
365 break;
365 break;
366 default:
366 default:
367 state = DEFAULT;
367 state = DEFAULT;
368 break;
368 break;
369 }
369 }
370 break;
370 break;
371 case DOT:
371 case DOT:
372 switch (src[i]) {
372 switch (src[i]) {
373 case '/':
373 case '/':
374 case '\0':
374 case '\0':
375 state = START;
375 state = START;
376 memcopy(dest, &destlen, destsize, "~2e", 3);
376 memcopy(dest, &destlen, destsize, "~2e", 3);
377 charcopy(dest, &destlen, destsize, src[i++]);
377 charcopy(dest, &destlen, destsize, src[i++]);
378 break;
378 break;
379 case 'd':
379 case 'd':
380 case 'i':
380 case 'i':
381 state = HGDI;
381 state = HGDI;
382 charcopy(dest, &destlen, destsize, '.');
382 charcopy(dest, &destlen, destsize, '.');
383 charcopy(dest, &destlen, destsize, src[i++]);
383 charcopy(dest, &destlen, destsize, src[i++]);
384 break;
384 break;
385 case 'h':
385 case 'h':
386 state = H;
386 state = H;
387 memcopy(dest, &destlen, destsize, ".h", 2);
387 memcopy(dest, &destlen, destsize, ".h", 2);
388 i++;
388 i++;
389 break;
389 break;
390 default:
390 default:
391 state = DEFAULT;
391 state = DEFAULT;
392 charcopy(dest, &destlen, destsize, '.');
392 charcopy(dest, &destlen, destsize, '.');
393 break;
393 break;
394 }
394 }
395 break;
395 break;
396 case H:
396 case H:
397 if (src[i] == 'g') {
397 if (src[i] == 'g') {
398 state = HGDI;
398 state = HGDI;
399 charcopy(dest, &destlen, destsize, src[i++]);
399 charcopy(dest, &destlen, destsize, src[i++]);
400 } else
400 } else
401 state = DEFAULT;
401 state = DEFAULT;
402 break;
402 break;
403 case HGDI:
403 case HGDI:
404 if (src[i] == '/') {
404 if (src[i] == '/') {
405 state = START;
405 state = START;
406 if (encodedir)
406 if (encodedir)
407 memcopy(dest, &destlen, destsize, ".hg",
407 memcopy(dest, &destlen, destsize, ".hg",
408 3);
408 3);
409 charcopy(dest, &destlen, destsize, src[i++]);
409 charcopy(dest, &destlen, destsize, src[i++]);
410 } else
410 } else
411 state = DEFAULT;
411 state = DEFAULT;
412 break;
412 break;
413 case SPACE:
413 case SPACE:
414 switch (src[i]) {
414 switch (src[i]) {
415 case '/':
415 case '/':
416 case '\0':
416 case '\0':
417 state = START;
417 state = START;
418 memcopy(dest, &destlen, destsize, "~20", 3);
418 memcopy(dest, &destlen, destsize, "~20", 3);
419 charcopy(dest, &destlen, destsize, src[i++]);
419 charcopy(dest, &destlen, destsize, src[i++]);
420 break;
420 break;
421 default:
421 default:
422 state = DEFAULT;
422 state = DEFAULT;
423 charcopy(dest, &destlen, destsize, ' ');
423 charcopy(dest, &destlen, destsize, ' ');
424 break;
424 break;
425 }
425 }
426 break;
426 break;
427 case DEFAULT:
427 case DEFAULT:
428 while (inset(onebyte, src[i])) {
428 while (inset(onebyte, src[i])) {
429 charcopy(dest, &destlen, destsize, src[i++]);
429 charcopy(dest, &destlen, destsize, src[i++]);
430 if (i == len)
430 if (i == len)
431 goto done;
431 goto done;
432 }
432 }
433 switch (src[i]) {
433 switch (src[i]) {
434 case '.':
434 case '.':
435 state = DOT;
435 state = DOT;
436 i++;
436 i++;
437 break;
437 break;
438 case ' ':
438 case ' ':
439 state = SPACE;
439 state = SPACE;
440 i++;
440 i++;
441 break;
441 break;
442 case '/':
442 case '/':
443 state = START;
443 state = START;
444 charcopy(dest, &destlen, destsize, '/');
444 charcopy(dest, &destlen, destsize, '/');
445 i++;
445 i++;
446 break;
446 break;
447 default:
447 default:
448 if (inset(onebyte, src[i])) {
448 if (inset(onebyte, src[i])) {
449 do {
449 do {
450 charcopy(dest, &destlen,
450 charcopy(dest, &destlen,
451 destsize, src[i++]);
451 destsize, src[i++]);
452 } while (i < len &&
452 } while (i < len &&
453 inset(onebyte, src[i]));
453 inset(onebyte, src[i]));
454 } else if (inset(twobytes, src[i])) {
454 } else if (inset(twobytes, src[i])) {
455 char c = src[i++];
455 char c = src[i++];
456 charcopy(dest, &destlen, destsize, '_');
456 charcopy(dest, &destlen, destsize, '_');
457 charcopy(dest, &destlen, destsize,
457 charcopy(dest, &destlen, destsize,
458 c == '_' ? '_' : c + 32);
458 c == '_' ? '_' : c + 32);
459 } else
459 } else
460 escape3(dest, &destlen, destsize,
460 escape3(dest, &destlen, destsize,
461 src[i++]);
461 src[i++]);
462 break;
462 break;
463 }
463 }
464 break;
464 break;
465 }
465 }
466 }
466 }
467 done:
467 done:
468 return destlen;
468 return destlen;
469 }
469 }
470
470
471 static Py_ssize_t basicencode(char *dest, size_t destsize, const char *src,
471 static Py_ssize_t basicencode(char *dest, size_t destsize, const char *src,
472 Py_ssize_t len)
472 Py_ssize_t len)
473 {
473 {
474 static const uint32_t twobytes[8] = {0, 0, 0x87fffffe};
474 static const uint32_t twobytes[8] = {0, 0, 0x87fffffe};
475
475
476 static const uint32_t onebyte[8] = {
476 static const uint32_t onebyte[8] = {
477 1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
477 1,
478 0x2bff3bfa,
479 0x68000001,
480 0x2fffffff,
478 };
481 };
479
482
480 Py_ssize_t destlen = 0;
483 Py_ssize_t destlen = 0;
481
484
482 return _encode(twobytes, onebyte, dest, destlen, destsize, src, len, 1);
485 return _encode(twobytes, onebyte, dest, destlen, destsize, src, len, 1);
483 }
486 }
484
487
485 static const Py_ssize_t maxstorepathlen = 120;
488 static const Py_ssize_t maxstorepathlen = 120;
486
489
487 static Py_ssize_t _lowerencode(char *dest, size_t destsize, const char *src,
490 static Py_ssize_t _lowerencode(char *dest, size_t destsize, const char *src,
488 Py_ssize_t len)
491 Py_ssize_t len)
489 {
492 {
490 static const uint32_t onebyte[8] = {1, 0x2bfffbfb, 0xe8000001,
493 static const uint32_t onebyte[8] = {1, 0x2bfffbfb, 0xe8000001,
491 0x2fffffff};
494 0x2fffffff};
492
495
493 static const uint32_t lower[8] = {0, 0, 0x7fffffe};
496 static const uint32_t lower[8] = {0, 0, 0x7fffffe};
494
497
495 Py_ssize_t i, destlen = 0;
498 Py_ssize_t i, destlen = 0;
496
499
497 for (i = 0; i < len; i++) {
500 for (i = 0; i < len; i++) {
498 if (inset(onebyte, src[i]))
501 if (inset(onebyte, src[i]))
499 charcopy(dest, &destlen, destsize, src[i]);
502 charcopy(dest, &destlen, destsize, src[i]);
500 else if (inset(lower, src[i]))
503 else if (inset(lower, src[i]))
501 charcopy(dest, &destlen, destsize, src[i] + 32);
504 charcopy(dest, &destlen, destsize, src[i] + 32);
502 else
505 else
503 escape3(dest, &destlen, destsize, src[i]);
506 escape3(dest, &destlen, destsize, src[i]);
504 }
507 }
505
508
506 return destlen;
509 return destlen;
507 }
510 }
508
511
509 PyObject *lowerencode(PyObject *self, PyObject *args)
512 PyObject *lowerencode(PyObject *self, PyObject *args)
510 {
513 {
511 char *path;
514 char *path;
512 Py_ssize_t len, newlen;
515 Py_ssize_t len, newlen;
513 PyObject *ret;
516 PyObject *ret;
514
517
515 if (!PyArg_ParseTuple(args, PY23("s#:lowerencode", "y#:lowerencode"),
518 if (!PyArg_ParseTuple(args, PY23("s#:lowerencode", "y#:lowerencode"),
516 &path, &len))
519 &path, &len))
517 return NULL;
520 return NULL;
518
521
519 newlen = _lowerencode(NULL, 0, path, len);
522 newlen = _lowerencode(NULL, 0, path, len);
520 ret = PyBytes_FromStringAndSize(NULL, newlen);
523 ret = PyBytes_FromStringAndSize(NULL, newlen);
521 if (ret)
524 if (ret)
522 _lowerencode(PyBytes_AS_STRING(ret), newlen, path, len);
525 _lowerencode(PyBytes_AS_STRING(ret), newlen, path, len);
523
526
524 return ret;
527 return ret;
525 }
528 }
526
529
527 /* See store.py:_auxencode for a description. */
530 /* See store.py:_auxencode for a description. */
528 static Py_ssize_t auxencode(char *dest, size_t destsize, const char *src,
531 static Py_ssize_t auxencode(char *dest, size_t destsize, const char *src,
529 Py_ssize_t len)
532 Py_ssize_t len)
530 {
533 {
531 static const uint32_t twobytes[8];
534 static const uint32_t twobytes[8];
532
535
533 static const uint32_t onebyte[8] = {
536 static const uint32_t onebyte[8] = {
534 ~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U,
537 ~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U,
535 };
538 };
536
539
537 return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);
540 return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);
538 }
541 }
539
542
540 static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])
543 static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])
541 {
544 {
542 static const Py_ssize_t dirprefixlen = 8;
545 static const Py_ssize_t dirprefixlen = 8;
543 static const Py_ssize_t maxshortdirslen = 68;
546 static const Py_ssize_t maxshortdirslen = 68;
544 char *dest;
547 char *dest;
545 PyObject *ret;
548 PyObject *ret;
546
549
547 Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
550 Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
548 Py_ssize_t destsize, destlen = 0, slop, used;
551 Py_ssize_t destsize, destlen = 0, slop, used;
549
552
550 while (lastslash >= 0 && src[lastslash] != '/') {
553 while (lastslash >= 0 && src[lastslash] != '/') {
551 if (src[lastslash] == '.' && lastdot == -1)
554 if (src[lastslash] == '.' && lastdot == -1)
552 lastdot = lastslash;
555 lastdot = lastslash;
553 lastslash--;
556 lastslash--;
554 }
557 }
555
558
556 #if 0
559 #if 0
557 /* All paths should end in a suffix of ".i" or ".d".
560 /* All paths should end in a suffix of ".i" or ".d".
558 Unfortunately, the file names in test-hybridencode.py
561 Unfortunately, the file names in test-hybridencode.py
559 violate this rule. */
562 violate this rule. */
560 if (lastdot != len - 3) {
563 if (lastdot != len - 3) {
561 PyErr_SetString(PyExc_ValueError,
564 PyErr_SetString(PyExc_ValueError,
562 "suffix missing or wrong length");
565 "suffix missing or wrong length");
563 return NULL;
566 return NULL;
564 }
567 }
565 #endif
568 #endif
566
569
567 /* If src contains a suffix, we will append it to the end of
570 /* If src contains a suffix, we will append it to the end of
568 the new string, so make room. */
571 the new string, so make room. */
569 destsize = 120;
572 destsize = 120;
570 if (lastdot >= 0)
573 if (lastdot >= 0)
571 destsize += len - lastdot - 1;
574 destsize += len - lastdot - 1;
572
575
573 ret = PyBytes_FromStringAndSize(NULL, destsize);
576 ret = PyBytes_FromStringAndSize(NULL, destsize);
574 if (ret == NULL)
577 if (ret == NULL)
575 return NULL;
578 return NULL;
576
579
577 dest = PyBytes_AS_STRING(ret);
580 dest = PyBytes_AS_STRING(ret);
578 memcopy(dest, &destlen, destsize, "dh/", 3);
581 memcopy(dest, &destlen, destsize, "dh/", 3);
579
582
580 /* Copy up to dirprefixlen bytes of each path component, up to
583 /* Copy up to dirprefixlen bytes of each path component, up to
581 a limit of maxshortdirslen bytes. */
584 a limit of maxshortdirslen bytes. */
582 for (i = d = p = 0; i < lastslash; i++, p++) {
585 for (i = d = p = 0; i < lastslash; i++, p++) {
583 if (src[i] == '/') {
586 if (src[i] == '/') {
584 char d = dest[destlen - 1];
587 char d = dest[destlen - 1];
585 /* After truncation, a directory name may end
588 /* After truncation, a directory name may end
586 in a space or dot, which are unportable. */
589 in a space or dot, which are unportable. */
587 if (d == '.' || d == ' ')
590 if (d == '.' || d == ' ')
588 dest[destlen - 1] = '_';
591 dest[destlen - 1] = '_';
589 /* The + 3 is to account for "dh/" in the beginning */
592 /* The + 3 is to account for "dh/" in the beginning */
590 if (destlen > maxshortdirslen + 3)
593 if (destlen > maxshortdirslen + 3)
591 break;
594 break;
592 charcopy(dest, &destlen, destsize, src[i]);
595 charcopy(dest, &destlen, destsize, src[i]);
593 p = -1;
596 p = -1;
594 } else if (p < dirprefixlen)
597 } else if (p < dirprefixlen)
595 charcopy(dest, &destlen, destsize, src[i]);
598 charcopy(dest, &destlen, destsize, src[i]);
596 }
599 }
597
600
598 /* Rewind to just before the last slash copied. */
601 /* Rewind to just before the last slash copied. */
599 if (destlen > maxshortdirslen + 3)
602 if (destlen > maxshortdirslen + 3)
600 do {
603 do {
601 destlen--;
604 destlen--;
602 } while (destlen > 0 && dest[destlen] != '/');
605 } while (destlen > 0 && dest[destlen] != '/');
603
606
604 if (destlen > 3) {
607 if (destlen > 3) {
605 if (lastslash > 0) {
608 if (lastslash > 0) {
606 char d = dest[destlen - 1];
609 char d = dest[destlen - 1];
607 /* The last directory component may be
610 /* The last directory component may be
608 truncated, so make it safe. */
611 truncated, so make it safe. */
609 if (d == '.' || d == ' ')
612 if (d == '.' || d == ' ')
610 dest[destlen - 1] = '_';
613 dest[destlen - 1] = '_';
611 }
614 }
612
615
613 charcopy(dest, &destlen, destsize, '/');
616 charcopy(dest, &destlen, destsize, '/');
614 }
617 }
615
618
616 /* Add a prefix of the original file's name. Its length
619 /* Add a prefix of the original file's name. Its length
617 depends on the number of bytes left after accounting for
620 depends on the number of bytes left after accounting for
618 hash and suffix. */
621 hash and suffix. */
619 used = destlen + 40;
622 used = destlen + 40;
620 if (lastdot >= 0)
623 if (lastdot >= 0)
621 used += len - lastdot - 1;
624 used += len - lastdot - 1;
622 slop = maxstorepathlen - used;
625 slop = maxstorepathlen - used;
623 if (slop > 0) {
626 if (slop > 0) {
624 Py_ssize_t basenamelen =
627 Py_ssize_t basenamelen =
625 lastslash >= 0 ? len - lastslash - 2 : len - 1;
628 lastslash >= 0 ? len - lastslash - 2 : len - 1;
626
629
627 if (basenamelen > slop)
630 if (basenamelen > slop)
628 basenamelen = slop;
631 basenamelen = slop;
629 if (basenamelen > 0)
632 if (basenamelen > 0)
630 memcopy(dest, &destlen, destsize, &src[lastslash + 1],
633 memcopy(dest, &destlen, destsize, &src[lastslash + 1],
631 basenamelen);
634 basenamelen);
632 }
635 }
633
636
634 /* Add hash and suffix. */
637 /* Add hash and suffix. */
635 for (i = 0; i < 20; i++)
638 for (i = 0; i < 20; i++)
636 hexencode(dest, &destlen, destsize, sha[i]);
639 hexencode(dest, &destlen, destsize, sha[i]);
637
640
638 if (lastdot >= 0)
641 if (lastdot >= 0)
639 memcopy(dest, &destlen, destsize, &src[lastdot],
642 memcopy(dest, &destlen, destsize, &src[lastdot],
640 len - lastdot - 1);
643 len - lastdot - 1);
641
644
642 assert(PyBytes_Check(ret));
645 assert(PyBytes_Check(ret));
643 Py_SIZE(ret) = destlen;
646 Py_SIZE(ret) = destlen;
644
647
645 return ret;
648 return ret;
646 }
649 }
647
650
648 /*
651 /*
649 * Avoiding a trip through Python would improve performance by 50%,
652 * Avoiding a trip through Python would improve performance by 50%,
650 * but we don't encounter enough long names to be worth the code.
653 * but we don't encounter enough long names to be worth the code.
651 */
654 */
652 static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
655 static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
653 {
656 {
654 static PyObject *shafunc;
657 static PyObject *shafunc;
655 PyObject *shaobj, *hashobj;
658 PyObject *shaobj, *hashobj;
656
659
657 if (shafunc == NULL) {
660 if (shafunc == NULL) {
658 PyObject *hashlib = PyImport_ImportModule("hashlib");
661 PyObject *hashlib = PyImport_ImportModule("hashlib");
659 if (hashlib == NULL) {
662 if (hashlib == NULL) {
660 PyErr_SetString(PyExc_ImportError,
663 PyErr_SetString(PyExc_ImportError,
661 "pathencode failed to find hashlib");
664 "pathencode failed to find hashlib");
662 return -1;
665 return -1;
663 }
666 }
664 shafunc = PyObject_GetAttrString(hashlib, "sha1");
667 shafunc = PyObject_GetAttrString(hashlib, "sha1");
665 Py_DECREF(hashlib);
668 Py_DECREF(hashlib);
666
669
667 if (shafunc == NULL) {
670 if (shafunc == NULL) {
668 PyErr_SetString(PyExc_AttributeError,
671 PyErr_SetString(PyExc_AttributeError,
669 "module 'hashlib' has no "
672 "module 'hashlib' has no "
670 "attribute 'sha1' in pathencode");
673 "attribute 'sha1' in pathencode");
671 return -1;
674 return -1;
672 }
675 }
673 }
676 }
674
677
675 shaobj = PyObject_CallFunction(shafunc, PY23("s#", "y#"), str, len);
678 shaobj = PyObject_CallFunction(shafunc, PY23("s#", "y#"), str, len);
676
679
677 if (shaobj == NULL)
680 if (shaobj == NULL)
678 return -1;
681 return -1;
679
682
680 hashobj = PyObject_CallMethod(shaobj, "digest", "");
683 hashobj = PyObject_CallMethod(shaobj, "digest", "");
681 Py_DECREF(shaobj);
684 Py_DECREF(shaobj);
682 if (hashobj == NULL)
685 if (hashobj == NULL)
683 return -1;
686 return -1;
684
687
685 if (!PyBytes_Check(hashobj) || PyBytes_GET_SIZE(hashobj) != 20) {
688 if (!PyBytes_Check(hashobj) || PyBytes_GET_SIZE(hashobj) != 20) {
686 PyErr_SetString(PyExc_TypeError,
689 PyErr_SetString(PyExc_TypeError,
687 "result of digest is not a 20-byte hash");
690 "result of digest is not a 20-byte hash");
688 Py_DECREF(hashobj);
691 Py_DECREF(hashobj);
689 return -1;
692 return -1;
690 }
693 }
691
694
692 memcpy(hash, PyBytes_AS_STRING(hashobj), 20);
695 memcpy(hash, PyBytes_AS_STRING(hashobj), 20);
693 Py_DECREF(hashobj);
696 Py_DECREF(hashobj);
694 return 0;
697 return 0;
695 }
698 }
696
699
697 #define MAXENCODE 4096 * 4
700 #define MAXENCODE 4096 * 4
698
701
699 static PyObject *hashencode(const char *src, Py_ssize_t len)
702 static PyObject *hashencode(const char *src, Py_ssize_t len)
700 {
703 {
701 char dired[MAXENCODE];
704 char dired[MAXENCODE];
702 char lowered[MAXENCODE];
705 char lowered[MAXENCODE];
703 char auxed[MAXENCODE];
706 char auxed[MAXENCODE];
704 Py_ssize_t dirlen, lowerlen, auxlen, baselen;
707 Py_ssize_t dirlen, lowerlen, auxlen, baselen;
705 char sha[20];
708 char sha[20];
706
709
707 baselen = (len - 5) * 3;
710 baselen = (len - 5) * 3;
708 if (baselen >= MAXENCODE) {
711 if (baselen >= MAXENCODE) {
709 PyErr_SetString(PyExc_ValueError, "string too long");
712 PyErr_SetString(PyExc_ValueError, "string too long");
710 return NULL;
713 return NULL;
711 }
714 }
712
715
713 dirlen = _encodedir(dired, baselen, src, len);
716 dirlen = _encodedir(dired, baselen, src, len);
714 if (sha1hash(sha, dired, dirlen - 1) == -1)
717 if (sha1hash(sha, dired, dirlen - 1) == -1)
715 return NULL;
718 return NULL;
716 lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);
719 lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);
717 auxlen = auxencode(auxed, baselen, lowered, lowerlen);
720 auxlen = auxencode(auxed, baselen, lowered, lowerlen);
718 return hashmangle(auxed, auxlen, sha);
721 return hashmangle(auxed, auxlen, sha);
719 }
722 }
720
723
721 PyObject *pathencode(PyObject *self, PyObject *args)
724 PyObject *pathencode(PyObject *self, PyObject *args)
722 {
725 {
723 Py_ssize_t len, newlen;
726 Py_ssize_t len, newlen;
724 PyObject *pathobj, *newobj;
727 PyObject *pathobj, *newobj;
725 char *path;
728 char *path;
726
729
727 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
730 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
728 return NULL;
731 return NULL;
729
732
730 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
733 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
731 PyErr_SetString(PyExc_TypeError, "expected a string");
734 PyErr_SetString(PyExc_TypeError, "expected a string");
732 return NULL;
735 return NULL;
733 }
736 }
734
737
735 if (len > maxstorepathlen)
738 if (len > maxstorepathlen)
736 newlen = maxstorepathlen + 2;
739 newlen = maxstorepathlen + 2;
737 else
740 else
738 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
741 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
739
742
740 if (newlen <= maxstorepathlen + 1) {
743 if (newlen <= maxstorepathlen + 1) {
741 if (newlen == len + 1) {
744 if (newlen == len + 1) {
742 Py_INCREF(pathobj);
745 Py_INCREF(pathobj);
743 return pathobj;
746 return pathobj;
744 }
747 }
745
748
746 newobj = PyBytes_FromStringAndSize(NULL, newlen);
749 newobj = PyBytes_FromStringAndSize(NULL, newlen);
747
750
748 if (newobj) {
751 if (newobj) {
749 assert(PyBytes_Check(newobj));
752 assert(PyBytes_Check(newobj));
750 Py_SIZE(newobj)--;
753 Py_SIZE(newobj)--;
751 basicencode(PyBytes_AS_STRING(newobj), newlen, path,
754 basicencode(PyBytes_AS_STRING(newobj), newlen, path,
752 len + 1);
755 len + 1);
753 }
756 }
754 } else
757 } else
755 newobj = hashencode(path, len + 1);
758 newobj = hashencode(path, len + 1);
756
759
757 return newobj;
760 return newobj;
758 }
761 }
General Comments 0
You need to be logged in to leave comments. Login now