##// END OF EJS Templates
pathencode: add a SHA-1 hash function...
Bryan O'Sullivan -
r18431:3aa9b213 default
parent child Browse files
Show More
@@ -1,573 +1,626 b''
1 /*
1 /*
2 pathencode.c - efficient path name encoding
2 pathencode.c - efficient path name encoding
3
3
4 Copyright 2012 Facebook
4 Copyright 2012 Facebook
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 /*
10 /*
11 * An implementation of the name encoding scheme used by the fncache
11 * An implementation of the name encoding scheme used by the fncache
12 * store. The common case is of a path < 120 bytes long, which is
12 * store. The common case is of a path < 120 bytes long, which is
13 * handled either in a single pass with no allocations or two passes
13 * handled either in a single pass with no allocations or two passes
14 * with a single allocation. For longer paths, multiple passes are
14 * with a single allocation. For longer paths, multiple passes are
15 * required.
15 * required.
16 */
16 */
17
17
18 #define PY_SSIZE_T_CLEAN
18 #define PY_SSIZE_T_CLEAN
19 #include <Python.h>
19 #include <Python.h>
20 #include <assert.h>
20 #include <assert.h>
21 #include <ctype.h>
21 #include <ctype.h>
22 #include <stdlib.h>
22 #include <stdlib.h>
23 #include <string.h>
23 #include <string.h>
24
24
25 #include "util.h"
25 #include "util.h"
26
26
27 /* state machine for the fast path */
27 /* state machine for the fast path */
28 enum path_state {
28 enum path_state {
29 START, /* first byte of a path component */
29 START, /* first byte of a path component */
30 A, /* "AUX" */
30 A, /* "AUX" */
31 AU,
31 AU,
32 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
32 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
33 C, /* "CON" or "COMn" */
33 C, /* "CON" or "COMn" */
34 CO,
34 CO,
35 COMLPT, /* "COM" or "LPT" */
35 COMLPT, /* "COM" or "LPT" */
36 COMLPTn,
36 COMLPTn,
37 L,
37 L,
38 LP,
38 LP,
39 N,
39 N,
40 NU,
40 NU,
41 P, /* "PRN" */
41 P, /* "PRN" */
42 PR,
42 PR,
43 LDOT, /* leading '.' */
43 LDOT, /* leading '.' */
44 DOT, /* '.' in a non-leading position */
44 DOT, /* '.' in a non-leading position */
45 H, /* ".h" */
45 H, /* ".h" */
46 HGDI, /* ".hg", ".d", or ".i" */
46 HGDI, /* ".hg", ".d", or ".i" */
47 SPACE,
47 SPACE,
48 DEFAULT, /* byte of a path component after the first */
48 DEFAULT, /* byte of a path component after the first */
49 };
49 };
50
50
51 /* state machine for dir-encoding */
51 /* state machine for dir-encoding */
52 enum dir_state {
52 enum dir_state {
53 DDOT,
53 DDOT,
54 DH,
54 DH,
55 DHGDI,
55 DHGDI,
56 DDEFAULT,
56 DDEFAULT,
57 };
57 };
58
58
59 static inline int inset(const uint32_t bitset[], char c)
59 static inline int inset(const uint32_t bitset[], char c)
60 {
60 {
61 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
61 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
62 }
62 }
63
63
64 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
64 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
65 char c)
65 char c)
66 {
66 {
67 if (dest) {
67 if (dest) {
68 assert(*destlen < destsize);
68 assert(*destlen < destsize);
69 dest[*destlen] = c;
69 dest[*destlen] = c;
70 }
70 }
71 (*destlen)++;
71 (*destlen)++;
72 }
72 }
73
73
74 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
74 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
75 const void *src, Py_ssize_t len)
75 const void *src, Py_ssize_t len)
76 {
76 {
77 if (dest) {
77 if (dest) {
78 assert(*destlen + len < destsize);
78 assert(*destlen + len < destsize);
79 memcpy((void *)&dest[*destlen], src, len);
79 memcpy((void *)&dest[*destlen], src, len);
80 }
80 }
81 *destlen += len;
81 *destlen += len;
82 }
82 }
83
83
84 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
84 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
85 uint8_t c)
85 uint8_t c)
86 {
86 {
87 static const char hexdigit[] = "0123456789abcdef";
87 static const char hexdigit[] = "0123456789abcdef";
88
88
89 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
89 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
90 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
90 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
91 }
91 }
92
92
93 /* 3-byte escape: tilde followed by two hex digits */
93 /* 3-byte escape: tilde followed by two hex digits */
94 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
94 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
95 char c)
95 char c)
96 {
96 {
97 charcopy(dest, destlen, destsize, '~');
97 charcopy(dest, destlen, destsize, '~');
98 hexencode(dest, destlen, destsize, c);
98 hexencode(dest, destlen, destsize, c);
99 }
99 }
100
100
101 static Py_ssize_t _encodedir(char *dest, size_t destsize,
101 static Py_ssize_t _encodedir(char *dest, size_t destsize,
102 const char *src, Py_ssize_t len)
102 const char *src, Py_ssize_t len)
103 {
103 {
104 enum dir_state state = DDEFAULT;
104 enum dir_state state = DDEFAULT;
105 Py_ssize_t i = 0, destlen = 0;
105 Py_ssize_t i = 0, destlen = 0;
106
106
107 while (i < len) {
107 while (i < len) {
108 switch (state) {
108 switch (state) {
109 case DDOT:
109 case DDOT:
110 switch (src[i]) {
110 switch (src[i]) {
111 case 'd':
111 case 'd':
112 case 'i':
112 case 'i':
113 state = DHGDI;
113 state = DHGDI;
114 charcopy(dest, &destlen, destsize, src[i++]);
114 charcopy(dest, &destlen, destsize, src[i++]);
115 break;
115 break;
116 case 'h':
116 case 'h':
117 state = DH;
117 state = DH;
118 charcopy(dest, &destlen, destsize, src[i++]);
118 charcopy(dest, &destlen, destsize, src[i++]);
119 break;
119 break;
120 default:
120 default:
121 state = DDEFAULT;
121 state = DDEFAULT;
122 break;
122 break;
123 }
123 }
124 break;
124 break;
125 case DH:
125 case DH:
126 if (src[i] == 'g') {
126 if (src[i] == 'g') {
127 state = DHGDI;
127 state = DHGDI;
128 charcopy(dest, &destlen, destsize, src[i++]);
128 charcopy(dest, &destlen, destsize, src[i++]);
129 }
129 }
130 else state = DDEFAULT;
130 else state = DDEFAULT;
131 break;
131 break;
132 case DHGDI:
132 case DHGDI:
133 if (src[i] == '/') {
133 if (src[i] == '/') {
134 memcopy(dest, &destlen, destsize, ".hg", 3);
134 memcopy(dest, &destlen, destsize, ".hg", 3);
135 charcopy(dest, &destlen, destsize, src[i++]);
135 charcopy(dest, &destlen, destsize, src[i++]);
136 }
136 }
137 state = DDEFAULT;
137 state = DDEFAULT;
138 break;
138 break;
139 case DDEFAULT:
139 case DDEFAULT:
140 if (src[i] == '.')
140 if (src[i] == '.')
141 state = DDOT;
141 state = DDOT;
142 charcopy(dest, &destlen, destsize, src[i++]);
142 charcopy(dest, &destlen, destsize, src[i++]);
143 break;
143 break;
144 }
144 }
145 }
145 }
146
146
147 return destlen;
147 return destlen;
148 }
148 }
149
149
150 PyObject *encodedir(PyObject *self, PyObject *args)
150 PyObject *encodedir(PyObject *self, PyObject *args)
151 {
151 {
152 Py_ssize_t len, newlen;
152 Py_ssize_t len, newlen;
153 PyObject *pathobj, *newobj;
153 PyObject *pathobj, *newobj;
154 char *path;
154 char *path;
155
155
156 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
156 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
157 return NULL;
157 return NULL;
158
158
159 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
159 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
160 PyErr_SetString(PyExc_TypeError, "expected a string");
160 PyErr_SetString(PyExc_TypeError, "expected a string");
161 return NULL;
161 return NULL;
162 }
162 }
163
163
164 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
164 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
165
165
166 if (newlen == len + 1) {
166 if (newlen == len + 1) {
167 Py_INCREF(pathobj);
167 Py_INCREF(pathobj);
168 return pathobj;
168 return pathobj;
169 }
169 }
170
170
171 newobj = PyString_FromStringAndSize(NULL, newlen);
171 newobj = PyString_FromStringAndSize(NULL, newlen);
172
172
173 if (newobj) {
173 if (newobj) {
174 PyString_GET_SIZE(newobj)--;
174 PyString_GET_SIZE(newobj)--;
175 _encodedir(PyString_AS_STRING(newobj), newlen, path,
175 _encodedir(PyString_AS_STRING(newobj), newlen, path,
176 len + 1);
176 len + 1);
177 }
177 }
178
178
179 return newobj;
179 return newobj;
180 }
180 }
181
181
182 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
182 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
183 char *dest, Py_ssize_t destlen, size_t destsize,
183 char *dest, Py_ssize_t destlen, size_t destsize,
184 const char *src, Py_ssize_t len,
184 const char *src, Py_ssize_t len,
185 int encodedir)
185 int encodedir)
186 {
186 {
187 enum path_state state = START;
187 enum path_state state = START;
188 Py_ssize_t i = 0;
188 Py_ssize_t i = 0;
189
189
190 /*
190 /*
191 * Python strings end with a zero byte, which we use as a
191 * Python strings end with a zero byte, which we use as a
192 * terminal token as they are not valid inside path names.
192 * terminal token as they are not valid inside path names.
193 */
193 */
194
194
195 while (i < len) {
195 while (i < len) {
196 switch (state) {
196 switch (state) {
197 case START:
197 case START:
198 switch (src[i]) {
198 switch (src[i]) {
199 case '/':
199 case '/':
200 charcopy(dest, &destlen, destsize, src[i++]);
200 charcopy(dest, &destlen, destsize, src[i++]);
201 break;
201 break;
202 case '.':
202 case '.':
203 state = LDOT;
203 state = LDOT;
204 escape3(dest, &destlen, destsize, src[i++]);
204 escape3(dest, &destlen, destsize, src[i++]);
205 break;
205 break;
206 case ' ':
206 case ' ':
207 state = DEFAULT;
207 state = DEFAULT;
208 escape3(dest, &destlen, destsize, src[i++]);
208 escape3(dest, &destlen, destsize, src[i++]);
209 break;
209 break;
210 case 'a':
210 case 'a':
211 state = A;
211 state = A;
212 charcopy(dest, &destlen, destsize, src[i++]);
212 charcopy(dest, &destlen, destsize, src[i++]);
213 break;
213 break;
214 case 'c':
214 case 'c':
215 state = C;
215 state = C;
216 charcopy(dest, &destlen, destsize, src[i++]);
216 charcopy(dest, &destlen, destsize, src[i++]);
217 break;
217 break;
218 case 'l':
218 case 'l':
219 state = L;
219 state = L;
220 charcopy(dest, &destlen, destsize, src[i++]);
220 charcopy(dest, &destlen, destsize, src[i++]);
221 break;
221 break;
222 case 'n':
222 case 'n':
223 state = N;
223 state = N;
224 charcopy(dest, &destlen, destsize, src[i++]);
224 charcopy(dest, &destlen, destsize, src[i++]);
225 break;
225 break;
226 case 'p':
226 case 'p':
227 state = P;
227 state = P;
228 charcopy(dest, &destlen, destsize, src[i++]);
228 charcopy(dest, &destlen, destsize, src[i++]);
229 break;
229 break;
230 default:
230 default:
231 state = DEFAULT;
231 state = DEFAULT;
232 break;
232 break;
233 }
233 }
234 break;
234 break;
235 case A:
235 case A:
236 if (src[i] == 'u') {
236 if (src[i] == 'u') {
237 state = AU;
237 state = AU;
238 charcopy(dest, &destlen, destsize, src[i++]);
238 charcopy(dest, &destlen, destsize, src[i++]);
239 }
239 }
240 else state = DEFAULT;
240 else state = DEFAULT;
241 break;
241 break;
242 case AU:
242 case AU:
243 if (src[i] == 'x') {
243 if (src[i] == 'x') {
244 state = THIRD;
244 state = THIRD;
245 i++;
245 i++;
246 }
246 }
247 else state = DEFAULT;
247 else state = DEFAULT;
248 break;
248 break;
249 case THIRD:
249 case THIRD:
250 state = DEFAULT;
250 state = DEFAULT;
251 switch (src[i]) {
251 switch (src[i]) {
252 case '.':
252 case '.':
253 case '/':
253 case '/':
254 case '\0':
254 case '\0':
255 escape3(dest, &destlen, destsize, src[i - 1]);
255 escape3(dest, &destlen, destsize, src[i - 1]);
256 break;
256 break;
257 default:
257 default:
258 i--;
258 i--;
259 break;
259 break;
260 }
260 }
261 break;
261 break;
262 case C:
262 case C:
263 if (src[i] == 'o') {
263 if (src[i] == 'o') {
264 state = CO;
264 state = CO;
265 charcopy(dest, &destlen, destsize, src[i++]);
265 charcopy(dest, &destlen, destsize, src[i++]);
266 }
266 }
267 else state = DEFAULT;
267 else state = DEFAULT;
268 break;
268 break;
269 case CO:
269 case CO:
270 if (src[i] == 'm') {
270 if (src[i] == 'm') {
271 state = COMLPT;
271 state = COMLPT;
272 i++;
272 i++;
273 }
273 }
274 else if (src[i] == 'n') {
274 else if (src[i] == 'n') {
275 state = THIRD;
275 state = THIRD;
276 i++;
276 i++;
277 }
277 }
278 else state = DEFAULT;
278 else state = DEFAULT;
279 break;
279 break;
280 case COMLPT:
280 case COMLPT:
281 switch (src[i]) {
281 switch (src[i]) {
282 case '1': case '2': case '3': case '4': case '5':
282 case '1': case '2': case '3': case '4': case '5':
283 case '6': case '7': case '8': case '9':
283 case '6': case '7': case '8': case '9':
284 state = COMLPTn;
284 state = COMLPTn;
285 i++;
285 i++;
286 break;
286 break;
287 default:
287 default:
288 state = DEFAULT;
288 state = DEFAULT;
289 charcopy(dest, &destlen, destsize, src[i - 1]);
289 charcopy(dest, &destlen, destsize, src[i - 1]);
290 break;
290 break;
291 }
291 }
292 break;
292 break;
293 case COMLPTn:
293 case COMLPTn:
294 state = DEFAULT;
294 state = DEFAULT;
295 switch (src[i]) {
295 switch (src[i]) {
296 case '.':
296 case '.':
297 case '/':
297 case '/':
298 case '\0':
298 case '\0':
299 escape3(dest, &destlen, destsize, src[i - 2]);
299 escape3(dest, &destlen, destsize, src[i - 2]);
300 charcopy(dest, &destlen, destsize, src[i - 1]);
300 charcopy(dest, &destlen, destsize, src[i - 1]);
301 break;
301 break;
302 default:
302 default:
303 memcopy(dest, &destlen, destsize,
303 memcopy(dest, &destlen, destsize,
304 &src[i - 2], 2);
304 &src[i - 2], 2);
305 break;
305 break;
306 }
306 }
307 break;
307 break;
308 case L:
308 case L:
309 if (src[i] == 'p') {
309 if (src[i] == 'p') {
310 state = LP;
310 state = LP;
311 charcopy(dest, &destlen, destsize, src[i++]);
311 charcopy(dest, &destlen, destsize, src[i++]);
312 }
312 }
313 else state = DEFAULT;
313 else state = DEFAULT;
314 break;
314 break;
315 case LP:
315 case LP:
316 if (src[i] == 't') {
316 if (src[i] == 't') {
317 state = COMLPT;
317 state = COMLPT;
318 i++;
318 i++;
319 }
319 }
320 else state = DEFAULT;
320 else state = DEFAULT;
321 break;
321 break;
322 case N:
322 case N:
323 if (src[i] == 'u') {
323 if (src[i] == 'u') {
324 state = NU;
324 state = NU;
325 charcopy(dest, &destlen, destsize, src[i++]);
325 charcopy(dest, &destlen, destsize, src[i++]);
326 }
326 }
327 else state = DEFAULT;
327 else state = DEFAULT;
328 break;
328 break;
329 case NU:
329 case NU:
330 if (src[i] == 'l') {
330 if (src[i] == 'l') {
331 state = THIRD;
331 state = THIRD;
332 i++;
332 i++;
333 }
333 }
334 else state = DEFAULT;
334 else state = DEFAULT;
335 break;
335 break;
336 case P:
336 case P:
337 if (src[i] == 'r') {
337 if (src[i] == 'r') {
338 state = PR;
338 state = PR;
339 charcopy(dest, &destlen, destsize, src[i++]);
339 charcopy(dest, &destlen, destsize, src[i++]);
340 }
340 }
341 else state = DEFAULT;
341 else state = DEFAULT;
342 break;
342 break;
343 case PR:
343 case PR:
344 if (src[i] == 'n') {
344 if (src[i] == 'n') {
345 state = THIRD;
345 state = THIRD;
346 i++;
346 i++;
347 }
347 }
348 else state = DEFAULT;
348 else state = DEFAULT;
349 break;
349 break;
350 case LDOT:
350 case LDOT:
351 switch (src[i]) {
351 switch (src[i]) {
352 case 'd':
352 case 'd':
353 case 'i':
353 case 'i':
354 state = HGDI;
354 state = HGDI;
355 charcopy(dest, &destlen, destsize, src[i++]);
355 charcopy(dest, &destlen, destsize, src[i++]);
356 break;
356 break;
357 case 'h':
357 case 'h':
358 state = H;
358 state = H;
359 charcopy(dest, &destlen, destsize, src[i++]);
359 charcopy(dest, &destlen, destsize, src[i++]);
360 break;
360 break;
361 default:
361 default:
362 state = DEFAULT;
362 state = DEFAULT;
363 break;
363 break;
364 }
364 }
365 break;
365 break;
366 case DOT:
366 case DOT:
367 switch (src[i]) {
367 switch (src[i]) {
368 case '/':
368 case '/':
369 case '\0':
369 case '\0':
370 state = START;
370 state = START;
371 memcopy(dest, &destlen, destsize, "~2e", 3);
371 memcopy(dest, &destlen, destsize, "~2e", 3);
372 charcopy(dest, &destlen, destsize, src[i++]);
372 charcopy(dest, &destlen, destsize, src[i++]);
373 break;
373 break;
374 case 'd':
374 case 'd':
375 case 'i':
375 case 'i':
376 state = HGDI;
376 state = HGDI;
377 charcopy(dest, &destlen, destsize, '.');
377 charcopy(dest, &destlen, destsize, '.');
378 charcopy(dest, &destlen, destsize, src[i++]);
378 charcopy(dest, &destlen, destsize, src[i++]);
379 break;
379 break;
380 case 'h':
380 case 'h':
381 state = H;
381 state = H;
382 memcopy(dest, &destlen, destsize, ".h", 2);
382 memcopy(dest, &destlen, destsize, ".h", 2);
383 i++;
383 i++;
384 break;
384 break;
385 default:
385 default:
386 state = DEFAULT;
386 state = DEFAULT;
387 charcopy(dest, &destlen, destsize, '.');
387 charcopy(dest, &destlen, destsize, '.');
388 break;
388 break;
389 }
389 }
390 break;
390 break;
391 case H:
391 case H:
392 if (src[i] == 'g') {
392 if (src[i] == 'g') {
393 state = HGDI;
393 state = HGDI;
394 charcopy(dest, &destlen, destsize, src[i++]);
394 charcopy(dest, &destlen, destsize, src[i++]);
395 }
395 }
396 else state = DEFAULT;
396 else state = DEFAULT;
397 break;
397 break;
398 case HGDI:
398 case HGDI:
399 if (src[i] == '/') {
399 if (src[i] == '/') {
400 state = START;
400 state = START;
401 if (encodedir)
401 if (encodedir)
402 memcopy(dest, &destlen, destsize, ".hg",
402 memcopy(dest, &destlen, destsize, ".hg",
403 3);
403 3);
404 charcopy(dest, &destlen, destsize, src[i++]);
404 charcopy(dest, &destlen, destsize, src[i++]);
405 }
405 }
406 else state = DEFAULT;
406 else state = DEFAULT;
407 break;
407 break;
408 case SPACE:
408 case SPACE:
409 switch (src[i]) {
409 switch (src[i]) {
410 case '/':
410 case '/':
411 case '\0':
411 case '\0':
412 state = START;
412 state = START;
413 memcopy(dest, &destlen, destsize, "~20", 3);
413 memcopy(dest, &destlen, destsize, "~20", 3);
414 charcopy(dest, &destlen, destsize, src[i++]);
414 charcopy(dest, &destlen, destsize, src[i++]);
415 break;
415 break;
416 default:
416 default:
417 state = DEFAULT;
417 state = DEFAULT;
418 charcopy(dest, &destlen, destsize, ' ');
418 charcopy(dest, &destlen, destsize, ' ');
419 break;
419 break;
420 }
420 }
421 break;
421 break;
422 case DEFAULT:
422 case DEFAULT:
423 while (inset(onebyte, src[i])) {
423 while (inset(onebyte, src[i])) {
424 charcopy(dest, &destlen, destsize, src[i++]);
424 charcopy(dest, &destlen, destsize, src[i++]);
425 if (i == len)
425 if (i == len)
426 goto done;
426 goto done;
427 }
427 }
428 switch (src[i]) {
428 switch (src[i]) {
429 case '.':
429 case '.':
430 state = DOT;
430 state = DOT;
431 i++;
431 i++;
432 break;
432 break;
433 case ' ':
433 case ' ':
434 state = SPACE;
434 state = SPACE;
435 i++;
435 i++;
436 break;
436 break;
437 case '/':
437 case '/':
438 state = START;
438 state = START;
439 charcopy(dest, &destlen, destsize, '/');
439 charcopy(dest, &destlen, destsize, '/');
440 i++;
440 i++;
441 break;
441 break;
442 default:
442 default:
443 if (inset(onebyte, src[i])) {
443 if (inset(onebyte, src[i])) {
444 do {
444 do {
445 charcopy(dest, &destlen,
445 charcopy(dest, &destlen,
446 destsize, src[i++]);
446 destsize, src[i++]);
447 } while (i < len &&
447 } while (i < len &&
448 inset(onebyte, src[i]));
448 inset(onebyte, src[i]));
449 }
449 }
450 else if (inset(twobytes, src[i])) {
450 else if (inset(twobytes, src[i])) {
451 char c = src[i++];
451 char c = src[i++];
452 charcopy(dest, &destlen, destsize, '_');
452 charcopy(dest, &destlen, destsize, '_');
453 charcopy(dest, &destlen, destsize,
453 charcopy(dest, &destlen, destsize,
454 c == '_' ? '_' : c + 32);
454 c == '_' ? '_' : c + 32);
455 }
455 }
456 else
456 else
457 escape3(dest, &destlen, destsize,
457 escape3(dest, &destlen, destsize,
458 src[i++]);
458 src[i++]);
459 break;
459 break;
460 }
460 }
461 break;
461 break;
462 }
462 }
463 }
463 }
464 done:
464 done:
465 return destlen;
465 return destlen;
466 }
466 }
467
467
468 static Py_ssize_t basicencode(char *dest, size_t destsize,
468 static Py_ssize_t basicencode(char *dest, size_t destsize,
469 const char *src, Py_ssize_t len)
469 const char *src, Py_ssize_t len)
470 {
470 {
471 static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };
471 static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };
472
472
473 static const uint32_t onebyte[8] = {
473 static const uint32_t onebyte[8] = {
474 1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
474 1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
475 };
475 };
476
476
477 Py_ssize_t destlen = 0;
477 Py_ssize_t destlen = 0;
478
478
479 return _encode(twobytes, onebyte, dest, destlen, destsize,
479 return _encode(twobytes, onebyte, dest, destlen, destsize,
480 src, len, 1);
480 src, len, 1);
481 }
481 }
482
482
483 static const Py_ssize_t maxstorepathlen = 120;
483 static const Py_ssize_t maxstorepathlen = 120;
484
484
485 static Py_ssize_t _lowerencode(char *dest, size_t destsize,
485 static Py_ssize_t _lowerencode(char *dest, size_t destsize,
486 const char *src, Py_ssize_t len)
486 const char *src, Py_ssize_t len)
487 {
487 {
488 static const uint32_t onebyte[8] = {
488 static const uint32_t onebyte[8] = {
489 1, 0x2bfffbfb, 0xe8000001, 0x2fffffff
489 1, 0x2bfffbfb, 0xe8000001, 0x2fffffff
490 };
490 };
491
491
492 static const uint32_t lower[8] = { 0, 0, 0x7fffffe };
492 static const uint32_t lower[8] = { 0, 0, 0x7fffffe };
493
493
494 Py_ssize_t i, destlen = 0;
494 Py_ssize_t i, destlen = 0;
495
495
496 for (i = 0; i < len; i++) {
496 for (i = 0; i < len; i++) {
497 if (inset(onebyte, src[i]))
497 if (inset(onebyte, src[i]))
498 charcopy(dest, &destlen, destsize, src[i]);
498 charcopy(dest, &destlen, destsize, src[i]);
499 else if (inset(lower, src[i]))
499 else if (inset(lower, src[i]))
500 charcopy(dest, &destlen, destsize, src[i] + 32);
500 charcopy(dest, &destlen, destsize, src[i] + 32);
501 else
501 else
502 escape3(dest, &destlen, destsize, src[i]);
502 escape3(dest, &destlen, destsize, src[i]);
503 }
503 }
504
504
505 return destlen;
505 return destlen;
506 }
506 }
507
507
508 PyObject *lowerencode(PyObject *self, PyObject *args)
508 PyObject *lowerencode(PyObject *self, PyObject *args)
509 {
509 {
510 char *path;
510 char *path;
511 Py_ssize_t len, newlen;
511 Py_ssize_t len, newlen;
512 PyObject *ret;
512 PyObject *ret;
513
513
514 if (!PyArg_ParseTuple(args, "s#:lowerencode", &path, &len))
514 if (!PyArg_ParseTuple(args, "s#:lowerencode", &path, &len))
515 return NULL;
515 return NULL;
516
516
517 newlen = _lowerencode(NULL, 0, path, len);
517 newlen = _lowerencode(NULL, 0, path, len);
518 ret = PyString_FromStringAndSize(NULL, newlen);
518 ret = PyString_FromStringAndSize(NULL, newlen);
519 if (ret)
519 if (ret)
520 newlen = _lowerencode(PyString_AS_STRING(ret), newlen,
520 newlen = _lowerencode(PyString_AS_STRING(ret), newlen,
521 path, len);
521 path, len);
522
522
523 return ret;
523 return ret;
524 }
524 }
525
525
526 /*
526 /*
527 * Avoiding a trip through Python would improve performance by 50%,
528 * but we don't encounter enough long names to be worth the code.
529 */
530 static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
531 {
532 static PyObject *shafunc;
533 PyObject *shaobj, *hashobj;
534
535 if (shafunc == NULL) {
536 PyObject *util, *name = PyString_FromString("mercurial.util");
537
538 if (name == NULL)
539 return -1;
540
541 util = PyImport_Import(name);
542 Py_DECREF(name);
543
544 if (util == NULL) {
545 PyErr_SetString(PyExc_ImportError, "mercurial.util");
546 return -1;
547 }
548 shafunc = PyObject_GetAttrString(util, "sha1");
549 Py_DECREF(util);
550
551 if (shafunc == NULL) {
552 PyErr_SetString(PyExc_AttributeError,
553 "module 'mercurial.util' has no "
554 "attribute 'sha1'");
555 return -1;
556 }
557 }
558
559 shaobj = PyObject_CallFunction(shafunc, "s#", str, len);
560
561 if (shaobj == NULL)
562 return -1;
563
564 hashobj = PyObject_CallMethod(shaobj, "digest", "");
565 Py_DECREF(shaobj);
566
567 if (!PyString_Check(hashobj) || PyString_GET_SIZE(hashobj) != 20) {
568 PyErr_SetString(PyExc_TypeError,
569 "result of digest is not a 20-byte hash");
570 Py_DECREF(hashobj);
571 return -1;
572 }
573
574 memcpy(hash, PyString_AS_STRING(hashobj), 20);
575 Py_DECREF(hashobj);
576 return 0;
577 }
578
579 /*
527 * We currently implement only basic encoding.
580 * We currently implement only basic encoding.
528 *
581 *
529 * If a name is too long to encode due to Windows path name limits,
582 * If a name is too long to encode due to Windows path name limits,
530 * this function returns None.
583 * this function returns None.
531 */
584 */
532 PyObject *pathencode(PyObject *self, PyObject *args)
585 PyObject *pathencode(PyObject *self, PyObject *args)
533 {
586 {
534 Py_ssize_t len, newlen;
587 Py_ssize_t len, newlen;
535 PyObject *pathobj, *newobj;
588 PyObject *pathobj, *newobj;
536 char *path;
589 char *path;
537
590
538 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
591 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
539 return NULL;
592 return NULL;
540
593
541 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
594 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
542 PyErr_SetString(PyExc_TypeError, "expected a string");
595 PyErr_SetString(PyExc_TypeError, "expected a string");
543 return NULL;
596 return NULL;
544 }
597 }
545
598
546 if (len > maxstorepathlen) {
599 if (len > maxstorepathlen) {
547 newobj = Py_None;
600 newobj = Py_None;
548 Py_INCREF(newobj);
601 Py_INCREF(newobj);
549 return newobj;
602 return newobj;
550 }
603 }
551
604
552 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
605 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
553
606
554 if (newlen <= maxstorepathlen + 1) {
607 if (newlen <= maxstorepathlen + 1) {
555 if (newlen == len + 1) {
608 if (newlen == len + 1) {
556 Py_INCREF(pathobj);
609 Py_INCREF(pathobj);
557 return pathobj;
610 return pathobj;
558 }
611 }
559
612
560 newobj = PyString_FromStringAndSize(NULL, newlen);
613 newobj = PyString_FromStringAndSize(NULL, newlen);
561
614
562 if (newobj) {
615 if (newobj) {
563 PyString_GET_SIZE(newobj)--;
616 PyString_GET_SIZE(newobj)--;
564 basicencode(PyString_AS_STRING(newobj), newlen, path,
617 basicencode(PyString_AS_STRING(newobj), newlen, path,
565 len + 1);
618 len + 1);
566 }
619 }
567 } else {
620 } else {
568 newobj = Py_None;
621 newobj = Py_None;
569 Py_INCREF(newobj);
622 Py_INCREF(newobj);
570 }
623 }
571
624
572 return newobj;
625 return newobj;
573 }
626 }
General Comments 0
You need to be logged in to leave comments. Login now