##// END OF EJS Templates
pathencode: simplify basicencode
Adrian Buehlmann -
r17691:c6c7e466 default
parent child Browse files
Show More
@@ -1,532 +1,525
1 /*
1 /*
2 pathencode.c - efficient path name encoding
2 pathencode.c - efficient path name encoding
3
3
4 Copyright 2012 Facebook
4 Copyright 2012 Facebook
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 /*
10 /*
11 * An implementation of the name encoding scheme used by the fncache
11 * An implementation of the name encoding scheme used by the fncache
12 * store. The common case is of a path < 120 bytes long, which is
12 * store. The common case is of a path < 120 bytes long, which is
13 * handled either in a single pass with no allocations or two passes
13 * handled either in a single pass with no allocations or two passes
14 * with a single allocation. For longer paths, multiple passes are
14 * with a single allocation. For longer paths, multiple passes are
15 * required.
15 * required.
16 */
16 */
17
17
18 #include <Python.h>
18 #include <Python.h>
19 #include <assert.h>
19 #include <assert.h>
20 #include <ctype.h>
20 #include <ctype.h>
21 #include <stdlib.h>
21 #include <stdlib.h>
22 #include <string.h>
22 #include <string.h>
23
23
24 #include "util.h"
24 #include "util.h"
25
25
26 /* state machine for the fast path */
26 /* state machine for the fast path */
27 enum path_state {
27 enum path_state {
28 START, /* first byte of a path component */
28 START, /* first byte of a path component */
29 A, /* "AUX" */
29 A, /* "AUX" */
30 AU,
30 AU,
31 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
31 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
32 C, /* "CON" or "COMn" */
32 C, /* "CON" or "COMn" */
33 CO,
33 CO,
34 COMLPT, /* "COM" or "LPT" */
34 COMLPT, /* "COM" or "LPT" */
35 COMLPTn,
35 COMLPTn,
36 L,
36 L,
37 LP,
37 LP,
38 N,
38 N,
39 NU,
39 NU,
40 P, /* "PRN" */
40 P, /* "PRN" */
41 PR,
41 PR,
42 LDOT, /* leading '.' */
42 LDOT, /* leading '.' */
43 DOT, /* '.' in a non-leading position */
43 DOT, /* '.' in a non-leading position */
44 H, /* ".h" */
44 H, /* ".h" */
45 HGDI, /* ".hg", ".d", or ".i" */
45 HGDI, /* ".hg", ".d", or ".i" */
46 SPACE,
46 SPACE,
47 DEFAULT, /* byte of a path component after the first */
47 DEFAULT, /* byte of a path component after the first */
48 };
48 };
49
49
50 /* state machine for dir-encoding */
50 /* state machine for dir-encoding */
51 enum dir_state {
51 enum dir_state {
52 DDOT,
52 DDOT,
53 DH,
53 DH,
54 DHGDI,
54 DHGDI,
55 DDEFAULT,
55 DDEFAULT,
56 };
56 };
57
57
58 static inline int isset(const uint32_t bitset[], char c)
58 static inline int isset(const uint32_t bitset[], char c)
59 {
59 {
60 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
60 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
61 }
61 }
62
62
63 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
63 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
64 char c)
64 char c)
65 {
65 {
66 if (dest) {
66 if (dest) {
67 assert(*destlen < destsize);
67 assert(*destlen < destsize);
68 dest[*destlen] = c;
68 dest[*destlen] = c;
69 }
69 }
70 (*destlen)++;
70 (*destlen)++;
71 }
71 }
72
72
73 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
73 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
74 const void *src, Py_ssize_t len)
74 const void *src, Py_ssize_t len)
75 {
75 {
76 if (dest) {
76 if (dest) {
77 assert(*destlen + len < destsize);
77 assert(*destlen + len < destsize);
78 memcpy((void *)&dest[*destlen], src, len);
78 memcpy((void *)&dest[*destlen], src, len);
79 }
79 }
80 *destlen += len;
80 *destlen += len;
81 }
81 }
82
82
83 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
83 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
84 uint8_t c)
84 uint8_t c)
85 {
85 {
86 static const char hexdigit[] = "0123456789abcdef";
86 static const char hexdigit[] = "0123456789abcdef";
87
87
88 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
88 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
89 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
89 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
90 }
90 }
91
91
92 /* 3-byte escape: tilde followed by two hex digits */
92 /* 3-byte escape: tilde followed by two hex digits */
93 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
93 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
94 char c)
94 char c)
95 {
95 {
96 charcopy(dest, destlen, destsize, '~');
96 charcopy(dest, destlen, destsize, '~');
97 hexencode(dest, destlen, destsize, c);
97 hexencode(dest, destlen, destsize, c);
98 }
98 }
99
99
100 static Py_ssize_t _encodedir(char *dest, size_t destsize,
100 static Py_ssize_t _encodedir(char *dest, size_t destsize,
101 const char *src, Py_ssize_t len)
101 const char *src, Py_ssize_t len)
102 {
102 {
103 enum dir_state state = DDEFAULT;
103 enum dir_state state = DDEFAULT;
104 Py_ssize_t i = 0, destlen = 0;
104 Py_ssize_t i = 0, destlen = 0;
105
105
106 while (i < len) {
106 while (i < len) {
107 switch (state) {
107 switch (state) {
108 case DDOT:
108 case DDOT:
109 switch (src[i]) {
109 switch (src[i]) {
110 case 'd':
110 case 'd':
111 case 'i':
111 case 'i':
112 state = DHGDI;
112 state = DHGDI;
113 charcopy(dest, &destlen, destsize, src[i++]);
113 charcopy(dest, &destlen, destsize, src[i++]);
114 break;
114 break;
115 case 'h':
115 case 'h':
116 state = DH;
116 state = DH;
117 charcopy(dest, &destlen, destsize, src[i++]);
117 charcopy(dest, &destlen, destsize, src[i++]);
118 break;
118 break;
119 default:
119 default:
120 state = DDEFAULT;
120 state = DDEFAULT;
121 break;
121 break;
122 }
122 }
123 break;
123 break;
124 case DH:
124 case DH:
125 if (src[i] == 'g') {
125 if (src[i] == 'g') {
126 state = DHGDI;
126 state = DHGDI;
127 charcopy(dest, &destlen, destsize, src[i++]);
127 charcopy(dest, &destlen, destsize, src[i++]);
128 }
128 }
129 else state = DDEFAULT;
129 else state = DDEFAULT;
130 break;
130 break;
131 case DHGDI:
131 case DHGDI:
132 if (src[i] == '/') {
132 if (src[i] == '/') {
133 memcopy(dest, &destlen, destsize, ".hg", 3);
133 memcopy(dest, &destlen, destsize, ".hg", 3);
134 charcopy(dest, &destlen, destsize, src[i++]);
134 charcopy(dest, &destlen, destsize, src[i++]);
135 }
135 }
136 state = DDEFAULT;
136 state = DDEFAULT;
137 break;
137 break;
138 case DDEFAULT:
138 case DDEFAULT:
139 if (src[i] == '.')
139 if (src[i] == '.')
140 state = DDOT;
140 state = DDOT;
141 charcopy(dest, &destlen, destsize, src[i++]);
141 charcopy(dest, &destlen, destsize, src[i++]);
142 break;
142 break;
143 }
143 }
144 }
144 }
145
145
146 return destlen;
146 return destlen;
147 }
147 }
148
148
149 PyObject *encodedir(PyObject *self, PyObject *args)
149 PyObject *encodedir(PyObject *self, PyObject *args)
150 {
150 {
151 Py_ssize_t len, newlen;
151 Py_ssize_t len, newlen;
152 PyObject *pathobj, *newobj;
152 PyObject *pathobj, *newobj;
153 char *path;
153 char *path;
154
154
155 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
155 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
156 return NULL;
156 return NULL;
157
157
158 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
158 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
159 PyErr_SetString(PyExc_TypeError, "expected a string");
159 PyErr_SetString(PyExc_TypeError, "expected a string");
160 return NULL;
160 return NULL;
161 }
161 }
162
162
163 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
163 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
164
164
165 if (newlen == len + 1) {
165 if (newlen == len + 1) {
166 Py_INCREF(pathobj);
166 Py_INCREF(pathobj);
167 return pathobj;
167 return pathobj;
168 }
168 }
169
169
170 newobj = PyString_FromStringAndSize(NULL, newlen);
170 newobj = PyString_FromStringAndSize(NULL, newlen);
171
171
172 if (newobj) {
172 if (newobj) {
173 PyString_GET_SIZE(newobj)--;
173 PyString_GET_SIZE(newobj)--;
174 _encodedir(PyString_AS_STRING(newobj), newlen, path,
174 _encodedir(PyString_AS_STRING(newobj), newlen, path,
175 len + 1);
175 len + 1);
176 }
176 }
177
177
178 return newobj;
178 return newobj;
179 }
179 }
180
180
181 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
181 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
182 char *dest, Py_ssize_t destlen, size_t destsize,
182 char *dest, Py_ssize_t destlen, size_t destsize,
183 const char *src, Py_ssize_t len,
183 const char *src, Py_ssize_t len,
184 int encodedir)
184 int encodedir)
185 {
185 {
186 enum path_state state = START;
186 enum path_state state = START;
187 Py_ssize_t i = 0;
187 Py_ssize_t i = 0;
188
188
189 /*
189 /*
190 * Python strings end with a zero byte, which we use as a
190 * Python strings end with a zero byte, which we use as a
191 * terminal token as they are not valid inside path names.
191 * terminal token as they are not valid inside path names.
192 */
192 */
193
193
194 while (i < len) {
194 while (i < len) {
195 switch (state) {
195 switch (state) {
196 case START:
196 case START:
197 switch (src[i]) {
197 switch (src[i]) {
198 case '/':
198 case '/':
199 charcopy(dest, &destlen, destsize, src[i++]);
199 charcopy(dest, &destlen, destsize, src[i++]);
200 break;
200 break;
201 case '.':
201 case '.':
202 state = LDOT;
202 state = LDOT;
203 escape3(dest, &destlen, destsize, src[i++]);
203 escape3(dest, &destlen, destsize, src[i++]);
204 break;
204 break;
205 case ' ':
205 case ' ':
206 state = DEFAULT;
206 state = DEFAULT;
207 escape3(dest, &destlen, destsize, src[i++]);
207 escape3(dest, &destlen, destsize, src[i++]);
208 break;
208 break;
209 case 'a':
209 case 'a':
210 state = A;
210 state = A;
211 charcopy(dest, &destlen, destsize, src[i++]);
211 charcopy(dest, &destlen, destsize, src[i++]);
212 break;
212 break;
213 case 'c':
213 case 'c':
214 state = C;
214 state = C;
215 charcopy(dest, &destlen, destsize, src[i++]);
215 charcopy(dest, &destlen, destsize, src[i++]);
216 break;
216 break;
217 case 'l':
217 case 'l':
218 state = L;
218 state = L;
219 charcopy(dest, &destlen, destsize, src[i++]);
219 charcopy(dest, &destlen, destsize, src[i++]);
220 break;
220 break;
221 case 'n':
221 case 'n':
222 state = N;
222 state = N;
223 charcopy(dest, &destlen, destsize, src[i++]);
223 charcopy(dest, &destlen, destsize, src[i++]);
224 break;
224 break;
225 case 'p':
225 case 'p':
226 state = P;
226 state = P;
227 charcopy(dest, &destlen, destsize, src[i++]);
227 charcopy(dest, &destlen, destsize, src[i++]);
228 break;
228 break;
229 default:
229 default:
230 state = DEFAULT;
230 state = DEFAULT;
231 break;
231 break;
232 }
232 }
233 break;
233 break;
234 case A:
234 case A:
235 if (src[i] == 'u') {
235 if (src[i] == 'u') {
236 state = AU;
236 state = AU;
237 charcopy(dest, &destlen, destsize, src[i++]);
237 charcopy(dest, &destlen, destsize, src[i++]);
238 }
238 }
239 else state = DEFAULT;
239 else state = DEFAULT;
240 break;
240 break;
241 case AU:
241 case AU:
242 if (src[i] == 'x') {
242 if (src[i] == 'x') {
243 state = THIRD;
243 state = THIRD;
244 i++;
244 i++;
245 }
245 }
246 else state = DEFAULT;
246 else state = DEFAULT;
247 break;
247 break;
248 case THIRD:
248 case THIRD:
249 state = DEFAULT;
249 state = DEFAULT;
250 switch (src[i]) {
250 switch (src[i]) {
251 case '.':
251 case '.':
252 case '/':
252 case '/':
253 case '\0':
253 case '\0':
254 escape3(dest, &destlen, destsize, src[i - 1]);
254 escape3(dest, &destlen, destsize, src[i - 1]);
255 break;
255 break;
256 default:
256 default:
257 i--;
257 i--;
258 break;
258 break;
259 }
259 }
260 break;
260 break;
261 case C:
261 case C:
262 if (src[i] == 'o') {
262 if (src[i] == 'o') {
263 state = CO;
263 state = CO;
264 charcopy(dest, &destlen, destsize, src[i++]);
264 charcopy(dest, &destlen, destsize, src[i++]);
265 }
265 }
266 else state = DEFAULT;
266 else state = DEFAULT;
267 break;
267 break;
268 case CO:
268 case CO:
269 if (src[i] == 'm') {
269 if (src[i] == 'm') {
270 state = COMLPT;
270 state = COMLPT;
271 i++;
271 i++;
272 }
272 }
273 else if (src[i] == 'n') {
273 else if (src[i] == 'n') {
274 state = THIRD;
274 state = THIRD;
275 i++;
275 i++;
276 }
276 }
277 else state = DEFAULT;
277 else state = DEFAULT;
278 break;
278 break;
279 case COMLPT:
279 case COMLPT:
280 switch (src[i]) {
280 switch (src[i]) {
281 case '1': case '2': case '3': case '4': case '5':
281 case '1': case '2': case '3': case '4': case '5':
282 case '6': case '7': case '8': case '9':
282 case '6': case '7': case '8': case '9':
283 state = COMLPTn;
283 state = COMLPTn;
284 i++;
284 i++;
285 break;
285 break;
286 default:
286 default:
287 state = DEFAULT;
287 state = DEFAULT;
288 charcopy(dest, &destlen, destsize, src[i - 1]);
288 charcopy(dest, &destlen, destsize, src[i - 1]);
289 break;
289 break;
290 }
290 }
291 break;
291 break;
292 case COMLPTn:
292 case COMLPTn:
293 state = DEFAULT;
293 state = DEFAULT;
294 switch (src[i]) {
294 switch (src[i]) {
295 case '.':
295 case '.':
296 case '/':
296 case '/':
297 case '\0':
297 case '\0':
298 escape3(dest, &destlen, destsize, src[i - 2]);
298 escape3(dest, &destlen, destsize, src[i - 2]);
299 charcopy(dest, &destlen, destsize, src[i - 1]);
299 charcopy(dest, &destlen, destsize, src[i - 1]);
300 break;
300 break;
301 default:
301 default:
302 memcopy(dest, &destlen, destsize,
302 memcopy(dest, &destlen, destsize,
303 &src[i - 2], 2);
303 &src[i - 2], 2);
304 break;
304 break;
305 }
305 }
306 break;
306 break;
307 case L:
307 case L:
308 if (src[i] == 'p') {
308 if (src[i] == 'p') {
309 state = LP;
309 state = LP;
310 charcopy(dest, &destlen, destsize, src[i++]);
310 charcopy(dest, &destlen, destsize, src[i++]);
311 }
311 }
312 else state = DEFAULT;
312 else state = DEFAULT;
313 break;
313 break;
314 case LP:
314 case LP:
315 if (src[i] == 't') {
315 if (src[i] == 't') {
316 state = COMLPT;
316 state = COMLPT;
317 i++;
317 i++;
318 }
318 }
319 else state = DEFAULT;
319 else state = DEFAULT;
320 break;
320 break;
321 case N:
321 case N:
322 if (src[i] == 'u') {
322 if (src[i] == 'u') {
323 state = NU;
323 state = NU;
324 charcopy(dest, &destlen, destsize, src[i++]);
324 charcopy(dest, &destlen, destsize, src[i++]);
325 }
325 }
326 else state = DEFAULT;
326 else state = DEFAULT;
327 break;
327 break;
328 case NU:
328 case NU:
329 if (src[i] == 'l') {
329 if (src[i] == 'l') {
330 state = THIRD;
330 state = THIRD;
331 i++;
331 i++;
332 }
332 }
333 else state = DEFAULT;
333 else state = DEFAULT;
334 break;
334 break;
335 case P:
335 case P:
336 if (src[i] == 'r') {
336 if (src[i] == 'r') {
337 state = PR;
337 state = PR;
338 charcopy(dest, &destlen, destsize, src[i++]);
338 charcopy(dest, &destlen, destsize, src[i++]);
339 }
339 }
340 else state = DEFAULT;
340 else state = DEFAULT;
341 break;
341 break;
342 case PR:
342 case PR:
343 if (src[i] == 'n') {
343 if (src[i] == 'n') {
344 state = THIRD;
344 state = THIRD;
345 i++;
345 i++;
346 }
346 }
347 else state = DEFAULT;
347 else state = DEFAULT;
348 break;
348 break;
349 case LDOT:
349 case LDOT:
350 switch (src[i]) {
350 switch (src[i]) {
351 case 'd':
351 case 'd':
352 case 'i':
352 case 'i':
353 state = HGDI;
353 state = HGDI;
354 charcopy(dest, &destlen, destsize, src[i++]);
354 charcopy(dest, &destlen, destsize, src[i++]);
355 break;
355 break;
356 case 'h':
356 case 'h':
357 state = H;
357 state = H;
358 charcopy(dest, &destlen, destsize, src[i++]);
358 charcopy(dest, &destlen, destsize, src[i++]);
359 break;
359 break;
360 default:
360 default:
361 state = DEFAULT;
361 state = DEFAULT;
362 break;
362 break;
363 }
363 }
364 break;
364 break;
365 case DOT:
365 case DOT:
366 switch (src[i]) {
366 switch (src[i]) {
367 case '/':
367 case '/':
368 case '\0':
368 case '\0':
369 state = START;
369 state = START;
370 memcopy(dest, &destlen, destsize, "~2e", 3);
370 memcopy(dest, &destlen, destsize, "~2e", 3);
371 charcopy(dest, &destlen, destsize, src[i++]);
371 charcopy(dest, &destlen, destsize, src[i++]);
372 break;
372 break;
373 case 'd':
373 case 'd':
374 case 'i':
374 case 'i':
375 state = HGDI;
375 state = HGDI;
376 charcopy(dest, &destlen, destsize, '.');
376 charcopy(dest, &destlen, destsize, '.');
377 charcopy(dest, &destlen, destsize, src[i++]);
377 charcopy(dest, &destlen, destsize, src[i++]);
378 break;
378 break;
379 case 'h':
379 case 'h':
380 state = H;
380 state = H;
381 memcopy(dest, &destlen, destsize, ".h", 2);
381 memcopy(dest, &destlen, destsize, ".h", 2);
382 i++;
382 i++;
383 break;
383 break;
384 default:
384 default:
385 state = DEFAULT;
385 state = DEFAULT;
386 charcopy(dest, &destlen, destsize, '.');
386 charcopy(dest, &destlen, destsize, '.');
387 break;
387 break;
388 }
388 }
389 break;
389 break;
390 case H:
390 case H:
391 if (src[i] == 'g') {
391 if (src[i] == 'g') {
392 state = HGDI;
392 state = HGDI;
393 charcopy(dest, &destlen, destsize, src[i++]);
393 charcopy(dest, &destlen, destsize, src[i++]);
394 }
394 }
395 else state = DEFAULT;
395 else state = DEFAULT;
396 break;
396 break;
397 case HGDI:
397 case HGDI:
398 if (src[i] == '/') {
398 if (src[i] == '/') {
399 state = START;
399 state = START;
400 if (encodedir)
400 if (encodedir)
401 memcopy(dest, &destlen, destsize, ".hg",
401 memcopy(dest, &destlen, destsize, ".hg",
402 3);
402 3);
403 charcopy(dest, &destlen, destsize, src[i++]);
403 charcopy(dest, &destlen, destsize, src[i++]);
404 }
404 }
405 else state = DEFAULT;
405 else state = DEFAULT;
406 break;
406 break;
407 case SPACE:
407 case SPACE:
408 switch (src[i]) {
408 switch (src[i]) {
409 case '/':
409 case '/':
410 case '\0':
410 case '\0':
411 state = START;
411 state = START;
412 memcopy(dest, &destlen, destsize, "~20", 3);
412 memcopy(dest, &destlen, destsize, "~20", 3);
413 charcopy(dest, &destlen, destsize, src[i++]);
413 charcopy(dest, &destlen, destsize, src[i++]);
414 break;
414 break;
415 default:
415 default:
416 state = DEFAULT;
416 state = DEFAULT;
417 charcopy(dest, &destlen, destsize, ' ');
417 charcopy(dest, &destlen, destsize, ' ');
418 break;
418 break;
419 }
419 }
420 break;
420 break;
421 case DEFAULT:
421 case DEFAULT:
422 while (isset(onebyte, src[i])) {
422 while (isset(onebyte, src[i])) {
423 charcopy(dest, &destlen, destsize, src[i++]);
423 charcopy(dest, &destlen, destsize, src[i++]);
424 if (i == len)
424 if (i == len)
425 goto done;
425 goto done;
426 }
426 }
427 switch (src[i]) {
427 switch (src[i]) {
428 case '.':
428 case '.':
429 state = DOT;
429 state = DOT;
430 i++;
430 i++;
431 break;
431 break;
432 case ' ':
432 case ' ':
433 state = SPACE;
433 state = SPACE;
434 i++;
434 i++;
435 break;
435 break;
436 case '/':
436 case '/':
437 state = START;
437 state = START;
438 charcopy(dest, &destlen, destsize, '/');
438 charcopy(dest, &destlen, destsize, '/');
439 i++;
439 i++;
440 break;
440 break;
441 default:
441 default:
442 if (isset(onebyte, src[i])) {
442 if (isset(onebyte, src[i])) {
443 do {
443 do {
444 charcopy(dest, &destlen,
444 charcopy(dest, &destlen,
445 destsize, src[i++]);
445 destsize, src[i++]);
446 } while (i < len &&
446 } while (i < len &&
447 isset(onebyte, src[i]));
447 isset(onebyte, src[i]));
448 }
448 }
449 else if (isset(twobytes, src[i])) {
449 else if (isset(twobytes, src[i])) {
450 char c = src[i++];
450 char c = src[i++];
451 charcopy(dest, &destlen, destsize, '_');
451 charcopy(dest, &destlen, destsize, '_');
452 charcopy(dest, &destlen, destsize,
452 charcopy(dest, &destlen, destsize,
453 c == '_' ? '_' : c + 32);
453 c == '_' ? '_' : c + 32);
454 }
454 }
455 else
455 else
456 escape3(dest, &destlen, destsize,
456 escape3(dest, &destlen, destsize,
457 src[i++]);
457 src[i++]);
458 break;
458 break;
459 }
459 }
460 break;
460 break;
461 }
461 }
462 }
462 }
463 done:
463 done:
464 return destlen;
464 return destlen;
465 }
465 }
466
466
467 static Py_ssize_t basicencode(char *dest, size_t destsize,
467 static Py_ssize_t basicencode(char *dest, size_t destsize,
468 const char *src, Py_ssize_t len)
468 const char *src, Py_ssize_t len)
469 {
469 {
470 static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };
470 static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };
471
471
472 static const uint32_t onebyte[8] = {
472 static const uint32_t onebyte[8] = {
473 1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
473 1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
474 };
474 };
475
475
476 Py_ssize_t destlen = 0;
476 Py_ssize_t destlen = 0;
477
477
478 if (len < 5 || memcmp(src, "data/", 5) != 0) {
479 memcopy(dest, &destlen, destsize, src, len);
480 return destlen;
481 }
482
483 memcopy(dest, &destlen, destsize, "data/", 5);
484
485 return _encode(twobytes, onebyte, dest, destlen, destsize,
478 return _encode(twobytes, onebyte, dest, destlen, destsize,
486 src + 5, len - 5, 1);
479 src, len, 1);
487 }
480 }
488
481
489 static const Py_ssize_t maxstorepathlen = 120;
482 static const Py_ssize_t maxstorepathlen = 120;
490
483
491 /*
484 /*
492 * We currently implement only basic encoding.
485 * We currently implement only basic encoding.
493 *
486 *
494 * If a name is too long to encode due to Windows path name limits,
487 * If a name is too long to encode due to Windows path name limits,
495 * this function returns None.
488 * this function returns None.
496 */
489 */
497 PyObject *pathencode(PyObject *self, PyObject *args)
490 PyObject *pathencode(PyObject *self, PyObject *args)
498 {
491 {
499 Py_ssize_t len, newlen;
492 Py_ssize_t len, newlen;
500 PyObject *pathobj, *newobj;
493 PyObject *pathobj, *newobj;
501 char *path;
494 char *path;
502
495
503 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
496 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
504 return NULL;
497 return NULL;
505
498
506 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
499 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
507 PyErr_SetString(PyExc_TypeError, "expected a string");
500 PyErr_SetString(PyExc_TypeError, "expected a string");
508 return NULL;
501 return NULL;
509 }
502 }
510
503
511 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
504 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
512
505
513 if (newlen <= maxstorepathlen + 1) {
506 if (newlen <= maxstorepathlen + 1) {
514 if (newlen == len + 1) {
507 if (newlen == len + 1) {
515 Py_INCREF(pathobj);
508 Py_INCREF(pathobj);
516 return pathobj;
509 return pathobj;
517 }
510 }
518
511
519 newobj = PyString_FromStringAndSize(NULL, newlen);
512 newobj = PyString_FromStringAndSize(NULL, newlen);
520
513
521 if (newobj) {
514 if (newobj) {
522 PyString_GET_SIZE(newobj)--;
515 PyString_GET_SIZE(newobj)--;
523 basicencode(PyString_AS_STRING(newobj), newlen, path,
516 basicencode(PyString_AS_STRING(newobj), newlen, path,
524 len + 1);
517 len + 1);
525 }
518 }
526 } else {
519 } else {
527 newobj = Py_None;
520 newobj = Py_None;
528 Py_INCREF(newobj);
521 Py_INCREF(newobj);
529 }
522 }
530
523
531 return newobj;
524 return newobj;
532 }
525 }
General Comments 0
You need to be logged in to leave comments. Login now