##// END OF EJS Templates
store: implement lowerencode in C
Bryan O'Sullivan -
r18430:0459c655 default
parent child Browse files
Show More
@@ -1,1560 +1,1562
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <stddef.h>
12 #include <stddef.h>
13 #include <string.h>
13 #include <string.h>
14
14
15 #include "util.h"
15 #include "util.h"
16
16
17 static inline int hexdigit(const char *p, Py_ssize_t off)
17 static inline int hexdigit(const char *p, Py_ssize_t off)
18 {
18 {
19 char c = p[off];
19 char c = p[off];
20
20
21 if (c >= '0' && c <= '9')
21 if (c >= '0' && c <= '9')
22 return c - '0';
22 return c - '0';
23 if (c >= 'a' && c <= 'f')
23 if (c >= 'a' && c <= 'f')
24 return c - 'a' + 10;
24 return c - 'a' + 10;
25 if (c >= 'A' && c <= 'F')
25 if (c >= 'A' && c <= 'F')
26 return c - 'A' + 10;
26 return c - 'A' + 10;
27
27
28 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
28 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
29 return 0;
29 return 0;
30 }
30 }
31
31
32 /*
32 /*
33 * Turn a hex-encoded string into binary.
33 * Turn a hex-encoded string into binary.
34 */
34 */
35 static PyObject *unhexlify(const char *str, int len)
35 static PyObject *unhexlify(const char *str, int len)
36 {
36 {
37 PyObject *ret;
37 PyObject *ret;
38 char *d;
38 char *d;
39 int i;
39 int i;
40
40
41 ret = PyBytes_FromStringAndSize(NULL, len / 2);
41 ret = PyBytes_FromStringAndSize(NULL, len / 2);
42
42
43 if (!ret)
43 if (!ret)
44 return NULL;
44 return NULL;
45
45
46 d = PyBytes_AsString(ret);
46 d = PyBytes_AsString(ret);
47
47
48 for (i = 0; i < len;) {
48 for (i = 0; i < len;) {
49 int hi = hexdigit(str, i++);
49 int hi = hexdigit(str, i++);
50 int lo = hexdigit(str, i++);
50 int lo = hexdigit(str, i++);
51 *d++ = (hi << 4) | lo;
51 *d++ = (hi << 4) | lo;
52 }
52 }
53
53
54 return ret;
54 return ret;
55 }
55 }
56
56
57 /*
57 /*
58 * This code assumes that a manifest is stitched together with newline
58 * This code assumes that a manifest is stitched together with newline
59 * ('\n') characters.
59 * ('\n') characters.
60 */
60 */
61 static PyObject *parse_manifest(PyObject *self, PyObject *args)
61 static PyObject *parse_manifest(PyObject *self, PyObject *args)
62 {
62 {
63 PyObject *mfdict, *fdict;
63 PyObject *mfdict, *fdict;
64 char *str, *cur, *start, *zero;
64 char *str, *cur, *start, *zero;
65 int len;
65 int len;
66
66
67 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
67 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
68 &PyDict_Type, &mfdict,
68 &PyDict_Type, &mfdict,
69 &PyDict_Type, &fdict,
69 &PyDict_Type, &fdict,
70 &str, &len))
70 &str, &len))
71 goto quit;
71 goto quit;
72
72
73 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
73 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
74 PyObject *file = NULL, *node = NULL;
74 PyObject *file = NULL, *node = NULL;
75 PyObject *flags = NULL;
75 PyObject *flags = NULL;
76 ptrdiff_t nlen;
76 ptrdiff_t nlen;
77
77
78 if (!*cur) {
78 if (!*cur) {
79 zero = cur;
79 zero = cur;
80 continue;
80 continue;
81 }
81 }
82 else if (*cur != '\n')
82 else if (*cur != '\n')
83 continue;
83 continue;
84
84
85 if (!zero) {
85 if (!zero) {
86 PyErr_SetString(PyExc_ValueError,
86 PyErr_SetString(PyExc_ValueError,
87 "manifest entry has no separator");
87 "manifest entry has no separator");
88 goto quit;
88 goto quit;
89 }
89 }
90
90
91 file = PyBytes_FromStringAndSize(start, zero - start);
91 file = PyBytes_FromStringAndSize(start, zero - start);
92
92
93 if (!file)
93 if (!file)
94 goto bail;
94 goto bail;
95
95
96 nlen = cur - zero - 1;
96 nlen = cur - zero - 1;
97
97
98 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
98 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
99 if (!node)
99 if (!node)
100 goto bail;
100 goto bail;
101
101
102 if (nlen > 40) {
102 if (nlen > 40) {
103 flags = PyBytes_FromStringAndSize(zero + 41,
103 flags = PyBytes_FromStringAndSize(zero + 41,
104 nlen - 40);
104 nlen - 40);
105 if (!flags)
105 if (!flags)
106 goto bail;
106 goto bail;
107
107
108 if (PyDict_SetItem(fdict, file, flags) == -1)
108 if (PyDict_SetItem(fdict, file, flags) == -1)
109 goto bail;
109 goto bail;
110 }
110 }
111
111
112 if (PyDict_SetItem(mfdict, file, node) == -1)
112 if (PyDict_SetItem(mfdict, file, node) == -1)
113 goto bail;
113 goto bail;
114
114
115 start = cur + 1;
115 start = cur + 1;
116 zero = NULL;
116 zero = NULL;
117
117
118 Py_XDECREF(flags);
118 Py_XDECREF(flags);
119 Py_XDECREF(node);
119 Py_XDECREF(node);
120 Py_XDECREF(file);
120 Py_XDECREF(file);
121 continue;
121 continue;
122 bail:
122 bail:
123 Py_XDECREF(flags);
123 Py_XDECREF(flags);
124 Py_XDECREF(node);
124 Py_XDECREF(node);
125 Py_XDECREF(file);
125 Py_XDECREF(file);
126 goto quit;
126 goto quit;
127 }
127 }
128
128
129 if (len > 0 && *(cur - 1) != '\n') {
129 if (len > 0 && *(cur - 1) != '\n') {
130 PyErr_SetString(PyExc_ValueError,
130 PyErr_SetString(PyExc_ValueError,
131 "manifest contains trailing garbage");
131 "manifest contains trailing garbage");
132 goto quit;
132 goto quit;
133 }
133 }
134
134
135 Py_INCREF(Py_None);
135 Py_INCREF(Py_None);
136 return Py_None;
136 return Py_None;
137 quit:
137 quit:
138 return NULL;
138 return NULL;
139 }
139 }
140
140
141 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
141 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
142 {
142 {
143 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
143 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
144 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
144 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
145 char *str, *cur, *end, *cpos;
145 char *str, *cur, *end, *cpos;
146 int state, mode, size, mtime;
146 int state, mode, size, mtime;
147 unsigned int flen;
147 unsigned int flen;
148 int len;
148 int len;
149
149
150 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
150 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
151 &PyDict_Type, &dmap,
151 &PyDict_Type, &dmap,
152 &PyDict_Type, &cmap,
152 &PyDict_Type, &cmap,
153 &str, &len))
153 &str, &len))
154 goto quit;
154 goto quit;
155
155
156 /* read parents */
156 /* read parents */
157 if (len < 40)
157 if (len < 40)
158 goto quit;
158 goto quit;
159
159
160 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
160 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
161 if (!parents)
161 if (!parents)
162 goto quit;
162 goto quit;
163
163
164 /* read filenames */
164 /* read filenames */
165 cur = str + 40;
165 cur = str + 40;
166 end = str + len;
166 end = str + len;
167
167
168 while (cur < end - 17) {
168 while (cur < end - 17) {
169 /* unpack header */
169 /* unpack header */
170 state = *cur;
170 state = *cur;
171 mode = getbe32(cur + 1);
171 mode = getbe32(cur + 1);
172 size = getbe32(cur + 5);
172 size = getbe32(cur + 5);
173 mtime = getbe32(cur + 9);
173 mtime = getbe32(cur + 9);
174 flen = getbe32(cur + 13);
174 flen = getbe32(cur + 13);
175 cur += 17;
175 cur += 17;
176 if (cur + flen > end || cur + flen < cur) {
176 if (cur + flen > end || cur + flen < cur) {
177 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
178 goto quit;
178 goto quit;
179 }
179 }
180
180
181 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 entry = Py_BuildValue("ciii", state, mode, size, mtime);
182 if (!entry)
182 if (!entry)
183 goto quit;
183 goto quit;
184 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184 PyObject_GC_UnTrack(entry); /* don't waste time with this */
185
185
186 cpos = memchr(cur, 0, flen);
186 cpos = memchr(cur, 0, flen);
187 if (cpos) {
187 if (cpos) {
188 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
189 cname = PyBytes_FromStringAndSize(cpos + 1,
189 cname = PyBytes_FromStringAndSize(cpos + 1,
190 flen - (cpos - cur) - 1);
190 flen - (cpos - cur) - 1);
191 if (!fname || !cname ||
191 if (!fname || !cname ||
192 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 PyDict_SetItem(cmap, fname, cname) == -1 ||
193 PyDict_SetItem(dmap, fname, entry) == -1)
193 PyDict_SetItem(dmap, fname, entry) == -1)
194 goto quit;
194 goto quit;
195 Py_DECREF(cname);
195 Py_DECREF(cname);
196 } else {
196 } else {
197 fname = PyBytes_FromStringAndSize(cur, flen);
197 fname = PyBytes_FromStringAndSize(cur, flen);
198 if (!fname ||
198 if (!fname ||
199 PyDict_SetItem(dmap, fname, entry) == -1)
199 PyDict_SetItem(dmap, fname, entry) == -1)
200 goto quit;
200 goto quit;
201 }
201 }
202 cur += flen;
202 cur += flen;
203 Py_DECREF(fname);
203 Py_DECREF(fname);
204 Py_DECREF(entry);
204 Py_DECREF(entry);
205 fname = cname = entry = NULL;
205 fname = cname = entry = NULL;
206 }
206 }
207
207
208 ret = parents;
208 ret = parents;
209 Py_INCREF(ret);
209 Py_INCREF(ret);
210 quit:
210 quit:
211 Py_XDECREF(fname);
211 Py_XDECREF(fname);
212 Py_XDECREF(cname);
212 Py_XDECREF(cname);
213 Py_XDECREF(entry);
213 Py_XDECREF(entry);
214 Py_XDECREF(parents);
214 Py_XDECREF(parents);
215 return ret;
215 return ret;
216 }
216 }
217
217
218 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
218 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
219 {
219 {
220 PyObject *o = PyTuple_GET_ITEM(tuple, off);
220 PyObject *o = PyTuple_GET_ITEM(tuple, off);
221 long val;
221 long val;
222
222
223 if (PyInt_Check(o))
223 if (PyInt_Check(o))
224 val = PyInt_AS_LONG(o);
224 val = PyInt_AS_LONG(o);
225 else if (PyLong_Check(o)) {
225 else if (PyLong_Check(o)) {
226 val = PyLong_AsLong(o);
226 val = PyLong_AsLong(o);
227 if (val == -1 && PyErr_Occurred())
227 if (val == -1 && PyErr_Occurred())
228 return -1;
228 return -1;
229 } else {
229 } else {
230 PyErr_SetString(PyExc_TypeError, "expected an int or long");
230 PyErr_SetString(PyExc_TypeError, "expected an int or long");
231 return -1;
231 return -1;
232 }
232 }
233 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
233 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
234 PyErr_SetString(PyExc_OverflowError,
234 PyErr_SetString(PyExc_OverflowError,
235 "Python value to large to convert to uint32_t");
235 "Python value to large to convert to uint32_t");
236 return -1;
236 return -1;
237 }
237 }
238 *v = (uint32_t)val;
238 *v = (uint32_t)val;
239 return 0;
239 return 0;
240 }
240 }
241
241
242 static PyObject *dirstate_unset;
242 static PyObject *dirstate_unset;
243
243
244 /*
244 /*
245 * Efficiently pack a dirstate object into its on-disk format.
245 * Efficiently pack a dirstate object into its on-disk format.
246 */
246 */
247 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
247 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
248 {
248 {
249 PyObject *packobj = NULL;
249 PyObject *packobj = NULL;
250 PyObject *map, *copymap, *pl;
250 PyObject *map, *copymap, *pl;
251 Py_ssize_t nbytes, pos, l;
251 Py_ssize_t nbytes, pos, l;
252 PyObject *k, *v, *pn;
252 PyObject *k, *v, *pn;
253 char *p, *s;
253 char *p, *s;
254 double now;
254 double now;
255
255
256 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
256 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
257 &PyDict_Type, &map, &PyDict_Type, &copymap,
257 &PyDict_Type, &map, &PyDict_Type, &copymap,
258 &pl, &now))
258 &pl, &now))
259 return NULL;
259 return NULL;
260
260
261 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
261 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
262 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
262 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
263 return NULL;
263 return NULL;
264 }
264 }
265
265
266 /* Figure out how much we need to allocate. */
266 /* Figure out how much we need to allocate. */
267 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
267 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
268 PyObject *c;
268 PyObject *c;
269 if (!PyString_Check(k)) {
269 if (!PyString_Check(k)) {
270 PyErr_SetString(PyExc_TypeError, "expected string key");
270 PyErr_SetString(PyExc_TypeError, "expected string key");
271 goto bail;
271 goto bail;
272 }
272 }
273 nbytes += PyString_GET_SIZE(k) + 17;
273 nbytes += PyString_GET_SIZE(k) + 17;
274 c = PyDict_GetItem(copymap, k);
274 c = PyDict_GetItem(copymap, k);
275 if (c) {
275 if (c) {
276 if (!PyString_Check(c)) {
276 if (!PyString_Check(c)) {
277 PyErr_SetString(PyExc_TypeError,
277 PyErr_SetString(PyExc_TypeError,
278 "expected string key");
278 "expected string key");
279 goto bail;
279 goto bail;
280 }
280 }
281 nbytes += PyString_GET_SIZE(c) + 1;
281 nbytes += PyString_GET_SIZE(c) + 1;
282 }
282 }
283 }
283 }
284
284
285 packobj = PyString_FromStringAndSize(NULL, nbytes);
285 packobj = PyString_FromStringAndSize(NULL, nbytes);
286 if (packobj == NULL)
286 if (packobj == NULL)
287 goto bail;
287 goto bail;
288
288
289 p = PyString_AS_STRING(packobj);
289 p = PyString_AS_STRING(packobj);
290
290
291 pn = PySequence_ITEM(pl, 0);
291 pn = PySequence_ITEM(pl, 0);
292 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
292 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
293 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
293 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
294 goto bail;
294 goto bail;
295 }
295 }
296 memcpy(p, s, l);
296 memcpy(p, s, l);
297 p += 20;
297 p += 20;
298 pn = PySequence_ITEM(pl, 1);
298 pn = PySequence_ITEM(pl, 1);
299 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
299 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
300 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
300 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
301 goto bail;
301 goto bail;
302 }
302 }
303 memcpy(p, s, l);
303 memcpy(p, s, l);
304 p += 20;
304 p += 20;
305
305
306 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
306 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
307 uint32_t mode, size, mtime;
307 uint32_t mode, size, mtime;
308 Py_ssize_t len, l;
308 Py_ssize_t len, l;
309 PyObject *o;
309 PyObject *o;
310 char *s, *t;
310 char *s, *t;
311
311
312 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
312 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
313 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
313 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
314 goto bail;
314 goto bail;
315 }
315 }
316 o = PyTuple_GET_ITEM(v, 0);
316 o = PyTuple_GET_ITEM(v, 0);
317 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
317 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
318 PyErr_SetString(PyExc_TypeError, "expected one byte");
318 PyErr_SetString(PyExc_TypeError, "expected one byte");
319 goto bail;
319 goto bail;
320 }
320 }
321 *p++ = *s;
321 *p++ = *s;
322 if (getintat(v, 1, &mode) == -1)
322 if (getintat(v, 1, &mode) == -1)
323 goto bail;
323 goto bail;
324 if (getintat(v, 2, &size) == -1)
324 if (getintat(v, 2, &size) == -1)
325 goto bail;
325 goto bail;
326 if (getintat(v, 3, &mtime) == -1)
326 if (getintat(v, 3, &mtime) == -1)
327 goto bail;
327 goto bail;
328 if (*s == 'n' && mtime == (uint32_t)now) {
328 if (*s == 'n' && mtime == (uint32_t)now) {
329 /* See dirstate.py:write for why we do this. */
329 /* See dirstate.py:write for why we do this. */
330 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
330 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
331 goto bail;
331 goto bail;
332 mode = 0, size = -1, mtime = -1;
332 mode = 0, size = -1, mtime = -1;
333 }
333 }
334 putbe32(mode, p);
334 putbe32(mode, p);
335 putbe32(size, p + 4);
335 putbe32(size, p + 4);
336 putbe32(mtime, p + 8);
336 putbe32(mtime, p + 8);
337 t = p + 12;
337 t = p + 12;
338 p += 16;
338 p += 16;
339 len = PyString_GET_SIZE(k);
339 len = PyString_GET_SIZE(k);
340 memcpy(p, PyString_AS_STRING(k), len);
340 memcpy(p, PyString_AS_STRING(k), len);
341 p += len;
341 p += len;
342 o = PyDict_GetItem(copymap, k);
342 o = PyDict_GetItem(copymap, k);
343 if (o) {
343 if (o) {
344 *p++ = '\0';
344 *p++ = '\0';
345 l = PyString_GET_SIZE(o);
345 l = PyString_GET_SIZE(o);
346 memcpy(p, PyString_AS_STRING(o), l);
346 memcpy(p, PyString_AS_STRING(o), l);
347 p += l;
347 p += l;
348 len += l + 1;
348 len += l + 1;
349 }
349 }
350 putbe32((uint32_t)len, t);
350 putbe32((uint32_t)len, t);
351 }
351 }
352
352
353 pos = p - PyString_AS_STRING(packobj);
353 pos = p - PyString_AS_STRING(packobj);
354 if (pos != nbytes) {
354 if (pos != nbytes) {
355 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
355 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
356 (long)pos, (long)nbytes);
356 (long)pos, (long)nbytes);
357 goto bail;
357 goto bail;
358 }
358 }
359
359
360 return packobj;
360 return packobj;
361 bail:
361 bail:
362 Py_XDECREF(packobj);
362 Py_XDECREF(packobj);
363 return NULL;
363 return NULL;
364 }
364 }
365
365
366 /*
366 /*
367 * A base-16 trie for fast node->rev mapping.
367 * A base-16 trie for fast node->rev mapping.
368 *
368 *
369 * Positive value is index of the next node in the trie
369 * Positive value is index of the next node in the trie
370 * Negative value is a leaf: -(rev + 1)
370 * Negative value is a leaf: -(rev + 1)
371 * Zero is empty
371 * Zero is empty
372 */
372 */
373 typedef struct {
373 typedef struct {
374 int children[16];
374 int children[16];
375 } nodetree;
375 } nodetree;
376
376
377 /*
377 /*
378 * This class has two behaviours.
378 * This class has two behaviours.
379 *
379 *
380 * When used in a list-like way (with integer keys), we decode an
380 * When used in a list-like way (with integer keys), we decode an
381 * entry in a RevlogNG index file on demand. Our last entry is a
381 * entry in a RevlogNG index file on demand. Our last entry is a
382 * sentinel, always a nullid. We have limited support for
382 * sentinel, always a nullid. We have limited support for
383 * integer-keyed insert and delete, only at elements right before the
383 * integer-keyed insert and delete, only at elements right before the
384 * sentinel.
384 * sentinel.
385 *
385 *
386 * With string keys, we lazily perform a reverse mapping from node to
386 * With string keys, we lazily perform a reverse mapping from node to
387 * rev, using a base-16 trie.
387 * rev, using a base-16 trie.
388 */
388 */
389 typedef struct {
389 typedef struct {
390 PyObject_HEAD
390 PyObject_HEAD
391 /* Type-specific fields go here. */
391 /* Type-specific fields go here. */
392 PyObject *data; /* raw bytes of index */
392 PyObject *data; /* raw bytes of index */
393 PyObject **cache; /* cached tuples */
393 PyObject **cache; /* cached tuples */
394 const char **offsets; /* populated on demand */
394 const char **offsets; /* populated on demand */
395 Py_ssize_t raw_length; /* original number of elements */
395 Py_ssize_t raw_length; /* original number of elements */
396 Py_ssize_t length; /* current number of elements */
396 Py_ssize_t length; /* current number of elements */
397 PyObject *added; /* populated on demand */
397 PyObject *added; /* populated on demand */
398 PyObject *headrevs; /* cache, invalidated on changes */
398 PyObject *headrevs; /* cache, invalidated on changes */
399 nodetree *nt; /* base-16 trie */
399 nodetree *nt; /* base-16 trie */
400 int ntlength; /* # nodes in use */
400 int ntlength; /* # nodes in use */
401 int ntcapacity; /* # nodes allocated */
401 int ntcapacity; /* # nodes allocated */
402 int ntdepth; /* maximum depth of tree */
402 int ntdepth; /* maximum depth of tree */
403 int ntsplits; /* # splits performed */
403 int ntsplits; /* # splits performed */
404 int ntrev; /* last rev scanned */
404 int ntrev; /* last rev scanned */
405 int ntlookups; /* # lookups */
405 int ntlookups; /* # lookups */
406 int ntmisses; /* # lookups that miss the cache */
406 int ntmisses; /* # lookups that miss the cache */
407 int inlined;
407 int inlined;
408 } indexObject;
408 } indexObject;
409
409
410 static Py_ssize_t index_length(const indexObject *self)
410 static Py_ssize_t index_length(const indexObject *self)
411 {
411 {
412 if (self->added == NULL)
412 if (self->added == NULL)
413 return self->length;
413 return self->length;
414 return self->length + PyList_GET_SIZE(self->added);
414 return self->length + PyList_GET_SIZE(self->added);
415 }
415 }
416
416
417 static PyObject *nullentry;
417 static PyObject *nullentry;
418 static const char nullid[20];
418 static const char nullid[20];
419
419
420 static long inline_scan(indexObject *self, const char **offsets);
420 static long inline_scan(indexObject *self, const char **offsets);
421
421
422 #if LONG_MAX == 0x7fffffffL
422 #if LONG_MAX == 0x7fffffffL
423 static char *tuple_format = "Kiiiiiis#";
423 static char *tuple_format = "Kiiiiiis#";
424 #else
424 #else
425 static char *tuple_format = "kiiiiiis#";
425 static char *tuple_format = "kiiiiiis#";
426 #endif
426 #endif
427
427
428 /* A RevlogNG v1 index entry is 64 bytes long. */
428 /* A RevlogNG v1 index entry is 64 bytes long. */
429 static const long v1_hdrsize = 64;
429 static const long v1_hdrsize = 64;
430
430
431 /*
431 /*
432 * Return a pointer to the beginning of a RevlogNG record.
432 * Return a pointer to the beginning of a RevlogNG record.
433 */
433 */
434 static const char *index_deref(indexObject *self, Py_ssize_t pos)
434 static const char *index_deref(indexObject *self, Py_ssize_t pos)
435 {
435 {
436 if (self->inlined && pos > 0) {
436 if (self->inlined && pos > 0) {
437 if (self->offsets == NULL) {
437 if (self->offsets == NULL) {
438 self->offsets = malloc(self->raw_length *
438 self->offsets = malloc(self->raw_length *
439 sizeof(*self->offsets));
439 sizeof(*self->offsets));
440 if (self->offsets == NULL)
440 if (self->offsets == NULL)
441 return (const char *)PyErr_NoMemory();
441 return (const char *)PyErr_NoMemory();
442 inline_scan(self, self->offsets);
442 inline_scan(self, self->offsets);
443 }
443 }
444 return self->offsets[pos];
444 return self->offsets[pos];
445 }
445 }
446
446
447 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
447 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
448 }
448 }
449
449
450 /*
450 /*
451 * RevlogNG format (all in big endian, data may be inlined):
451 * RevlogNG format (all in big endian, data may be inlined):
452 * 6 bytes: offset
452 * 6 bytes: offset
453 * 2 bytes: flags
453 * 2 bytes: flags
454 * 4 bytes: compressed length
454 * 4 bytes: compressed length
455 * 4 bytes: uncompressed length
455 * 4 bytes: uncompressed length
456 * 4 bytes: base revision
456 * 4 bytes: base revision
457 * 4 bytes: link revision
457 * 4 bytes: link revision
458 * 4 bytes: parent 1 revision
458 * 4 bytes: parent 1 revision
459 * 4 bytes: parent 2 revision
459 * 4 bytes: parent 2 revision
460 * 32 bytes: nodeid (only 20 bytes used)
460 * 32 bytes: nodeid (only 20 bytes used)
461 */
461 */
462 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
462 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
463 {
463 {
464 uint64_t offset_flags;
464 uint64_t offset_flags;
465 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
465 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
466 const char *c_node_id;
466 const char *c_node_id;
467 const char *data;
467 const char *data;
468 Py_ssize_t length = index_length(self);
468 Py_ssize_t length = index_length(self);
469 PyObject *entry;
469 PyObject *entry;
470
470
471 if (pos < 0)
471 if (pos < 0)
472 pos += length;
472 pos += length;
473
473
474 if (pos < 0 || pos >= length) {
474 if (pos < 0 || pos >= length) {
475 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
475 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
476 return NULL;
476 return NULL;
477 }
477 }
478
478
479 if (pos == length - 1) {
479 if (pos == length - 1) {
480 Py_INCREF(nullentry);
480 Py_INCREF(nullentry);
481 return nullentry;
481 return nullentry;
482 }
482 }
483
483
484 if (pos >= self->length - 1) {
484 if (pos >= self->length - 1) {
485 PyObject *obj;
485 PyObject *obj;
486 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
486 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
487 Py_INCREF(obj);
487 Py_INCREF(obj);
488 return obj;
488 return obj;
489 }
489 }
490
490
491 if (self->cache) {
491 if (self->cache) {
492 if (self->cache[pos]) {
492 if (self->cache[pos]) {
493 Py_INCREF(self->cache[pos]);
493 Py_INCREF(self->cache[pos]);
494 return self->cache[pos];
494 return self->cache[pos];
495 }
495 }
496 } else {
496 } else {
497 self->cache = calloc(self->raw_length, sizeof(PyObject *));
497 self->cache = calloc(self->raw_length, sizeof(PyObject *));
498 if (self->cache == NULL)
498 if (self->cache == NULL)
499 return PyErr_NoMemory();
499 return PyErr_NoMemory();
500 }
500 }
501
501
502 data = index_deref(self, pos);
502 data = index_deref(self, pos);
503 if (data == NULL)
503 if (data == NULL)
504 return NULL;
504 return NULL;
505
505
506 offset_flags = getbe32(data + 4);
506 offset_flags = getbe32(data + 4);
507 if (pos == 0) /* mask out version number for the first entry */
507 if (pos == 0) /* mask out version number for the first entry */
508 offset_flags &= 0xFFFF;
508 offset_flags &= 0xFFFF;
509 else {
509 else {
510 uint32_t offset_high = getbe32(data);
510 uint32_t offset_high = getbe32(data);
511 offset_flags |= ((uint64_t)offset_high) << 32;
511 offset_flags |= ((uint64_t)offset_high) << 32;
512 }
512 }
513
513
514 comp_len = getbe32(data + 8);
514 comp_len = getbe32(data + 8);
515 uncomp_len = getbe32(data + 12);
515 uncomp_len = getbe32(data + 12);
516 base_rev = getbe32(data + 16);
516 base_rev = getbe32(data + 16);
517 link_rev = getbe32(data + 20);
517 link_rev = getbe32(data + 20);
518 parent_1 = getbe32(data + 24);
518 parent_1 = getbe32(data + 24);
519 parent_2 = getbe32(data + 28);
519 parent_2 = getbe32(data + 28);
520 c_node_id = data + 32;
520 c_node_id = data + 32;
521
521
522 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
522 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
523 uncomp_len, base_rev, link_rev,
523 uncomp_len, base_rev, link_rev,
524 parent_1, parent_2, c_node_id, 20);
524 parent_1, parent_2, c_node_id, 20);
525
525
526 if (entry)
526 if (entry)
527 PyObject_GC_UnTrack(entry);
527 PyObject_GC_UnTrack(entry);
528
528
529 self->cache[pos] = entry;
529 self->cache[pos] = entry;
530 Py_INCREF(entry);
530 Py_INCREF(entry);
531
531
532 return entry;
532 return entry;
533 }
533 }
534
534
535 /*
535 /*
536 * Return the 20-byte SHA of the node corresponding to the given rev.
536 * Return the 20-byte SHA of the node corresponding to the given rev.
537 */
537 */
538 static const char *index_node(indexObject *self, Py_ssize_t pos)
538 static const char *index_node(indexObject *self, Py_ssize_t pos)
539 {
539 {
540 Py_ssize_t length = index_length(self);
540 Py_ssize_t length = index_length(self);
541 const char *data;
541 const char *data;
542
542
543 if (pos == length - 1 || pos == INT_MAX)
543 if (pos == length - 1 || pos == INT_MAX)
544 return nullid;
544 return nullid;
545
545
546 if (pos >= length)
546 if (pos >= length)
547 return NULL;
547 return NULL;
548
548
549 if (pos >= self->length - 1) {
549 if (pos >= self->length - 1) {
550 PyObject *tuple, *str;
550 PyObject *tuple, *str;
551 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
551 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
552 str = PyTuple_GetItem(tuple, 7);
552 str = PyTuple_GetItem(tuple, 7);
553 return str ? PyString_AS_STRING(str) : NULL;
553 return str ? PyString_AS_STRING(str) : NULL;
554 }
554 }
555
555
556 data = index_deref(self, pos);
556 data = index_deref(self, pos);
557 return data ? data + 32 : NULL;
557 return data ? data + 32 : NULL;
558 }
558 }
559
559
560 static int nt_insert(indexObject *self, const char *node, int rev);
560 static int nt_insert(indexObject *self, const char *node, int rev);
561
561
562 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
562 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
563 {
563 {
564 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
564 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
565 return -1;
565 return -1;
566 if (*nodelen == 20)
566 if (*nodelen == 20)
567 return 0;
567 return 0;
568 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
568 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
569 return -1;
569 return -1;
570 }
570 }
571
571
572 static PyObject *index_insert(indexObject *self, PyObject *args)
572 static PyObject *index_insert(indexObject *self, PyObject *args)
573 {
573 {
574 PyObject *obj;
574 PyObject *obj;
575 char *node;
575 char *node;
576 long offset;
576 long offset;
577 Py_ssize_t len, nodelen;
577 Py_ssize_t len, nodelen;
578
578
579 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
579 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
580 return NULL;
580 return NULL;
581
581
582 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
582 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
583 PyErr_SetString(PyExc_TypeError, "8-tuple required");
583 PyErr_SetString(PyExc_TypeError, "8-tuple required");
584 return NULL;
584 return NULL;
585 }
585 }
586
586
587 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
587 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
588 return NULL;
588 return NULL;
589
589
590 len = index_length(self);
590 len = index_length(self);
591
591
592 if (offset < 0)
592 if (offset < 0)
593 offset += len;
593 offset += len;
594
594
595 if (offset != len - 1) {
595 if (offset != len - 1) {
596 PyErr_SetString(PyExc_IndexError,
596 PyErr_SetString(PyExc_IndexError,
597 "insert only supported at index -1");
597 "insert only supported at index -1");
598 return NULL;
598 return NULL;
599 }
599 }
600
600
601 if (offset > INT_MAX) {
601 if (offset > INT_MAX) {
602 PyErr_SetString(PyExc_ValueError,
602 PyErr_SetString(PyExc_ValueError,
603 "currently only 2**31 revs supported");
603 "currently only 2**31 revs supported");
604 return NULL;
604 return NULL;
605 }
605 }
606
606
607 if (self->added == NULL) {
607 if (self->added == NULL) {
608 self->added = PyList_New(0);
608 self->added = PyList_New(0);
609 if (self->added == NULL)
609 if (self->added == NULL)
610 return NULL;
610 return NULL;
611 }
611 }
612
612
613 if (PyList_Append(self->added, obj) == -1)
613 if (PyList_Append(self->added, obj) == -1)
614 return NULL;
614 return NULL;
615
615
616 if (self->nt)
616 if (self->nt)
617 nt_insert(self, node, (int)offset);
617 nt_insert(self, node, (int)offset);
618
618
619 Py_CLEAR(self->headrevs);
619 Py_CLEAR(self->headrevs);
620 Py_RETURN_NONE;
620 Py_RETURN_NONE;
621 }
621 }
622
622
623 static void _index_clearcaches(indexObject *self)
623 static void _index_clearcaches(indexObject *self)
624 {
624 {
625 if (self->cache) {
625 if (self->cache) {
626 Py_ssize_t i;
626 Py_ssize_t i;
627
627
628 for (i = 0; i < self->raw_length; i++)
628 for (i = 0; i < self->raw_length; i++)
629 Py_CLEAR(self->cache[i]);
629 Py_CLEAR(self->cache[i]);
630 free(self->cache);
630 free(self->cache);
631 self->cache = NULL;
631 self->cache = NULL;
632 }
632 }
633 if (self->offsets) {
633 if (self->offsets) {
634 free(self->offsets);
634 free(self->offsets);
635 self->offsets = NULL;
635 self->offsets = NULL;
636 }
636 }
637 if (self->nt) {
637 if (self->nt) {
638 free(self->nt);
638 free(self->nt);
639 self->nt = NULL;
639 self->nt = NULL;
640 }
640 }
641 Py_CLEAR(self->headrevs);
641 Py_CLEAR(self->headrevs);
642 }
642 }
643
643
644 static PyObject *index_clearcaches(indexObject *self)
644 static PyObject *index_clearcaches(indexObject *self)
645 {
645 {
646 _index_clearcaches(self);
646 _index_clearcaches(self);
647 self->ntlength = self->ntcapacity = 0;
647 self->ntlength = self->ntcapacity = 0;
648 self->ntdepth = self->ntsplits = 0;
648 self->ntdepth = self->ntsplits = 0;
649 self->ntrev = -1;
649 self->ntrev = -1;
650 self->ntlookups = self->ntmisses = 0;
650 self->ntlookups = self->ntmisses = 0;
651 Py_RETURN_NONE;
651 Py_RETURN_NONE;
652 }
652 }
653
653
654 static PyObject *index_stats(indexObject *self)
654 static PyObject *index_stats(indexObject *self)
655 {
655 {
656 PyObject *obj = PyDict_New();
656 PyObject *obj = PyDict_New();
657
657
658 if (obj == NULL)
658 if (obj == NULL)
659 return NULL;
659 return NULL;
660
660
661 #define istat(__n, __d) \
661 #define istat(__n, __d) \
662 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
662 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
663 goto bail;
663 goto bail;
664
664
665 if (self->added) {
665 if (self->added) {
666 Py_ssize_t len = PyList_GET_SIZE(self->added);
666 Py_ssize_t len = PyList_GET_SIZE(self->added);
667 if (PyDict_SetItemString(obj, "index entries added",
667 if (PyDict_SetItemString(obj, "index entries added",
668 PyInt_FromSsize_t(len)) == -1)
668 PyInt_FromSsize_t(len)) == -1)
669 goto bail;
669 goto bail;
670 }
670 }
671
671
672 if (self->raw_length != self->length - 1)
672 if (self->raw_length != self->length - 1)
673 istat(raw_length, "revs on disk");
673 istat(raw_length, "revs on disk");
674 istat(length, "revs in memory");
674 istat(length, "revs in memory");
675 istat(ntcapacity, "node trie capacity");
675 istat(ntcapacity, "node trie capacity");
676 istat(ntdepth, "node trie depth");
676 istat(ntdepth, "node trie depth");
677 istat(ntlength, "node trie count");
677 istat(ntlength, "node trie count");
678 istat(ntlookups, "node trie lookups");
678 istat(ntlookups, "node trie lookups");
679 istat(ntmisses, "node trie misses");
679 istat(ntmisses, "node trie misses");
680 istat(ntrev, "node trie last rev scanned");
680 istat(ntrev, "node trie last rev scanned");
681 istat(ntsplits, "node trie splits");
681 istat(ntsplits, "node trie splits");
682
682
683 #undef istat
683 #undef istat
684
684
685 return obj;
685 return obj;
686
686
687 bail:
687 bail:
688 Py_XDECREF(obj);
688 Py_XDECREF(obj);
689 return NULL;
689 return NULL;
690 }
690 }
691
691
692 /*
692 /*
693 * When we cache a list, we want to be sure the caller can't mutate
693 * When we cache a list, we want to be sure the caller can't mutate
694 * the cached copy.
694 * the cached copy.
695 */
695 */
696 static PyObject *list_copy(PyObject *list)
696 static PyObject *list_copy(PyObject *list)
697 {
697 {
698 Py_ssize_t len = PyList_GET_SIZE(list);
698 Py_ssize_t len = PyList_GET_SIZE(list);
699 PyObject *newlist = PyList_New(len);
699 PyObject *newlist = PyList_New(len);
700 Py_ssize_t i;
700 Py_ssize_t i;
701
701
702 if (newlist == NULL)
702 if (newlist == NULL)
703 return NULL;
703 return NULL;
704
704
705 for (i = 0; i < len; i++) {
705 for (i = 0; i < len; i++) {
706 PyObject *obj = PyList_GET_ITEM(list, i);
706 PyObject *obj = PyList_GET_ITEM(list, i);
707 Py_INCREF(obj);
707 Py_INCREF(obj);
708 PyList_SET_ITEM(newlist, i, obj);
708 PyList_SET_ITEM(newlist, i, obj);
709 }
709 }
710
710
711 return newlist;
711 return newlist;
712 }
712 }
713
713
714 static PyObject *index_headrevs(indexObject *self)
714 static PyObject *index_headrevs(indexObject *self)
715 {
715 {
716 Py_ssize_t i, len, addlen;
716 Py_ssize_t i, len, addlen;
717 char *nothead = NULL;
717 char *nothead = NULL;
718 PyObject *heads;
718 PyObject *heads;
719
719
720 if (self->headrevs)
720 if (self->headrevs)
721 return list_copy(self->headrevs);
721 return list_copy(self->headrevs);
722
722
723 len = index_length(self) - 1;
723 len = index_length(self) - 1;
724 heads = PyList_New(0);
724 heads = PyList_New(0);
725 if (heads == NULL)
725 if (heads == NULL)
726 goto bail;
726 goto bail;
727 if (len == 0) {
727 if (len == 0) {
728 PyObject *nullid = PyInt_FromLong(-1);
728 PyObject *nullid = PyInt_FromLong(-1);
729 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
729 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
730 Py_XDECREF(nullid);
730 Py_XDECREF(nullid);
731 goto bail;
731 goto bail;
732 }
732 }
733 goto done;
733 goto done;
734 }
734 }
735
735
736 nothead = calloc(len, 1);
736 nothead = calloc(len, 1);
737 if (nothead == NULL)
737 if (nothead == NULL)
738 goto bail;
738 goto bail;
739
739
740 for (i = 0; i < self->raw_length; i++) {
740 for (i = 0; i < self->raw_length; i++) {
741 const char *data = index_deref(self, i);
741 const char *data = index_deref(self, i);
742 int parent_1 = getbe32(data + 24);
742 int parent_1 = getbe32(data + 24);
743 int parent_2 = getbe32(data + 28);
743 int parent_2 = getbe32(data + 28);
744 if (parent_1 >= 0)
744 if (parent_1 >= 0)
745 nothead[parent_1] = 1;
745 nothead[parent_1] = 1;
746 if (parent_2 >= 0)
746 if (parent_2 >= 0)
747 nothead[parent_2] = 1;
747 nothead[parent_2] = 1;
748 }
748 }
749
749
750 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
750 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
751
751
752 for (i = 0; i < addlen; i++) {
752 for (i = 0; i < addlen; i++) {
753 PyObject *rev = PyList_GET_ITEM(self->added, i);
753 PyObject *rev = PyList_GET_ITEM(self->added, i);
754 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
754 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
755 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
755 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
756 long parent_1, parent_2;
756 long parent_1, parent_2;
757
757
758 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
758 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
759 PyErr_SetString(PyExc_TypeError,
759 PyErr_SetString(PyExc_TypeError,
760 "revlog parents are invalid");
760 "revlog parents are invalid");
761 goto bail;
761 goto bail;
762 }
762 }
763 parent_1 = PyInt_AS_LONG(p1);
763 parent_1 = PyInt_AS_LONG(p1);
764 parent_2 = PyInt_AS_LONG(p2);
764 parent_2 = PyInt_AS_LONG(p2);
765 if (parent_1 >= 0)
765 if (parent_1 >= 0)
766 nothead[parent_1] = 1;
766 nothead[parent_1] = 1;
767 if (parent_2 >= 0)
767 if (parent_2 >= 0)
768 nothead[parent_2] = 1;
768 nothead[parent_2] = 1;
769 }
769 }
770
770
771 for (i = 0; i < len; i++) {
771 for (i = 0; i < len; i++) {
772 PyObject *head;
772 PyObject *head;
773
773
774 if (nothead[i])
774 if (nothead[i])
775 continue;
775 continue;
776 head = PyInt_FromLong(i);
776 head = PyInt_FromLong(i);
777 if (head == NULL || PyList_Append(heads, head) == -1) {
777 if (head == NULL || PyList_Append(heads, head) == -1) {
778 Py_XDECREF(head);
778 Py_XDECREF(head);
779 goto bail;
779 goto bail;
780 }
780 }
781 }
781 }
782
782
783 done:
783 done:
784 self->headrevs = heads;
784 self->headrevs = heads;
785 free(nothead);
785 free(nothead);
786 return list_copy(self->headrevs);
786 return list_copy(self->headrevs);
787 bail:
787 bail:
788 Py_XDECREF(heads);
788 Py_XDECREF(heads);
789 free(nothead);
789 free(nothead);
790 return NULL;
790 return NULL;
791 }
791 }
792
792
793 static inline int nt_level(const char *node, Py_ssize_t level)
793 static inline int nt_level(const char *node, Py_ssize_t level)
794 {
794 {
795 int v = node[level>>1];
795 int v = node[level>>1];
796 if (!(level & 1))
796 if (!(level & 1))
797 v >>= 4;
797 v >>= 4;
798 return v & 0xf;
798 return v & 0xf;
799 }
799 }
800
800
801 /*
801 /*
802 * Return values:
802 * Return values:
803 *
803 *
804 * -4: match is ambiguous (multiple candidates)
804 * -4: match is ambiguous (multiple candidates)
805 * -2: not found
805 * -2: not found
806 * rest: valid rev
806 * rest: valid rev
807 */
807 */
808 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
808 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
809 int hex)
809 int hex)
810 {
810 {
811 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
811 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
812 int level, maxlevel, off;
812 int level, maxlevel, off;
813
813
814 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
814 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
815 return -1;
815 return -1;
816
816
817 if (self->nt == NULL)
817 if (self->nt == NULL)
818 return -2;
818 return -2;
819
819
820 if (hex)
820 if (hex)
821 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
821 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
822 else
822 else
823 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
823 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
824
824
825 for (level = off = 0; level < maxlevel; level++) {
825 for (level = off = 0; level < maxlevel; level++) {
826 int k = getnybble(node, level);
826 int k = getnybble(node, level);
827 nodetree *n = &self->nt[off];
827 nodetree *n = &self->nt[off];
828 int v = n->children[k];
828 int v = n->children[k];
829
829
830 if (v < 0) {
830 if (v < 0) {
831 const char *n;
831 const char *n;
832 Py_ssize_t i;
832 Py_ssize_t i;
833
833
834 v = -v - 1;
834 v = -v - 1;
835 n = index_node(self, v);
835 n = index_node(self, v);
836 if (n == NULL)
836 if (n == NULL)
837 return -2;
837 return -2;
838 for (i = level; i < maxlevel; i++)
838 for (i = level; i < maxlevel; i++)
839 if (getnybble(node, i) != nt_level(n, i))
839 if (getnybble(node, i) != nt_level(n, i))
840 return -2;
840 return -2;
841 return v;
841 return v;
842 }
842 }
843 if (v == 0)
843 if (v == 0)
844 return -2;
844 return -2;
845 off = v;
845 off = v;
846 }
846 }
847 /* multiple matches against an ambiguous prefix */
847 /* multiple matches against an ambiguous prefix */
848 return -4;
848 return -4;
849 }
849 }
850
850
851 static int nt_new(indexObject *self)
851 static int nt_new(indexObject *self)
852 {
852 {
853 if (self->ntlength == self->ntcapacity) {
853 if (self->ntlength == self->ntcapacity) {
854 self->ntcapacity *= 2;
854 self->ntcapacity *= 2;
855 self->nt = realloc(self->nt,
855 self->nt = realloc(self->nt,
856 self->ntcapacity * sizeof(nodetree));
856 self->ntcapacity * sizeof(nodetree));
857 if (self->nt == NULL) {
857 if (self->nt == NULL) {
858 PyErr_SetString(PyExc_MemoryError, "out of memory");
858 PyErr_SetString(PyExc_MemoryError, "out of memory");
859 return -1;
859 return -1;
860 }
860 }
861 memset(&self->nt[self->ntlength], 0,
861 memset(&self->nt[self->ntlength], 0,
862 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
862 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
863 }
863 }
864 return self->ntlength++;
864 return self->ntlength++;
865 }
865 }
866
866
867 static int nt_insert(indexObject *self, const char *node, int rev)
867 static int nt_insert(indexObject *self, const char *node, int rev)
868 {
868 {
869 int level = 0;
869 int level = 0;
870 int off = 0;
870 int off = 0;
871
871
872 while (level < 40) {
872 while (level < 40) {
873 int k = nt_level(node, level);
873 int k = nt_level(node, level);
874 nodetree *n;
874 nodetree *n;
875 int v;
875 int v;
876
876
877 n = &self->nt[off];
877 n = &self->nt[off];
878 v = n->children[k];
878 v = n->children[k];
879
879
880 if (v == 0) {
880 if (v == 0) {
881 n->children[k] = -rev - 1;
881 n->children[k] = -rev - 1;
882 return 0;
882 return 0;
883 }
883 }
884 if (v < 0) {
884 if (v < 0) {
885 const char *oldnode = index_node(self, -v - 1);
885 const char *oldnode = index_node(self, -v - 1);
886 int noff;
886 int noff;
887
887
888 if (!oldnode || !memcmp(oldnode, node, 20)) {
888 if (!oldnode || !memcmp(oldnode, node, 20)) {
889 n->children[k] = -rev - 1;
889 n->children[k] = -rev - 1;
890 return 0;
890 return 0;
891 }
891 }
892 noff = nt_new(self);
892 noff = nt_new(self);
893 if (noff == -1)
893 if (noff == -1)
894 return -1;
894 return -1;
895 /* self->nt may have been changed by realloc */
895 /* self->nt may have been changed by realloc */
896 self->nt[off].children[k] = noff;
896 self->nt[off].children[k] = noff;
897 off = noff;
897 off = noff;
898 n = &self->nt[off];
898 n = &self->nt[off];
899 n->children[nt_level(oldnode, ++level)] = v;
899 n->children[nt_level(oldnode, ++level)] = v;
900 if (level > self->ntdepth)
900 if (level > self->ntdepth)
901 self->ntdepth = level;
901 self->ntdepth = level;
902 self->ntsplits += 1;
902 self->ntsplits += 1;
903 } else {
903 } else {
904 level += 1;
904 level += 1;
905 off = v;
905 off = v;
906 }
906 }
907 }
907 }
908
908
909 return -1;
909 return -1;
910 }
910 }
911
911
912 static int nt_init(indexObject *self)
912 static int nt_init(indexObject *self)
913 {
913 {
914 if (self->nt == NULL) {
914 if (self->nt == NULL) {
915 self->ntcapacity = self->raw_length < 4
915 self->ntcapacity = self->raw_length < 4
916 ? 4 : self->raw_length / 2;
916 ? 4 : self->raw_length / 2;
917 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
917 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
918 if (self->nt == NULL) {
918 if (self->nt == NULL) {
919 PyErr_NoMemory();
919 PyErr_NoMemory();
920 return -1;
920 return -1;
921 }
921 }
922 self->ntlength = 1;
922 self->ntlength = 1;
923 self->ntrev = (int)index_length(self) - 1;
923 self->ntrev = (int)index_length(self) - 1;
924 self->ntlookups = 1;
924 self->ntlookups = 1;
925 self->ntmisses = 0;
925 self->ntmisses = 0;
926 if (nt_insert(self, nullid, INT_MAX) == -1)
926 if (nt_insert(self, nullid, INT_MAX) == -1)
927 return -1;
927 return -1;
928 }
928 }
929 return 0;
929 return 0;
930 }
930 }
931
931
932 /*
932 /*
933 * Return values:
933 * Return values:
934 *
934 *
935 * -3: error (exception set)
935 * -3: error (exception set)
936 * -2: not found (no exception set)
936 * -2: not found (no exception set)
937 * rest: valid rev
937 * rest: valid rev
938 */
938 */
939 static int index_find_node(indexObject *self,
939 static int index_find_node(indexObject *self,
940 const char *node, Py_ssize_t nodelen)
940 const char *node, Py_ssize_t nodelen)
941 {
941 {
942 int rev;
942 int rev;
943
943
944 self->ntlookups++;
944 self->ntlookups++;
945 rev = nt_find(self, node, nodelen, 0);
945 rev = nt_find(self, node, nodelen, 0);
946 if (rev >= -1)
946 if (rev >= -1)
947 return rev;
947 return rev;
948
948
949 if (nt_init(self) == -1)
949 if (nt_init(self) == -1)
950 return -3;
950 return -3;
951
951
952 /*
952 /*
953 * For the first handful of lookups, we scan the entire index,
953 * For the first handful of lookups, we scan the entire index,
954 * and cache only the matching nodes. This optimizes for cases
954 * and cache only the matching nodes. This optimizes for cases
955 * like "hg tip", where only a few nodes are accessed.
955 * like "hg tip", where only a few nodes are accessed.
956 *
956 *
957 * After that, we cache every node we visit, using a single
957 * After that, we cache every node we visit, using a single
958 * scan amortized over multiple lookups. This gives the best
958 * scan amortized over multiple lookups. This gives the best
959 * bulk performance, e.g. for "hg log".
959 * bulk performance, e.g. for "hg log".
960 */
960 */
961 if (self->ntmisses++ < 4) {
961 if (self->ntmisses++ < 4) {
962 for (rev = self->ntrev - 1; rev >= 0; rev--) {
962 for (rev = self->ntrev - 1; rev >= 0; rev--) {
963 const char *n = index_node(self, rev);
963 const char *n = index_node(self, rev);
964 if (n == NULL)
964 if (n == NULL)
965 return -2;
965 return -2;
966 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
966 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
967 if (nt_insert(self, n, rev) == -1)
967 if (nt_insert(self, n, rev) == -1)
968 return -3;
968 return -3;
969 break;
969 break;
970 }
970 }
971 }
971 }
972 } else {
972 } else {
973 for (rev = self->ntrev - 1; rev >= 0; rev--) {
973 for (rev = self->ntrev - 1; rev >= 0; rev--) {
974 const char *n = index_node(self, rev);
974 const char *n = index_node(self, rev);
975 if (n == NULL) {
975 if (n == NULL) {
976 self->ntrev = rev + 1;
976 self->ntrev = rev + 1;
977 return -2;
977 return -2;
978 }
978 }
979 if (nt_insert(self, n, rev) == -1) {
979 if (nt_insert(self, n, rev) == -1) {
980 self->ntrev = rev + 1;
980 self->ntrev = rev + 1;
981 return -3;
981 return -3;
982 }
982 }
983 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
983 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
984 break;
984 break;
985 }
985 }
986 }
986 }
987 self->ntrev = rev;
987 self->ntrev = rev;
988 }
988 }
989
989
990 if (rev >= 0)
990 if (rev >= 0)
991 return rev;
991 return rev;
992 return -2;
992 return -2;
993 }
993 }
994
994
995 static PyObject *raise_revlog_error(void)
995 static PyObject *raise_revlog_error(void)
996 {
996 {
997 static PyObject *errclass;
997 static PyObject *errclass;
998 PyObject *mod = NULL, *errobj;
998 PyObject *mod = NULL, *errobj;
999
999
1000 if (errclass == NULL) {
1000 if (errclass == NULL) {
1001 PyObject *dict;
1001 PyObject *dict;
1002
1002
1003 mod = PyImport_ImportModule("mercurial.error");
1003 mod = PyImport_ImportModule("mercurial.error");
1004 if (mod == NULL)
1004 if (mod == NULL)
1005 goto classfail;
1005 goto classfail;
1006
1006
1007 dict = PyModule_GetDict(mod);
1007 dict = PyModule_GetDict(mod);
1008 if (dict == NULL)
1008 if (dict == NULL)
1009 goto classfail;
1009 goto classfail;
1010
1010
1011 errclass = PyDict_GetItemString(dict, "RevlogError");
1011 errclass = PyDict_GetItemString(dict, "RevlogError");
1012 if (errclass == NULL) {
1012 if (errclass == NULL) {
1013 PyErr_SetString(PyExc_SystemError,
1013 PyErr_SetString(PyExc_SystemError,
1014 "could not find RevlogError");
1014 "could not find RevlogError");
1015 goto classfail;
1015 goto classfail;
1016 }
1016 }
1017 Py_INCREF(errclass);
1017 Py_INCREF(errclass);
1018 }
1018 }
1019
1019
1020 errobj = PyObject_CallFunction(errclass, NULL);
1020 errobj = PyObject_CallFunction(errclass, NULL);
1021 if (errobj == NULL)
1021 if (errobj == NULL)
1022 return NULL;
1022 return NULL;
1023 PyErr_SetObject(errclass, errobj);
1023 PyErr_SetObject(errclass, errobj);
1024 return errobj;
1024 return errobj;
1025
1025
1026 classfail:
1026 classfail:
1027 Py_XDECREF(mod);
1027 Py_XDECREF(mod);
1028 return NULL;
1028 return NULL;
1029 }
1029 }
1030
1030
1031 static PyObject *index_getitem(indexObject *self, PyObject *value)
1031 static PyObject *index_getitem(indexObject *self, PyObject *value)
1032 {
1032 {
1033 char *node;
1033 char *node;
1034 Py_ssize_t nodelen;
1034 Py_ssize_t nodelen;
1035 int rev;
1035 int rev;
1036
1036
1037 if (PyInt_Check(value))
1037 if (PyInt_Check(value))
1038 return index_get(self, PyInt_AS_LONG(value));
1038 return index_get(self, PyInt_AS_LONG(value));
1039
1039
1040 if (node_check(value, &node, &nodelen) == -1)
1040 if (node_check(value, &node, &nodelen) == -1)
1041 return NULL;
1041 return NULL;
1042 rev = index_find_node(self, node, nodelen);
1042 rev = index_find_node(self, node, nodelen);
1043 if (rev >= -1)
1043 if (rev >= -1)
1044 return PyInt_FromLong(rev);
1044 return PyInt_FromLong(rev);
1045 if (rev == -2)
1045 if (rev == -2)
1046 raise_revlog_error();
1046 raise_revlog_error();
1047 return NULL;
1047 return NULL;
1048 }
1048 }
1049
1049
1050 static int nt_partialmatch(indexObject *self, const char *node,
1050 static int nt_partialmatch(indexObject *self, const char *node,
1051 Py_ssize_t nodelen)
1051 Py_ssize_t nodelen)
1052 {
1052 {
1053 int rev;
1053 int rev;
1054
1054
1055 if (nt_init(self) == -1)
1055 if (nt_init(self) == -1)
1056 return -3;
1056 return -3;
1057
1057
1058 if (self->ntrev > 0) {
1058 if (self->ntrev > 0) {
1059 /* ensure that the radix tree is fully populated */
1059 /* ensure that the radix tree is fully populated */
1060 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1060 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1061 const char *n = index_node(self, rev);
1061 const char *n = index_node(self, rev);
1062 if (n == NULL)
1062 if (n == NULL)
1063 return -2;
1063 return -2;
1064 if (nt_insert(self, n, rev) == -1)
1064 if (nt_insert(self, n, rev) == -1)
1065 return -3;
1065 return -3;
1066 }
1066 }
1067 self->ntrev = rev;
1067 self->ntrev = rev;
1068 }
1068 }
1069
1069
1070 return nt_find(self, node, nodelen, 1);
1070 return nt_find(self, node, nodelen, 1);
1071 }
1071 }
1072
1072
1073 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1073 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1074 {
1074 {
1075 const char *fullnode;
1075 const char *fullnode;
1076 int nodelen;
1076 int nodelen;
1077 char *node;
1077 char *node;
1078 int rev, i;
1078 int rev, i;
1079
1079
1080 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1080 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1081 return NULL;
1081 return NULL;
1082
1082
1083 if (nodelen < 4) {
1083 if (nodelen < 4) {
1084 PyErr_SetString(PyExc_ValueError, "key too short");
1084 PyErr_SetString(PyExc_ValueError, "key too short");
1085 return NULL;
1085 return NULL;
1086 }
1086 }
1087
1087
1088 if (nodelen > 40) {
1088 if (nodelen > 40) {
1089 PyErr_SetString(PyExc_ValueError, "key too long");
1089 PyErr_SetString(PyExc_ValueError, "key too long");
1090 return NULL;
1090 return NULL;
1091 }
1091 }
1092
1092
1093 for (i = 0; i < nodelen; i++)
1093 for (i = 0; i < nodelen; i++)
1094 hexdigit(node, i);
1094 hexdigit(node, i);
1095 if (PyErr_Occurred()) {
1095 if (PyErr_Occurred()) {
1096 /* input contains non-hex characters */
1096 /* input contains non-hex characters */
1097 PyErr_Clear();
1097 PyErr_Clear();
1098 Py_RETURN_NONE;
1098 Py_RETURN_NONE;
1099 }
1099 }
1100
1100
1101 rev = nt_partialmatch(self, node, nodelen);
1101 rev = nt_partialmatch(self, node, nodelen);
1102
1102
1103 switch (rev) {
1103 switch (rev) {
1104 case -4:
1104 case -4:
1105 raise_revlog_error();
1105 raise_revlog_error();
1106 case -3:
1106 case -3:
1107 return NULL;
1107 return NULL;
1108 case -2:
1108 case -2:
1109 Py_RETURN_NONE;
1109 Py_RETURN_NONE;
1110 case -1:
1110 case -1:
1111 return PyString_FromStringAndSize(nullid, 20);
1111 return PyString_FromStringAndSize(nullid, 20);
1112 }
1112 }
1113
1113
1114 fullnode = index_node(self, rev);
1114 fullnode = index_node(self, rev);
1115 if (fullnode == NULL) {
1115 if (fullnode == NULL) {
1116 PyErr_Format(PyExc_IndexError,
1116 PyErr_Format(PyExc_IndexError,
1117 "could not access rev %d", rev);
1117 "could not access rev %d", rev);
1118 return NULL;
1118 return NULL;
1119 }
1119 }
1120 return PyString_FromStringAndSize(fullnode, 20);
1120 return PyString_FromStringAndSize(fullnode, 20);
1121 }
1121 }
1122
1122
1123 static PyObject *index_m_get(indexObject *self, PyObject *args)
1123 static PyObject *index_m_get(indexObject *self, PyObject *args)
1124 {
1124 {
1125 Py_ssize_t nodelen;
1125 Py_ssize_t nodelen;
1126 PyObject *val;
1126 PyObject *val;
1127 char *node;
1127 char *node;
1128 int rev;
1128 int rev;
1129
1129
1130 if (!PyArg_ParseTuple(args, "O", &val))
1130 if (!PyArg_ParseTuple(args, "O", &val))
1131 return NULL;
1131 return NULL;
1132 if (node_check(val, &node, &nodelen) == -1)
1132 if (node_check(val, &node, &nodelen) == -1)
1133 return NULL;
1133 return NULL;
1134 rev = index_find_node(self, node, nodelen);
1134 rev = index_find_node(self, node, nodelen);
1135 if (rev == -3)
1135 if (rev == -3)
1136 return NULL;
1136 return NULL;
1137 if (rev == -2)
1137 if (rev == -2)
1138 Py_RETURN_NONE;
1138 Py_RETURN_NONE;
1139 return PyInt_FromLong(rev);
1139 return PyInt_FromLong(rev);
1140 }
1140 }
1141
1141
1142 static int index_contains(indexObject *self, PyObject *value)
1142 static int index_contains(indexObject *self, PyObject *value)
1143 {
1143 {
1144 char *node;
1144 char *node;
1145 Py_ssize_t nodelen;
1145 Py_ssize_t nodelen;
1146
1146
1147 if (PyInt_Check(value)) {
1147 if (PyInt_Check(value)) {
1148 long rev = PyInt_AS_LONG(value);
1148 long rev = PyInt_AS_LONG(value);
1149 return rev >= -1 && rev < index_length(self);
1149 return rev >= -1 && rev < index_length(self);
1150 }
1150 }
1151
1151
1152 if (node_check(value, &node, &nodelen) == -1)
1152 if (node_check(value, &node, &nodelen) == -1)
1153 return -1;
1153 return -1;
1154
1154
1155 switch (index_find_node(self, node, nodelen)) {
1155 switch (index_find_node(self, node, nodelen)) {
1156 case -3:
1156 case -3:
1157 return -1;
1157 return -1;
1158 case -2:
1158 case -2:
1159 return 0;
1159 return 0;
1160 default:
1160 default:
1161 return 1;
1161 return 1;
1162 }
1162 }
1163 }
1163 }
1164
1164
1165 /*
1165 /*
1166 * Invalidate any trie entries introduced by added revs.
1166 * Invalidate any trie entries introduced by added revs.
1167 */
1167 */
1168 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1168 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1169 {
1169 {
1170 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1170 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1171
1171
1172 for (i = start; i < len; i++) {
1172 for (i = start; i < len; i++) {
1173 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1173 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1174 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1174 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1175
1175
1176 nt_insert(self, PyString_AS_STRING(node), -1);
1176 nt_insert(self, PyString_AS_STRING(node), -1);
1177 }
1177 }
1178
1178
1179 if (start == 0)
1179 if (start == 0)
1180 Py_CLEAR(self->added);
1180 Py_CLEAR(self->added);
1181 }
1181 }
1182
1182
1183 /*
1183 /*
1184 * Delete a numeric range of revs, which must be at the end of the
1184 * Delete a numeric range of revs, which must be at the end of the
1185 * range, but exclude the sentinel nullid entry.
1185 * range, but exclude the sentinel nullid entry.
1186 */
1186 */
1187 static int index_slice_del(indexObject *self, PyObject *item)
1187 static int index_slice_del(indexObject *self, PyObject *item)
1188 {
1188 {
1189 Py_ssize_t start, stop, step, slicelength;
1189 Py_ssize_t start, stop, step, slicelength;
1190 Py_ssize_t length = index_length(self);
1190 Py_ssize_t length = index_length(self);
1191 int ret = 0;
1191 int ret = 0;
1192
1192
1193 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1193 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1194 &start, &stop, &step, &slicelength) < 0)
1194 &start, &stop, &step, &slicelength) < 0)
1195 return -1;
1195 return -1;
1196
1196
1197 if (slicelength <= 0)
1197 if (slicelength <= 0)
1198 return 0;
1198 return 0;
1199
1199
1200 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1200 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1201 stop = start;
1201 stop = start;
1202
1202
1203 if (step < 0) {
1203 if (step < 0) {
1204 stop = start + 1;
1204 stop = start + 1;
1205 start = stop + step*(slicelength - 1) - 1;
1205 start = stop + step*(slicelength - 1) - 1;
1206 step = -step;
1206 step = -step;
1207 }
1207 }
1208
1208
1209 if (step != 1) {
1209 if (step != 1) {
1210 PyErr_SetString(PyExc_ValueError,
1210 PyErr_SetString(PyExc_ValueError,
1211 "revlog index delete requires step size of 1");
1211 "revlog index delete requires step size of 1");
1212 return -1;
1212 return -1;
1213 }
1213 }
1214
1214
1215 if (stop != length - 1) {
1215 if (stop != length - 1) {
1216 PyErr_SetString(PyExc_IndexError,
1216 PyErr_SetString(PyExc_IndexError,
1217 "revlog index deletion indices are invalid");
1217 "revlog index deletion indices are invalid");
1218 return -1;
1218 return -1;
1219 }
1219 }
1220
1220
1221 if (start < self->length - 1) {
1221 if (start < self->length - 1) {
1222 if (self->nt) {
1222 if (self->nt) {
1223 Py_ssize_t i;
1223 Py_ssize_t i;
1224
1224
1225 for (i = start + 1; i < self->length - 1; i++) {
1225 for (i = start + 1; i < self->length - 1; i++) {
1226 const char *node = index_node(self, i);
1226 const char *node = index_node(self, i);
1227
1227
1228 if (node)
1228 if (node)
1229 nt_insert(self, node, -1);
1229 nt_insert(self, node, -1);
1230 }
1230 }
1231 if (self->added)
1231 if (self->added)
1232 nt_invalidate_added(self, 0);
1232 nt_invalidate_added(self, 0);
1233 if (self->ntrev > start)
1233 if (self->ntrev > start)
1234 self->ntrev = (int)start;
1234 self->ntrev = (int)start;
1235 }
1235 }
1236 self->length = start + 1;
1236 self->length = start + 1;
1237 if (start < self->raw_length)
1237 if (start < self->raw_length)
1238 self->raw_length = start;
1238 self->raw_length = start;
1239 goto done;
1239 goto done;
1240 }
1240 }
1241
1241
1242 if (self->nt) {
1242 if (self->nt) {
1243 nt_invalidate_added(self, start - self->length + 1);
1243 nt_invalidate_added(self, start - self->length + 1);
1244 if (self->ntrev > start)
1244 if (self->ntrev > start)
1245 self->ntrev = (int)start;
1245 self->ntrev = (int)start;
1246 }
1246 }
1247 if (self->added)
1247 if (self->added)
1248 ret = PyList_SetSlice(self->added, start - self->length + 1,
1248 ret = PyList_SetSlice(self->added, start - self->length + 1,
1249 PyList_GET_SIZE(self->added), NULL);
1249 PyList_GET_SIZE(self->added), NULL);
1250 done:
1250 done:
1251 Py_CLEAR(self->headrevs);
1251 Py_CLEAR(self->headrevs);
1252 return ret;
1252 return ret;
1253 }
1253 }
1254
1254
1255 /*
1255 /*
1256 * Supported ops:
1256 * Supported ops:
1257 *
1257 *
1258 * slice deletion
1258 * slice deletion
1259 * string assignment (extend node->rev mapping)
1259 * string assignment (extend node->rev mapping)
1260 * string deletion (shrink node->rev mapping)
1260 * string deletion (shrink node->rev mapping)
1261 */
1261 */
1262 static int index_assign_subscript(indexObject *self, PyObject *item,
1262 static int index_assign_subscript(indexObject *self, PyObject *item,
1263 PyObject *value)
1263 PyObject *value)
1264 {
1264 {
1265 char *node;
1265 char *node;
1266 Py_ssize_t nodelen;
1266 Py_ssize_t nodelen;
1267 long rev;
1267 long rev;
1268
1268
1269 if (PySlice_Check(item) && value == NULL)
1269 if (PySlice_Check(item) && value == NULL)
1270 return index_slice_del(self, item);
1270 return index_slice_del(self, item);
1271
1271
1272 if (node_check(item, &node, &nodelen) == -1)
1272 if (node_check(item, &node, &nodelen) == -1)
1273 return -1;
1273 return -1;
1274
1274
1275 if (value == NULL)
1275 if (value == NULL)
1276 return self->nt ? nt_insert(self, node, -1) : 0;
1276 return self->nt ? nt_insert(self, node, -1) : 0;
1277 rev = PyInt_AsLong(value);
1277 rev = PyInt_AsLong(value);
1278 if (rev > INT_MAX || rev < 0) {
1278 if (rev > INT_MAX || rev < 0) {
1279 if (!PyErr_Occurred())
1279 if (!PyErr_Occurred())
1280 PyErr_SetString(PyExc_ValueError, "rev out of range");
1280 PyErr_SetString(PyExc_ValueError, "rev out of range");
1281 return -1;
1281 return -1;
1282 }
1282 }
1283 return nt_insert(self, node, (int)rev);
1283 return nt_insert(self, node, (int)rev);
1284 }
1284 }
1285
1285
1286 /*
1286 /*
1287 * Find all RevlogNG entries in an index that has inline data. Update
1287 * Find all RevlogNG entries in an index that has inline data. Update
1288 * the optional "offsets" table with those entries.
1288 * the optional "offsets" table with those entries.
1289 */
1289 */
1290 static long inline_scan(indexObject *self, const char **offsets)
1290 static long inline_scan(indexObject *self, const char **offsets)
1291 {
1291 {
1292 const char *data = PyString_AS_STRING(self->data);
1292 const char *data = PyString_AS_STRING(self->data);
1293 const char *end = data + PyString_GET_SIZE(self->data);
1293 const char *end = data + PyString_GET_SIZE(self->data);
1294 long incr = v1_hdrsize;
1294 long incr = v1_hdrsize;
1295 Py_ssize_t len = 0;
1295 Py_ssize_t len = 0;
1296
1296
1297 while (data + v1_hdrsize <= end) {
1297 while (data + v1_hdrsize <= end) {
1298 uint32_t comp_len;
1298 uint32_t comp_len;
1299 const char *old_data;
1299 const char *old_data;
1300 /* 3rd element of header is length of compressed inline data */
1300 /* 3rd element of header is length of compressed inline data */
1301 comp_len = getbe32(data + 8);
1301 comp_len = getbe32(data + 8);
1302 incr = v1_hdrsize + comp_len;
1302 incr = v1_hdrsize + comp_len;
1303 if (incr < v1_hdrsize)
1303 if (incr < v1_hdrsize)
1304 break;
1304 break;
1305 if (offsets)
1305 if (offsets)
1306 offsets[len] = data;
1306 offsets[len] = data;
1307 len++;
1307 len++;
1308 old_data = data;
1308 old_data = data;
1309 data += incr;
1309 data += incr;
1310 if (data <= old_data)
1310 if (data <= old_data)
1311 break;
1311 break;
1312 }
1312 }
1313
1313
1314 if (data != end && data + v1_hdrsize != end) {
1314 if (data != end && data + v1_hdrsize != end) {
1315 if (!PyErr_Occurred())
1315 if (!PyErr_Occurred())
1316 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1316 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1317 return -1;
1317 return -1;
1318 }
1318 }
1319
1319
1320 return len;
1320 return len;
1321 }
1321 }
1322
1322
1323 static int index_init(indexObject *self, PyObject *args)
1323 static int index_init(indexObject *self, PyObject *args)
1324 {
1324 {
1325 PyObject *data_obj, *inlined_obj;
1325 PyObject *data_obj, *inlined_obj;
1326 Py_ssize_t size;
1326 Py_ssize_t size;
1327
1327
1328 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1328 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1329 return -1;
1329 return -1;
1330 if (!PyString_Check(data_obj)) {
1330 if (!PyString_Check(data_obj)) {
1331 PyErr_SetString(PyExc_TypeError, "data is not a string");
1331 PyErr_SetString(PyExc_TypeError, "data is not a string");
1332 return -1;
1332 return -1;
1333 }
1333 }
1334 size = PyString_GET_SIZE(data_obj);
1334 size = PyString_GET_SIZE(data_obj);
1335
1335
1336 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1336 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1337 self->data = data_obj;
1337 self->data = data_obj;
1338 self->cache = NULL;
1338 self->cache = NULL;
1339
1339
1340 self->added = NULL;
1340 self->added = NULL;
1341 self->headrevs = NULL;
1341 self->headrevs = NULL;
1342 self->offsets = NULL;
1342 self->offsets = NULL;
1343 self->nt = NULL;
1343 self->nt = NULL;
1344 self->ntlength = self->ntcapacity = 0;
1344 self->ntlength = self->ntcapacity = 0;
1345 self->ntdepth = self->ntsplits = 0;
1345 self->ntdepth = self->ntsplits = 0;
1346 self->ntlookups = self->ntmisses = 0;
1346 self->ntlookups = self->ntmisses = 0;
1347 self->ntrev = -1;
1347 self->ntrev = -1;
1348 Py_INCREF(self->data);
1348 Py_INCREF(self->data);
1349
1349
1350 if (self->inlined) {
1350 if (self->inlined) {
1351 long len = inline_scan(self, NULL);
1351 long len = inline_scan(self, NULL);
1352 if (len == -1)
1352 if (len == -1)
1353 goto bail;
1353 goto bail;
1354 self->raw_length = len;
1354 self->raw_length = len;
1355 self->length = len + 1;
1355 self->length = len + 1;
1356 } else {
1356 } else {
1357 if (size % v1_hdrsize) {
1357 if (size % v1_hdrsize) {
1358 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1358 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1359 goto bail;
1359 goto bail;
1360 }
1360 }
1361 self->raw_length = size / v1_hdrsize;
1361 self->raw_length = size / v1_hdrsize;
1362 self->length = self->raw_length + 1;
1362 self->length = self->raw_length + 1;
1363 }
1363 }
1364
1364
1365 return 0;
1365 return 0;
1366 bail:
1366 bail:
1367 return -1;
1367 return -1;
1368 }
1368 }
1369
1369
1370 static PyObject *index_nodemap(indexObject *self)
1370 static PyObject *index_nodemap(indexObject *self)
1371 {
1371 {
1372 Py_INCREF(self);
1372 Py_INCREF(self);
1373 return (PyObject *)self;
1373 return (PyObject *)self;
1374 }
1374 }
1375
1375
1376 static void index_dealloc(indexObject *self)
1376 static void index_dealloc(indexObject *self)
1377 {
1377 {
1378 _index_clearcaches(self);
1378 _index_clearcaches(self);
1379 Py_DECREF(self->data);
1379 Py_DECREF(self->data);
1380 Py_XDECREF(self->added);
1380 Py_XDECREF(self->added);
1381 PyObject_Del(self);
1381 PyObject_Del(self);
1382 }
1382 }
1383
1383
1384 static PySequenceMethods index_sequence_methods = {
1384 static PySequenceMethods index_sequence_methods = {
1385 (lenfunc)index_length, /* sq_length */
1385 (lenfunc)index_length, /* sq_length */
1386 0, /* sq_concat */
1386 0, /* sq_concat */
1387 0, /* sq_repeat */
1387 0, /* sq_repeat */
1388 (ssizeargfunc)index_get, /* sq_item */
1388 (ssizeargfunc)index_get, /* sq_item */
1389 0, /* sq_slice */
1389 0, /* sq_slice */
1390 0, /* sq_ass_item */
1390 0, /* sq_ass_item */
1391 0, /* sq_ass_slice */
1391 0, /* sq_ass_slice */
1392 (objobjproc)index_contains, /* sq_contains */
1392 (objobjproc)index_contains, /* sq_contains */
1393 };
1393 };
1394
1394
1395 static PyMappingMethods index_mapping_methods = {
1395 static PyMappingMethods index_mapping_methods = {
1396 (lenfunc)index_length, /* mp_length */
1396 (lenfunc)index_length, /* mp_length */
1397 (binaryfunc)index_getitem, /* mp_subscript */
1397 (binaryfunc)index_getitem, /* mp_subscript */
1398 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1398 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1399 };
1399 };
1400
1400
1401 static PyMethodDef index_methods[] = {
1401 static PyMethodDef index_methods[] = {
1402 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1402 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1403 "clear the index caches"},
1403 "clear the index caches"},
1404 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1404 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1405 "get an index entry"},
1405 "get an index entry"},
1406 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1406 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1407 "get head revisions"},
1407 "get head revisions"},
1408 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1408 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1409 "insert an index entry"},
1409 "insert an index entry"},
1410 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1410 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1411 "match a potentially ambiguous node ID"},
1411 "match a potentially ambiguous node ID"},
1412 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1412 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1413 "stats for the index"},
1413 "stats for the index"},
1414 {NULL} /* Sentinel */
1414 {NULL} /* Sentinel */
1415 };
1415 };
1416
1416
1417 static PyGetSetDef index_getset[] = {
1417 static PyGetSetDef index_getset[] = {
1418 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1418 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1419 {NULL} /* Sentinel */
1419 {NULL} /* Sentinel */
1420 };
1420 };
1421
1421
1422 static PyTypeObject indexType = {
1422 static PyTypeObject indexType = {
1423 PyObject_HEAD_INIT(NULL)
1423 PyObject_HEAD_INIT(NULL)
1424 0, /* ob_size */
1424 0, /* ob_size */
1425 "parsers.index", /* tp_name */
1425 "parsers.index", /* tp_name */
1426 sizeof(indexObject), /* tp_basicsize */
1426 sizeof(indexObject), /* tp_basicsize */
1427 0, /* tp_itemsize */
1427 0, /* tp_itemsize */
1428 (destructor)index_dealloc, /* tp_dealloc */
1428 (destructor)index_dealloc, /* tp_dealloc */
1429 0, /* tp_print */
1429 0, /* tp_print */
1430 0, /* tp_getattr */
1430 0, /* tp_getattr */
1431 0, /* tp_setattr */
1431 0, /* tp_setattr */
1432 0, /* tp_compare */
1432 0, /* tp_compare */
1433 0, /* tp_repr */
1433 0, /* tp_repr */
1434 0, /* tp_as_number */
1434 0, /* tp_as_number */
1435 &index_sequence_methods, /* tp_as_sequence */
1435 &index_sequence_methods, /* tp_as_sequence */
1436 &index_mapping_methods, /* tp_as_mapping */
1436 &index_mapping_methods, /* tp_as_mapping */
1437 0, /* tp_hash */
1437 0, /* tp_hash */
1438 0, /* tp_call */
1438 0, /* tp_call */
1439 0, /* tp_str */
1439 0, /* tp_str */
1440 0, /* tp_getattro */
1440 0, /* tp_getattro */
1441 0, /* tp_setattro */
1441 0, /* tp_setattro */
1442 0, /* tp_as_buffer */
1442 0, /* tp_as_buffer */
1443 Py_TPFLAGS_DEFAULT, /* tp_flags */
1443 Py_TPFLAGS_DEFAULT, /* tp_flags */
1444 "revlog index", /* tp_doc */
1444 "revlog index", /* tp_doc */
1445 0, /* tp_traverse */
1445 0, /* tp_traverse */
1446 0, /* tp_clear */
1446 0, /* tp_clear */
1447 0, /* tp_richcompare */
1447 0, /* tp_richcompare */
1448 0, /* tp_weaklistoffset */
1448 0, /* tp_weaklistoffset */
1449 0, /* tp_iter */
1449 0, /* tp_iter */
1450 0, /* tp_iternext */
1450 0, /* tp_iternext */
1451 index_methods, /* tp_methods */
1451 index_methods, /* tp_methods */
1452 0, /* tp_members */
1452 0, /* tp_members */
1453 index_getset, /* tp_getset */
1453 index_getset, /* tp_getset */
1454 0, /* tp_base */
1454 0, /* tp_base */
1455 0, /* tp_dict */
1455 0, /* tp_dict */
1456 0, /* tp_descr_get */
1456 0, /* tp_descr_get */
1457 0, /* tp_descr_set */
1457 0, /* tp_descr_set */
1458 0, /* tp_dictoffset */
1458 0, /* tp_dictoffset */
1459 (initproc)index_init, /* tp_init */
1459 (initproc)index_init, /* tp_init */
1460 0, /* tp_alloc */
1460 0, /* tp_alloc */
1461 };
1461 };
1462
1462
1463 /*
1463 /*
1464 * returns a tuple of the form (index, index, cache) with elements as
1464 * returns a tuple of the form (index, index, cache) with elements as
1465 * follows:
1465 * follows:
1466 *
1466 *
1467 * index: an index object that lazily parses RevlogNG records
1467 * index: an index object that lazily parses RevlogNG records
1468 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1468 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1469 *
1469 *
1470 * added complications are for backwards compatibility
1470 * added complications are for backwards compatibility
1471 */
1471 */
1472 static PyObject *parse_index2(PyObject *self, PyObject *args)
1472 static PyObject *parse_index2(PyObject *self, PyObject *args)
1473 {
1473 {
1474 PyObject *tuple = NULL, *cache = NULL;
1474 PyObject *tuple = NULL, *cache = NULL;
1475 indexObject *idx;
1475 indexObject *idx;
1476 int ret;
1476 int ret;
1477
1477
1478 idx = PyObject_New(indexObject, &indexType);
1478 idx = PyObject_New(indexObject, &indexType);
1479 if (idx == NULL)
1479 if (idx == NULL)
1480 goto bail;
1480 goto bail;
1481
1481
1482 ret = index_init(idx, args);
1482 ret = index_init(idx, args);
1483 if (ret == -1)
1483 if (ret == -1)
1484 goto bail;
1484 goto bail;
1485
1485
1486 if (idx->inlined) {
1486 if (idx->inlined) {
1487 cache = Py_BuildValue("iO", 0, idx->data);
1487 cache = Py_BuildValue("iO", 0, idx->data);
1488 if (cache == NULL)
1488 if (cache == NULL)
1489 goto bail;
1489 goto bail;
1490 } else {
1490 } else {
1491 cache = Py_None;
1491 cache = Py_None;
1492 Py_INCREF(cache);
1492 Py_INCREF(cache);
1493 }
1493 }
1494
1494
1495 tuple = Py_BuildValue("NN", idx, cache);
1495 tuple = Py_BuildValue("NN", idx, cache);
1496 if (!tuple)
1496 if (!tuple)
1497 goto bail;
1497 goto bail;
1498 return tuple;
1498 return tuple;
1499
1499
1500 bail:
1500 bail:
1501 Py_XDECREF(idx);
1501 Py_XDECREF(idx);
1502 Py_XDECREF(cache);
1502 Py_XDECREF(cache);
1503 Py_XDECREF(tuple);
1503 Py_XDECREF(tuple);
1504 return NULL;
1504 return NULL;
1505 }
1505 }
1506
1506
1507 static char parsers_doc[] = "Efficient content parsing.";
1507 static char parsers_doc[] = "Efficient content parsing.";
1508
1508
1509 PyObject *encodedir(PyObject *self, PyObject *args);
1509 PyObject *encodedir(PyObject *self, PyObject *args);
1510 PyObject *pathencode(PyObject *self, PyObject *args);
1510 PyObject *pathencode(PyObject *self, PyObject *args);
1511 PyObject *lowerencode(PyObject *self, PyObject *args);
1511
1512
1512 static PyMethodDef methods[] = {
1513 static PyMethodDef methods[] = {
1513 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1514 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1514 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1515 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1515 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1516 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1516 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1517 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1517 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1518 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1518 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1519 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1520 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1519 {NULL, NULL}
1521 {NULL, NULL}
1520 };
1522 };
1521
1523
1522 static void module_init(PyObject *mod)
1524 static void module_init(PyObject *mod)
1523 {
1525 {
1524 indexType.tp_new = PyType_GenericNew;
1526 indexType.tp_new = PyType_GenericNew;
1525 if (PyType_Ready(&indexType) < 0)
1527 if (PyType_Ready(&indexType) < 0)
1526 return;
1528 return;
1527 Py_INCREF(&indexType);
1529 Py_INCREF(&indexType);
1528
1530
1529 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1531 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1530
1532
1531 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1533 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1532 -1, -1, -1, -1, nullid, 20);
1534 -1, -1, -1, -1, nullid, 20);
1533 if (nullentry)
1535 if (nullentry)
1534 PyObject_GC_UnTrack(nullentry);
1536 PyObject_GC_UnTrack(nullentry);
1535
1537
1536 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1538 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1537 }
1539 }
1538
1540
1539 #ifdef IS_PY3K
1541 #ifdef IS_PY3K
1540 static struct PyModuleDef parsers_module = {
1542 static struct PyModuleDef parsers_module = {
1541 PyModuleDef_HEAD_INIT,
1543 PyModuleDef_HEAD_INIT,
1542 "parsers",
1544 "parsers",
1543 parsers_doc,
1545 parsers_doc,
1544 -1,
1546 -1,
1545 methods
1547 methods
1546 };
1548 };
1547
1549
1548 PyMODINIT_FUNC PyInit_parsers(void)
1550 PyMODINIT_FUNC PyInit_parsers(void)
1549 {
1551 {
1550 PyObject *mod = PyModule_Create(&parsers_module);
1552 PyObject *mod = PyModule_Create(&parsers_module);
1551 module_init(mod);
1553 module_init(mod);
1552 return mod;
1554 return mod;
1553 }
1555 }
1554 #else
1556 #else
1555 PyMODINIT_FUNC initparsers(void)
1557 PyMODINIT_FUNC initparsers(void)
1556 {
1558 {
1557 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1559 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1558 module_init(mod);
1560 module_init(mod);
1559 }
1561 }
1560 #endif
1562 #endif
@@ -1,531 +1,573
1 /*
1 /*
2 pathencode.c - efficient path name encoding
2 pathencode.c - efficient path name encoding
3
3
4 Copyright 2012 Facebook
4 Copyright 2012 Facebook
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 /*
10 /*
11 * An implementation of the name encoding scheme used by the fncache
11 * An implementation of the name encoding scheme used by the fncache
12 * store. The common case is of a path < 120 bytes long, which is
12 * store. The common case is of a path < 120 bytes long, which is
13 * handled either in a single pass with no allocations or two passes
13 * handled either in a single pass with no allocations or two passes
14 * with a single allocation. For longer paths, multiple passes are
14 * with a single allocation. For longer paths, multiple passes are
15 * required.
15 * required.
16 */
16 */
17
17
18 #define PY_SSIZE_T_CLEAN
18 #include <Python.h>
19 #include <Python.h>
19 #include <assert.h>
20 #include <assert.h>
20 #include <ctype.h>
21 #include <ctype.h>
21 #include <stdlib.h>
22 #include <stdlib.h>
22 #include <string.h>
23 #include <string.h>
23
24
24 #include "util.h"
25 #include "util.h"
25
26
26 /* state machine for the fast path */
27 /* state machine for the fast path */
27 enum path_state {
28 enum path_state {
28 START, /* first byte of a path component */
29 START, /* first byte of a path component */
29 A, /* "AUX" */
30 A, /* "AUX" */
30 AU,
31 AU,
31 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
32 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
32 C, /* "CON" or "COMn" */
33 C, /* "CON" or "COMn" */
33 CO,
34 CO,
34 COMLPT, /* "COM" or "LPT" */
35 COMLPT, /* "COM" or "LPT" */
35 COMLPTn,
36 COMLPTn,
36 L,
37 L,
37 LP,
38 LP,
38 N,
39 N,
39 NU,
40 NU,
40 P, /* "PRN" */
41 P, /* "PRN" */
41 PR,
42 PR,
42 LDOT, /* leading '.' */
43 LDOT, /* leading '.' */
43 DOT, /* '.' in a non-leading position */
44 DOT, /* '.' in a non-leading position */
44 H, /* ".h" */
45 H, /* ".h" */
45 HGDI, /* ".hg", ".d", or ".i" */
46 HGDI, /* ".hg", ".d", or ".i" */
46 SPACE,
47 SPACE,
47 DEFAULT, /* byte of a path component after the first */
48 DEFAULT, /* byte of a path component after the first */
48 };
49 };
49
50
50 /* state machine for dir-encoding */
51 /* state machine for dir-encoding */
51 enum dir_state {
52 enum dir_state {
52 DDOT,
53 DDOT,
53 DH,
54 DH,
54 DHGDI,
55 DHGDI,
55 DDEFAULT,
56 DDEFAULT,
56 };
57 };
57
58
58 static inline int inset(const uint32_t bitset[], char c)
59 static inline int inset(const uint32_t bitset[], char c)
59 {
60 {
60 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
61 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
61 }
62 }
62
63
63 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
64 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
64 char c)
65 char c)
65 {
66 {
66 if (dest) {
67 if (dest) {
67 assert(*destlen < destsize);
68 assert(*destlen < destsize);
68 dest[*destlen] = c;
69 dest[*destlen] = c;
69 }
70 }
70 (*destlen)++;
71 (*destlen)++;
71 }
72 }
72
73
73 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
74 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
74 const void *src, Py_ssize_t len)
75 const void *src, Py_ssize_t len)
75 {
76 {
76 if (dest) {
77 if (dest) {
77 assert(*destlen + len < destsize);
78 assert(*destlen + len < destsize);
78 memcpy((void *)&dest[*destlen], src, len);
79 memcpy((void *)&dest[*destlen], src, len);
79 }
80 }
80 *destlen += len;
81 *destlen += len;
81 }
82 }
82
83
83 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
84 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
84 uint8_t c)
85 uint8_t c)
85 {
86 {
86 static const char hexdigit[] = "0123456789abcdef";
87 static const char hexdigit[] = "0123456789abcdef";
87
88
88 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
89 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
89 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
90 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
90 }
91 }
91
92
92 /* 3-byte escape: tilde followed by two hex digits */
93 /* 3-byte escape: tilde followed by two hex digits */
93 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
94 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
94 char c)
95 char c)
95 {
96 {
96 charcopy(dest, destlen, destsize, '~');
97 charcopy(dest, destlen, destsize, '~');
97 hexencode(dest, destlen, destsize, c);
98 hexencode(dest, destlen, destsize, c);
98 }
99 }
99
100
100 static Py_ssize_t _encodedir(char *dest, size_t destsize,
101 static Py_ssize_t _encodedir(char *dest, size_t destsize,
101 const char *src, Py_ssize_t len)
102 const char *src, Py_ssize_t len)
102 {
103 {
103 enum dir_state state = DDEFAULT;
104 enum dir_state state = DDEFAULT;
104 Py_ssize_t i = 0, destlen = 0;
105 Py_ssize_t i = 0, destlen = 0;
105
106
106 while (i < len) {
107 while (i < len) {
107 switch (state) {
108 switch (state) {
108 case DDOT:
109 case DDOT:
109 switch (src[i]) {
110 switch (src[i]) {
110 case 'd':
111 case 'd':
111 case 'i':
112 case 'i':
112 state = DHGDI;
113 state = DHGDI;
113 charcopy(dest, &destlen, destsize, src[i++]);
114 charcopy(dest, &destlen, destsize, src[i++]);
114 break;
115 break;
115 case 'h':
116 case 'h':
116 state = DH;
117 state = DH;
117 charcopy(dest, &destlen, destsize, src[i++]);
118 charcopy(dest, &destlen, destsize, src[i++]);
118 break;
119 break;
119 default:
120 default:
120 state = DDEFAULT;
121 state = DDEFAULT;
121 break;
122 break;
122 }
123 }
123 break;
124 break;
124 case DH:
125 case DH:
125 if (src[i] == 'g') {
126 if (src[i] == 'g') {
126 state = DHGDI;
127 state = DHGDI;
127 charcopy(dest, &destlen, destsize, src[i++]);
128 charcopy(dest, &destlen, destsize, src[i++]);
128 }
129 }
129 else state = DDEFAULT;
130 else state = DDEFAULT;
130 break;
131 break;
131 case DHGDI:
132 case DHGDI:
132 if (src[i] == '/') {
133 if (src[i] == '/') {
133 memcopy(dest, &destlen, destsize, ".hg", 3);
134 memcopy(dest, &destlen, destsize, ".hg", 3);
134 charcopy(dest, &destlen, destsize, src[i++]);
135 charcopy(dest, &destlen, destsize, src[i++]);
135 }
136 }
136 state = DDEFAULT;
137 state = DDEFAULT;
137 break;
138 break;
138 case DDEFAULT:
139 case DDEFAULT:
139 if (src[i] == '.')
140 if (src[i] == '.')
140 state = DDOT;
141 state = DDOT;
141 charcopy(dest, &destlen, destsize, src[i++]);
142 charcopy(dest, &destlen, destsize, src[i++]);
142 break;
143 break;
143 }
144 }
144 }
145 }
145
146
146 return destlen;
147 return destlen;
147 }
148 }
148
149
149 PyObject *encodedir(PyObject *self, PyObject *args)
150 PyObject *encodedir(PyObject *self, PyObject *args)
150 {
151 {
151 Py_ssize_t len, newlen;
152 Py_ssize_t len, newlen;
152 PyObject *pathobj, *newobj;
153 PyObject *pathobj, *newobj;
153 char *path;
154 char *path;
154
155
155 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
156 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
156 return NULL;
157 return NULL;
157
158
158 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
159 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
159 PyErr_SetString(PyExc_TypeError, "expected a string");
160 PyErr_SetString(PyExc_TypeError, "expected a string");
160 return NULL;
161 return NULL;
161 }
162 }
162
163
163 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
164 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
164
165
165 if (newlen == len + 1) {
166 if (newlen == len + 1) {
166 Py_INCREF(pathobj);
167 Py_INCREF(pathobj);
167 return pathobj;
168 return pathobj;
168 }
169 }
169
170
170 newobj = PyString_FromStringAndSize(NULL, newlen);
171 newobj = PyString_FromStringAndSize(NULL, newlen);
171
172
172 if (newobj) {
173 if (newobj) {
173 PyString_GET_SIZE(newobj)--;
174 PyString_GET_SIZE(newobj)--;
174 _encodedir(PyString_AS_STRING(newobj), newlen, path,
175 _encodedir(PyString_AS_STRING(newobj), newlen, path,
175 len + 1);
176 len + 1);
176 }
177 }
177
178
178 return newobj;
179 return newobj;
179 }
180 }
180
181
181 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
182 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
182 char *dest, Py_ssize_t destlen, size_t destsize,
183 char *dest, Py_ssize_t destlen, size_t destsize,
183 const char *src, Py_ssize_t len,
184 const char *src, Py_ssize_t len,
184 int encodedir)
185 int encodedir)
185 {
186 {
186 enum path_state state = START;
187 enum path_state state = START;
187 Py_ssize_t i = 0;
188 Py_ssize_t i = 0;
188
189
189 /*
190 /*
190 * Python strings end with a zero byte, which we use as a
191 * Python strings end with a zero byte, which we use as a
191 * terminal token as they are not valid inside path names.
192 * terminal token as they are not valid inside path names.
192 */
193 */
193
194
194 while (i < len) {
195 while (i < len) {
195 switch (state) {
196 switch (state) {
196 case START:
197 case START:
197 switch (src[i]) {
198 switch (src[i]) {
198 case '/':
199 case '/':
199 charcopy(dest, &destlen, destsize, src[i++]);
200 charcopy(dest, &destlen, destsize, src[i++]);
200 break;
201 break;
201 case '.':
202 case '.':
202 state = LDOT;
203 state = LDOT;
203 escape3(dest, &destlen, destsize, src[i++]);
204 escape3(dest, &destlen, destsize, src[i++]);
204 break;
205 break;
205 case ' ':
206 case ' ':
206 state = DEFAULT;
207 state = DEFAULT;
207 escape3(dest, &destlen, destsize, src[i++]);
208 escape3(dest, &destlen, destsize, src[i++]);
208 break;
209 break;
209 case 'a':
210 case 'a':
210 state = A;
211 state = A;
211 charcopy(dest, &destlen, destsize, src[i++]);
212 charcopy(dest, &destlen, destsize, src[i++]);
212 break;
213 break;
213 case 'c':
214 case 'c':
214 state = C;
215 state = C;
215 charcopy(dest, &destlen, destsize, src[i++]);
216 charcopy(dest, &destlen, destsize, src[i++]);
216 break;
217 break;
217 case 'l':
218 case 'l':
218 state = L;
219 state = L;
219 charcopy(dest, &destlen, destsize, src[i++]);
220 charcopy(dest, &destlen, destsize, src[i++]);
220 break;
221 break;
221 case 'n':
222 case 'n':
222 state = N;
223 state = N;
223 charcopy(dest, &destlen, destsize, src[i++]);
224 charcopy(dest, &destlen, destsize, src[i++]);
224 break;
225 break;
225 case 'p':
226 case 'p':
226 state = P;
227 state = P;
227 charcopy(dest, &destlen, destsize, src[i++]);
228 charcopy(dest, &destlen, destsize, src[i++]);
228 break;
229 break;
229 default:
230 default:
230 state = DEFAULT;
231 state = DEFAULT;
231 break;
232 break;
232 }
233 }
233 break;
234 break;
234 case A:
235 case A:
235 if (src[i] == 'u') {
236 if (src[i] == 'u') {
236 state = AU;
237 state = AU;
237 charcopy(dest, &destlen, destsize, src[i++]);
238 charcopy(dest, &destlen, destsize, src[i++]);
238 }
239 }
239 else state = DEFAULT;
240 else state = DEFAULT;
240 break;
241 break;
241 case AU:
242 case AU:
242 if (src[i] == 'x') {
243 if (src[i] == 'x') {
243 state = THIRD;
244 state = THIRD;
244 i++;
245 i++;
245 }
246 }
246 else state = DEFAULT;
247 else state = DEFAULT;
247 break;
248 break;
248 case THIRD:
249 case THIRD:
249 state = DEFAULT;
250 state = DEFAULT;
250 switch (src[i]) {
251 switch (src[i]) {
251 case '.':
252 case '.':
252 case '/':
253 case '/':
253 case '\0':
254 case '\0':
254 escape3(dest, &destlen, destsize, src[i - 1]);
255 escape3(dest, &destlen, destsize, src[i - 1]);
255 break;
256 break;
256 default:
257 default:
257 i--;
258 i--;
258 break;
259 break;
259 }
260 }
260 break;
261 break;
261 case C:
262 case C:
262 if (src[i] == 'o') {
263 if (src[i] == 'o') {
263 state = CO;
264 state = CO;
264 charcopy(dest, &destlen, destsize, src[i++]);
265 charcopy(dest, &destlen, destsize, src[i++]);
265 }
266 }
266 else state = DEFAULT;
267 else state = DEFAULT;
267 break;
268 break;
268 case CO:
269 case CO:
269 if (src[i] == 'm') {
270 if (src[i] == 'm') {
270 state = COMLPT;
271 state = COMLPT;
271 i++;
272 i++;
272 }
273 }
273 else if (src[i] == 'n') {
274 else if (src[i] == 'n') {
274 state = THIRD;
275 state = THIRD;
275 i++;
276 i++;
276 }
277 }
277 else state = DEFAULT;
278 else state = DEFAULT;
278 break;
279 break;
279 case COMLPT:
280 case COMLPT:
280 switch (src[i]) {
281 switch (src[i]) {
281 case '1': case '2': case '3': case '4': case '5':
282 case '1': case '2': case '3': case '4': case '5':
282 case '6': case '7': case '8': case '9':
283 case '6': case '7': case '8': case '9':
283 state = COMLPTn;
284 state = COMLPTn;
284 i++;
285 i++;
285 break;
286 break;
286 default:
287 default:
287 state = DEFAULT;
288 state = DEFAULT;
288 charcopy(dest, &destlen, destsize, src[i - 1]);
289 charcopy(dest, &destlen, destsize, src[i - 1]);
289 break;
290 break;
290 }
291 }
291 break;
292 break;
292 case COMLPTn:
293 case COMLPTn:
293 state = DEFAULT;
294 state = DEFAULT;
294 switch (src[i]) {
295 switch (src[i]) {
295 case '.':
296 case '.':
296 case '/':
297 case '/':
297 case '\0':
298 case '\0':
298 escape3(dest, &destlen, destsize, src[i - 2]);
299 escape3(dest, &destlen, destsize, src[i - 2]);
299 charcopy(dest, &destlen, destsize, src[i - 1]);
300 charcopy(dest, &destlen, destsize, src[i - 1]);
300 break;
301 break;
301 default:
302 default:
302 memcopy(dest, &destlen, destsize,
303 memcopy(dest, &destlen, destsize,
303 &src[i - 2], 2);
304 &src[i - 2], 2);
304 break;
305 break;
305 }
306 }
306 break;
307 break;
307 case L:
308 case L:
308 if (src[i] == 'p') {
309 if (src[i] == 'p') {
309 state = LP;
310 state = LP;
310 charcopy(dest, &destlen, destsize, src[i++]);
311 charcopy(dest, &destlen, destsize, src[i++]);
311 }
312 }
312 else state = DEFAULT;
313 else state = DEFAULT;
313 break;
314 break;
314 case LP:
315 case LP:
315 if (src[i] == 't') {
316 if (src[i] == 't') {
316 state = COMLPT;
317 state = COMLPT;
317 i++;
318 i++;
318 }
319 }
319 else state = DEFAULT;
320 else state = DEFAULT;
320 break;
321 break;
321 case N:
322 case N:
322 if (src[i] == 'u') {
323 if (src[i] == 'u') {
323 state = NU;
324 state = NU;
324 charcopy(dest, &destlen, destsize, src[i++]);
325 charcopy(dest, &destlen, destsize, src[i++]);
325 }
326 }
326 else state = DEFAULT;
327 else state = DEFAULT;
327 break;
328 break;
328 case NU:
329 case NU:
329 if (src[i] == 'l') {
330 if (src[i] == 'l') {
330 state = THIRD;
331 state = THIRD;
331 i++;
332 i++;
332 }
333 }
333 else state = DEFAULT;
334 else state = DEFAULT;
334 break;
335 break;
335 case P:
336 case P:
336 if (src[i] == 'r') {
337 if (src[i] == 'r') {
337 state = PR;
338 state = PR;
338 charcopy(dest, &destlen, destsize, src[i++]);
339 charcopy(dest, &destlen, destsize, src[i++]);
339 }
340 }
340 else state = DEFAULT;
341 else state = DEFAULT;
341 break;
342 break;
342 case PR:
343 case PR:
343 if (src[i] == 'n') {
344 if (src[i] == 'n') {
344 state = THIRD;
345 state = THIRD;
345 i++;
346 i++;
346 }
347 }
347 else state = DEFAULT;
348 else state = DEFAULT;
348 break;
349 break;
349 case LDOT:
350 case LDOT:
350 switch (src[i]) {
351 switch (src[i]) {
351 case 'd':
352 case 'd':
352 case 'i':
353 case 'i':
353 state = HGDI;
354 state = HGDI;
354 charcopy(dest, &destlen, destsize, src[i++]);
355 charcopy(dest, &destlen, destsize, src[i++]);
355 break;
356 break;
356 case 'h':
357 case 'h':
357 state = H;
358 state = H;
358 charcopy(dest, &destlen, destsize, src[i++]);
359 charcopy(dest, &destlen, destsize, src[i++]);
359 break;
360 break;
360 default:
361 default:
361 state = DEFAULT;
362 state = DEFAULT;
362 break;
363 break;
363 }
364 }
364 break;
365 break;
365 case DOT:
366 case DOT:
366 switch (src[i]) {
367 switch (src[i]) {
367 case '/':
368 case '/':
368 case '\0':
369 case '\0':
369 state = START;
370 state = START;
370 memcopy(dest, &destlen, destsize, "~2e", 3);
371 memcopy(dest, &destlen, destsize, "~2e", 3);
371 charcopy(dest, &destlen, destsize, src[i++]);
372 charcopy(dest, &destlen, destsize, src[i++]);
372 break;
373 break;
373 case 'd':
374 case 'd':
374 case 'i':
375 case 'i':
375 state = HGDI;
376 state = HGDI;
376 charcopy(dest, &destlen, destsize, '.');
377 charcopy(dest, &destlen, destsize, '.');
377 charcopy(dest, &destlen, destsize, src[i++]);
378 charcopy(dest, &destlen, destsize, src[i++]);
378 break;
379 break;
379 case 'h':
380 case 'h':
380 state = H;
381 state = H;
381 memcopy(dest, &destlen, destsize, ".h", 2);
382 memcopy(dest, &destlen, destsize, ".h", 2);
382 i++;
383 i++;
383 break;
384 break;
384 default:
385 default:
385 state = DEFAULT;
386 state = DEFAULT;
386 charcopy(dest, &destlen, destsize, '.');
387 charcopy(dest, &destlen, destsize, '.');
387 break;
388 break;
388 }
389 }
389 break;
390 break;
390 case H:
391 case H:
391 if (src[i] == 'g') {
392 if (src[i] == 'g') {
392 state = HGDI;
393 state = HGDI;
393 charcopy(dest, &destlen, destsize, src[i++]);
394 charcopy(dest, &destlen, destsize, src[i++]);
394 }
395 }
395 else state = DEFAULT;
396 else state = DEFAULT;
396 break;
397 break;
397 case HGDI:
398 case HGDI:
398 if (src[i] == '/') {
399 if (src[i] == '/') {
399 state = START;
400 state = START;
400 if (encodedir)
401 if (encodedir)
401 memcopy(dest, &destlen, destsize, ".hg",
402 memcopy(dest, &destlen, destsize, ".hg",
402 3);
403 3);
403 charcopy(dest, &destlen, destsize, src[i++]);
404 charcopy(dest, &destlen, destsize, src[i++]);
404 }
405 }
405 else state = DEFAULT;
406 else state = DEFAULT;
406 break;
407 break;
407 case SPACE:
408 case SPACE:
408 switch (src[i]) {
409 switch (src[i]) {
409 case '/':
410 case '/':
410 case '\0':
411 case '\0':
411 state = START;
412 state = START;
412 memcopy(dest, &destlen, destsize, "~20", 3);
413 memcopy(dest, &destlen, destsize, "~20", 3);
413 charcopy(dest, &destlen, destsize, src[i++]);
414 charcopy(dest, &destlen, destsize, src[i++]);
414 break;
415 break;
415 default:
416 default:
416 state = DEFAULT;
417 state = DEFAULT;
417 charcopy(dest, &destlen, destsize, ' ');
418 charcopy(dest, &destlen, destsize, ' ');
418 break;
419 break;
419 }
420 }
420 break;
421 break;
421 case DEFAULT:
422 case DEFAULT:
422 while (inset(onebyte, src[i])) {
423 while (inset(onebyte, src[i])) {
423 charcopy(dest, &destlen, destsize, src[i++]);
424 charcopy(dest, &destlen, destsize, src[i++]);
424 if (i == len)
425 if (i == len)
425 goto done;
426 goto done;
426 }
427 }
427 switch (src[i]) {
428 switch (src[i]) {
428 case '.':
429 case '.':
429 state = DOT;
430 state = DOT;
430 i++;
431 i++;
431 break;
432 break;
432 case ' ':
433 case ' ':
433 state = SPACE;
434 state = SPACE;
434 i++;
435 i++;
435 break;
436 break;
436 case '/':
437 case '/':
437 state = START;
438 state = START;
438 charcopy(dest, &destlen, destsize, '/');
439 charcopy(dest, &destlen, destsize, '/');
439 i++;
440 i++;
440 break;
441 break;
441 default:
442 default:
442 if (inset(onebyte, src[i])) {
443 if (inset(onebyte, src[i])) {
443 do {
444 do {
444 charcopy(dest, &destlen,
445 charcopy(dest, &destlen,
445 destsize, src[i++]);
446 destsize, src[i++]);
446 } while (i < len &&
447 } while (i < len &&
447 inset(onebyte, src[i]));
448 inset(onebyte, src[i]));
448 }
449 }
449 else if (inset(twobytes, src[i])) {
450 else if (inset(twobytes, src[i])) {
450 char c = src[i++];
451 char c = src[i++];
451 charcopy(dest, &destlen, destsize, '_');
452 charcopy(dest, &destlen, destsize, '_');
452 charcopy(dest, &destlen, destsize,
453 charcopy(dest, &destlen, destsize,
453 c == '_' ? '_' : c + 32);
454 c == '_' ? '_' : c + 32);
454 }
455 }
455 else
456 else
456 escape3(dest, &destlen, destsize,
457 escape3(dest, &destlen, destsize,
457 src[i++]);
458 src[i++]);
458 break;
459 break;
459 }
460 }
460 break;
461 break;
461 }
462 }
462 }
463 }
463 done:
464 done:
464 return destlen;
465 return destlen;
465 }
466 }
466
467
467 static Py_ssize_t basicencode(char *dest, size_t destsize,
468 static Py_ssize_t basicencode(char *dest, size_t destsize,
468 const char *src, Py_ssize_t len)
469 const char *src, Py_ssize_t len)
469 {
470 {
470 static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };
471 static const uint32_t twobytes[8] = { 0, 0, 0x87fffffe };
471
472
472 static const uint32_t onebyte[8] = {
473 static const uint32_t onebyte[8] = {
473 1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
474 1, 0x2bff3bfa, 0x68000001, 0x2fffffff,
474 };
475 };
475
476
476 Py_ssize_t destlen = 0;
477 Py_ssize_t destlen = 0;
477
478
478 return _encode(twobytes, onebyte, dest, destlen, destsize,
479 return _encode(twobytes, onebyte, dest, destlen, destsize,
479 src, len, 1);
480 src, len, 1);
480 }
481 }
481
482
482 static const Py_ssize_t maxstorepathlen = 120;
483 static const Py_ssize_t maxstorepathlen = 120;
483
484
485 static Py_ssize_t _lowerencode(char *dest, size_t destsize,
486 const char *src, Py_ssize_t len)
487 {
488 static const uint32_t onebyte[8] = {
489 1, 0x2bfffbfb, 0xe8000001, 0x2fffffff
490 };
491
492 static const uint32_t lower[8] = { 0, 0, 0x7fffffe };
493
494 Py_ssize_t i, destlen = 0;
495
496 for (i = 0; i < len; i++) {
497 if (inset(onebyte, src[i]))
498 charcopy(dest, &destlen, destsize, src[i]);
499 else if (inset(lower, src[i]))
500 charcopy(dest, &destlen, destsize, src[i] + 32);
501 else
502 escape3(dest, &destlen, destsize, src[i]);
503 }
504
505 return destlen;
506 }
507
508 PyObject *lowerencode(PyObject *self, PyObject *args)
509 {
510 char *path;
511 Py_ssize_t len, newlen;
512 PyObject *ret;
513
514 if (!PyArg_ParseTuple(args, "s#:lowerencode", &path, &len))
515 return NULL;
516
517 newlen = _lowerencode(NULL, 0, path, len);
518 ret = PyString_FromStringAndSize(NULL, newlen);
519 if (ret)
520 newlen = _lowerencode(PyString_AS_STRING(ret), newlen,
521 path, len);
522
523 return ret;
524 }
525
484 /*
526 /*
485 * We currently implement only basic encoding.
527 * We currently implement only basic encoding.
486 *
528 *
487 * If a name is too long to encode due to Windows path name limits,
529 * If a name is too long to encode due to Windows path name limits,
488 * this function returns None.
530 * this function returns None.
489 */
531 */
490 PyObject *pathencode(PyObject *self, PyObject *args)
532 PyObject *pathencode(PyObject *self, PyObject *args)
491 {
533 {
492 Py_ssize_t len, newlen;
534 Py_ssize_t len, newlen;
493 PyObject *pathobj, *newobj;
535 PyObject *pathobj, *newobj;
494 char *path;
536 char *path;
495
537
496 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
538 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj))
497 return NULL;
539 return NULL;
498
540
499 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
541 if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
500 PyErr_SetString(PyExc_TypeError, "expected a string");
542 PyErr_SetString(PyExc_TypeError, "expected a string");
501 return NULL;
543 return NULL;
502 }
544 }
503
545
504 if (len > maxstorepathlen) {
546 if (len > maxstorepathlen) {
505 newobj = Py_None;
547 newobj = Py_None;
506 Py_INCREF(newobj);
548 Py_INCREF(newobj);
507 return newobj;
549 return newobj;
508 }
550 }
509
551
510 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
552 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
511
553
512 if (newlen <= maxstorepathlen + 1) {
554 if (newlen <= maxstorepathlen + 1) {
513 if (newlen == len + 1) {
555 if (newlen == len + 1) {
514 Py_INCREF(pathobj);
556 Py_INCREF(pathobj);
515 return pathobj;
557 return pathobj;
516 }
558 }
517
559
518 newobj = PyString_FromStringAndSize(NULL, newlen);
560 newobj = PyString_FromStringAndSize(NULL, newlen);
519
561
520 if (newobj) {
562 if (newobj) {
521 PyString_GET_SIZE(newobj)--;
563 PyString_GET_SIZE(newobj)--;
522 basicencode(PyString_AS_STRING(newobj), newlen, path,
564 basicencode(PyString_AS_STRING(newobj), newlen, path,
523 len + 1);
565 len + 1);
524 }
566 }
525 } else {
567 } else {
526 newobj = Py_None;
568 newobj = Py_None;
527 Py_INCREF(newobj);
569 Py_INCREF(newobj);
528 }
570 }
529
571
530 return newobj;
572 return newobj;
531 }
573 }
@@ -1,538 +1,538
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import scmutil, util, parsers
9 import scmutil, util, parsers
10 import os, stat, errno
10 import os, stat, errno
11
11
12 _sha = util.sha1
12 _sha = util.sha1
13
13
14 # This avoids a collision between a file named foo and a dir named
14 # This avoids a collision between a file named foo and a dir named
15 # foo.i or foo.d
15 # foo.i or foo.d
16 def _encodedir(path):
16 def _encodedir(path):
17 '''
17 '''
18 >>> _encodedir('data/foo.i')
18 >>> _encodedir('data/foo.i')
19 'data/foo.i'
19 'data/foo.i'
20 >>> _encodedir('data/foo.i/bla.i')
20 >>> _encodedir('data/foo.i/bla.i')
21 'data/foo.i.hg/bla.i'
21 'data/foo.i.hg/bla.i'
22 >>> _encodedir('data/foo.i.hg/bla.i')
22 >>> _encodedir('data/foo.i.hg/bla.i')
23 'data/foo.i.hg.hg/bla.i'
23 'data/foo.i.hg.hg/bla.i'
24 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
24 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
25 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
25 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
26 '''
26 '''
27 return (path
27 return (path
28 .replace(".hg/", ".hg.hg/")
28 .replace(".hg/", ".hg.hg/")
29 .replace(".i/", ".i.hg/")
29 .replace(".i/", ".i.hg/")
30 .replace(".d/", ".d.hg/"))
30 .replace(".d/", ".d.hg/"))
31
31
32 encodedir = getattr(parsers, 'encodedir', _encodedir)
32 encodedir = getattr(parsers, 'encodedir', _encodedir)
33
33
34 def decodedir(path):
34 def decodedir(path):
35 '''
35 '''
36 >>> decodedir('data/foo.i')
36 >>> decodedir('data/foo.i')
37 'data/foo.i'
37 'data/foo.i'
38 >>> decodedir('data/foo.i.hg/bla.i')
38 >>> decodedir('data/foo.i.hg/bla.i')
39 'data/foo.i/bla.i'
39 'data/foo.i/bla.i'
40 >>> decodedir('data/foo.i.hg.hg/bla.i')
40 >>> decodedir('data/foo.i.hg.hg/bla.i')
41 'data/foo.i.hg/bla.i'
41 'data/foo.i.hg/bla.i'
42 '''
42 '''
43 if ".hg/" not in path:
43 if ".hg/" not in path:
44 return path
44 return path
45 return (path
45 return (path
46 .replace(".d.hg/", ".d/")
46 .replace(".d.hg/", ".d/")
47 .replace(".i.hg/", ".i/")
47 .replace(".i.hg/", ".i/")
48 .replace(".hg.hg/", ".hg/"))
48 .replace(".hg.hg/", ".hg/"))
49
49
50 def _buildencodefun():
50 def _buildencodefun():
51 '''
51 '''
52 >>> enc, dec = _buildencodefun()
52 >>> enc, dec = _buildencodefun()
53
53
54 >>> enc('nothing/special.txt')
54 >>> enc('nothing/special.txt')
55 'nothing/special.txt'
55 'nothing/special.txt'
56 >>> dec('nothing/special.txt')
56 >>> dec('nothing/special.txt')
57 'nothing/special.txt'
57 'nothing/special.txt'
58
58
59 >>> enc('HELLO')
59 >>> enc('HELLO')
60 '_h_e_l_l_o'
60 '_h_e_l_l_o'
61 >>> dec('_h_e_l_l_o')
61 >>> dec('_h_e_l_l_o')
62 'HELLO'
62 'HELLO'
63
63
64 >>> enc('hello:world?')
64 >>> enc('hello:world?')
65 'hello~3aworld~3f'
65 'hello~3aworld~3f'
66 >>> dec('hello~3aworld~3f')
66 >>> dec('hello~3aworld~3f')
67 'hello:world?'
67 'hello:world?'
68
68
69 >>> enc('the\x07quick\xADshot')
69 >>> enc('the\x07quick\xADshot')
70 'the~07quick~adshot'
70 'the~07quick~adshot'
71 >>> dec('the~07quick~adshot')
71 >>> dec('the~07quick~adshot')
72 'the\\x07quick\\xadshot'
72 'the\\x07quick\\xadshot'
73 '''
73 '''
74 e = '_'
74 e = '_'
75 winreserved = [ord(x) for x in '\\:*?"<>|']
75 winreserved = [ord(x) for x in '\\:*?"<>|']
76 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
76 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
77 for x in (range(32) + range(126, 256) + winreserved):
77 for x in (range(32) + range(126, 256) + winreserved):
78 cmap[chr(x)] = "~%02x" % x
78 cmap[chr(x)] = "~%02x" % x
79 for x in range(ord("A"), ord("Z") + 1) + [ord(e)]:
79 for x in range(ord("A"), ord("Z") + 1) + [ord(e)]:
80 cmap[chr(x)] = e + chr(x).lower()
80 cmap[chr(x)] = e + chr(x).lower()
81 dmap = {}
81 dmap = {}
82 for k, v in cmap.iteritems():
82 for k, v in cmap.iteritems():
83 dmap[v] = k
83 dmap[v] = k
84 def decode(s):
84 def decode(s):
85 i = 0
85 i = 0
86 while i < len(s):
86 while i < len(s):
87 for l in xrange(1, 4):
87 for l in xrange(1, 4):
88 try:
88 try:
89 yield dmap[s[i:i + l]]
89 yield dmap[s[i:i + l]]
90 i += l
90 i += l
91 break
91 break
92 except KeyError:
92 except KeyError:
93 pass
93 pass
94 else:
94 else:
95 raise KeyError
95 raise KeyError
96 return (lambda s: ''.join([cmap[c] for c in s]),
96 return (lambda s: ''.join([cmap[c] for c in s]),
97 lambda s: ''.join(list(decode(s))))
97 lambda s: ''.join(list(decode(s))))
98
98
99 _encodefname, _decodefname = _buildencodefun()
99 _encodefname, _decodefname = _buildencodefun()
100
100
101 def encodefilename(s):
101 def encodefilename(s):
102 '''
102 '''
103 >>> encodefilename('foo.i/bar.d/bla.hg/hi:world?/HELLO')
103 >>> encodefilename('foo.i/bar.d/bla.hg/hi:world?/HELLO')
104 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
104 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
105 '''
105 '''
106 return _encodefname(encodedir(s))
106 return _encodefname(encodedir(s))
107
107
108 def decodefilename(s):
108 def decodefilename(s):
109 '''
109 '''
110 >>> decodefilename('foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
110 >>> decodefilename('foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
111 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
111 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
112 '''
112 '''
113 return decodedir(_decodefname(s))
113 return decodedir(_decodefname(s))
114
114
115 def _buildlowerencodefun():
115 def _buildlowerencodefun():
116 '''
116 '''
117 >>> f = _buildlowerencodefun()
117 >>> f = _buildlowerencodefun()
118 >>> f('nothing/special.txt')
118 >>> f('nothing/special.txt')
119 'nothing/special.txt'
119 'nothing/special.txt'
120 >>> f('HELLO')
120 >>> f('HELLO')
121 'hello'
121 'hello'
122 >>> f('hello:world?')
122 >>> f('hello:world?')
123 'hello~3aworld~3f'
123 'hello~3aworld~3f'
124 >>> f('the\x07quick\xADshot')
124 >>> f('the\x07quick\xADshot')
125 'the~07quick~adshot'
125 'the~07quick~adshot'
126 '''
126 '''
127 winreserved = [ord(x) for x in '\\:*?"<>|']
127 winreserved = [ord(x) for x in '\\:*?"<>|']
128 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
128 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
129 for x in (range(32) + range(126, 256) + winreserved):
129 for x in (range(32) + range(126, 256) + winreserved):
130 cmap[chr(x)] = "~%02x" % x
130 cmap[chr(x)] = "~%02x" % x
131 for x in range(ord("A"), ord("Z") + 1):
131 for x in range(ord("A"), ord("Z") + 1):
132 cmap[chr(x)] = chr(x).lower()
132 cmap[chr(x)] = chr(x).lower()
133 return lambda s: "".join([cmap[c] for c in s])
133 return lambda s: "".join([cmap[c] for c in s])
134
134
135 lowerencode = _buildlowerencodefun()
135 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
136
136
137 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
137 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
138 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
138 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
139 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
139 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
140 def _auxencode(path, dotencode):
140 def _auxencode(path, dotencode):
141 '''
141 '''
142 Encodes filenames containing names reserved by Windows or which end in
142 Encodes filenames containing names reserved by Windows or which end in
143 period or space. Does not touch other single reserved characters c.
143 period or space. Does not touch other single reserved characters c.
144 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
144 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
145 Additionally encodes space or period at the beginning, if dotencode is
145 Additionally encodes space or period at the beginning, if dotencode is
146 True. Parameter path is assumed to be all lowercase.
146 True. Parameter path is assumed to be all lowercase.
147 A segment only needs encoding if a reserved name appears as a
147 A segment only needs encoding if a reserved name appears as a
148 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
148 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
149 doesn't need encoding.
149 doesn't need encoding.
150
150
151 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
151 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
152 >>> _auxencode(s.split('/'), True)
152 >>> _auxencode(s.split('/'), True)
153 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
153 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
154 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
154 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
155 >>> _auxencode(s.split('/'), False)
155 >>> _auxencode(s.split('/'), False)
156 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
156 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
157 >>> _auxencode(['foo. '], True)
157 >>> _auxencode(['foo. '], True)
158 ['foo.~20']
158 ['foo.~20']
159 >>> _auxencode([' .foo'], True)
159 >>> _auxencode([' .foo'], True)
160 ['~20.foo']
160 ['~20.foo']
161 '''
161 '''
162 for i, n in enumerate(path):
162 for i, n in enumerate(path):
163 if not n:
163 if not n:
164 continue
164 continue
165 if dotencode and n[0] in '. ':
165 if dotencode and n[0] in '. ':
166 n = "~%02x" % ord(n[0]) + n[1:]
166 n = "~%02x" % ord(n[0]) + n[1:]
167 path[i] = n
167 path[i] = n
168 else:
168 else:
169 l = n.find('.')
169 l = n.find('.')
170 if l == -1:
170 if l == -1:
171 l = len(n)
171 l = len(n)
172 if ((l == 3 and n[:3] in _winres3) or
172 if ((l == 3 and n[:3] in _winres3) or
173 (l == 4 and n[3] <= '9' and n[3] >= '1'
173 (l == 4 and n[3] <= '9' and n[3] >= '1'
174 and n[:3] in _winres4)):
174 and n[:3] in _winres4)):
175 # encode third letter ('aux' -> 'au~78')
175 # encode third letter ('aux' -> 'au~78')
176 ec = "~%02x" % ord(n[2])
176 ec = "~%02x" % ord(n[2])
177 n = n[0:2] + ec + n[3:]
177 n = n[0:2] + ec + n[3:]
178 path[i] = n
178 path[i] = n
179 if n[-1] in '. ':
179 if n[-1] in '. ':
180 # encode last period or space ('foo...' -> 'foo..~2e')
180 # encode last period or space ('foo...' -> 'foo..~2e')
181 path[i] = n[:-1] + "~%02x" % ord(n[-1])
181 path[i] = n[:-1] + "~%02x" % ord(n[-1])
182 return path
182 return path
183
183
184 _maxstorepathlen = 120
184 _maxstorepathlen = 120
185 _dirprefixlen = 8
185 _dirprefixlen = 8
186 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
186 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
187
187
188 def _hashencode(path, dotencode):
188 def _hashencode(path, dotencode):
189 digest = _sha(path).hexdigest()
189 digest = _sha(path).hexdigest()
190 le = lowerencode(path).split('/')[1:]
190 le = lowerencode(path).split('/')[1:]
191 parts = _auxencode(le, dotencode)
191 parts = _auxencode(le, dotencode)
192 basename = parts[-1]
192 basename = parts[-1]
193 _root, ext = os.path.splitext(basename)
193 _root, ext = os.path.splitext(basename)
194 sdirs = []
194 sdirs = []
195 sdirslen = 0
195 sdirslen = 0
196 for p in parts[:-1]:
196 for p in parts[:-1]:
197 d = p[:_dirprefixlen]
197 d = p[:_dirprefixlen]
198 if d[-1] in '. ':
198 if d[-1] in '. ':
199 # Windows can't access dirs ending in period or space
199 # Windows can't access dirs ending in period or space
200 d = d[:-1] + '_'
200 d = d[:-1] + '_'
201 if sdirslen == 0:
201 if sdirslen == 0:
202 t = len(d)
202 t = len(d)
203 else:
203 else:
204 t = sdirslen + 1 + len(d)
204 t = sdirslen + 1 + len(d)
205 if t > _maxshortdirslen:
205 if t > _maxshortdirslen:
206 break
206 break
207 sdirs.append(d)
207 sdirs.append(d)
208 sdirslen = t
208 sdirslen = t
209 dirs = '/'.join(sdirs)
209 dirs = '/'.join(sdirs)
210 if len(dirs) > 0:
210 if len(dirs) > 0:
211 dirs += '/'
211 dirs += '/'
212 res = 'dh/' + dirs + digest + ext
212 res = 'dh/' + dirs + digest + ext
213 spaceleft = _maxstorepathlen - len(res)
213 spaceleft = _maxstorepathlen - len(res)
214 if spaceleft > 0:
214 if spaceleft > 0:
215 filler = basename[:spaceleft]
215 filler = basename[:spaceleft]
216 res = 'dh/' + dirs + filler + digest + ext
216 res = 'dh/' + dirs + filler + digest + ext
217 return res
217 return res
218
218
219 def _hybridencode(path, dotencode):
219 def _hybridencode(path, dotencode):
220 '''encodes path with a length limit
220 '''encodes path with a length limit
221
221
222 Encodes all paths that begin with 'data/', according to the following.
222 Encodes all paths that begin with 'data/', according to the following.
223
223
224 Default encoding (reversible):
224 Default encoding (reversible):
225
225
226 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
226 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
227 characters are encoded as '~xx', where xx is the two digit hex code
227 characters are encoded as '~xx', where xx is the two digit hex code
228 of the character (see encodefilename).
228 of the character (see encodefilename).
229 Relevant path components consisting of Windows reserved filenames are
229 Relevant path components consisting of Windows reserved filenames are
230 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
230 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
231
231
232 Hashed encoding (not reversible):
232 Hashed encoding (not reversible):
233
233
234 If the default-encoded path is longer than _maxstorepathlen, a
234 If the default-encoded path is longer than _maxstorepathlen, a
235 non-reversible hybrid hashing of the path is done instead.
235 non-reversible hybrid hashing of the path is done instead.
236 This encoding uses up to _dirprefixlen characters of all directory
236 This encoding uses up to _dirprefixlen characters of all directory
237 levels of the lowerencoded path, but not more levels than can fit into
237 levels of the lowerencoded path, but not more levels than can fit into
238 _maxshortdirslen.
238 _maxshortdirslen.
239 Then follows the filler followed by the sha digest of the full path.
239 Then follows the filler followed by the sha digest of the full path.
240 The filler is the beginning of the basename of the lowerencoded path
240 The filler is the beginning of the basename of the lowerencoded path
241 (the basename is everything after the last path separator). The filler
241 (the basename is everything after the last path separator). The filler
242 is as long as possible, filling in characters from the basename until
242 is as long as possible, filling in characters from the basename until
243 the encoded path has _maxstorepathlen characters (or all chars of the
243 the encoded path has _maxstorepathlen characters (or all chars of the
244 basename have been taken).
244 basename have been taken).
245 The extension (e.g. '.i' or '.d') is preserved.
245 The extension (e.g. '.i' or '.d') is preserved.
246
246
247 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
247 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
248 encoding was used.
248 encoding was used.
249 '''
249 '''
250 path = encodedir(path)
250 path = encodedir(path)
251 ef = _encodefname(path).split('/')
251 ef = _encodefname(path).split('/')
252 res = '/'.join(_auxencode(ef, dotencode))
252 res = '/'.join(_auxencode(ef, dotencode))
253 if len(res) > _maxstorepathlen:
253 if len(res) > _maxstorepathlen:
254 res = _hashencode(path, dotencode)
254 res = _hashencode(path, dotencode)
255 return res
255 return res
256
256
257 def _pathencode(path):
257 def _pathencode(path):
258 if len(path) > _maxstorepathlen:
258 if len(path) > _maxstorepathlen:
259 return None
259 return None
260 ef = _encodefname(encodedir(path)).split('/')
260 ef = _encodefname(encodedir(path)).split('/')
261 res = '/'.join(_auxencode(ef, True))
261 res = '/'.join(_auxencode(ef, True))
262 if len(res) > _maxstorepathlen:
262 if len(res) > _maxstorepathlen:
263 return None
263 return None
264 return res
264 return res
265
265
266 _pathencode = getattr(parsers, 'pathencode', _pathencode)
266 _pathencode = getattr(parsers, 'pathencode', _pathencode)
267
267
268 def _dothybridencode(f):
268 def _dothybridencode(f):
269 ef = _pathencode(f)
269 ef = _pathencode(f)
270 if ef is None:
270 if ef is None:
271 return _hashencode(encodedir(f), True)
271 return _hashencode(encodedir(f), True)
272 return ef
272 return ef
273
273
274 def _plainhybridencode(f):
274 def _plainhybridencode(f):
275 return _hybridencode(f, False)
275 return _hybridencode(f, False)
276
276
277 def _calcmode(vfs):
277 def _calcmode(vfs):
278 try:
278 try:
279 # files in .hg/ will be created using this mode
279 # files in .hg/ will be created using this mode
280 mode = vfs.stat().st_mode
280 mode = vfs.stat().st_mode
281 # avoid some useless chmods
281 # avoid some useless chmods
282 if (0777 & ~util.umask) == (0777 & mode):
282 if (0777 & ~util.umask) == (0777 & mode):
283 mode = None
283 mode = None
284 except OSError:
284 except OSError:
285 mode = None
285 mode = None
286 return mode
286 return mode
287
287
288 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
288 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
289 ' phaseroots obsstore')
289 ' phaseroots obsstore')
290
290
291 class basicstore(object):
291 class basicstore(object):
292 '''base class for local repository stores'''
292 '''base class for local repository stores'''
293 def __init__(self, path, vfstype):
293 def __init__(self, path, vfstype):
294 vfs = vfstype(path)
294 vfs = vfstype(path)
295 self.path = vfs.base
295 self.path = vfs.base
296 self.createmode = _calcmode(vfs)
296 self.createmode = _calcmode(vfs)
297 vfs.createmode = self.createmode
297 vfs.createmode = self.createmode
298 self.rawvfs = vfs
298 self.rawvfs = vfs
299 self.vfs = scmutil.filtervfs(vfs, encodedir)
299 self.vfs = scmutil.filtervfs(vfs, encodedir)
300 self.opener = self.vfs
300 self.opener = self.vfs
301
301
302 def join(self, f):
302 def join(self, f):
303 return self.path + '/' + encodedir(f)
303 return self.path + '/' + encodedir(f)
304
304
305 def _walk(self, relpath, recurse):
305 def _walk(self, relpath, recurse):
306 '''yields (unencoded, encoded, size)'''
306 '''yields (unencoded, encoded, size)'''
307 path = self.path
307 path = self.path
308 if relpath:
308 if relpath:
309 path += '/' + relpath
309 path += '/' + relpath
310 striplen = len(self.path) + 1
310 striplen = len(self.path) + 1
311 l = []
311 l = []
312 if self.rawvfs.isdir(path):
312 if self.rawvfs.isdir(path):
313 visit = [path]
313 visit = [path]
314 readdir = self.rawvfs.readdir
314 readdir = self.rawvfs.readdir
315 while visit:
315 while visit:
316 p = visit.pop()
316 p = visit.pop()
317 for f, kind, st in readdir(p, stat=True):
317 for f, kind, st in readdir(p, stat=True):
318 fp = p + '/' + f
318 fp = p + '/' + f
319 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
319 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
320 n = util.pconvert(fp[striplen:])
320 n = util.pconvert(fp[striplen:])
321 l.append((decodedir(n), n, st.st_size))
321 l.append((decodedir(n), n, st.st_size))
322 elif kind == stat.S_IFDIR and recurse:
322 elif kind == stat.S_IFDIR and recurse:
323 visit.append(fp)
323 visit.append(fp)
324 l.sort()
324 l.sort()
325 return l
325 return l
326
326
327 def datafiles(self):
327 def datafiles(self):
328 return self._walk('data', True)
328 return self._walk('data', True)
329
329
330 def walk(self):
330 def walk(self):
331 '''yields (unencoded, encoded, size)'''
331 '''yields (unencoded, encoded, size)'''
332 # yield data files first
332 # yield data files first
333 for x in self.datafiles():
333 for x in self.datafiles():
334 yield x
334 yield x
335 # yield manifest before changelog
335 # yield manifest before changelog
336 for x in reversed(self._walk('', False)):
336 for x in reversed(self._walk('', False)):
337 yield x
337 yield x
338
338
339 def copylist(self):
339 def copylist(self):
340 return ['requires'] + _data.split()
340 return ['requires'] + _data.split()
341
341
342 def write(self):
342 def write(self):
343 pass
343 pass
344
344
345 def __contains__(self, path):
345 def __contains__(self, path):
346 '''Checks if the store contains path'''
346 '''Checks if the store contains path'''
347 path = "/".join(("data", path))
347 path = "/".join(("data", path))
348 # file?
348 # file?
349 if os.path.exists(self.join(path + ".i")):
349 if os.path.exists(self.join(path + ".i")):
350 return True
350 return True
351 # dir?
351 # dir?
352 if not path.endswith("/"):
352 if not path.endswith("/"):
353 path = path + "/"
353 path = path + "/"
354 return os.path.exists(self.join(path))
354 return os.path.exists(self.join(path))
355
355
356 class encodedstore(basicstore):
356 class encodedstore(basicstore):
357 def __init__(self, path, vfstype):
357 def __init__(self, path, vfstype):
358 vfs = vfstype(path + '/store')
358 vfs = vfstype(path + '/store')
359 self.path = vfs.base
359 self.path = vfs.base
360 self.createmode = _calcmode(vfs)
360 self.createmode = _calcmode(vfs)
361 vfs.createmode = self.createmode
361 vfs.createmode = self.createmode
362 self.rawvfs = vfs
362 self.rawvfs = vfs
363 self.vfs = scmutil.filtervfs(vfs, encodefilename)
363 self.vfs = scmutil.filtervfs(vfs, encodefilename)
364 self.opener = self.vfs
364 self.opener = self.vfs
365
365
366 def datafiles(self):
366 def datafiles(self):
367 for a, b, size in self._walk('data', True):
367 for a, b, size in self._walk('data', True):
368 try:
368 try:
369 a = decodefilename(a)
369 a = decodefilename(a)
370 except KeyError:
370 except KeyError:
371 a = None
371 a = None
372 yield a, b, size
372 yield a, b, size
373
373
374 def join(self, f):
374 def join(self, f):
375 return self.path + '/' + encodefilename(f)
375 return self.path + '/' + encodefilename(f)
376
376
377 def copylist(self):
377 def copylist(self):
378 return (['requires', '00changelog.i'] +
378 return (['requires', '00changelog.i'] +
379 ['store/' + f for f in _data.split()])
379 ['store/' + f for f in _data.split()])
380
380
381 class fncache(object):
381 class fncache(object):
382 # the filename used to be partially encoded
382 # the filename used to be partially encoded
383 # hence the encodedir/decodedir dance
383 # hence the encodedir/decodedir dance
384 def __init__(self, vfs):
384 def __init__(self, vfs):
385 self.vfs = vfs
385 self.vfs = vfs
386 self.entries = None
386 self.entries = None
387 self._dirty = False
387 self._dirty = False
388
388
389 def _load(self):
389 def _load(self):
390 '''fill the entries from the fncache file'''
390 '''fill the entries from the fncache file'''
391 self._dirty = False
391 self._dirty = False
392 try:
392 try:
393 fp = self.vfs('fncache', mode='rb')
393 fp = self.vfs('fncache', mode='rb')
394 except IOError:
394 except IOError:
395 # skip nonexistent file
395 # skip nonexistent file
396 self.entries = set()
396 self.entries = set()
397 return
397 return
398 self.entries = set(decodedir(fp.read()).splitlines())
398 self.entries = set(decodedir(fp.read()).splitlines())
399 if '' in self.entries:
399 if '' in self.entries:
400 fp.seek(0)
400 fp.seek(0)
401 for n, line in enumerate(fp):
401 for n, line in enumerate(fp):
402 if not line.rstrip('\n'):
402 if not line.rstrip('\n'):
403 t = _('invalid entry in fncache, line %s') % (n + 1)
403 t = _('invalid entry in fncache, line %s') % (n + 1)
404 raise util.Abort(t)
404 raise util.Abort(t)
405 fp.close()
405 fp.close()
406
406
407 def _write(self, files, atomictemp):
407 def _write(self, files, atomictemp):
408 fp = self.vfs('fncache', mode='wb', atomictemp=atomictemp)
408 fp = self.vfs('fncache', mode='wb', atomictemp=atomictemp)
409 if files:
409 if files:
410 fp.write(encodedir('\n'.join(files) + '\n'))
410 fp.write(encodedir('\n'.join(files) + '\n'))
411 fp.close()
411 fp.close()
412 self._dirty = False
412 self._dirty = False
413
413
414 def rewrite(self, files):
414 def rewrite(self, files):
415 self._write(files, False)
415 self._write(files, False)
416 self.entries = set(files)
416 self.entries = set(files)
417
417
418 def write(self):
418 def write(self):
419 if self._dirty:
419 if self._dirty:
420 self._write(self.entries, True)
420 self._write(self.entries, True)
421
421
422 def add(self, fn):
422 def add(self, fn):
423 if self.entries is None:
423 if self.entries is None:
424 self._load()
424 self._load()
425 if fn not in self.entries:
425 if fn not in self.entries:
426 self._dirty = True
426 self._dirty = True
427 self.entries.add(fn)
427 self.entries.add(fn)
428
428
429 def __contains__(self, fn):
429 def __contains__(self, fn):
430 if self.entries is None:
430 if self.entries is None:
431 self._load()
431 self._load()
432 return fn in self.entries
432 return fn in self.entries
433
433
434 def __iter__(self):
434 def __iter__(self):
435 if self.entries is None:
435 if self.entries is None:
436 self._load()
436 self._load()
437 return iter(self.entries)
437 return iter(self.entries)
438
438
439 class _fncachevfs(scmutil.abstractvfs, scmutil.auditvfs):
439 class _fncachevfs(scmutil.abstractvfs, scmutil.auditvfs):
440 def __init__(self, vfs, fnc, encode):
440 def __init__(self, vfs, fnc, encode):
441 scmutil.auditvfs.__init__(self, vfs)
441 scmutil.auditvfs.__init__(self, vfs)
442 self.fncache = fnc
442 self.fncache = fnc
443 self.encode = encode
443 self.encode = encode
444
444
445 def __call__(self, path, mode='r', *args, **kw):
445 def __call__(self, path, mode='r', *args, **kw):
446 if mode not in ('r', 'rb') and path.startswith('data/'):
446 if mode not in ('r', 'rb') and path.startswith('data/'):
447 self.fncache.add(path)
447 self.fncache.add(path)
448 return self.vfs(self.encode(path), mode, *args, **kw)
448 return self.vfs(self.encode(path), mode, *args, **kw)
449
449
450 def join(self, path):
450 def join(self, path):
451 if path:
451 if path:
452 return self.vfs.join(self.encode(path))
452 return self.vfs.join(self.encode(path))
453 else:
453 else:
454 return self.vfs.join(path)
454 return self.vfs.join(path)
455
455
456 class fncachestore(basicstore):
456 class fncachestore(basicstore):
457 def __init__(self, path, vfstype, dotencode):
457 def __init__(self, path, vfstype, dotencode):
458 if dotencode:
458 if dotencode:
459 encode = _dothybridencode
459 encode = _dothybridencode
460 else:
460 else:
461 encode = _plainhybridencode
461 encode = _plainhybridencode
462 self.encode = encode
462 self.encode = encode
463 vfs = vfstype(path + '/store')
463 vfs = vfstype(path + '/store')
464 self.path = vfs.base
464 self.path = vfs.base
465 self.pathsep = self.path + '/'
465 self.pathsep = self.path + '/'
466 self.createmode = _calcmode(vfs)
466 self.createmode = _calcmode(vfs)
467 vfs.createmode = self.createmode
467 vfs.createmode = self.createmode
468 self.rawvfs = vfs
468 self.rawvfs = vfs
469 fnc = fncache(vfs)
469 fnc = fncache(vfs)
470 self.fncache = fnc
470 self.fncache = fnc
471 self.vfs = _fncachevfs(vfs, fnc, encode)
471 self.vfs = _fncachevfs(vfs, fnc, encode)
472 self.opener = self.vfs
472 self.opener = self.vfs
473
473
474 def join(self, f):
474 def join(self, f):
475 return self.pathsep + self.encode(f)
475 return self.pathsep + self.encode(f)
476
476
477 def getsize(self, path):
477 def getsize(self, path):
478 return self.rawvfs.stat(path).st_size
478 return self.rawvfs.stat(path).st_size
479
479
480 def datafiles(self):
480 def datafiles(self):
481 rewrite = False
481 rewrite = False
482 existing = []
482 existing = []
483 for f in sorted(self.fncache):
483 for f in sorted(self.fncache):
484 ef = self.encode(f)
484 ef = self.encode(f)
485 try:
485 try:
486 yield f, ef, self.getsize(ef)
486 yield f, ef, self.getsize(ef)
487 existing.append(f)
487 existing.append(f)
488 except OSError, err:
488 except OSError, err:
489 if err.errno != errno.ENOENT:
489 if err.errno != errno.ENOENT:
490 raise
490 raise
491 # nonexistent entry
491 # nonexistent entry
492 rewrite = True
492 rewrite = True
493 if rewrite:
493 if rewrite:
494 # rewrite fncache to remove nonexistent entries
494 # rewrite fncache to remove nonexistent entries
495 # (may be caused by rollback / strip)
495 # (may be caused by rollback / strip)
496 self.fncache.rewrite(existing)
496 self.fncache.rewrite(existing)
497
497
498 def copylist(self):
498 def copylist(self):
499 d = ('data dh fncache phaseroots obsstore'
499 d = ('data dh fncache phaseroots obsstore'
500 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
500 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
501 return (['requires', '00changelog.i'] +
501 return (['requires', '00changelog.i'] +
502 ['store/' + f for f in d.split()])
502 ['store/' + f for f in d.split()])
503
503
504 def write(self):
504 def write(self):
505 self.fncache.write()
505 self.fncache.write()
506
506
507 def _exists(self, f):
507 def _exists(self, f):
508 ef = self.encode(f)
508 ef = self.encode(f)
509 try:
509 try:
510 self.getsize(ef)
510 self.getsize(ef)
511 return True
511 return True
512 except OSError, err:
512 except OSError, err:
513 if err.errno != errno.ENOENT:
513 if err.errno != errno.ENOENT:
514 raise
514 raise
515 # nonexistent entry
515 # nonexistent entry
516 return False
516 return False
517
517
518 def __contains__(self, path):
518 def __contains__(self, path):
519 '''Checks if the store contains path'''
519 '''Checks if the store contains path'''
520 path = "/".join(("data", path))
520 path = "/".join(("data", path))
521 # check for files (exact match)
521 # check for files (exact match)
522 e = path + '.i'
522 e = path + '.i'
523 if e in self.fncache and self._exists(e):
523 if e in self.fncache and self._exists(e):
524 return True
524 return True
525 # now check for directories (prefix match)
525 # now check for directories (prefix match)
526 if not path.endswith('/'):
526 if not path.endswith('/'):
527 path += '/'
527 path += '/'
528 for e in self.fncache:
528 for e in self.fncache:
529 if e.startswith(path) and self._exists(e):
529 if e.startswith(path) and self._exists(e):
530 return True
530 return True
531 return False
531 return False
532
532
533 def store(requirements, path, vfstype):
533 def store(requirements, path, vfstype):
534 if 'store' in requirements:
534 if 'store' in requirements:
535 if 'fncache' in requirements:
535 if 'fncache' in requirements:
536 return fncachestore(path, vfstype, 'dotencode' in requirements)
536 return fncachestore(path, vfstype, 'dotencode' in requirements)
537 return encodedstore(path, vfstype)
537 return encodedstore(path, vfstype)
538 return basicstore(path, vfstype)
538 return basicstore(path, vfstype)
General Comments 0
You need to be logged in to leave comments. Login now