##// END OF EJS Templates
mpatch: rewrite pointer overflow checks
Matt Mackall -
r20167:09e41ac6 stable
parent child Browse files
Show More
@@ -1,429 +1,420 b''
1 1 /*
2 2 mpatch.c - efficient binary patching for Mercurial
3 3
4 4 This implements a patch algorithm that's O(m + nlog n) where m is the
5 5 size of the output and n is the number of patches.
6 6
7 7 Given a list of binary patches, it unpacks each into a hunk list,
8 8 then combines the hunk lists with a treewise recursion to form a
9 9 single hunk list. This hunk list is then applied to the original
10 10 text.
11 11
12 12 The text (or binary) fragments are copied directly from their source
13 13 Python objects into a preallocated output string to avoid the
14 14 allocation of intermediate Python objects. Working memory is about 2x
15 15 the total number of hunks.
16 16
17 17 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
18 18
19 19 This software may be used and distributed according to the terms
20 20 of the GNU General Public License, incorporated herein by reference.
21 21 */
22 22
23 23 #define PY_SSIZE_T_CLEAN
24 24 #include <Python.h>
25 25 #include <stdlib.h>
26 26 #include <string.h>
27 27
28 28 #include "util.h"
29 29
30 30 static char mpatch_doc[] = "Efficient binary patching.";
31 31 static PyObject *mpatch_Error;
32 32
33 33 struct frag {
34 34 int start, end, len;
35 35 const char *data;
36 36 };
37 37
38 38 struct flist {
39 39 struct frag *base, *head, *tail;
40 40 };
41 41
42 42 static struct flist *lalloc(Py_ssize_t size)
43 43 {
44 44 struct flist *a = NULL;
45 45
46 46 if (size < 1)
47 47 size = 1;
48 48
49 49 a = (struct flist *)malloc(sizeof(struct flist));
50 50 if (a) {
51 51 a->base = (struct frag *)malloc(sizeof(struct frag) * size);
52 52 if (a->base) {
53 53 a->head = a->tail = a->base;
54 54 return a;
55 55 }
56 56 free(a);
57 57 a = NULL;
58 58 }
59 59 if (!PyErr_Occurred())
60 60 PyErr_NoMemory();
61 61 return NULL;
62 62 }
63 63
64 64 static void lfree(struct flist *a)
65 65 {
66 66 if (a) {
67 67 free(a->base);
68 68 free(a);
69 69 }
70 70 }
71 71
72 72 static Py_ssize_t lsize(struct flist *a)
73 73 {
74 74 return a->tail - a->head;
75 75 }
76 76
77 77 /* move hunks in source that are less cut to dest, compensating
78 78 for changes in offset. the last hunk may be split if necessary.
79 79 */
80 80 static int gather(struct flist *dest, struct flist *src, int cut, int offset)
81 81 {
82 82 struct frag *d = dest->tail, *s = src->head;
83 83 int postend, c, l;
84 84
85 85 while (s != src->tail) {
86 86 if (s->start + offset >= cut)
87 87 break; /* we've gone far enough */
88 88
89 89 postend = offset + s->start + s->len;
90 90 if (postend <= cut) {
91 91 /* save this hunk */
92 92 offset += s->start + s->len - s->end;
93 93 *d++ = *s++;
94 94 }
95 95 else {
96 96 /* break up this hunk */
97 97 c = cut - offset;
98 98 if (s->end < c)
99 99 c = s->end;
100 100 l = cut - offset - s->start;
101 101 if (s->len < l)
102 102 l = s->len;
103 103
104 104 offset += s->start + l - c;
105 105
106 106 d->start = s->start;
107 107 d->end = c;
108 108 d->len = l;
109 109 d->data = s->data;
110 110 d++;
111 111 s->start = c;
112 112 s->len = s->len - l;
113 113 s->data = s->data + l;
114 114
115 115 break;
116 116 }
117 117 }
118 118
119 119 dest->tail = d;
120 120 src->head = s;
121 121 return offset;
122 122 }
123 123
124 124 /* like gather, but with no output list */
125 125 static int discard(struct flist *src, int cut, int offset)
126 126 {
127 127 struct frag *s = src->head;
128 128 int postend, c, l;
129 129
130 130 while (s != src->tail) {
131 131 if (s->start + offset >= cut)
132 132 break;
133 133
134 134 postend = offset + s->start + s->len;
135 135 if (postend <= cut) {
136 136 offset += s->start + s->len - s->end;
137 137 s++;
138 138 }
139 139 else {
140 140 c = cut - offset;
141 141 if (s->end < c)
142 142 c = s->end;
143 143 l = cut - offset - s->start;
144 144 if (s->len < l)
145 145 l = s->len;
146 146
147 147 offset += s->start + l - c;
148 148 s->start = c;
149 149 s->len = s->len - l;
150 150 s->data = s->data + l;
151 151
152 152 break;
153 153 }
154 154 }
155 155
156 156 src->head = s;
157 157 return offset;
158 158 }
159 159
160 160 /* combine hunk lists a and b, while adjusting b for offset changes in a/
161 161 this deletes a and b and returns the resultant list. */
162 162 static struct flist *combine(struct flist *a, struct flist *b)
163 163 {
164 164 struct flist *c = NULL;
165 165 struct frag *bh, *ct;
166 166 int offset = 0, post;
167 167
168 168 if (a && b)
169 169 c = lalloc((lsize(a) + lsize(b)) * 2);
170 170
171 171 if (c) {
172 172
173 173 for (bh = b->head; bh != b->tail; bh++) {
174 174 /* save old hunks */
175 175 offset = gather(c, a, bh->start, offset);
176 176
177 177 /* discard replaced hunks */
178 178 post = discard(a, bh->end, offset);
179 179
180 180 /* insert new hunk */
181 181 ct = c->tail;
182 182 ct->start = bh->start - offset;
183 183 ct->end = bh->end - post;
184 184 ct->len = bh->len;
185 185 ct->data = bh->data;
186 186 c->tail++;
187 187 offset = post;
188 188 }
189 189
190 190 /* hold on to tail from a */
191 191 memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a));
192 192 c->tail += lsize(a);
193 193 }
194 194
195 195 lfree(a);
196 196 lfree(b);
197 197 return c;
198 198 }
199 199
200 200 /* decode a binary patch into a hunk list */
201 201 static struct flist *decode(const char *bin, Py_ssize_t len)
202 202 {
203 203 struct flist *l;
204 204 struct frag *lt;
205 const char *data = bin + 12, *end = bin + len;
205 int pos = 0;
206 206
207 207 /* assume worst case size, we won't have many of these lists */
208 208 l = lalloc(len / 12);
209 209 if (!l)
210 210 return NULL;
211 211
212 212 lt = l->tail;
213 213
214 while (data <= end) {
215 lt->start = getbe32(bin);
216 lt->end = getbe32(bin + 4);
217 lt->len = getbe32(bin + 8);
214 while (pos >= 0 && pos < len) {
215 lt->start = getbe32(bin + pos);
216 lt->end = getbe32(bin + pos + 4);
217 lt->len = getbe32(bin + pos + 8);
218 218 if (lt->start > lt->end)
219 219 break; /* sanity check */
220 bin = data + lt->len;
221 if (bin < data)
222 break; /* big data + big (bogus) len can wrap around */
223 lt->data = data;
224 data = bin + 12;
220 lt->data = bin + pos + 12;
221 pos += 12 + lt->len;
225 222 lt++;
226 223 }
227 224
228 if (bin != end) {
225 if (pos != len) {
229 226 if (!PyErr_Occurred())
230 227 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
231 228 lfree(l);
232 229 return NULL;
233 230 }
234 231
235 232 l->tail = lt;
236 233 return l;
237 234 }
238 235
239 236 /* calculate the size of resultant text */
240 237 static Py_ssize_t calcsize(Py_ssize_t len, struct flist *l)
241 238 {
242 239 Py_ssize_t outlen = 0, last = 0;
243 240 struct frag *f = l->head;
244 241
245 242 while (f != l->tail) {
246 243 if (f->start < last || f->end > len) {
247 244 if (!PyErr_Occurred())
248 245 PyErr_SetString(mpatch_Error,
249 246 "invalid patch");
250 247 return -1;
251 248 }
252 249 outlen += f->start - last;
253 250 last = f->end;
254 251 outlen += f->len;
255 252 f++;
256 253 }
257 254
258 255 outlen += len - last;
259 256 return outlen;
260 257 }
261 258
262 259 static int apply(char *buf, const char *orig, Py_ssize_t len, struct flist *l)
263 260 {
264 261 struct frag *f = l->head;
265 262 int last = 0;
266 263 char *p = buf;
267 264
268 265 while (f != l->tail) {
269 266 if (f->start < last || f->end > len) {
270 267 if (!PyErr_Occurred())
271 268 PyErr_SetString(mpatch_Error,
272 269 "invalid patch");
273 270 return 0;
274 271 }
275 272 memcpy(p, orig + last, f->start - last);
276 273 p += f->start - last;
277 274 memcpy(p, f->data, f->len);
278 275 last = f->end;
279 276 p += f->len;
280 277 f++;
281 278 }
282 279 memcpy(p, orig + last, len - last);
283 280 return 1;
284 281 }
285 282
286 283 /* recursively generate a patch of all bins between start and end */
287 284 static struct flist *fold(PyObject *bins, Py_ssize_t start, Py_ssize_t end)
288 285 {
289 286 Py_ssize_t len, blen;
290 287 const char *buffer;
291 288
292 289 if (start + 1 == end) {
293 290 /* trivial case, output a decoded list */
294 291 PyObject *tmp = PyList_GetItem(bins, start);
295 292 if (!tmp)
296 293 return NULL;
297 294 if (PyObject_AsCharBuffer(tmp, &buffer, &blen))
298 295 return NULL;
299 296 return decode(buffer, blen);
300 297 }
301 298
302 299 /* divide and conquer, memory management is elsewhere */
303 300 len = (end - start) / 2;
304 301 return combine(fold(bins, start, start + len),
305 302 fold(bins, start + len, end));
306 303 }
307 304
308 305 static PyObject *
309 306 patches(PyObject *self, PyObject *args)
310 307 {
311 308 PyObject *text, *bins, *result;
312 309 struct flist *patch;
313 310 const char *in;
314 311 char *out;
315 312 Py_ssize_t len, outlen, inlen;
316 313
317 314 if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
318 315 return NULL;
319 316
320 317 len = PyList_Size(bins);
321 318 if (!len) {
322 319 /* nothing to do */
323 320 Py_INCREF(text);
324 321 return text;
325 322 }
326 323
327 324 if (PyObject_AsCharBuffer(text, &in, &inlen))
328 325 return NULL;
329 326
330 327 patch = fold(bins, 0, len);
331 328 if (!patch)
332 329 return NULL;
333 330
334 331 outlen = calcsize(inlen, patch);
335 332 if (outlen < 0) {
336 333 result = NULL;
337 334 goto cleanup;
338 335 }
339 336 result = PyBytes_FromStringAndSize(NULL, outlen);
340 337 if (!result) {
341 338 result = NULL;
342 339 goto cleanup;
343 340 }
344 341 out = PyBytes_AsString(result);
345 342 if (!apply(out, in, inlen, patch)) {
346 343 Py_DECREF(result);
347 344 result = NULL;
348 345 }
349 346 cleanup:
350 347 lfree(patch);
351 348 return result;
352 349 }
353 350
354 351 /* calculate size of a patched file directly */
355 352 static PyObject *
356 353 patchedsize(PyObject *self, PyObject *args)
357 354 {
358 long orig, start, end, len, outlen = 0, last = 0;
355 long orig, start, end, len, outlen = 0, last = 0, pos = 0;
359 356 Py_ssize_t patchlen;
360 char *bin, *binend, *data;
357 char *bin;
361 358
362 359 if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
363 360 return NULL;
364 361
365 binend = bin + patchlen;
366 data = bin + 12;
367
368 while (data <= binend) {
369 start = getbe32(bin);
370 end = getbe32(bin + 4);
371 len = getbe32(bin + 8);
362 while (pos >= 0 && pos < patchlen) {
363 start = getbe32(bin + pos);
364 end = getbe32(bin + pos + 4);
365 len = getbe32(bin + pos + 8);
372 366 if (start > end)
373 367 break; /* sanity check */
374 bin = data + len;
375 if (bin < data)
376 break; /* big data + big (bogus) len can wrap around */
377 data = bin + 12;
368 pos += 12 + len;
378 369 outlen += start - last;
379 370 last = end;
380 371 outlen += len;
381 372 }
382 373
383 if (bin != binend) {
374 if (pos != patchlen) {
384 375 if (!PyErr_Occurred())
385 376 PyErr_SetString(mpatch_Error, "patch cannot be decoded");
386 377 return NULL;
387 378 }
388 379
389 380 outlen += orig - last;
390 381 return Py_BuildValue("l", outlen);
391 382 }
392 383
393 384 static PyMethodDef methods[] = {
394 385 {"patches", patches, METH_VARARGS, "apply a series of patches\n"},
395 386 {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
396 387 {NULL, NULL}
397 388 };
398 389
399 390 #ifdef IS_PY3K
400 391 static struct PyModuleDef mpatch_module = {
401 392 PyModuleDef_HEAD_INIT,
402 393 "mpatch",
403 394 mpatch_doc,
404 395 -1,
405 396 methods
406 397 };
407 398
408 399 PyMODINIT_FUNC PyInit_mpatch(void)
409 400 {
410 401 PyObject *m;
411 402
412 403 m = PyModule_Create(&mpatch_module);
413 404 if (m == NULL)
414 405 return NULL;
415 406
416 407 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
417 408 Py_INCREF(mpatch_Error);
418 409 PyModule_AddObject(m, "mpatchError", mpatch_Error);
419 410
420 411 return m;
421 412 }
422 413 #else
423 414 PyMODINIT_FUNC
424 415 initmpatch(void)
425 416 {
426 417 Py_InitModule3("mpatch", methods, mpatch_doc);
427 418 mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL);
428 419 }
429 420 #endif
@@ -1,1960 +1,1954 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static int8_t hextable[256] = {
18 18 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
19 19 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
20 20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 21 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
22 22 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
23 23 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
24 24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
25 25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 26 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
27 27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
34 34 };
35 35
36 36 static inline int hexdigit(const char *p, Py_ssize_t off)
37 37 {
38 38 int8_t val = hextable[(unsigned char)p[off]];
39 39
40 40 if (val >= 0) {
41 41 return val;
42 42 }
43 43
44 44 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
45 45 return 0;
46 46 }
47 47
48 48 /*
49 49 * Turn a hex-encoded string into binary.
50 50 */
51 51 static PyObject *unhexlify(const char *str, int len)
52 52 {
53 53 PyObject *ret;
54 54 char *d;
55 55 int i;
56 56
57 57 ret = PyBytes_FromStringAndSize(NULL, len / 2);
58 58
59 59 if (!ret)
60 60 return NULL;
61 61
62 62 d = PyBytes_AsString(ret);
63 63
64 64 for (i = 0; i < len;) {
65 65 int hi = hexdigit(str, i++);
66 66 int lo = hexdigit(str, i++);
67 67 *d++ = (hi << 4) | lo;
68 68 }
69 69
70 70 return ret;
71 71 }
72 72
73 73 /*
74 74 * This code assumes that a manifest is stitched together with newline
75 75 * ('\n') characters.
76 76 */
77 77 static PyObject *parse_manifest(PyObject *self, PyObject *args)
78 78 {
79 79 PyObject *mfdict, *fdict;
80 80 char *str, *start, *end;
81 81 int len;
82 82
83 83 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
84 84 &PyDict_Type, &mfdict,
85 85 &PyDict_Type, &fdict,
86 86 &str, &len))
87 87 goto quit;
88 88
89 89 start = str;
90 90 end = str + len;
91 91 while (start < end) {
92 92 PyObject *file = NULL, *node = NULL;
93 93 PyObject *flags = NULL;
94 94 char *zero = NULL, *newline = NULL;
95 95 ptrdiff_t nlen;
96 96
97 97 zero = memchr(start, '\0', end - start);
98 98 if (!zero) {
99 99 PyErr_SetString(PyExc_ValueError,
100 100 "manifest entry has no separator");
101 101 goto quit;
102 102 }
103 103
104 104 newline = memchr(zero + 1, '\n', end - (zero + 1));
105 105 if (!newline) {
106 106 PyErr_SetString(PyExc_ValueError,
107 107 "manifest contains trailing garbage");
108 108 goto quit;
109 109 }
110 110
111 111 file = PyBytes_FromStringAndSize(start, zero - start);
112 112
113 113 if (!file)
114 114 goto bail;
115 115
116 116 nlen = newline - zero - 1;
117 117
118 118 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
119 119 if (!node)
120 120 goto bail;
121 121
122 122 if (nlen > 40) {
123 123 flags = PyBytes_FromStringAndSize(zero + 41,
124 124 nlen - 40);
125 125 if (!flags)
126 126 goto bail;
127 127
128 128 if (PyDict_SetItem(fdict, file, flags) == -1)
129 129 goto bail;
130 130 }
131 131
132 132 if (PyDict_SetItem(mfdict, file, node) == -1)
133 133 goto bail;
134 134
135 135 start = newline + 1;
136 136
137 137 Py_XDECREF(flags);
138 138 Py_XDECREF(node);
139 139 Py_XDECREF(file);
140 140 continue;
141 141 bail:
142 142 Py_XDECREF(flags);
143 143 Py_XDECREF(node);
144 144 Py_XDECREF(file);
145 145 goto quit;
146 146 }
147 147
148 148 Py_INCREF(Py_None);
149 149 return Py_None;
150 150 quit:
151 151 return NULL;
152 152 }
153 153
154 154 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
155 155 {
156 156 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
157 157 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
158 char state, *str, *cur, *end, *cpos;
158 char state, *cur, *str, *cpos;
159 159 int mode, size, mtime;
160 160 unsigned int flen;
161 int len;
161 int len, pos = 40;
162 162
163 163 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
164 164 &PyDict_Type, &dmap,
165 165 &PyDict_Type, &cmap,
166 166 &str, &len))
167 167 goto quit;
168 168
169 169 /* read parents */
170 170 if (len < 40)
171 171 goto quit;
172 172
173 173 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
174 174 if (!parents)
175 175 goto quit;
176 176
177 177 /* read filenames */
178 cur = str + 40;
179 end = str + len;
180
181 while (cur < end - 17) {
178 while (pos >= 40 && pos < len) {
179 cur = str + pos;
182 180 /* unpack header */
183 181 state = *cur;
184 182 mode = getbe32(cur + 1);
185 183 size = getbe32(cur + 5);
186 184 mtime = getbe32(cur + 9);
187 185 flen = getbe32(cur + 13);
186 pos += 17;
188 187 cur += 17;
189 if (cur + flen > end || cur + flen < cur) {
188 if (flen > len - pos || flen < 0) {
190 189 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
191 190 goto quit;
192 191 }
193 192
194 193 entry = Py_BuildValue("ciii", state, mode, size, mtime);
195 194 if (!entry)
196 195 goto quit;
197 196 PyObject_GC_UnTrack(entry); /* don't waste time with this */
198 197
199 198 cpos = memchr(cur, 0, flen);
200 199 if (cpos) {
201 200 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
202 201 cname = PyBytes_FromStringAndSize(cpos + 1,
203 202 flen - (cpos - cur) - 1);
204 203 if (!fname || !cname ||
205 204 PyDict_SetItem(cmap, fname, cname) == -1 ||
206 205 PyDict_SetItem(dmap, fname, entry) == -1)
207 206 goto quit;
208 207 Py_DECREF(cname);
209 208 } else {
210 209 fname = PyBytes_FromStringAndSize(cur, flen);
211 210 if (!fname ||
212 211 PyDict_SetItem(dmap, fname, entry) == -1)
213 212 goto quit;
214 213 }
215 cur += flen;
216 214 Py_DECREF(fname);
217 215 Py_DECREF(entry);
218 216 fname = cname = entry = NULL;
217 pos += flen;
219 218 }
220 219
221 220 ret = parents;
222 221 Py_INCREF(ret);
223 222 quit:
224 223 Py_XDECREF(fname);
225 224 Py_XDECREF(cname);
226 225 Py_XDECREF(entry);
227 226 Py_XDECREF(parents);
228 227 return ret;
229 228 }
230 229
231 230 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
232 231 {
233 232 PyObject *o = PyTuple_GET_ITEM(tuple, off);
234 233 long val;
235 234
236 235 if (PyInt_Check(o))
237 236 val = PyInt_AS_LONG(o);
238 237 else if (PyLong_Check(o)) {
239 238 val = PyLong_AsLong(o);
240 239 if (val == -1 && PyErr_Occurred())
241 240 return -1;
242 241 } else {
243 242 PyErr_SetString(PyExc_TypeError, "expected an int or long");
244 243 return -1;
245 244 }
246 245 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
247 246 PyErr_SetString(PyExc_OverflowError,
248 247 "Python value to large to convert to uint32_t");
249 248 return -1;
250 249 }
251 250 *v = (uint32_t)val;
252 251 return 0;
253 252 }
254 253
255 254 static PyObject *dirstate_unset;
256 255
257 256 /*
258 257 * Efficiently pack a dirstate object into its on-disk format.
259 258 */
260 259 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
261 260 {
262 261 PyObject *packobj = NULL;
263 262 PyObject *map, *copymap, *pl;
264 263 Py_ssize_t nbytes, pos, l;
265 264 PyObject *k, *v, *pn;
266 265 char *p, *s;
267 266 double now;
268 267
269 268 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
270 269 &PyDict_Type, &map, &PyDict_Type, &copymap,
271 270 &pl, &now))
272 271 return NULL;
273 272
274 273 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
275 274 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
276 275 return NULL;
277 276 }
278 277
279 278 /* Figure out how much we need to allocate. */
280 279 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
281 280 PyObject *c;
282 281 if (!PyString_Check(k)) {
283 282 PyErr_SetString(PyExc_TypeError, "expected string key");
284 283 goto bail;
285 284 }
286 285 nbytes += PyString_GET_SIZE(k) + 17;
287 286 c = PyDict_GetItem(copymap, k);
288 287 if (c) {
289 288 if (!PyString_Check(c)) {
290 289 PyErr_SetString(PyExc_TypeError,
291 290 "expected string key");
292 291 goto bail;
293 292 }
294 293 nbytes += PyString_GET_SIZE(c) + 1;
295 294 }
296 295 }
297 296
298 297 packobj = PyString_FromStringAndSize(NULL, nbytes);
299 298 if (packobj == NULL)
300 299 goto bail;
301 300
302 301 p = PyString_AS_STRING(packobj);
303 302
304 303 pn = PySequence_ITEM(pl, 0);
305 304 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
306 305 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
307 306 goto bail;
308 307 }
309 308 memcpy(p, s, l);
310 309 p += 20;
311 310 pn = PySequence_ITEM(pl, 1);
312 311 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
313 312 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
314 313 goto bail;
315 314 }
316 315 memcpy(p, s, l);
317 316 p += 20;
318 317
319 318 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
320 319 uint32_t mode, size, mtime;
321 320 Py_ssize_t len, l;
322 321 PyObject *o;
323 322 char *s, *t;
324 323
325 324 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
326 325 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
327 326 goto bail;
328 327 }
329 328 o = PyTuple_GET_ITEM(v, 0);
330 329 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
331 330 PyErr_SetString(PyExc_TypeError, "expected one byte");
332 331 goto bail;
333 332 }
334 333 *p++ = *s;
335 334 if (getintat(v, 1, &mode) == -1)
336 335 goto bail;
337 336 if (getintat(v, 2, &size) == -1)
338 337 goto bail;
339 338 if (getintat(v, 3, &mtime) == -1)
340 339 goto bail;
341 340 if (*s == 'n' && mtime == (uint32_t)now) {
342 341 /* See pure/parsers.py:pack_dirstate for why we do
343 342 * this. */
344 343 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
345 344 goto bail;
346 345 mtime = -1;
347 346 }
348 347 putbe32(mode, p);
349 348 putbe32(size, p + 4);
350 349 putbe32(mtime, p + 8);
351 350 t = p + 12;
352 351 p += 16;
353 352 len = PyString_GET_SIZE(k);
354 353 memcpy(p, PyString_AS_STRING(k), len);
355 354 p += len;
356 355 o = PyDict_GetItem(copymap, k);
357 356 if (o) {
358 357 *p++ = '\0';
359 358 l = PyString_GET_SIZE(o);
360 359 memcpy(p, PyString_AS_STRING(o), l);
361 360 p += l;
362 361 len += l + 1;
363 362 }
364 363 putbe32((uint32_t)len, t);
365 364 }
366 365
367 366 pos = p - PyString_AS_STRING(packobj);
368 367 if (pos != nbytes) {
369 368 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
370 369 (long)pos, (long)nbytes);
371 370 goto bail;
372 371 }
373 372
374 373 return packobj;
375 374 bail:
376 375 Py_XDECREF(packobj);
377 376 return NULL;
378 377 }
379 378
380 379 /*
381 380 * A base-16 trie for fast node->rev mapping.
382 381 *
383 382 * Positive value is index of the next node in the trie
384 383 * Negative value is a leaf: -(rev + 1)
385 384 * Zero is empty
386 385 */
387 386 typedef struct {
388 387 int children[16];
389 388 } nodetree;
390 389
391 390 /*
392 391 * This class has two behaviours.
393 392 *
394 393 * When used in a list-like way (with integer keys), we decode an
395 394 * entry in a RevlogNG index file on demand. Our last entry is a
396 395 * sentinel, always a nullid. We have limited support for
397 396 * integer-keyed insert and delete, only at elements right before the
398 397 * sentinel.
399 398 *
400 399 * With string keys, we lazily perform a reverse mapping from node to
401 400 * rev, using a base-16 trie.
402 401 */
403 402 typedef struct {
404 403 PyObject_HEAD
405 404 /* Type-specific fields go here. */
406 405 PyObject *data; /* raw bytes of index */
407 406 PyObject **cache; /* cached tuples */
408 407 const char **offsets; /* populated on demand */
409 408 Py_ssize_t raw_length; /* original number of elements */
410 409 Py_ssize_t length; /* current number of elements */
411 410 PyObject *added; /* populated on demand */
412 411 PyObject *headrevs; /* cache, invalidated on changes */
413 412 nodetree *nt; /* base-16 trie */
414 413 int ntlength; /* # nodes in use */
415 414 int ntcapacity; /* # nodes allocated */
416 415 int ntdepth; /* maximum depth of tree */
417 416 int ntsplits; /* # splits performed */
418 417 int ntrev; /* last rev scanned */
419 418 int ntlookups; /* # lookups */
420 419 int ntmisses; /* # lookups that miss the cache */
421 420 int inlined;
422 421 } indexObject;
423 422
424 423 static Py_ssize_t index_length(const indexObject *self)
425 424 {
426 425 if (self->added == NULL)
427 426 return self->length;
428 427 return self->length + PyList_GET_SIZE(self->added);
429 428 }
430 429
431 430 static PyObject *nullentry;
432 431 static const char nullid[20];
433 432
434 433 static long inline_scan(indexObject *self, const char **offsets);
435 434
436 435 #if LONG_MAX == 0x7fffffffL
437 436 static char *tuple_format = "Kiiiiiis#";
438 437 #else
439 438 static char *tuple_format = "kiiiiiis#";
440 439 #endif
441 440
442 441 /* A RevlogNG v1 index entry is 64 bytes long. */
443 442 static const long v1_hdrsize = 64;
444 443
445 444 /*
446 445 * Return a pointer to the beginning of a RevlogNG record.
447 446 */
448 447 static const char *index_deref(indexObject *self, Py_ssize_t pos)
449 448 {
450 449 if (self->inlined && pos > 0) {
451 450 if (self->offsets == NULL) {
452 451 self->offsets = malloc(self->raw_length *
453 452 sizeof(*self->offsets));
454 453 if (self->offsets == NULL)
455 454 return (const char *)PyErr_NoMemory();
456 455 inline_scan(self, self->offsets);
457 456 }
458 457 return self->offsets[pos];
459 458 }
460 459
461 460 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
462 461 }
463 462
464 463 /*
465 464 * RevlogNG format (all in big endian, data may be inlined):
466 465 * 6 bytes: offset
467 466 * 2 bytes: flags
468 467 * 4 bytes: compressed length
469 468 * 4 bytes: uncompressed length
470 469 * 4 bytes: base revision
471 470 * 4 bytes: link revision
472 471 * 4 bytes: parent 1 revision
473 472 * 4 bytes: parent 2 revision
474 473 * 32 bytes: nodeid (only 20 bytes used)
475 474 */
476 475 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
477 476 {
478 477 uint64_t offset_flags;
479 478 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
480 479 const char *c_node_id;
481 480 const char *data;
482 481 Py_ssize_t length = index_length(self);
483 482 PyObject *entry;
484 483
485 484 if (pos < 0)
486 485 pos += length;
487 486
488 487 if (pos < 0 || pos >= length) {
489 488 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
490 489 return NULL;
491 490 }
492 491
493 492 if (pos == length - 1) {
494 493 Py_INCREF(nullentry);
495 494 return nullentry;
496 495 }
497 496
498 497 if (pos >= self->length - 1) {
499 498 PyObject *obj;
500 499 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
501 500 Py_INCREF(obj);
502 501 return obj;
503 502 }
504 503
505 504 if (self->cache) {
506 505 if (self->cache[pos]) {
507 506 Py_INCREF(self->cache[pos]);
508 507 return self->cache[pos];
509 508 }
510 509 } else {
511 510 self->cache = calloc(self->raw_length, sizeof(PyObject *));
512 511 if (self->cache == NULL)
513 512 return PyErr_NoMemory();
514 513 }
515 514
516 515 data = index_deref(self, pos);
517 516 if (data == NULL)
518 517 return NULL;
519 518
520 519 offset_flags = getbe32(data + 4);
521 520 if (pos == 0) /* mask out version number for the first entry */
522 521 offset_flags &= 0xFFFF;
523 522 else {
524 523 uint32_t offset_high = getbe32(data);
525 524 offset_flags |= ((uint64_t)offset_high) << 32;
526 525 }
527 526
528 527 comp_len = getbe32(data + 8);
529 528 uncomp_len = getbe32(data + 12);
530 529 base_rev = getbe32(data + 16);
531 530 link_rev = getbe32(data + 20);
532 531 parent_1 = getbe32(data + 24);
533 532 parent_2 = getbe32(data + 28);
534 533 c_node_id = data + 32;
535 534
536 535 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
537 536 uncomp_len, base_rev, link_rev,
538 537 parent_1, parent_2, c_node_id, 20);
539 538
540 539 if (entry) {
541 540 PyObject_GC_UnTrack(entry);
542 541 Py_INCREF(entry);
543 542 }
544 543
545 544 self->cache[pos] = entry;
546 545
547 546 return entry;
548 547 }
549 548
550 549 /*
551 550 * Return the 20-byte SHA of the node corresponding to the given rev.
552 551 */
553 552 static const char *index_node(indexObject *self, Py_ssize_t pos)
554 553 {
555 554 Py_ssize_t length = index_length(self);
556 555 const char *data;
557 556
558 557 if (pos == length - 1 || pos == INT_MAX)
559 558 return nullid;
560 559
561 560 if (pos >= length)
562 561 return NULL;
563 562
564 563 if (pos >= self->length - 1) {
565 564 PyObject *tuple, *str;
566 565 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
567 566 str = PyTuple_GetItem(tuple, 7);
568 567 return str ? PyString_AS_STRING(str) : NULL;
569 568 }
570 569
571 570 data = index_deref(self, pos);
572 571 return data ? data + 32 : NULL;
573 572 }
574 573
575 574 static int nt_insert(indexObject *self, const char *node, int rev);
576 575
577 576 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
578 577 {
579 578 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
580 579 return -1;
581 580 if (*nodelen == 20)
582 581 return 0;
583 582 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
584 583 return -1;
585 584 }
586 585
587 586 static PyObject *index_insert(indexObject *self, PyObject *args)
588 587 {
589 588 PyObject *obj;
590 589 char *node;
591 590 long offset;
592 591 Py_ssize_t len, nodelen;
593 592
594 593 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
595 594 return NULL;
596 595
597 596 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
598 597 PyErr_SetString(PyExc_TypeError, "8-tuple required");
599 598 return NULL;
600 599 }
601 600
602 601 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
603 602 return NULL;
604 603
605 604 len = index_length(self);
606 605
607 606 if (offset < 0)
608 607 offset += len;
609 608
610 609 if (offset != len - 1) {
611 610 PyErr_SetString(PyExc_IndexError,
612 611 "insert only supported at index -1");
613 612 return NULL;
614 613 }
615 614
616 615 if (offset > INT_MAX) {
617 616 PyErr_SetString(PyExc_ValueError,
618 617 "currently only 2**31 revs supported");
619 618 return NULL;
620 619 }
621 620
622 621 if (self->added == NULL) {
623 622 self->added = PyList_New(0);
624 623 if (self->added == NULL)
625 624 return NULL;
626 625 }
627 626
628 627 if (PyList_Append(self->added, obj) == -1)
629 628 return NULL;
630 629
631 630 if (self->nt)
632 631 nt_insert(self, node, (int)offset);
633 632
634 633 Py_CLEAR(self->headrevs);
635 634 Py_RETURN_NONE;
636 635 }
637 636
638 637 static void _index_clearcaches(indexObject *self)
639 638 {
640 639 if (self->cache) {
641 640 Py_ssize_t i;
642 641
643 642 for (i = 0; i < self->raw_length; i++)
644 643 Py_CLEAR(self->cache[i]);
645 644 free(self->cache);
646 645 self->cache = NULL;
647 646 }
648 647 if (self->offsets) {
649 648 free(self->offsets);
650 649 self->offsets = NULL;
651 650 }
652 651 if (self->nt) {
653 652 free(self->nt);
654 653 self->nt = NULL;
655 654 }
656 655 Py_CLEAR(self->headrevs);
657 656 }
658 657
659 658 static PyObject *index_clearcaches(indexObject *self)
660 659 {
661 660 _index_clearcaches(self);
662 661 self->ntlength = self->ntcapacity = 0;
663 662 self->ntdepth = self->ntsplits = 0;
664 663 self->ntrev = -1;
665 664 self->ntlookups = self->ntmisses = 0;
666 665 Py_RETURN_NONE;
667 666 }
668 667
669 668 static PyObject *index_stats(indexObject *self)
670 669 {
671 670 PyObject *obj = PyDict_New();
672 671
673 672 if (obj == NULL)
674 673 return NULL;
675 674
676 675 #define istat(__n, __d) \
677 676 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
678 677 goto bail;
679 678
680 679 if (self->added) {
681 680 Py_ssize_t len = PyList_GET_SIZE(self->added);
682 681 if (PyDict_SetItemString(obj, "index entries added",
683 682 PyInt_FromSsize_t(len)) == -1)
684 683 goto bail;
685 684 }
686 685
687 686 if (self->raw_length != self->length - 1)
688 687 istat(raw_length, "revs on disk");
689 688 istat(length, "revs in memory");
690 689 istat(ntcapacity, "node trie capacity");
691 690 istat(ntdepth, "node trie depth");
692 691 istat(ntlength, "node trie count");
693 692 istat(ntlookups, "node trie lookups");
694 693 istat(ntmisses, "node trie misses");
695 694 istat(ntrev, "node trie last rev scanned");
696 695 istat(ntsplits, "node trie splits");
697 696
698 697 #undef istat
699 698
700 699 return obj;
701 700
702 701 bail:
703 702 Py_XDECREF(obj);
704 703 return NULL;
705 704 }
706 705
707 706 /*
708 707 * When we cache a list, we want to be sure the caller can't mutate
709 708 * the cached copy.
710 709 */
711 710 static PyObject *list_copy(PyObject *list)
712 711 {
713 712 Py_ssize_t len = PyList_GET_SIZE(list);
714 713 PyObject *newlist = PyList_New(len);
715 714 Py_ssize_t i;
716 715
717 716 if (newlist == NULL)
718 717 return NULL;
719 718
720 719 for (i = 0; i < len; i++) {
721 720 PyObject *obj = PyList_GET_ITEM(list, i);
722 721 Py_INCREF(obj);
723 722 PyList_SET_ITEM(newlist, i, obj);
724 723 }
725 724
726 725 return newlist;
727 726 }
728 727
729 728 static PyObject *index_headrevs(indexObject *self)
730 729 {
731 730 Py_ssize_t i, len, addlen;
732 731 char *nothead = NULL;
733 732 PyObject *heads;
734 733
735 734 if (self->headrevs)
736 735 return list_copy(self->headrevs);
737 736
738 737 len = index_length(self) - 1;
739 738 heads = PyList_New(0);
740 739 if (heads == NULL)
741 740 goto bail;
742 741 if (len == 0) {
743 742 PyObject *nullid = PyInt_FromLong(-1);
744 743 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
745 744 Py_XDECREF(nullid);
746 745 goto bail;
747 746 }
748 747 goto done;
749 748 }
750 749
751 750 nothead = calloc(len, 1);
752 751 if (nothead == NULL)
753 752 goto bail;
754 753
755 754 for (i = 0; i < self->raw_length; i++) {
756 755 const char *data = index_deref(self, i);
757 756 int parent_1 = getbe32(data + 24);
758 757 int parent_2 = getbe32(data + 28);
759 758 if (parent_1 >= 0)
760 759 nothead[parent_1] = 1;
761 760 if (parent_2 >= 0)
762 761 nothead[parent_2] = 1;
763 762 }
764 763
765 764 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
766 765
767 766 for (i = 0; i < addlen; i++) {
768 767 PyObject *rev = PyList_GET_ITEM(self->added, i);
769 768 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
770 769 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
771 770 long parent_1, parent_2;
772 771
773 772 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
774 773 PyErr_SetString(PyExc_TypeError,
775 774 "revlog parents are invalid");
776 775 goto bail;
777 776 }
778 777 parent_1 = PyInt_AS_LONG(p1);
779 778 parent_2 = PyInt_AS_LONG(p2);
780 779 if (parent_1 >= 0)
781 780 nothead[parent_1] = 1;
782 781 if (parent_2 >= 0)
783 782 nothead[parent_2] = 1;
784 783 }
785 784
786 785 for (i = 0; i < len; i++) {
787 786 PyObject *head;
788 787
789 788 if (nothead[i])
790 789 continue;
791 790 head = PyInt_FromLong(i);
792 791 if (head == NULL || PyList_Append(heads, head) == -1) {
793 792 Py_XDECREF(head);
794 793 goto bail;
795 794 }
796 795 }
797 796
798 797 done:
799 798 self->headrevs = heads;
800 799 free(nothead);
801 800 return list_copy(self->headrevs);
802 801 bail:
803 802 Py_XDECREF(heads);
804 803 free(nothead);
805 804 return NULL;
806 805 }
807 806
808 807 static inline int nt_level(const char *node, Py_ssize_t level)
809 808 {
810 809 int v = node[level>>1];
811 810 if (!(level & 1))
812 811 v >>= 4;
813 812 return v & 0xf;
814 813 }
815 814
816 815 /*
817 816 * Return values:
818 817 *
819 818 * -4: match is ambiguous (multiple candidates)
820 819 * -2: not found
821 820 * rest: valid rev
822 821 */
823 822 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
824 823 int hex)
825 824 {
826 825 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
827 826 int level, maxlevel, off;
828 827
829 828 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
830 829 return -1;
831 830
832 831 if (self->nt == NULL)
833 832 return -2;
834 833
835 834 if (hex)
836 835 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
837 836 else
838 837 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
839 838
840 839 for (level = off = 0; level < maxlevel; level++) {
841 840 int k = getnybble(node, level);
842 841 nodetree *n = &self->nt[off];
843 842 int v = n->children[k];
844 843
845 844 if (v < 0) {
846 845 const char *n;
847 846 Py_ssize_t i;
848 847
849 848 v = -v - 1;
850 849 n = index_node(self, v);
851 850 if (n == NULL)
852 851 return -2;
853 852 for (i = level; i < maxlevel; i++)
854 853 if (getnybble(node, i) != nt_level(n, i))
855 854 return -2;
856 855 return v;
857 856 }
858 857 if (v == 0)
859 858 return -2;
860 859 off = v;
861 860 }
862 861 /* multiple matches against an ambiguous prefix */
863 862 return -4;
864 863 }
865 864
866 865 static int nt_new(indexObject *self)
867 866 {
868 867 if (self->ntlength == self->ntcapacity) {
869 868 self->ntcapacity *= 2;
870 869 self->nt = realloc(self->nt,
871 870 self->ntcapacity * sizeof(nodetree));
872 871 if (self->nt == NULL) {
873 872 PyErr_SetString(PyExc_MemoryError, "out of memory");
874 873 return -1;
875 874 }
876 875 memset(&self->nt[self->ntlength], 0,
877 876 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
878 877 }
879 878 return self->ntlength++;
880 879 }
881 880
882 881 static int nt_insert(indexObject *self, const char *node, int rev)
883 882 {
884 883 int level = 0;
885 884 int off = 0;
886 885
887 886 while (level < 40) {
888 887 int k = nt_level(node, level);
889 888 nodetree *n;
890 889 int v;
891 890
892 891 n = &self->nt[off];
893 892 v = n->children[k];
894 893
895 894 if (v == 0) {
896 895 n->children[k] = -rev - 1;
897 896 return 0;
898 897 }
899 898 if (v < 0) {
900 899 const char *oldnode = index_node(self, -v - 1);
901 900 int noff;
902 901
903 902 if (!oldnode || !memcmp(oldnode, node, 20)) {
904 903 n->children[k] = -rev - 1;
905 904 return 0;
906 905 }
907 906 noff = nt_new(self);
908 907 if (noff == -1)
909 908 return -1;
910 909 /* self->nt may have been changed by realloc */
911 910 self->nt[off].children[k] = noff;
912 911 off = noff;
913 912 n = &self->nt[off];
914 913 n->children[nt_level(oldnode, ++level)] = v;
915 914 if (level > self->ntdepth)
916 915 self->ntdepth = level;
917 916 self->ntsplits += 1;
918 917 } else {
919 918 level += 1;
920 919 off = v;
921 920 }
922 921 }
923 922
924 923 return -1;
925 924 }
926 925
927 926 static int nt_init(indexObject *self)
928 927 {
929 928 if (self->nt == NULL) {
930 929 self->ntcapacity = self->raw_length < 4
931 930 ? 4 : self->raw_length / 2;
932 931 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
933 932 if (self->nt == NULL) {
934 933 PyErr_NoMemory();
935 934 return -1;
936 935 }
937 936 self->ntlength = 1;
938 937 self->ntrev = (int)index_length(self) - 1;
939 938 self->ntlookups = 1;
940 939 self->ntmisses = 0;
941 940 if (nt_insert(self, nullid, INT_MAX) == -1)
942 941 return -1;
943 942 }
944 943 return 0;
945 944 }
946 945
947 946 /*
948 947 * Return values:
949 948 *
950 949 * -3: error (exception set)
951 950 * -2: not found (no exception set)
952 951 * rest: valid rev
953 952 */
954 953 static int index_find_node(indexObject *self,
955 954 const char *node, Py_ssize_t nodelen)
956 955 {
957 956 int rev;
958 957
959 958 self->ntlookups++;
960 959 rev = nt_find(self, node, nodelen, 0);
961 960 if (rev >= -1)
962 961 return rev;
963 962
964 963 if (nt_init(self) == -1)
965 964 return -3;
966 965
967 966 /*
968 967 * For the first handful of lookups, we scan the entire index,
969 968 * and cache only the matching nodes. This optimizes for cases
970 969 * like "hg tip", where only a few nodes are accessed.
971 970 *
972 971 * After that, we cache every node we visit, using a single
973 972 * scan amortized over multiple lookups. This gives the best
974 973 * bulk performance, e.g. for "hg log".
975 974 */
976 975 if (self->ntmisses++ < 4) {
977 976 for (rev = self->ntrev - 1; rev >= 0; rev--) {
978 977 const char *n = index_node(self, rev);
979 978 if (n == NULL)
980 979 return -2;
981 980 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
982 981 if (nt_insert(self, n, rev) == -1)
983 982 return -3;
984 983 break;
985 984 }
986 985 }
987 986 } else {
988 987 for (rev = self->ntrev - 1; rev >= 0; rev--) {
989 988 const char *n = index_node(self, rev);
990 989 if (n == NULL) {
991 990 self->ntrev = rev + 1;
992 991 return -2;
993 992 }
994 993 if (nt_insert(self, n, rev) == -1) {
995 994 self->ntrev = rev + 1;
996 995 return -3;
997 996 }
998 997 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
999 998 break;
1000 999 }
1001 1000 }
1002 1001 self->ntrev = rev;
1003 1002 }
1004 1003
1005 1004 if (rev >= 0)
1006 1005 return rev;
1007 1006 return -2;
1008 1007 }
1009 1008
1010 1009 static PyObject *raise_revlog_error(void)
1011 1010 {
1012 1011 static PyObject *errclass;
1013 1012 PyObject *mod = NULL, *errobj;
1014 1013
1015 1014 if (errclass == NULL) {
1016 1015 PyObject *dict;
1017 1016
1018 1017 mod = PyImport_ImportModule("mercurial.error");
1019 1018 if (mod == NULL)
1020 1019 goto classfail;
1021 1020
1022 1021 dict = PyModule_GetDict(mod);
1023 1022 if (dict == NULL)
1024 1023 goto classfail;
1025 1024
1026 1025 errclass = PyDict_GetItemString(dict, "RevlogError");
1027 1026 if (errclass == NULL) {
1028 1027 PyErr_SetString(PyExc_SystemError,
1029 1028 "could not find RevlogError");
1030 1029 goto classfail;
1031 1030 }
1032 1031 Py_INCREF(errclass);
1033 1032 }
1034 1033
1035 1034 errobj = PyObject_CallFunction(errclass, NULL);
1036 1035 if (errobj == NULL)
1037 1036 return NULL;
1038 1037 PyErr_SetObject(errclass, errobj);
1039 1038 return errobj;
1040 1039
1041 1040 classfail:
1042 1041 Py_XDECREF(mod);
1043 1042 return NULL;
1044 1043 }
1045 1044
1046 1045 static PyObject *index_getitem(indexObject *self, PyObject *value)
1047 1046 {
1048 1047 char *node;
1049 1048 Py_ssize_t nodelen;
1050 1049 int rev;
1051 1050
1052 1051 if (PyInt_Check(value))
1053 1052 return index_get(self, PyInt_AS_LONG(value));
1054 1053
1055 1054 if (node_check(value, &node, &nodelen) == -1)
1056 1055 return NULL;
1057 1056 rev = index_find_node(self, node, nodelen);
1058 1057 if (rev >= -1)
1059 1058 return PyInt_FromLong(rev);
1060 1059 if (rev == -2)
1061 1060 raise_revlog_error();
1062 1061 return NULL;
1063 1062 }
1064 1063
1065 1064 static int nt_partialmatch(indexObject *self, const char *node,
1066 1065 Py_ssize_t nodelen)
1067 1066 {
1068 1067 int rev;
1069 1068
1070 1069 if (nt_init(self) == -1)
1071 1070 return -3;
1072 1071
1073 1072 if (self->ntrev > 0) {
1074 1073 /* ensure that the radix tree is fully populated */
1075 1074 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1076 1075 const char *n = index_node(self, rev);
1077 1076 if (n == NULL)
1078 1077 return -2;
1079 1078 if (nt_insert(self, n, rev) == -1)
1080 1079 return -3;
1081 1080 }
1082 1081 self->ntrev = rev;
1083 1082 }
1084 1083
1085 1084 return nt_find(self, node, nodelen, 1);
1086 1085 }
1087 1086
1088 1087 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1089 1088 {
1090 1089 const char *fullnode;
1091 1090 int nodelen;
1092 1091 char *node;
1093 1092 int rev, i;
1094 1093
1095 1094 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1096 1095 return NULL;
1097 1096
1098 1097 if (nodelen < 4) {
1099 1098 PyErr_SetString(PyExc_ValueError, "key too short");
1100 1099 return NULL;
1101 1100 }
1102 1101
1103 1102 if (nodelen > 40) {
1104 1103 PyErr_SetString(PyExc_ValueError, "key too long");
1105 1104 return NULL;
1106 1105 }
1107 1106
1108 1107 for (i = 0; i < nodelen; i++)
1109 1108 hexdigit(node, i);
1110 1109 if (PyErr_Occurred()) {
1111 1110 /* input contains non-hex characters */
1112 1111 PyErr_Clear();
1113 1112 Py_RETURN_NONE;
1114 1113 }
1115 1114
1116 1115 rev = nt_partialmatch(self, node, nodelen);
1117 1116
1118 1117 switch (rev) {
1119 1118 case -4:
1120 1119 raise_revlog_error();
1121 1120 case -3:
1122 1121 return NULL;
1123 1122 case -2:
1124 1123 Py_RETURN_NONE;
1125 1124 case -1:
1126 1125 return PyString_FromStringAndSize(nullid, 20);
1127 1126 }
1128 1127
1129 1128 fullnode = index_node(self, rev);
1130 1129 if (fullnode == NULL) {
1131 1130 PyErr_Format(PyExc_IndexError,
1132 1131 "could not access rev %d", rev);
1133 1132 return NULL;
1134 1133 }
1135 1134 return PyString_FromStringAndSize(fullnode, 20);
1136 1135 }
1137 1136
1138 1137 static PyObject *index_m_get(indexObject *self, PyObject *args)
1139 1138 {
1140 1139 Py_ssize_t nodelen;
1141 1140 PyObject *val;
1142 1141 char *node;
1143 1142 int rev;
1144 1143
1145 1144 if (!PyArg_ParseTuple(args, "O", &val))
1146 1145 return NULL;
1147 1146 if (node_check(val, &node, &nodelen) == -1)
1148 1147 return NULL;
1149 1148 rev = index_find_node(self, node, nodelen);
1150 1149 if (rev == -3)
1151 1150 return NULL;
1152 1151 if (rev == -2)
1153 1152 Py_RETURN_NONE;
1154 1153 return PyInt_FromLong(rev);
1155 1154 }
1156 1155
1157 1156 static int index_contains(indexObject *self, PyObject *value)
1158 1157 {
1159 1158 char *node;
1160 1159 Py_ssize_t nodelen;
1161 1160
1162 1161 if (PyInt_Check(value)) {
1163 1162 long rev = PyInt_AS_LONG(value);
1164 1163 return rev >= -1 && rev < index_length(self);
1165 1164 }
1166 1165
1167 1166 if (node_check(value, &node, &nodelen) == -1)
1168 1167 return -1;
1169 1168
1170 1169 switch (index_find_node(self, node, nodelen)) {
1171 1170 case -3:
1172 1171 return -1;
1173 1172 case -2:
1174 1173 return 0;
1175 1174 default:
1176 1175 return 1;
1177 1176 }
1178 1177 }
1179 1178
1180 1179 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1181 1180 {
1182 1181 if (rev >= self->length - 1) {
1183 1182 PyObject *tuple = PyList_GET_ITEM(self->added,
1184 1183 rev - self->length + 1);
1185 1184 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1186 1185 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1187 1186 } else {
1188 1187 const char *data = index_deref(self, rev);
1189 1188 ps[0] = getbe32(data + 24);
1190 1189 ps[1] = getbe32(data + 28);
1191 1190 }
1192 1191 }
1193 1192
1194 1193 typedef uint64_t bitmask;
1195 1194
1196 1195 /*
1197 1196 * Given a disjoint set of revs, return all candidates for the
1198 1197 * greatest common ancestor. In revset notation, this is the set
1199 1198 * "heads(::a and ::b and ...)"
1200 1199 */
1201 1200 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1202 1201 int revcount)
1203 1202 {
1204 1203 const bitmask allseen = (1ull << revcount) - 1;
1205 1204 const bitmask poison = 1ull << revcount;
1206 1205 PyObject *gca = PyList_New(0);
1207 1206 int i, v, interesting, left;
1208 1207 int maxrev = -1;
1209 1208 long sp;
1210 1209 bitmask *seen;
1211 1210
1212 1211 if (gca == NULL)
1213 1212 return PyErr_NoMemory();
1214 1213
1215 1214 for (i = 0; i < revcount; i++) {
1216 1215 if (revs[i] > maxrev)
1217 1216 maxrev = revs[i];
1218 1217 }
1219 1218
1220 1219 seen = calloc(sizeof(*seen), maxrev + 1);
1221 1220 if (seen == NULL) {
1222 1221 Py_DECREF(gca);
1223 1222 return PyErr_NoMemory();
1224 1223 }
1225 1224
1226 1225 for (i = 0; i < revcount; i++)
1227 1226 seen[revs[i]] = 1ull << i;
1228 1227
1229 1228 interesting = left = revcount;
1230 1229
1231 1230 for (v = maxrev; v >= 0 && interesting; v--) {
1232 1231 long sv = seen[v];
1233 1232 int parents[2];
1234 1233
1235 1234 if (!sv)
1236 1235 continue;
1237 1236
1238 1237 if (sv < poison) {
1239 1238 interesting -= 1;
1240 1239 if (sv == allseen) {
1241 1240 PyObject *obj = PyInt_FromLong(v);
1242 1241 if (obj == NULL)
1243 1242 goto bail;
1244 1243 if (PyList_Append(gca, obj) == -1) {
1245 1244 Py_DECREF(obj);
1246 1245 goto bail;
1247 1246 }
1248 1247 sv |= poison;
1249 1248 for (i = 0; i < revcount; i++) {
1250 1249 if (revs[i] == v) {
1251 1250 if (--left <= 1)
1252 1251 goto done;
1253 1252 break;
1254 1253 }
1255 1254 }
1256 1255 }
1257 1256 }
1258 1257 index_get_parents(self, v, parents);
1259 1258
1260 1259 for (i = 0; i < 2; i++) {
1261 1260 int p = parents[i];
1262 1261 if (p == -1)
1263 1262 continue;
1264 1263 sp = seen[p];
1265 1264 if (sv < poison) {
1266 1265 if (sp == 0) {
1267 1266 seen[p] = sv;
1268 1267 interesting++;
1269 1268 }
1270 1269 else if (sp != sv)
1271 1270 seen[p] |= sv;
1272 1271 } else {
1273 1272 if (sp && sp < poison)
1274 1273 interesting--;
1275 1274 seen[p] = sv;
1276 1275 }
1277 1276 }
1278 1277 }
1279 1278
1280 1279 done:
1281 1280 free(seen);
1282 1281 return gca;
1283 1282 bail:
1284 1283 free(seen);
1285 1284 Py_XDECREF(gca);
1286 1285 return NULL;
1287 1286 }
1288 1287
1289 1288 /*
1290 1289 * Given a disjoint set of revs, return the subset with the longest
1291 1290 * path to the root.
1292 1291 */
1293 1292 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1294 1293 {
1295 1294 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1296 1295 static const Py_ssize_t capacity = 24;
1297 1296 int *depth, *interesting = NULL;
1298 1297 int i, j, v, ninteresting;
1299 1298 PyObject *dict = NULL, *keys;
1300 1299 long *seen = NULL;
1301 1300 int maxrev = -1;
1302 1301 long final;
1303 1302
1304 1303 if (revcount > capacity) {
1305 1304 PyErr_Format(PyExc_OverflowError,
1306 1305 "bitset size (%ld) > capacity (%ld)",
1307 1306 (long)revcount, (long)capacity);
1308 1307 return NULL;
1309 1308 }
1310 1309
1311 1310 for (i = 0; i < revcount; i++) {
1312 1311 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1313 1312 if (n > maxrev)
1314 1313 maxrev = n;
1315 1314 }
1316 1315
1317 1316 depth = calloc(sizeof(*depth), maxrev + 1);
1318 1317 if (depth == NULL)
1319 1318 return PyErr_NoMemory();
1320 1319
1321 1320 seen = calloc(sizeof(*seen), maxrev + 1);
1322 1321 if (seen == NULL) {
1323 1322 PyErr_NoMemory();
1324 1323 goto bail;
1325 1324 }
1326 1325
1327 1326 interesting = calloc(sizeof(*interesting), 2 << revcount);
1328 1327 if (interesting == NULL) {
1329 1328 PyErr_NoMemory();
1330 1329 goto bail;
1331 1330 }
1332 1331
1333 1332 if (PyList_Sort(revs) == -1)
1334 1333 goto bail;
1335 1334
1336 1335 for (i = 0; i < revcount; i++) {
1337 1336 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1338 1337 long b = 1l << i;
1339 1338 depth[n] = 1;
1340 1339 seen[n] = b;
1341 1340 interesting[b] = 1;
1342 1341 }
1343 1342
1344 1343 ninteresting = (int)revcount;
1345 1344
1346 1345 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1347 1346 int dv = depth[v];
1348 1347 int parents[2];
1349 1348 long sv;
1350 1349
1351 1350 if (dv == 0)
1352 1351 continue;
1353 1352
1354 1353 sv = seen[v];
1355 1354 index_get_parents(self, v, parents);
1356 1355
1357 1356 for (i = 0; i < 2; i++) {
1358 1357 int p = parents[i];
1359 1358 long nsp, sp;
1360 1359 int dp;
1361 1360
1362 1361 if (p == -1)
1363 1362 continue;
1364 1363
1365 1364 dp = depth[p];
1366 1365 nsp = sp = seen[p];
1367 1366 if (dp <= dv) {
1368 1367 depth[p] = dv + 1;
1369 1368 if (sp != sv) {
1370 1369 interesting[sv] += 1;
1371 1370 nsp = seen[p] = sv;
1372 1371 if (sp) {
1373 1372 interesting[sp] -= 1;
1374 1373 if (interesting[sp] == 0)
1375 1374 ninteresting -= 1;
1376 1375 }
1377 1376 }
1378 1377 }
1379 1378 else if (dv == dp - 1) {
1380 1379 nsp = sp | sv;
1381 1380 if (nsp == sp)
1382 1381 continue;
1383 1382 seen[p] = nsp;
1384 1383 interesting[sp] -= 1;
1385 1384 if (interesting[sp] == 0 && interesting[nsp] > 0)
1386 1385 ninteresting -= 1;
1387 1386 interesting[nsp] += 1;
1388 1387 }
1389 1388 }
1390 1389 interesting[sv] -= 1;
1391 1390 if (interesting[sv] == 0)
1392 1391 ninteresting -= 1;
1393 1392 }
1394 1393
1395 1394 final = 0;
1396 1395 j = ninteresting;
1397 1396 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1398 1397 if (interesting[i] == 0)
1399 1398 continue;
1400 1399 final |= i;
1401 1400 j -= 1;
1402 1401 }
1403 1402 if (final == 0)
1404 1403 return PyList_New(0);
1405 1404
1406 1405 dict = PyDict_New();
1407 1406 if (dict == NULL)
1408 1407 goto bail;
1409 1408
1410 1409 for (i = 0; i < revcount; i++) {
1411 1410 PyObject *key;
1412 1411
1413 1412 if ((final & (1 << i)) == 0)
1414 1413 continue;
1415 1414
1416 1415 key = PyList_GET_ITEM(revs, i);
1417 1416 Py_INCREF(key);
1418 1417 Py_INCREF(Py_None);
1419 1418 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1420 1419 Py_DECREF(key);
1421 1420 Py_DECREF(Py_None);
1422 1421 goto bail;
1423 1422 }
1424 1423 }
1425 1424
1426 1425 keys = PyDict_Keys(dict);
1427 1426
1428 1427 free(depth);
1429 1428 free(seen);
1430 1429 free(interesting);
1431 1430 Py_DECREF(dict);
1432 1431
1433 1432 return keys;
1434 1433 bail:
1435 1434 free(depth);
1436 1435 free(seen);
1437 1436 free(interesting);
1438 1437 Py_XDECREF(dict);
1439 1438
1440 1439 return NULL;
1441 1440 }
1442 1441
1443 1442 /*
1444 1443 * Given a (possibly overlapping) set of revs, return the greatest
1445 1444 * common ancestors: those with the longest path to the root.
1446 1445 */
1447 1446 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1448 1447 {
1449 1448 PyObject *ret = NULL, *gca = NULL;
1450 1449 Py_ssize_t argcount, i, len;
1451 1450 bitmask repeat = 0;
1452 1451 int revcount = 0;
1453 1452 int *revs;
1454 1453
1455 1454 argcount = PySequence_Length(args);
1456 1455 revs = malloc(argcount * sizeof(*revs));
1457 1456 if (argcount > 0 && revs == NULL)
1458 1457 return PyErr_NoMemory();
1459 1458 len = index_length(self) - 1;
1460 1459
1461 1460 for (i = 0; i < argcount; i++) {
1462 1461 static const int capacity = 24;
1463 1462 PyObject *obj = PySequence_GetItem(args, i);
1464 1463 bitmask x;
1465 1464 long val;
1466 1465
1467 1466 if (!PyInt_Check(obj)) {
1468 1467 PyErr_SetString(PyExc_TypeError,
1469 1468 "arguments must all be ints");
1470 1469 goto bail;
1471 1470 }
1472 1471 val = PyInt_AsLong(obj);
1473 1472 if (val == -1) {
1474 1473 ret = PyList_New(0);
1475 1474 goto done;
1476 1475 }
1477 1476 if (val < 0 || val >= len) {
1478 1477 PyErr_SetString(PyExc_IndexError,
1479 1478 "index out of range");
1480 1479 goto bail;
1481 1480 }
1482 1481 /* this cheesy bloom filter lets us avoid some more
1483 1482 * expensive duplicate checks in the common set-is-disjoint
1484 1483 * case */
1485 1484 x = 1ull << (val & 0x3f);
1486 1485 if (repeat & x) {
1487 1486 int k;
1488 1487 for (k = 0; k < revcount; k++) {
1489 1488 if (val == revs[k])
1490 1489 goto duplicate;
1491 1490 }
1492 1491 }
1493 1492 else repeat |= x;
1494 1493 if (revcount >= capacity) {
1495 1494 PyErr_Format(PyExc_OverflowError,
1496 1495 "bitset size (%d) > capacity (%d)",
1497 1496 revcount, capacity);
1498 1497 goto bail;
1499 1498 }
1500 1499 revs[revcount++] = (int)val;
1501 1500 duplicate:;
1502 1501 }
1503 1502
1504 1503 if (revcount == 0) {
1505 1504 ret = PyList_New(0);
1506 1505 goto done;
1507 1506 }
1508 1507 if (revcount == 1) {
1509 1508 PyObject *obj;
1510 1509 ret = PyList_New(1);
1511 1510 if (ret == NULL)
1512 1511 goto bail;
1513 1512 obj = PyInt_FromLong(revs[0]);
1514 1513 if (obj == NULL)
1515 1514 goto bail;
1516 1515 PyList_SET_ITEM(ret, 0, obj);
1517 1516 goto done;
1518 1517 }
1519 1518
1520 1519 gca = find_gca_candidates(self, revs, revcount);
1521 1520 if (gca == NULL)
1522 1521 goto bail;
1523 1522
1524 1523 if (PyList_GET_SIZE(gca) <= 1) {
1525 1524 ret = gca;
1526 1525 Py_INCREF(gca);
1527 1526 }
1528 1527 else if (PyList_GET_SIZE(gca) == 1) {
1529 1528 ret = PyList_GET_ITEM(gca, 0);
1530 1529 Py_INCREF(ret);
1531 1530 }
1532 1531 else ret = find_deepest(self, gca);
1533 1532
1534 1533 done:
1535 1534 free(revs);
1536 1535 Py_XDECREF(gca);
1537 1536
1538 1537 return ret;
1539 1538
1540 1539 bail:
1541 1540 free(revs);
1542 1541 Py_XDECREF(gca);
1543 1542 Py_XDECREF(ret);
1544 1543 return NULL;
1545 1544 }
1546 1545
1547 1546 /*
1548 1547 * Invalidate any trie entries introduced by added revs.
1549 1548 */
1550 1549 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1551 1550 {
1552 1551 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1553 1552
1554 1553 for (i = start; i < len; i++) {
1555 1554 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1556 1555 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1557 1556
1558 1557 nt_insert(self, PyString_AS_STRING(node), -1);
1559 1558 }
1560 1559
1561 1560 if (start == 0)
1562 1561 Py_CLEAR(self->added);
1563 1562 }
1564 1563
1565 1564 /*
1566 1565 * Delete a numeric range of revs, which must be at the end of the
1567 1566 * range, but exclude the sentinel nullid entry.
1568 1567 */
1569 1568 static int index_slice_del(indexObject *self, PyObject *item)
1570 1569 {
1571 1570 Py_ssize_t start, stop, step, slicelength;
1572 1571 Py_ssize_t length = index_length(self);
1573 1572 int ret = 0;
1574 1573
1575 1574 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1576 1575 &start, &stop, &step, &slicelength) < 0)
1577 1576 return -1;
1578 1577
1579 1578 if (slicelength <= 0)
1580 1579 return 0;
1581 1580
1582 1581 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1583 1582 stop = start;
1584 1583
1585 1584 if (step < 0) {
1586 1585 stop = start + 1;
1587 1586 start = stop + step*(slicelength - 1) - 1;
1588 1587 step = -step;
1589 1588 }
1590 1589
1591 1590 if (step != 1) {
1592 1591 PyErr_SetString(PyExc_ValueError,
1593 1592 "revlog index delete requires step size of 1");
1594 1593 return -1;
1595 1594 }
1596 1595
1597 1596 if (stop != length - 1) {
1598 1597 PyErr_SetString(PyExc_IndexError,
1599 1598 "revlog index deletion indices are invalid");
1600 1599 return -1;
1601 1600 }
1602 1601
1603 1602 if (start < self->length - 1) {
1604 1603 if (self->nt) {
1605 1604 Py_ssize_t i;
1606 1605
1607 1606 for (i = start + 1; i < self->length - 1; i++) {
1608 1607 const char *node = index_node(self, i);
1609 1608
1610 1609 if (node)
1611 1610 nt_insert(self, node, -1);
1612 1611 }
1613 1612 if (self->added)
1614 1613 nt_invalidate_added(self, 0);
1615 1614 if (self->ntrev > start)
1616 1615 self->ntrev = (int)start;
1617 1616 }
1618 1617 self->length = start + 1;
1619 1618 if (start < self->raw_length) {
1620 1619 if (self->cache) {
1621 1620 Py_ssize_t i;
1622 1621 for (i = start; i < self->raw_length; i++)
1623 1622 Py_CLEAR(self->cache[i]);
1624 1623 }
1625 1624 self->raw_length = start;
1626 1625 }
1627 1626 goto done;
1628 1627 }
1629 1628
1630 1629 if (self->nt) {
1631 1630 nt_invalidate_added(self, start - self->length + 1);
1632 1631 if (self->ntrev > start)
1633 1632 self->ntrev = (int)start;
1634 1633 }
1635 1634 if (self->added)
1636 1635 ret = PyList_SetSlice(self->added, start - self->length + 1,
1637 1636 PyList_GET_SIZE(self->added), NULL);
1638 1637 done:
1639 1638 Py_CLEAR(self->headrevs);
1640 1639 return ret;
1641 1640 }
1642 1641
1643 1642 /*
1644 1643 * Supported ops:
1645 1644 *
1646 1645 * slice deletion
1647 1646 * string assignment (extend node->rev mapping)
1648 1647 * string deletion (shrink node->rev mapping)
1649 1648 */
1650 1649 static int index_assign_subscript(indexObject *self, PyObject *item,
1651 1650 PyObject *value)
1652 1651 {
1653 1652 char *node;
1654 1653 Py_ssize_t nodelen;
1655 1654 long rev;
1656 1655
1657 1656 if (PySlice_Check(item) && value == NULL)
1658 1657 return index_slice_del(self, item);
1659 1658
1660 1659 if (node_check(item, &node, &nodelen) == -1)
1661 1660 return -1;
1662 1661
1663 1662 if (value == NULL)
1664 1663 return self->nt ? nt_insert(self, node, -1) : 0;
1665 1664 rev = PyInt_AsLong(value);
1666 1665 if (rev > INT_MAX || rev < 0) {
1667 1666 if (!PyErr_Occurred())
1668 1667 PyErr_SetString(PyExc_ValueError, "rev out of range");
1669 1668 return -1;
1670 1669 }
1671 1670 return nt_insert(self, node, (int)rev);
1672 1671 }
1673 1672
1674 1673 /*
1675 1674 * Find all RevlogNG entries in an index that has inline data. Update
1676 1675 * the optional "offsets" table with those entries.
1677 1676 */
1678 1677 static long inline_scan(indexObject *self, const char **offsets)
1679 1678 {
1680 1679 const char *data = PyString_AS_STRING(self->data);
1681 const char *end = data + PyString_GET_SIZE(self->data);
1680 Py_ssize_t pos = 0;
1681 Py_ssize_t end = PyString_GET_SIZE(self->data);
1682 1682 long incr = v1_hdrsize;
1683 1683 Py_ssize_t len = 0;
1684 1684
1685 while (data + v1_hdrsize <= end) {
1685 while (pos + v1_hdrsize <= end && pos >= 0) {
1686 1686 uint32_t comp_len;
1687 const char *old_data;
1688 1687 /* 3rd element of header is length of compressed inline data */
1689 comp_len = getbe32(data + 8);
1688 comp_len = getbe32(data + pos + 8);
1690 1689 incr = v1_hdrsize + comp_len;
1691 if (incr < v1_hdrsize)
1692 break;
1693 1690 if (offsets)
1694 offsets[len] = data;
1691 offsets[len] = data + pos;
1695 1692 len++;
1696 old_data = data;
1697 data += incr;
1698 if (data <= old_data)
1699 break;
1693 pos += incr;
1700 1694 }
1701 1695
1702 if (data != end && data + v1_hdrsize != end) {
1696 if (pos != end) {
1703 1697 if (!PyErr_Occurred())
1704 1698 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1705 1699 return -1;
1706 1700 }
1707 1701
1708 1702 return len;
1709 1703 }
1710 1704
1711 1705 static int index_init(indexObject *self, PyObject *args)
1712 1706 {
1713 1707 PyObject *data_obj, *inlined_obj;
1714 1708 Py_ssize_t size;
1715 1709
1716 1710 /* Initialize before argument-checking to avoid index_dealloc() crash. */
1717 1711 self->raw_length = 0;
1718 1712 self->added = NULL;
1719 1713 self->cache = NULL;
1720 1714 self->data = NULL;
1721 1715 self->headrevs = NULL;
1722 1716 self->nt = NULL;
1723 1717 self->offsets = NULL;
1724 1718
1725 1719 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1726 1720 return -1;
1727 1721 if (!PyString_Check(data_obj)) {
1728 1722 PyErr_SetString(PyExc_TypeError, "data is not a string");
1729 1723 return -1;
1730 1724 }
1731 1725 size = PyString_GET_SIZE(data_obj);
1732 1726
1733 1727 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1734 1728 self->data = data_obj;
1735 1729
1736 1730 self->ntlength = self->ntcapacity = 0;
1737 1731 self->ntdepth = self->ntsplits = 0;
1738 1732 self->ntlookups = self->ntmisses = 0;
1739 1733 self->ntrev = -1;
1740 1734 Py_INCREF(self->data);
1741 1735
1742 1736 if (self->inlined) {
1743 1737 long len = inline_scan(self, NULL);
1744 1738 if (len == -1)
1745 1739 goto bail;
1746 1740 self->raw_length = len;
1747 1741 self->length = len + 1;
1748 1742 } else {
1749 1743 if (size % v1_hdrsize) {
1750 1744 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1751 1745 goto bail;
1752 1746 }
1753 1747 self->raw_length = size / v1_hdrsize;
1754 1748 self->length = self->raw_length + 1;
1755 1749 }
1756 1750
1757 1751 return 0;
1758 1752 bail:
1759 1753 return -1;
1760 1754 }
1761 1755
1762 1756 static PyObject *index_nodemap(indexObject *self)
1763 1757 {
1764 1758 Py_INCREF(self);
1765 1759 return (PyObject *)self;
1766 1760 }
1767 1761
1768 1762 static void index_dealloc(indexObject *self)
1769 1763 {
1770 1764 _index_clearcaches(self);
1771 1765 Py_XDECREF(self->data);
1772 1766 Py_XDECREF(self->added);
1773 1767 PyObject_Del(self);
1774 1768 }
1775 1769
1776 1770 static PySequenceMethods index_sequence_methods = {
1777 1771 (lenfunc)index_length, /* sq_length */
1778 1772 0, /* sq_concat */
1779 1773 0, /* sq_repeat */
1780 1774 (ssizeargfunc)index_get, /* sq_item */
1781 1775 0, /* sq_slice */
1782 1776 0, /* sq_ass_item */
1783 1777 0, /* sq_ass_slice */
1784 1778 (objobjproc)index_contains, /* sq_contains */
1785 1779 };
1786 1780
1787 1781 static PyMappingMethods index_mapping_methods = {
1788 1782 (lenfunc)index_length, /* mp_length */
1789 1783 (binaryfunc)index_getitem, /* mp_subscript */
1790 1784 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1791 1785 };
1792 1786
1793 1787 static PyMethodDef index_methods[] = {
1794 1788 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1795 1789 "return the gca set of the given revs"},
1796 1790 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1797 1791 "clear the index caches"},
1798 1792 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1799 1793 "get an index entry"},
1800 1794 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1801 1795 "get head revisions"},
1802 1796 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1803 1797 "insert an index entry"},
1804 1798 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1805 1799 "match a potentially ambiguous node ID"},
1806 1800 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1807 1801 "stats for the index"},
1808 1802 {NULL} /* Sentinel */
1809 1803 };
1810 1804
1811 1805 static PyGetSetDef index_getset[] = {
1812 1806 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1813 1807 {NULL} /* Sentinel */
1814 1808 };
1815 1809
1816 1810 static PyTypeObject indexType = {
1817 1811 PyObject_HEAD_INIT(NULL)
1818 1812 0, /* ob_size */
1819 1813 "parsers.index", /* tp_name */
1820 1814 sizeof(indexObject), /* tp_basicsize */
1821 1815 0, /* tp_itemsize */
1822 1816 (destructor)index_dealloc, /* tp_dealloc */
1823 1817 0, /* tp_print */
1824 1818 0, /* tp_getattr */
1825 1819 0, /* tp_setattr */
1826 1820 0, /* tp_compare */
1827 1821 0, /* tp_repr */
1828 1822 0, /* tp_as_number */
1829 1823 &index_sequence_methods, /* tp_as_sequence */
1830 1824 &index_mapping_methods, /* tp_as_mapping */
1831 1825 0, /* tp_hash */
1832 1826 0, /* tp_call */
1833 1827 0, /* tp_str */
1834 1828 0, /* tp_getattro */
1835 1829 0, /* tp_setattro */
1836 1830 0, /* tp_as_buffer */
1837 1831 Py_TPFLAGS_DEFAULT, /* tp_flags */
1838 1832 "revlog index", /* tp_doc */
1839 1833 0, /* tp_traverse */
1840 1834 0, /* tp_clear */
1841 1835 0, /* tp_richcompare */
1842 1836 0, /* tp_weaklistoffset */
1843 1837 0, /* tp_iter */
1844 1838 0, /* tp_iternext */
1845 1839 index_methods, /* tp_methods */
1846 1840 0, /* tp_members */
1847 1841 index_getset, /* tp_getset */
1848 1842 0, /* tp_base */
1849 1843 0, /* tp_dict */
1850 1844 0, /* tp_descr_get */
1851 1845 0, /* tp_descr_set */
1852 1846 0, /* tp_dictoffset */
1853 1847 (initproc)index_init, /* tp_init */
1854 1848 0, /* tp_alloc */
1855 1849 };
1856 1850
1857 1851 /*
1858 1852 * returns a tuple of the form (index, index, cache) with elements as
1859 1853 * follows:
1860 1854 *
1861 1855 * index: an index object that lazily parses RevlogNG records
1862 1856 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1863 1857 *
1864 1858 * added complications are for backwards compatibility
1865 1859 */
1866 1860 static PyObject *parse_index2(PyObject *self, PyObject *args)
1867 1861 {
1868 1862 PyObject *tuple = NULL, *cache = NULL;
1869 1863 indexObject *idx;
1870 1864 int ret;
1871 1865
1872 1866 idx = PyObject_New(indexObject, &indexType);
1873 1867 if (idx == NULL)
1874 1868 goto bail;
1875 1869
1876 1870 ret = index_init(idx, args);
1877 1871 if (ret == -1)
1878 1872 goto bail;
1879 1873
1880 1874 if (idx->inlined) {
1881 1875 cache = Py_BuildValue("iO", 0, idx->data);
1882 1876 if (cache == NULL)
1883 1877 goto bail;
1884 1878 } else {
1885 1879 cache = Py_None;
1886 1880 Py_INCREF(cache);
1887 1881 }
1888 1882
1889 1883 tuple = Py_BuildValue("NN", idx, cache);
1890 1884 if (!tuple)
1891 1885 goto bail;
1892 1886 return tuple;
1893 1887
1894 1888 bail:
1895 1889 Py_XDECREF(idx);
1896 1890 Py_XDECREF(cache);
1897 1891 Py_XDECREF(tuple);
1898 1892 return NULL;
1899 1893 }
1900 1894
1901 1895 static char parsers_doc[] = "Efficient content parsing.";
1902 1896
1903 1897 PyObject *encodedir(PyObject *self, PyObject *args);
1904 1898 PyObject *pathencode(PyObject *self, PyObject *args);
1905 1899 PyObject *lowerencode(PyObject *self, PyObject *args);
1906 1900
1907 1901 static PyMethodDef methods[] = {
1908 1902 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1909 1903 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1910 1904 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1911 1905 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1912 1906 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1913 1907 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1914 1908 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1915 1909 {NULL, NULL}
1916 1910 };
1917 1911
1918 1912 void dirs_module_init(PyObject *mod);
1919 1913
1920 1914 static void module_init(PyObject *mod)
1921 1915 {
1922 1916 dirs_module_init(mod);
1923 1917
1924 1918 indexType.tp_new = PyType_GenericNew;
1925 1919 if (PyType_Ready(&indexType) < 0)
1926 1920 return;
1927 1921 Py_INCREF(&indexType);
1928 1922
1929 1923 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1930 1924
1931 1925 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1932 1926 -1, -1, -1, -1, nullid, 20);
1933 1927 if (nullentry)
1934 1928 PyObject_GC_UnTrack(nullentry);
1935 1929
1936 1930 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1937 1931 }
1938 1932
1939 1933 #ifdef IS_PY3K
1940 1934 static struct PyModuleDef parsers_module = {
1941 1935 PyModuleDef_HEAD_INIT,
1942 1936 "parsers",
1943 1937 parsers_doc,
1944 1938 -1,
1945 1939 methods
1946 1940 };
1947 1941
1948 1942 PyMODINIT_FUNC PyInit_parsers(void)
1949 1943 {
1950 1944 PyObject *mod = PyModule_Create(&parsers_module);
1951 1945 module_init(mod);
1952 1946 return mod;
1953 1947 }
1954 1948 #else
1955 1949 PyMODINIT_FUNC initparsers(void)
1956 1950 {
1957 1951 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1958 1952 module_init(mod);
1959 1953 }
1960 1954 #endif
General Comments 0
You need to be logged in to leave comments. Login now