##// END OF EJS Templates
parsers: a C implementation of the new ancestors algorithm...
Bryan O'Sullivan -
r18988:5bae9367 default
parent child Browse files
Show More
@@ -1,1573 +1,1935 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <stddef.h>
12 #include <stddef.h>
13 #include <string.h>
13 #include <string.h>
14
14
15 #include "util.h"
15 #include "util.h"
16
16
17 static inline int hexdigit(const char *p, Py_ssize_t off)
17 static inline int hexdigit(const char *p, Py_ssize_t off)
18 {
18 {
19 char c = p[off];
19 char c = p[off];
20
20
21 if (c >= '0' && c <= '9')
21 if (c >= '0' && c <= '9')
22 return c - '0';
22 return c - '0';
23 if (c >= 'a' && c <= 'f')
23 if (c >= 'a' && c <= 'f')
24 return c - 'a' + 10;
24 return c - 'a' + 10;
25 if (c >= 'A' && c <= 'F')
25 if (c >= 'A' && c <= 'F')
26 return c - 'A' + 10;
26 return c - 'A' + 10;
27
27
28 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
28 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
29 return 0;
29 return 0;
30 }
30 }
31
31
32 /*
32 /*
33 * Turn a hex-encoded string into binary.
33 * Turn a hex-encoded string into binary.
34 */
34 */
35 static PyObject *unhexlify(const char *str, int len)
35 static PyObject *unhexlify(const char *str, int len)
36 {
36 {
37 PyObject *ret;
37 PyObject *ret;
38 char *d;
38 char *d;
39 int i;
39 int i;
40
40
41 ret = PyBytes_FromStringAndSize(NULL, len / 2);
41 ret = PyBytes_FromStringAndSize(NULL, len / 2);
42
42
43 if (!ret)
43 if (!ret)
44 return NULL;
44 return NULL;
45
45
46 d = PyBytes_AsString(ret);
46 d = PyBytes_AsString(ret);
47
47
48 for (i = 0; i < len;) {
48 for (i = 0; i < len;) {
49 int hi = hexdigit(str, i++);
49 int hi = hexdigit(str, i++);
50 int lo = hexdigit(str, i++);
50 int lo = hexdigit(str, i++);
51 *d++ = (hi << 4) | lo;
51 *d++ = (hi << 4) | lo;
52 }
52 }
53
53
54 return ret;
54 return ret;
55 }
55 }
56
56
57 /*
57 /*
58 * This code assumes that a manifest is stitched together with newline
58 * This code assumes that a manifest is stitched together with newline
59 * ('\n') characters.
59 * ('\n') characters.
60 */
60 */
61 static PyObject *parse_manifest(PyObject *self, PyObject *args)
61 static PyObject *parse_manifest(PyObject *self, PyObject *args)
62 {
62 {
63 PyObject *mfdict, *fdict;
63 PyObject *mfdict, *fdict;
64 char *str, *cur, *start, *zero;
64 char *str, *cur, *start, *zero;
65 int len;
65 int len;
66
66
67 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
67 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
68 &PyDict_Type, &mfdict,
68 &PyDict_Type, &mfdict,
69 &PyDict_Type, &fdict,
69 &PyDict_Type, &fdict,
70 &str, &len))
70 &str, &len))
71 goto quit;
71 goto quit;
72
72
73 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
73 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
74 PyObject *file = NULL, *node = NULL;
74 PyObject *file = NULL, *node = NULL;
75 PyObject *flags = NULL;
75 PyObject *flags = NULL;
76 ptrdiff_t nlen;
76 ptrdiff_t nlen;
77
77
78 if (!*cur) {
78 if (!*cur) {
79 zero = cur;
79 zero = cur;
80 continue;
80 continue;
81 }
81 }
82 else if (*cur != '\n')
82 else if (*cur != '\n')
83 continue;
83 continue;
84
84
85 if (!zero) {
85 if (!zero) {
86 PyErr_SetString(PyExc_ValueError,
86 PyErr_SetString(PyExc_ValueError,
87 "manifest entry has no separator");
87 "manifest entry has no separator");
88 goto quit;
88 goto quit;
89 }
89 }
90
90
91 file = PyBytes_FromStringAndSize(start, zero - start);
91 file = PyBytes_FromStringAndSize(start, zero - start);
92
92
93 if (!file)
93 if (!file)
94 goto bail;
94 goto bail;
95
95
96 nlen = cur - zero - 1;
96 nlen = cur - zero - 1;
97
97
98 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
98 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
99 if (!node)
99 if (!node)
100 goto bail;
100 goto bail;
101
101
102 if (nlen > 40) {
102 if (nlen > 40) {
103 flags = PyBytes_FromStringAndSize(zero + 41,
103 flags = PyBytes_FromStringAndSize(zero + 41,
104 nlen - 40);
104 nlen - 40);
105 if (!flags)
105 if (!flags)
106 goto bail;
106 goto bail;
107
107
108 if (PyDict_SetItem(fdict, file, flags) == -1)
108 if (PyDict_SetItem(fdict, file, flags) == -1)
109 goto bail;
109 goto bail;
110 }
110 }
111
111
112 if (PyDict_SetItem(mfdict, file, node) == -1)
112 if (PyDict_SetItem(mfdict, file, node) == -1)
113 goto bail;
113 goto bail;
114
114
115 start = cur + 1;
115 start = cur + 1;
116 zero = NULL;
116 zero = NULL;
117
117
118 Py_XDECREF(flags);
118 Py_XDECREF(flags);
119 Py_XDECREF(node);
119 Py_XDECREF(node);
120 Py_XDECREF(file);
120 Py_XDECREF(file);
121 continue;
121 continue;
122 bail:
122 bail:
123 Py_XDECREF(flags);
123 Py_XDECREF(flags);
124 Py_XDECREF(node);
124 Py_XDECREF(node);
125 Py_XDECREF(file);
125 Py_XDECREF(file);
126 goto quit;
126 goto quit;
127 }
127 }
128
128
129 if (len > 0 && *(cur - 1) != '\n') {
129 if (len > 0 && *(cur - 1) != '\n') {
130 PyErr_SetString(PyExc_ValueError,
130 PyErr_SetString(PyExc_ValueError,
131 "manifest contains trailing garbage");
131 "manifest contains trailing garbage");
132 goto quit;
132 goto quit;
133 }
133 }
134
134
135 Py_INCREF(Py_None);
135 Py_INCREF(Py_None);
136 return Py_None;
136 return Py_None;
137 quit:
137 quit:
138 return NULL;
138 return NULL;
139 }
139 }
140
140
141 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
141 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
142 {
142 {
143 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
143 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
144 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
144 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
145 char *str, *cur, *end, *cpos;
145 char *str, *cur, *end, *cpos;
146 int state, mode, size, mtime;
146 int state, mode, size, mtime;
147 unsigned int flen;
147 unsigned int flen;
148 int len;
148 int len;
149
149
150 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
150 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
151 &PyDict_Type, &dmap,
151 &PyDict_Type, &dmap,
152 &PyDict_Type, &cmap,
152 &PyDict_Type, &cmap,
153 &str, &len))
153 &str, &len))
154 goto quit;
154 goto quit;
155
155
156 /* read parents */
156 /* read parents */
157 if (len < 40)
157 if (len < 40)
158 goto quit;
158 goto quit;
159
159
160 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
160 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
161 if (!parents)
161 if (!parents)
162 goto quit;
162 goto quit;
163
163
164 /* read filenames */
164 /* read filenames */
165 cur = str + 40;
165 cur = str + 40;
166 end = str + len;
166 end = str + len;
167
167
168 while (cur < end - 17) {
168 while (cur < end - 17) {
169 /* unpack header */
169 /* unpack header */
170 state = *cur;
170 state = *cur;
171 mode = getbe32(cur + 1);
171 mode = getbe32(cur + 1);
172 size = getbe32(cur + 5);
172 size = getbe32(cur + 5);
173 mtime = getbe32(cur + 9);
173 mtime = getbe32(cur + 9);
174 flen = getbe32(cur + 13);
174 flen = getbe32(cur + 13);
175 cur += 17;
175 cur += 17;
176 if (cur + flen > end || cur + flen < cur) {
176 if (cur + flen > end || cur + flen < cur) {
177 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
178 goto quit;
178 goto quit;
179 }
179 }
180
180
181 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 entry = Py_BuildValue("ciii", state, mode, size, mtime);
182 if (!entry)
182 if (!entry)
183 goto quit;
183 goto quit;
184 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184 PyObject_GC_UnTrack(entry); /* don't waste time with this */
185
185
186 cpos = memchr(cur, 0, flen);
186 cpos = memchr(cur, 0, flen);
187 if (cpos) {
187 if (cpos) {
188 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
189 cname = PyBytes_FromStringAndSize(cpos + 1,
189 cname = PyBytes_FromStringAndSize(cpos + 1,
190 flen - (cpos - cur) - 1);
190 flen - (cpos - cur) - 1);
191 if (!fname || !cname ||
191 if (!fname || !cname ||
192 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 PyDict_SetItem(cmap, fname, cname) == -1 ||
193 PyDict_SetItem(dmap, fname, entry) == -1)
193 PyDict_SetItem(dmap, fname, entry) == -1)
194 goto quit;
194 goto quit;
195 Py_DECREF(cname);
195 Py_DECREF(cname);
196 } else {
196 } else {
197 fname = PyBytes_FromStringAndSize(cur, flen);
197 fname = PyBytes_FromStringAndSize(cur, flen);
198 if (!fname ||
198 if (!fname ||
199 PyDict_SetItem(dmap, fname, entry) == -1)
199 PyDict_SetItem(dmap, fname, entry) == -1)
200 goto quit;
200 goto quit;
201 }
201 }
202 cur += flen;
202 cur += flen;
203 Py_DECREF(fname);
203 Py_DECREF(fname);
204 Py_DECREF(entry);
204 Py_DECREF(entry);
205 fname = cname = entry = NULL;
205 fname = cname = entry = NULL;
206 }
206 }
207
207
208 ret = parents;
208 ret = parents;
209 Py_INCREF(ret);
209 Py_INCREF(ret);
210 quit:
210 quit:
211 Py_XDECREF(fname);
211 Py_XDECREF(fname);
212 Py_XDECREF(cname);
212 Py_XDECREF(cname);
213 Py_XDECREF(entry);
213 Py_XDECREF(entry);
214 Py_XDECREF(parents);
214 Py_XDECREF(parents);
215 return ret;
215 return ret;
216 }
216 }
217
217
218 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
218 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
219 {
219 {
220 PyObject *o = PyTuple_GET_ITEM(tuple, off);
220 PyObject *o = PyTuple_GET_ITEM(tuple, off);
221 long val;
221 long val;
222
222
223 if (PyInt_Check(o))
223 if (PyInt_Check(o))
224 val = PyInt_AS_LONG(o);
224 val = PyInt_AS_LONG(o);
225 else if (PyLong_Check(o)) {
225 else if (PyLong_Check(o)) {
226 val = PyLong_AsLong(o);
226 val = PyLong_AsLong(o);
227 if (val == -1 && PyErr_Occurred())
227 if (val == -1 && PyErr_Occurred())
228 return -1;
228 return -1;
229 } else {
229 } else {
230 PyErr_SetString(PyExc_TypeError, "expected an int or long");
230 PyErr_SetString(PyExc_TypeError, "expected an int or long");
231 return -1;
231 return -1;
232 }
232 }
233 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
233 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
234 PyErr_SetString(PyExc_OverflowError,
234 PyErr_SetString(PyExc_OverflowError,
235 "Python value to large to convert to uint32_t");
235 "Python value to large to convert to uint32_t");
236 return -1;
236 return -1;
237 }
237 }
238 *v = (uint32_t)val;
238 *v = (uint32_t)val;
239 return 0;
239 return 0;
240 }
240 }
241
241
242 static PyObject *dirstate_unset;
242 static PyObject *dirstate_unset;
243
243
244 /*
244 /*
245 * Efficiently pack a dirstate object into its on-disk format.
245 * Efficiently pack a dirstate object into its on-disk format.
246 */
246 */
247 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
247 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
248 {
248 {
249 PyObject *packobj = NULL;
249 PyObject *packobj = NULL;
250 PyObject *map, *copymap, *pl;
250 PyObject *map, *copymap, *pl;
251 Py_ssize_t nbytes, pos, l;
251 Py_ssize_t nbytes, pos, l;
252 PyObject *k, *v, *pn;
252 PyObject *k, *v, *pn;
253 char *p, *s;
253 char *p, *s;
254 double now;
254 double now;
255
255
256 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
256 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
257 &PyDict_Type, &map, &PyDict_Type, &copymap,
257 &PyDict_Type, &map, &PyDict_Type, &copymap,
258 &pl, &now))
258 &pl, &now))
259 return NULL;
259 return NULL;
260
260
261 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
261 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
262 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
262 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
263 return NULL;
263 return NULL;
264 }
264 }
265
265
266 /* Figure out how much we need to allocate. */
266 /* Figure out how much we need to allocate. */
267 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
267 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
268 PyObject *c;
268 PyObject *c;
269 if (!PyString_Check(k)) {
269 if (!PyString_Check(k)) {
270 PyErr_SetString(PyExc_TypeError, "expected string key");
270 PyErr_SetString(PyExc_TypeError, "expected string key");
271 goto bail;
271 goto bail;
272 }
272 }
273 nbytes += PyString_GET_SIZE(k) + 17;
273 nbytes += PyString_GET_SIZE(k) + 17;
274 c = PyDict_GetItem(copymap, k);
274 c = PyDict_GetItem(copymap, k);
275 if (c) {
275 if (c) {
276 if (!PyString_Check(c)) {
276 if (!PyString_Check(c)) {
277 PyErr_SetString(PyExc_TypeError,
277 PyErr_SetString(PyExc_TypeError,
278 "expected string key");
278 "expected string key");
279 goto bail;
279 goto bail;
280 }
280 }
281 nbytes += PyString_GET_SIZE(c) + 1;
281 nbytes += PyString_GET_SIZE(c) + 1;
282 }
282 }
283 }
283 }
284
284
285 packobj = PyString_FromStringAndSize(NULL, nbytes);
285 packobj = PyString_FromStringAndSize(NULL, nbytes);
286 if (packobj == NULL)
286 if (packobj == NULL)
287 goto bail;
287 goto bail;
288
288
289 p = PyString_AS_STRING(packobj);
289 p = PyString_AS_STRING(packobj);
290
290
291 pn = PySequence_ITEM(pl, 0);
291 pn = PySequence_ITEM(pl, 0);
292 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
292 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
293 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
293 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
294 goto bail;
294 goto bail;
295 }
295 }
296 memcpy(p, s, l);
296 memcpy(p, s, l);
297 p += 20;
297 p += 20;
298 pn = PySequence_ITEM(pl, 1);
298 pn = PySequence_ITEM(pl, 1);
299 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
299 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
300 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
300 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
301 goto bail;
301 goto bail;
302 }
302 }
303 memcpy(p, s, l);
303 memcpy(p, s, l);
304 p += 20;
304 p += 20;
305
305
306 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
306 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
307 uint32_t mode, size, mtime;
307 uint32_t mode, size, mtime;
308 Py_ssize_t len, l;
308 Py_ssize_t len, l;
309 PyObject *o;
309 PyObject *o;
310 char *s, *t;
310 char *s, *t;
311
311
312 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
312 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
313 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
313 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
314 goto bail;
314 goto bail;
315 }
315 }
316 o = PyTuple_GET_ITEM(v, 0);
316 o = PyTuple_GET_ITEM(v, 0);
317 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
317 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
318 PyErr_SetString(PyExc_TypeError, "expected one byte");
318 PyErr_SetString(PyExc_TypeError, "expected one byte");
319 goto bail;
319 goto bail;
320 }
320 }
321 *p++ = *s;
321 *p++ = *s;
322 if (getintat(v, 1, &mode) == -1)
322 if (getintat(v, 1, &mode) == -1)
323 goto bail;
323 goto bail;
324 if (getintat(v, 2, &size) == -1)
324 if (getintat(v, 2, &size) == -1)
325 goto bail;
325 goto bail;
326 if (getintat(v, 3, &mtime) == -1)
326 if (getintat(v, 3, &mtime) == -1)
327 goto bail;
327 goto bail;
328 if (*s == 'n' && mtime == (uint32_t)now) {
328 if (*s == 'n' && mtime == (uint32_t)now) {
329 /* See pure/parsers.py:pack_dirstate for why we do
329 /* See pure/parsers.py:pack_dirstate for why we do
330 * this. */
330 * this. */
331 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
331 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
332 goto bail;
332 goto bail;
333 mode = 0, size = -1, mtime = -1;
333 mode = 0, size = -1, mtime = -1;
334 }
334 }
335 putbe32(mode, p);
335 putbe32(mode, p);
336 putbe32(size, p + 4);
336 putbe32(size, p + 4);
337 putbe32(mtime, p + 8);
337 putbe32(mtime, p + 8);
338 t = p + 12;
338 t = p + 12;
339 p += 16;
339 p += 16;
340 len = PyString_GET_SIZE(k);
340 len = PyString_GET_SIZE(k);
341 memcpy(p, PyString_AS_STRING(k), len);
341 memcpy(p, PyString_AS_STRING(k), len);
342 p += len;
342 p += len;
343 o = PyDict_GetItem(copymap, k);
343 o = PyDict_GetItem(copymap, k);
344 if (o) {
344 if (o) {
345 *p++ = '\0';
345 *p++ = '\0';
346 l = PyString_GET_SIZE(o);
346 l = PyString_GET_SIZE(o);
347 memcpy(p, PyString_AS_STRING(o), l);
347 memcpy(p, PyString_AS_STRING(o), l);
348 p += l;
348 p += l;
349 len += l + 1;
349 len += l + 1;
350 }
350 }
351 putbe32((uint32_t)len, t);
351 putbe32((uint32_t)len, t);
352 }
352 }
353
353
354 pos = p - PyString_AS_STRING(packobj);
354 pos = p - PyString_AS_STRING(packobj);
355 if (pos != nbytes) {
355 if (pos != nbytes) {
356 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
356 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
357 (long)pos, (long)nbytes);
357 (long)pos, (long)nbytes);
358 goto bail;
358 goto bail;
359 }
359 }
360
360
361 return packobj;
361 return packobj;
362 bail:
362 bail:
363 Py_XDECREF(packobj);
363 Py_XDECREF(packobj);
364 return NULL;
364 return NULL;
365 }
365 }
366
366
367 /*
367 /*
368 * A base-16 trie for fast node->rev mapping.
368 * A base-16 trie for fast node->rev mapping.
369 *
369 *
370 * Positive value is index of the next node in the trie
370 * Positive value is index of the next node in the trie
371 * Negative value is a leaf: -(rev + 1)
371 * Negative value is a leaf: -(rev + 1)
372 * Zero is empty
372 * Zero is empty
373 */
373 */
374 typedef struct {
374 typedef struct {
375 int children[16];
375 int children[16];
376 } nodetree;
376 } nodetree;
377
377
378 /*
378 /*
379 * This class has two behaviours.
379 * This class has two behaviours.
380 *
380 *
381 * When used in a list-like way (with integer keys), we decode an
381 * When used in a list-like way (with integer keys), we decode an
382 * entry in a RevlogNG index file on demand. Our last entry is a
382 * entry in a RevlogNG index file on demand. Our last entry is a
383 * sentinel, always a nullid. We have limited support for
383 * sentinel, always a nullid. We have limited support for
384 * integer-keyed insert and delete, only at elements right before the
384 * integer-keyed insert and delete, only at elements right before the
385 * sentinel.
385 * sentinel.
386 *
386 *
387 * With string keys, we lazily perform a reverse mapping from node to
387 * With string keys, we lazily perform a reverse mapping from node to
388 * rev, using a base-16 trie.
388 * rev, using a base-16 trie.
389 */
389 */
390 typedef struct {
390 typedef struct {
391 PyObject_HEAD
391 PyObject_HEAD
392 /* Type-specific fields go here. */
392 /* Type-specific fields go here. */
393 PyObject *data; /* raw bytes of index */
393 PyObject *data; /* raw bytes of index */
394 PyObject **cache; /* cached tuples */
394 PyObject **cache; /* cached tuples */
395 const char **offsets; /* populated on demand */
395 const char **offsets; /* populated on demand */
396 Py_ssize_t raw_length; /* original number of elements */
396 Py_ssize_t raw_length; /* original number of elements */
397 Py_ssize_t length; /* current number of elements */
397 Py_ssize_t length; /* current number of elements */
398 PyObject *added; /* populated on demand */
398 PyObject *added; /* populated on demand */
399 PyObject *headrevs; /* cache, invalidated on changes */
399 PyObject *headrevs; /* cache, invalidated on changes */
400 nodetree *nt; /* base-16 trie */
400 nodetree *nt; /* base-16 trie */
401 int ntlength; /* # nodes in use */
401 int ntlength; /* # nodes in use */
402 int ntcapacity; /* # nodes allocated */
402 int ntcapacity; /* # nodes allocated */
403 int ntdepth; /* maximum depth of tree */
403 int ntdepth; /* maximum depth of tree */
404 int ntsplits; /* # splits performed */
404 int ntsplits; /* # splits performed */
405 int ntrev; /* last rev scanned */
405 int ntrev; /* last rev scanned */
406 int ntlookups; /* # lookups */
406 int ntlookups; /* # lookups */
407 int ntmisses; /* # lookups that miss the cache */
407 int ntmisses; /* # lookups that miss the cache */
408 int inlined;
408 int inlined;
409 } indexObject;
409 } indexObject;
410
410
411 static Py_ssize_t index_length(const indexObject *self)
411 static Py_ssize_t index_length(const indexObject *self)
412 {
412 {
413 if (self->added == NULL)
413 if (self->added == NULL)
414 return self->length;
414 return self->length;
415 return self->length + PyList_GET_SIZE(self->added);
415 return self->length + PyList_GET_SIZE(self->added);
416 }
416 }
417
417
418 static PyObject *nullentry;
418 static PyObject *nullentry;
419 static const char nullid[20];
419 static const char nullid[20];
420
420
421 static long inline_scan(indexObject *self, const char **offsets);
421 static long inline_scan(indexObject *self, const char **offsets);
422
422
423 #if LONG_MAX == 0x7fffffffL
423 #if LONG_MAX == 0x7fffffffL
424 static char *tuple_format = "Kiiiiiis#";
424 static char *tuple_format = "Kiiiiiis#";
425 #else
425 #else
426 static char *tuple_format = "kiiiiiis#";
426 static char *tuple_format = "kiiiiiis#";
427 #endif
427 #endif
428
428
429 /* A RevlogNG v1 index entry is 64 bytes long. */
429 /* A RevlogNG v1 index entry is 64 bytes long. */
430 static const long v1_hdrsize = 64;
430 static const long v1_hdrsize = 64;
431
431
432 /*
432 /*
433 * Return a pointer to the beginning of a RevlogNG record.
433 * Return a pointer to the beginning of a RevlogNG record.
434 */
434 */
435 static const char *index_deref(indexObject *self, Py_ssize_t pos)
435 static const char *index_deref(indexObject *self, Py_ssize_t pos)
436 {
436 {
437 if (self->inlined && pos > 0) {
437 if (self->inlined && pos > 0) {
438 if (self->offsets == NULL) {
438 if (self->offsets == NULL) {
439 self->offsets = malloc(self->raw_length *
439 self->offsets = malloc(self->raw_length *
440 sizeof(*self->offsets));
440 sizeof(*self->offsets));
441 if (self->offsets == NULL)
441 if (self->offsets == NULL)
442 return (const char *)PyErr_NoMemory();
442 return (const char *)PyErr_NoMemory();
443 inline_scan(self, self->offsets);
443 inline_scan(self, self->offsets);
444 }
444 }
445 return self->offsets[pos];
445 return self->offsets[pos];
446 }
446 }
447
447
448 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
448 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
449 }
449 }
450
450
451 /*
451 /*
452 * RevlogNG format (all in big endian, data may be inlined):
452 * RevlogNG format (all in big endian, data may be inlined):
453 * 6 bytes: offset
453 * 6 bytes: offset
454 * 2 bytes: flags
454 * 2 bytes: flags
455 * 4 bytes: compressed length
455 * 4 bytes: compressed length
456 * 4 bytes: uncompressed length
456 * 4 bytes: uncompressed length
457 * 4 bytes: base revision
457 * 4 bytes: base revision
458 * 4 bytes: link revision
458 * 4 bytes: link revision
459 * 4 bytes: parent 1 revision
459 * 4 bytes: parent 1 revision
460 * 4 bytes: parent 2 revision
460 * 4 bytes: parent 2 revision
461 * 32 bytes: nodeid (only 20 bytes used)
461 * 32 bytes: nodeid (only 20 bytes used)
462 */
462 */
463 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
463 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
464 {
464 {
465 uint64_t offset_flags;
465 uint64_t offset_flags;
466 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
466 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
467 const char *c_node_id;
467 const char *c_node_id;
468 const char *data;
468 const char *data;
469 Py_ssize_t length = index_length(self);
469 Py_ssize_t length = index_length(self);
470 PyObject *entry;
470 PyObject *entry;
471
471
472 if (pos < 0)
472 if (pos < 0)
473 pos += length;
473 pos += length;
474
474
475 if (pos < 0 || pos >= length) {
475 if (pos < 0 || pos >= length) {
476 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
476 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
477 return NULL;
477 return NULL;
478 }
478 }
479
479
480 if (pos == length - 1) {
480 if (pos == length - 1) {
481 Py_INCREF(nullentry);
481 Py_INCREF(nullentry);
482 return nullentry;
482 return nullentry;
483 }
483 }
484
484
485 if (pos >= self->length - 1) {
485 if (pos >= self->length - 1) {
486 PyObject *obj;
486 PyObject *obj;
487 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
487 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
488 Py_INCREF(obj);
488 Py_INCREF(obj);
489 return obj;
489 return obj;
490 }
490 }
491
491
492 if (self->cache) {
492 if (self->cache) {
493 if (self->cache[pos]) {
493 if (self->cache[pos]) {
494 Py_INCREF(self->cache[pos]);
494 Py_INCREF(self->cache[pos]);
495 return self->cache[pos];
495 return self->cache[pos];
496 }
496 }
497 } else {
497 } else {
498 self->cache = calloc(self->raw_length, sizeof(PyObject *));
498 self->cache = calloc(self->raw_length, sizeof(PyObject *));
499 if (self->cache == NULL)
499 if (self->cache == NULL)
500 return PyErr_NoMemory();
500 return PyErr_NoMemory();
501 }
501 }
502
502
503 data = index_deref(self, pos);
503 data = index_deref(self, pos);
504 if (data == NULL)
504 if (data == NULL)
505 return NULL;
505 return NULL;
506
506
507 offset_flags = getbe32(data + 4);
507 offset_flags = getbe32(data + 4);
508 if (pos == 0) /* mask out version number for the first entry */
508 if (pos == 0) /* mask out version number for the first entry */
509 offset_flags &= 0xFFFF;
509 offset_flags &= 0xFFFF;
510 else {
510 else {
511 uint32_t offset_high = getbe32(data);
511 uint32_t offset_high = getbe32(data);
512 offset_flags |= ((uint64_t)offset_high) << 32;
512 offset_flags |= ((uint64_t)offset_high) << 32;
513 }
513 }
514
514
515 comp_len = getbe32(data + 8);
515 comp_len = getbe32(data + 8);
516 uncomp_len = getbe32(data + 12);
516 uncomp_len = getbe32(data + 12);
517 base_rev = getbe32(data + 16);
517 base_rev = getbe32(data + 16);
518 link_rev = getbe32(data + 20);
518 link_rev = getbe32(data + 20);
519 parent_1 = getbe32(data + 24);
519 parent_1 = getbe32(data + 24);
520 parent_2 = getbe32(data + 28);
520 parent_2 = getbe32(data + 28);
521 c_node_id = data + 32;
521 c_node_id = data + 32;
522
522
523 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
523 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
524 uncomp_len, base_rev, link_rev,
524 uncomp_len, base_rev, link_rev,
525 parent_1, parent_2, c_node_id, 20);
525 parent_1, parent_2, c_node_id, 20);
526
526
527 if (entry)
527 if (entry)
528 PyObject_GC_UnTrack(entry);
528 PyObject_GC_UnTrack(entry);
529
529
530 self->cache[pos] = entry;
530 self->cache[pos] = entry;
531 Py_INCREF(entry);
531 Py_INCREF(entry);
532
532
533 return entry;
533 return entry;
534 }
534 }
535
535
536 /*
536 /*
537 * Return the 20-byte SHA of the node corresponding to the given rev.
537 * Return the 20-byte SHA of the node corresponding to the given rev.
538 */
538 */
539 static const char *index_node(indexObject *self, Py_ssize_t pos)
539 static const char *index_node(indexObject *self, Py_ssize_t pos)
540 {
540 {
541 Py_ssize_t length = index_length(self);
541 Py_ssize_t length = index_length(self);
542 const char *data;
542 const char *data;
543
543
544 if (pos == length - 1 || pos == INT_MAX)
544 if (pos == length - 1 || pos == INT_MAX)
545 return nullid;
545 return nullid;
546
546
547 if (pos >= length)
547 if (pos >= length)
548 return NULL;
548 return NULL;
549
549
550 if (pos >= self->length - 1) {
550 if (pos >= self->length - 1) {
551 PyObject *tuple, *str;
551 PyObject *tuple, *str;
552 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
552 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
553 str = PyTuple_GetItem(tuple, 7);
553 str = PyTuple_GetItem(tuple, 7);
554 return str ? PyString_AS_STRING(str) : NULL;
554 return str ? PyString_AS_STRING(str) : NULL;
555 }
555 }
556
556
557 data = index_deref(self, pos);
557 data = index_deref(self, pos);
558 return data ? data + 32 : NULL;
558 return data ? data + 32 : NULL;
559 }
559 }
560
560
561 static int nt_insert(indexObject *self, const char *node, int rev);
561 static int nt_insert(indexObject *self, const char *node, int rev);
562
562
563 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
563 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
564 {
564 {
565 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
565 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
566 return -1;
566 return -1;
567 if (*nodelen == 20)
567 if (*nodelen == 20)
568 return 0;
568 return 0;
569 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
569 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
570 return -1;
570 return -1;
571 }
571 }
572
572
573 static PyObject *index_insert(indexObject *self, PyObject *args)
573 static PyObject *index_insert(indexObject *self, PyObject *args)
574 {
574 {
575 PyObject *obj;
575 PyObject *obj;
576 char *node;
576 char *node;
577 long offset;
577 long offset;
578 Py_ssize_t len, nodelen;
578 Py_ssize_t len, nodelen;
579
579
580 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
580 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
581 return NULL;
581 return NULL;
582
582
583 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
583 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
584 PyErr_SetString(PyExc_TypeError, "8-tuple required");
584 PyErr_SetString(PyExc_TypeError, "8-tuple required");
585 return NULL;
585 return NULL;
586 }
586 }
587
587
588 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
588 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
589 return NULL;
589 return NULL;
590
590
591 len = index_length(self);
591 len = index_length(self);
592
592
593 if (offset < 0)
593 if (offset < 0)
594 offset += len;
594 offset += len;
595
595
596 if (offset != len - 1) {
596 if (offset != len - 1) {
597 PyErr_SetString(PyExc_IndexError,
597 PyErr_SetString(PyExc_IndexError,
598 "insert only supported at index -1");
598 "insert only supported at index -1");
599 return NULL;
599 return NULL;
600 }
600 }
601
601
602 if (offset > INT_MAX) {
602 if (offset > INT_MAX) {
603 PyErr_SetString(PyExc_ValueError,
603 PyErr_SetString(PyExc_ValueError,
604 "currently only 2**31 revs supported");
604 "currently only 2**31 revs supported");
605 return NULL;
605 return NULL;
606 }
606 }
607
607
608 if (self->added == NULL) {
608 if (self->added == NULL) {
609 self->added = PyList_New(0);
609 self->added = PyList_New(0);
610 if (self->added == NULL)
610 if (self->added == NULL)
611 return NULL;
611 return NULL;
612 }
612 }
613
613
614 if (PyList_Append(self->added, obj) == -1)
614 if (PyList_Append(self->added, obj) == -1)
615 return NULL;
615 return NULL;
616
616
617 if (self->nt)
617 if (self->nt)
618 nt_insert(self, node, (int)offset);
618 nt_insert(self, node, (int)offset);
619
619
620 Py_CLEAR(self->headrevs);
620 Py_CLEAR(self->headrevs);
621 Py_RETURN_NONE;
621 Py_RETURN_NONE;
622 }
622 }
623
623
624 static void _index_clearcaches(indexObject *self)
624 static void _index_clearcaches(indexObject *self)
625 {
625 {
626 if (self->cache) {
626 if (self->cache) {
627 Py_ssize_t i;
627 Py_ssize_t i;
628
628
629 for (i = 0; i < self->raw_length; i++)
629 for (i = 0; i < self->raw_length; i++)
630 Py_CLEAR(self->cache[i]);
630 Py_CLEAR(self->cache[i]);
631 free(self->cache);
631 free(self->cache);
632 self->cache = NULL;
632 self->cache = NULL;
633 }
633 }
634 if (self->offsets) {
634 if (self->offsets) {
635 free(self->offsets);
635 free(self->offsets);
636 self->offsets = NULL;
636 self->offsets = NULL;
637 }
637 }
638 if (self->nt) {
638 if (self->nt) {
639 free(self->nt);
639 free(self->nt);
640 self->nt = NULL;
640 self->nt = NULL;
641 }
641 }
642 Py_CLEAR(self->headrevs);
642 Py_CLEAR(self->headrevs);
643 }
643 }
644
644
645 static PyObject *index_clearcaches(indexObject *self)
645 static PyObject *index_clearcaches(indexObject *self)
646 {
646 {
647 _index_clearcaches(self);
647 _index_clearcaches(self);
648 self->ntlength = self->ntcapacity = 0;
648 self->ntlength = self->ntcapacity = 0;
649 self->ntdepth = self->ntsplits = 0;
649 self->ntdepth = self->ntsplits = 0;
650 self->ntrev = -1;
650 self->ntrev = -1;
651 self->ntlookups = self->ntmisses = 0;
651 self->ntlookups = self->ntmisses = 0;
652 Py_RETURN_NONE;
652 Py_RETURN_NONE;
653 }
653 }
654
654
655 static PyObject *index_stats(indexObject *self)
655 static PyObject *index_stats(indexObject *self)
656 {
656 {
657 PyObject *obj = PyDict_New();
657 PyObject *obj = PyDict_New();
658
658
659 if (obj == NULL)
659 if (obj == NULL)
660 return NULL;
660 return NULL;
661
661
662 #define istat(__n, __d) \
662 #define istat(__n, __d) \
663 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
663 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
664 goto bail;
664 goto bail;
665
665
666 if (self->added) {
666 if (self->added) {
667 Py_ssize_t len = PyList_GET_SIZE(self->added);
667 Py_ssize_t len = PyList_GET_SIZE(self->added);
668 if (PyDict_SetItemString(obj, "index entries added",
668 if (PyDict_SetItemString(obj, "index entries added",
669 PyInt_FromSsize_t(len)) == -1)
669 PyInt_FromSsize_t(len)) == -1)
670 goto bail;
670 goto bail;
671 }
671 }
672
672
673 if (self->raw_length != self->length - 1)
673 if (self->raw_length != self->length - 1)
674 istat(raw_length, "revs on disk");
674 istat(raw_length, "revs on disk");
675 istat(length, "revs in memory");
675 istat(length, "revs in memory");
676 istat(ntcapacity, "node trie capacity");
676 istat(ntcapacity, "node trie capacity");
677 istat(ntdepth, "node trie depth");
677 istat(ntdepth, "node trie depth");
678 istat(ntlength, "node trie count");
678 istat(ntlength, "node trie count");
679 istat(ntlookups, "node trie lookups");
679 istat(ntlookups, "node trie lookups");
680 istat(ntmisses, "node trie misses");
680 istat(ntmisses, "node trie misses");
681 istat(ntrev, "node trie last rev scanned");
681 istat(ntrev, "node trie last rev scanned");
682 istat(ntsplits, "node trie splits");
682 istat(ntsplits, "node trie splits");
683
683
684 #undef istat
684 #undef istat
685
685
686 return obj;
686 return obj;
687
687
688 bail:
688 bail:
689 Py_XDECREF(obj);
689 Py_XDECREF(obj);
690 return NULL;
690 return NULL;
691 }
691 }
692
692
693 /*
693 /*
694 * When we cache a list, we want to be sure the caller can't mutate
694 * When we cache a list, we want to be sure the caller can't mutate
695 * the cached copy.
695 * the cached copy.
696 */
696 */
697 static PyObject *list_copy(PyObject *list)
697 static PyObject *list_copy(PyObject *list)
698 {
698 {
699 Py_ssize_t len = PyList_GET_SIZE(list);
699 Py_ssize_t len = PyList_GET_SIZE(list);
700 PyObject *newlist = PyList_New(len);
700 PyObject *newlist = PyList_New(len);
701 Py_ssize_t i;
701 Py_ssize_t i;
702
702
703 if (newlist == NULL)
703 if (newlist == NULL)
704 return NULL;
704 return NULL;
705
705
706 for (i = 0; i < len; i++) {
706 for (i = 0; i < len; i++) {
707 PyObject *obj = PyList_GET_ITEM(list, i);
707 PyObject *obj = PyList_GET_ITEM(list, i);
708 Py_INCREF(obj);
708 Py_INCREF(obj);
709 PyList_SET_ITEM(newlist, i, obj);
709 PyList_SET_ITEM(newlist, i, obj);
710 }
710 }
711
711
712 return newlist;
712 return newlist;
713 }
713 }
714
714
715 static PyObject *index_headrevs(indexObject *self)
715 static PyObject *index_headrevs(indexObject *self)
716 {
716 {
717 Py_ssize_t i, len, addlen;
717 Py_ssize_t i, len, addlen;
718 char *nothead = NULL;
718 char *nothead = NULL;
719 PyObject *heads;
719 PyObject *heads;
720
720
721 if (self->headrevs)
721 if (self->headrevs)
722 return list_copy(self->headrevs);
722 return list_copy(self->headrevs);
723
723
724 len = index_length(self) - 1;
724 len = index_length(self) - 1;
725 heads = PyList_New(0);
725 heads = PyList_New(0);
726 if (heads == NULL)
726 if (heads == NULL)
727 goto bail;
727 goto bail;
728 if (len == 0) {
728 if (len == 0) {
729 PyObject *nullid = PyInt_FromLong(-1);
729 PyObject *nullid = PyInt_FromLong(-1);
730 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
730 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
731 Py_XDECREF(nullid);
731 Py_XDECREF(nullid);
732 goto bail;
732 goto bail;
733 }
733 }
734 goto done;
734 goto done;
735 }
735 }
736
736
737 nothead = calloc(len, 1);
737 nothead = calloc(len, 1);
738 if (nothead == NULL)
738 if (nothead == NULL)
739 goto bail;
739 goto bail;
740
740
741 for (i = 0; i < self->raw_length; i++) {
741 for (i = 0; i < self->raw_length; i++) {
742 const char *data = index_deref(self, i);
742 const char *data = index_deref(self, i);
743 int parent_1 = getbe32(data + 24);
743 int parent_1 = getbe32(data + 24);
744 int parent_2 = getbe32(data + 28);
744 int parent_2 = getbe32(data + 28);
745 if (parent_1 >= 0)
745 if (parent_1 >= 0)
746 nothead[parent_1] = 1;
746 nothead[parent_1] = 1;
747 if (parent_2 >= 0)
747 if (parent_2 >= 0)
748 nothead[parent_2] = 1;
748 nothead[parent_2] = 1;
749 }
749 }
750
750
751 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
751 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
752
752
753 for (i = 0; i < addlen; i++) {
753 for (i = 0; i < addlen; i++) {
754 PyObject *rev = PyList_GET_ITEM(self->added, i);
754 PyObject *rev = PyList_GET_ITEM(self->added, i);
755 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
755 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
756 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
756 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
757 long parent_1, parent_2;
757 long parent_1, parent_2;
758
758
759 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
759 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
760 PyErr_SetString(PyExc_TypeError,
760 PyErr_SetString(PyExc_TypeError,
761 "revlog parents are invalid");
761 "revlog parents are invalid");
762 goto bail;
762 goto bail;
763 }
763 }
764 parent_1 = PyInt_AS_LONG(p1);
764 parent_1 = PyInt_AS_LONG(p1);
765 parent_2 = PyInt_AS_LONG(p2);
765 parent_2 = PyInt_AS_LONG(p2);
766 if (parent_1 >= 0)
766 if (parent_1 >= 0)
767 nothead[parent_1] = 1;
767 nothead[parent_1] = 1;
768 if (parent_2 >= 0)
768 if (parent_2 >= 0)
769 nothead[parent_2] = 1;
769 nothead[parent_2] = 1;
770 }
770 }
771
771
772 for (i = 0; i < len; i++) {
772 for (i = 0; i < len; i++) {
773 PyObject *head;
773 PyObject *head;
774
774
775 if (nothead[i])
775 if (nothead[i])
776 continue;
776 continue;
777 head = PyInt_FromLong(i);
777 head = PyInt_FromLong(i);
778 if (head == NULL || PyList_Append(heads, head) == -1) {
778 if (head == NULL || PyList_Append(heads, head) == -1) {
779 Py_XDECREF(head);
779 Py_XDECREF(head);
780 goto bail;
780 goto bail;
781 }
781 }
782 }
782 }
783
783
784 done:
784 done:
785 self->headrevs = heads;
785 self->headrevs = heads;
786 free(nothead);
786 free(nothead);
787 return list_copy(self->headrevs);
787 return list_copy(self->headrevs);
788 bail:
788 bail:
789 Py_XDECREF(heads);
789 Py_XDECREF(heads);
790 free(nothead);
790 free(nothead);
791 return NULL;
791 return NULL;
792 }
792 }
793
793
794 static inline int nt_level(const char *node, Py_ssize_t level)
794 static inline int nt_level(const char *node, Py_ssize_t level)
795 {
795 {
796 int v = node[level>>1];
796 int v = node[level>>1];
797 if (!(level & 1))
797 if (!(level & 1))
798 v >>= 4;
798 v >>= 4;
799 return v & 0xf;
799 return v & 0xf;
800 }
800 }
801
801
802 /*
802 /*
803 * Return values:
803 * Return values:
804 *
804 *
805 * -4: match is ambiguous (multiple candidates)
805 * -4: match is ambiguous (multiple candidates)
806 * -2: not found
806 * -2: not found
807 * rest: valid rev
807 * rest: valid rev
808 */
808 */
809 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
809 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
810 int hex)
810 int hex)
811 {
811 {
812 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
812 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
813 int level, maxlevel, off;
813 int level, maxlevel, off;
814
814
815 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
815 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
816 return -1;
816 return -1;
817
817
818 if (self->nt == NULL)
818 if (self->nt == NULL)
819 return -2;
819 return -2;
820
820
821 if (hex)
821 if (hex)
822 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
822 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
823 else
823 else
824 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
824 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
825
825
826 for (level = off = 0; level < maxlevel; level++) {
826 for (level = off = 0; level < maxlevel; level++) {
827 int k = getnybble(node, level);
827 int k = getnybble(node, level);
828 nodetree *n = &self->nt[off];
828 nodetree *n = &self->nt[off];
829 int v = n->children[k];
829 int v = n->children[k];
830
830
831 if (v < 0) {
831 if (v < 0) {
832 const char *n;
832 const char *n;
833 Py_ssize_t i;
833 Py_ssize_t i;
834
834
835 v = -v - 1;
835 v = -v - 1;
836 n = index_node(self, v);
836 n = index_node(self, v);
837 if (n == NULL)
837 if (n == NULL)
838 return -2;
838 return -2;
839 for (i = level; i < maxlevel; i++)
839 for (i = level; i < maxlevel; i++)
840 if (getnybble(node, i) != nt_level(n, i))
840 if (getnybble(node, i) != nt_level(n, i))
841 return -2;
841 return -2;
842 return v;
842 return v;
843 }
843 }
844 if (v == 0)
844 if (v == 0)
845 return -2;
845 return -2;
846 off = v;
846 off = v;
847 }
847 }
848 /* multiple matches against an ambiguous prefix */
848 /* multiple matches against an ambiguous prefix */
849 return -4;
849 return -4;
850 }
850 }
851
851
852 static int nt_new(indexObject *self)
852 static int nt_new(indexObject *self)
853 {
853 {
854 if (self->ntlength == self->ntcapacity) {
854 if (self->ntlength == self->ntcapacity) {
855 self->ntcapacity *= 2;
855 self->ntcapacity *= 2;
856 self->nt = realloc(self->nt,
856 self->nt = realloc(self->nt,
857 self->ntcapacity * sizeof(nodetree));
857 self->ntcapacity * sizeof(nodetree));
858 if (self->nt == NULL) {
858 if (self->nt == NULL) {
859 PyErr_SetString(PyExc_MemoryError, "out of memory");
859 PyErr_SetString(PyExc_MemoryError, "out of memory");
860 return -1;
860 return -1;
861 }
861 }
862 memset(&self->nt[self->ntlength], 0,
862 memset(&self->nt[self->ntlength], 0,
863 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
863 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
864 }
864 }
865 return self->ntlength++;
865 return self->ntlength++;
866 }
866 }
867
867
868 static int nt_insert(indexObject *self, const char *node, int rev)
868 static int nt_insert(indexObject *self, const char *node, int rev)
869 {
869 {
870 int level = 0;
870 int level = 0;
871 int off = 0;
871 int off = 0;
872
872
873 while (level < 40) {
873 while (level < 40) {
874 int k = nt_level(node, level);
874 int k = nt_level(node, level);
875 nodetree *n;
875 nodetree *n;
876 int v;
876 int v;
877
877
878 n = &self->nt[off];
878 n = &self->nt[off];
879 v = n->children[k];
879 v = n->children[k];
880
880
881 if (v == 0) {
881 if (v == 0) {
882 n->children[k] = -rev - 1;
882 n->children[k] = -rev - 1;
883 return 0;
883 return 0;
884 }
884 }
885 if (v < 0) {
885 if (v < 0) {
886 const char *oldnode = index_node(self, -v - 1);
886 const char *oldnode = index_node(self, -v - 1);
887 int noff;
887 int noff;
888
888
889 if (!oldnode || !memcmp(oldnode, node, 20)) {
889 if (!oldnode || !memcmp(oldnode, node, 20)) {
890 n->children[k] = -rev - 1;
890 n->children[k] = -rev - 1;
891 return 0;
891 return 0;
892 }
892 }
893 noff = nt_new(self);
893 noff = nt_new(self);
894 if (noff == -1)
894 if (noff == -1)
895 return -1;
895 return -1;
896 /* self->nt may have been changed by realloc */
896 /* self->nt may have been changed by realloc */
897 self->nt[off].children[k] = noff;
897 self->nt[off].children[k] = noff;
898 off = noff;
898 off = noff;
899 n = &self->nt[off];
899 n = &self->nt[off];
900 n->children[nt_level(oldnode, ++level)] = v;
900 n->children[nt_level(oldnode, ++level)] = v;
901 if (level > self->ntdepth)
901 if (level > self->ntdepth)
902 self->ntdepth = level;
902 self->ntdepth = level;
903 self->ntsplits += 1;
903 self->ntsplits += 1;
904 } else {
904 } else {
905 level += 1;
905 level += 1;
906 off = v;
906 off = v;
907 }
907 }
908 }
908 }
909
909
910 return -1;
910 return -1;
911 }
911 }
912
912
913 static int nt_init(indexObject *self)
913 static int nt_init(indexObject *self)
914 {
914 {
915 if (self->nt == NULL) {
915 if (self->nt == NULL) {
916 self->ntcapacity = self->raw_length < 4
916 self->ntcapacity = self->raw_length < 4
917 ? 4 : self->raw_length / 2;
917 ? 4 : self->raw_length / 2;
918 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
918 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
919 if (self->nt == NULL) {
919 if (self->nt == NULL) {
920 PyErr_NoMemory();
920 PyErr_NoMemory();
921 return -1;
921 return -1;
922 }
922 }
923 self->ntlength = 1;
923 self->ntlength = 1;
924 self->ntrev = (int)index_length(self) - 1;
924 self->ntrev = (int)index_length(self) - 1;
925 self->ntlookups = 1;
925 self->ntlookups = 1;
926 self->ntmisses = 0;
926 self->ntmisses = 0;
927 if (nt_insert(self, nullid, INT_MAX) == -1)
927 if (nt_insert(self, nullid, INT_MAX) == -1)
928 return -1;
928 return -1;
929 }
929 }
930 return 0;
930 return 0;
931 }
931 }
932
932
933 /*
933 /*
934 * Return values:
934 * Return values:
935 *
935 *
936 * -3: error (exception set)
936 * -3: error (exception set)
937 * -2: not found (no exception set)
937 * -2: not found (no exception set)
938 * rest: valid rev
938 * rest: valid rev
939 */
939 */
940 static int index_find_node(indexObject *self,
940 static int index_find_node(indexObject *self,
941 const char *node, Py_ssize_t nodelen)
941 const char *node, Py_ssize_t nodelen)
942 {
942 {
943 int rev;
943 int rev;
944
944
945 self->ntlookups++;
945 self->ntlookups++;
946 rev = nt_find(self, node, nodelen, 0);
946 rev = nt_find(self, node, nodelen, 0);
947 if (rev >= -1)
947 if (rev >= -1)
948 return rev;
948 return rev;
949
949
950 if (nt_init(self) == -1)
950 if (nt_init(self) == -1)
951 return -3;
951 return -3;
952
952
953 /*
953 /*
954 * For the first handful of lookups, we scan the entire index,
954 * For the first handful of lookups, we scan the entire index,
955 * and cache only the matching nodes. This optimizes for cases
955 * and cache only the matching nodes. This optimizes for cases
956 * like "hg tip", where only a few nodes are accessed.
956 * like "hg tip", where only a few nodes are accessed.
957 *
957 *
958 * After that, we cache every node we visit, using a single
958 * After that, we cache every node we visit, using a single
959 * scan amortized over multiple lookups. This gives the best
959 * scan amortized over multiple lookups. This gives the best
960 * bulk performance, e.g. for "hg log".
960 * bulk performance, e.g. for "hg log".
961 */
961 */
962 if (self->ntmisses++ < 4) {
962 if (self->ntmisses++ < 4) {
963 for (rev = self->ntrev - 1; rev >= 0; rev--) {
963 for (rev = self->ntrev - 1; rev >= 0; rev--) {
964 const char *n = index_node(self, rev);
964 const char *n = index_node(self, rev);
965 if (n == NULL)
965 if (n == NULL)
966 return -2;
966 return -2;
967 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
967 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
968 if (nt_insert(self, n, rev) == -1)
968 if (nt_insert(self, n, rev) == -1)
969 return -3;
969 return -3;
970 break;
970 break;
971 }
971 }
972 }
972 }
973 } else {
973 } else {
974 for (rev = self->ntrev - 1; rev >= 0; rev--) {
974 for (rev = self->ntrev - 1; rev >= 0; rev--) {
975 const char *n = index_node(self, rev);
975 const char *n = index_node(self, rev);
976 if (n == NULL) {
976 if (n == NULL) {
977 self->ntrev = rev + 1;
977 self->ntrev = rev + 1;
978 return -2;
978 return -2;
979 }
979 }
980 if (nt_insert(self, n, rev) == -1) {
980 if (nt_insert(self, n, rev) == -1) {
981 self->ntrev = rev + 1;
981 self->ntrev = rev + 1;
982 return -3;
982 return -3;
983 }
983 }
984 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
984 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
985 break;
985 break;
986 }
986 }
987 }
987 }
988 self->ntrev = rev;
988 self->ntrev = rev;
989 }
989 }
990
990
991 if (rev >= 0)
991 if (rev >= 0)
992 return rev;
992 return rev;
993 return -2;
993 return -2;
994 }
994 }
995
995
996 static PyObject *raise_revlog_error(void)
996 static PyObject *raise_revlog_error(void)
997 {
997 {
998 static PyObject *errclass;
998 static PyObject *errclass;
999 PyObject *mod = NULL, *errobj;
999 PyObject *mod = NULL, *errobj;
1000
1000
1001 if (errclass == NULL) {
1001 if (errclass == NULL) {
1002 PyObject *dict;
1002 PyObject *dict;
1003
1003
1004 mod = PyImport_ImportModule("mercurial.error");
1004 mod = PyImport_ImportModule("mercurial.error");
1005 if (mod == NULL)
1005 if (mod == NULL)
1006 goto classfail;
1006 goto classfail;
1007
1007
1008 dict = PyModule_GetDict(mod);
1008 dict = PyModule_GetDict(mod);
1009 if (dict == NULL)
1009 if (dict == NULL)
1010 goto classfail;
1010 goto classfail;
1011
1011
1012 errclass = PyDict_GetItemString(dict, "RevlogError");
1012 errclass = PyDict_GetItemString(dict, "RevlogError");
1013 if (errclass == NULL) {
1013 if (errclass == NULL) {
1014 PyErr_SetString(PyExc_SystemError,
1014 PyErr_SetString(PyExc_SystemError,
1015 "could not find RevlogError");
1015 "could not find RevlogError");
1016 goto classfail;
1016 goto classfail;
1017 }
1017 }
1018 Py_INCREF(errclass);
1018 Py_INCREF(errclass);
1019 }
1019 }
1020
1020
1021 errobj = PyObject_CallFunction(errclass, NULL);
1021 errobj = PyObject_CallFunction(errclass, NULL);
1022 if (errobj == NULL)
1022 if (errobj == NULL)
1023 return NULL;
1023 return NULL;
1024 PyErr_SetObject(errclass, errobj);
1024 PyErr_SetObject(errclass, errobj);
1025 return errobj;
1025 return errobj;
1026
1026
1027 classfail:
1027 classfail:
1028 Py_XDECREF(mod);
1028 Py_XDECREF(mod);
1029 return NULL;
1029 return NULL;
1030 }
1030 }
1031
1031
1032 static PyObject *index_getitem(indexObject *self, PyObject *value)
1032 static PyObject *index_getitem(indexObject *self, PyObject *value)
1033 {
1033 {
1034 char *node;
1034 char *node;
1035 Py_ssize_t nodelen;
1035 Py_ssize_t nodelen;
1036 int rev;
1036 int rev;
1037
1037
1038 if (PyInt_Check(value))
1038 if (PyInt_Check(value))
1039 return index_get(self, PyInt_AS_LONG(value));
1039 return index_get(self, PyInt_AS_LONG(value));
1040
1040
1041 if (node_check(value, &node, &nodelen) == -1)
1041 if (node_check(value, &node, &nodelen) == -1)
1042 return NULL;
1042 return NULL;
1043 rev = index_find_node(self, node, nodelen);
1043 rev = index_find_node(self, node, nodelen);
1044 if (rev >= -1)
1044 if (rev >= -1)
1045 return PyInt_FromLong(rev);
1045 return PyInt_FromLong(rev);
1046 if (rev == -2)
1046 if (rev == -2)
1047 raise_revlog_error();
1047 raise_revlog_error();
1048 return NULL;
1048 return NULL;
1049 }
1049 }
1050
1050
1051 static int nt_partialmatch(indexObject *self, const char *node,
1051 static int nt_partialmatch(indexObject *self, const char *node,
1052 Py_ssize_t nodelen)
1052 Py_ssize_t nodelen)
1053 {
1053 {
1054 int rev;
1054 int rev;
1055
1055
1056 if (nt_init(self) == -1)
1056 if (nt_init(self) == -1)
1057 return -3;
1057 return -3;
1058
1058
1059 if (self->ntrev > 0) {
1059 if (self->ntrev > 0) {
1060 /* ensure that the radix tree is fully populated */
1060 /* ensure that the radix tree is fully populated */
1061 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1061 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1062 const char *n = index_node(self, rev);
1062 const char *n = index_node(self, rev);
1063 if (n == NULL)
1063 if (n == NULL)
1064 return -2;
1064 return -2;
1065 if (nt_insert(self, n, rev) == -1)
1065 if (nt_insert(self, n, rev) == -1)
1066 return -3;
1066 return -3;
1067 }
1067 }
1068 self->ntrev = rev;
1068 self->ntrev = rev;
1069 }
1069 }
1070
1070
1071 return nt_find(self, node, nodelen, 1);
1071 return nt_find(self, node, nodelen, 1);
1072 }
1072 }
1073
1073
1074 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1074 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1075 {
1075 {
1076 const char *fullnode;
1076 const char *fullnode;
1077 int nodelen;
1077 int nodelen;
1078 char *node;
1078 char *node;
1079 int rev, i;
1079 int rev, i;
1080
1080
1081 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1081 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1082 return NULL;
1082 return NULL;
1083
1083
1084 if (nodelen < 4) {
1084 if (nodelen < 4) {
1085 PyErr_SetString(PyExc_ValueError, "key too short");
1085 PyErr_SetString(PyExc_ValueError, "key too short");
1086 return NULL;
1086 return NULL;
1087 }
1087 }
1088
1088
1089 if (nodelen > 40) {
1089 if (nodelen > 40) {
1090 PyErr_SetString(PyExc_ValueError, "key too long");
1090 PyErr_SetString(PyExc_ValueError, "key too long");
1091 return NULL;
1091 return NULL;
1092 }
1092 }
1093
1093
1094 for (i = 0; i < nodelen; i++)
1094 for (i = 0; i < nodelen; i++)
1095 hexdigit(node, i);
1095 hexdigit(node, i);
1096 if (PyErr_Occurred()) {
1096 if (PyErr_Occurred()) {
1097 /* input contains non-hex characters */
1097 /* input contains non-hex characters */
1098 PyErr_Clear();
1098 PyErr_Clear();
1099 Py_RETURN_NONE;
1099 Py_RETURN_NONE;
1100 }
1100 }
1101
1101
1102 rev = nt_partialmatch(self, node, nodelen);
1102 rev = nt_partialmatch(self, node, nodelen);
1103
1103
1104 switch (rev) {
1104 switch (rev) {
1105 case -4:
1105 case -4:
1106 raise_revlog_error();
1106 raise_revlog_error();
1107 case -3:
1107 case -3:
1108 return NULL;
1108 return NULL;
1109 case -2:
1109 case -2:
1110 Py_RETURN_NONE;
1110 Py_RETURN_NONE;
1111 case -1:
1111 case -1:
1112 return PyString_FromStringAndSize(nullid, 20);
1112 return PyString_FromStringAndSize(nullid, 20);
1113 }
1113 }
1114
1114
1115 fullnode = index_node(self, rev);
1115 fullnode = index_node(self, rev);
1116 if (fullnode == NULL) {
1116 if (fullnode == NULL) {
1117 PyErr_Format(PyExc_IndexError,
1117 PyErr_Format(PyExc_IndexError,
1118 "could not access rev %d", rev);
1118 "could not access rev %d", rev);
1119 return NULL;
1119 return NULL;
1120 }
1120 }
1121 return PyString_FromStringAndSize(fullnode, 20);
1121 return PyString_FromStringAndSize(fullnode, 20);
1122 }
1122 }
1123
1123
1124 static PyObject *index_m_get(indexObject *self, PyObject *args)
1124 static PyObject *index_m_get(indexObject *self, PyObject *args)
1125 {
1125 {
1126 Py_ssize_t nodelen;
1126 Py_ssize_t nodelen;
1127 PyObject *val;
1127 PyObject *val;
1128 char *node;
1128 char *node;
1129 int rev;
1129 int rev;
1130
1130
1131 if (!PyArg_ParseTuple(args, "O", &val))
1131 if (!PyArg_ParseTuple(args, "O", &val))
1132 return NULL;
1132 return NULL;
1133 if (node_check(val, &node, &nodelen) == -1)
1133 if (node_check(val, &node, &nodelen) == -1)
1134 return NULL;
1134 return NULL;
1135 rev = index_find_node(self, node, nodelen);
1135 rev = index_find_node(self, node, nodelen);
1136 if (rev == -3)
1136 if (rev == -3)
1137 return NULL;
1137 return NULL;
1138 if (rev == -2)
1138 if (rev == -2)
1139 Py_RETURN_NONE;
1139 Py_RETURN_NONE;
1140 return PyInt_FromLong(rev);
1140 return PyInt_FromLong(rev);
1141 }
1141 }
1142
1142
1143 static int index_contains(indexObject *self, PyObject *value)
1143 static int index_contains(indexObject *self, PyObject *value)
1144 {
1144 {
1145 char *node;
1145 char *node;
1146 Py_ssize_t nodelen;
1146 Py_ssize_t nodelen;
1147
1147
1148 if (PyInt_Check(value)) {
1148 if (PyInt_Check(value)) {
1149 long rev = PyInt_AS_LONG(value);
1149 long rev = PyInt_AS_LONG(value);
1150 return rev >= -1 && rev < index_length(self);
1150 return rev >= -1 && rev < index_length(self);
1151 }
1151 }
1152
1152
1153 if (node_check(value, &node, &nodelen) == -1)
1153 if (node_check(value, &node, &nodelen) == -1)
1154 return -1;
1154 return -1;
1155
1155
1156 switch (index_find_node(self, node, nodelen)) {
1156 switch (index_find_node(self, node, nodelen)) {
1157 case -3:
1157 case -3:
1158 return -1;
1158 return -1;
1159 case -2:
1159 case -2:
1160 return 0;
1160 return 0;
1161 default:
1161 default:
1162 return 1;
1162 return 1;
1163 }
1163 }
1164 }
1164 }
1165
1165
1166 static inline void index_get_parents(indexObject *self, int rev, int *ps)
1167 {
1168 if (rev >= self->length - 1) {
1169 PyObject *tuple = PyList_GET_ITEM(self->added,
1170 rev - self->length + 1);
1171 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
1172 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
1173 } else {
1174 const char *data = index_deref(self, rev);
1175 ps[0] = getbe32(data + 24);
1176 ps[1] = getbe32(data + 28);
1177 }
1178 }
1179
1180 typedef uint64_t bitmask;
1181
1182 /*
1183 * Given a disjoint set of revs, return all candidates for the
1184 * greatest common ancestor. In revset notation, this is the set
1185 * "heads(::a and ::b and ...)"
1186 */
1187 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1188 int revcount)
1189 {
1190 const bitmask allseen = (1ull << revcount) - 1;
1191 const bitmask poison = 1ull << revcount;
1192 PyObject *gca = PyList_New(0);
1193 int i, v, interesting, left;
1194 int maxrev = -1;
1195 bitmask *seen;
1196
1197 for (i = 0; i < revcount; i++) {
1198 if (revs[i] > maxrev)
1199 maxrev = revs[i];
1200 }
1201
1202 seen = calloc(sizeof(*seen), maxrev + 1);
1203 if (seen == NULL)
1204 return PyErr_NoMemory();
1205
1206 for (i = 0; i < revcount; i++)
1207 seen[revs[i]] = 1ull << i;
1208
1209 interesting = left = revcount;
1210
1211 for (v = maxrev; v >= 0 && interesting; v--) {
1212 long sv = seen[v];
1213 int parents[2];
1214
1215 if (!sv)
1216 continue;
1217
1218 if (sv < poison) {
1219 interesting -= 1;
1220 if (sv == allseen) {
1221 PyObject *obj = PyInt_FromLong(v);
1222 if (obj == NULL)
1223 goto bail;
1224 if (PyList_Append(gca, obj) == -1) {
1225 Py_DECREF(obj);
1226 goto bail;
1227 }
1228 sv |= poison;
1229 for (i = 0; i < revcount; i++) {
1230 if (revs[i] == v) {
1231 if (--left <= 1)
1232 goto done;
1233 break;
1234 }
1235 }
1236 }
1237 }
1238 index_get_parents(self, v, parents);
1239
1240 for (i = 0; i < 2; i++) {
1241 int p = parents[i];
1242 if (p == -1)
1243 continue;
1244 const long sp = seen[p];
1245 if (sv < poison) {
1246 if (sp == 0) {
1247 seen[p] = sv;
1248 interesting++;
1249 }
1250 else if (sp != sv)
1251 seen[p] |= sv;
1252 } else {
1253 if (sp && sp < poison)
1254 interesting--;
1255 seen[p] = sv;
1256 }
1257 }
1258 }
1259
1260 done:
1261 free(seen);
1262 return gca;
1263 bail:
1264 free(seen);
1265 Py_XDECREF(gca);
1266 return NULL;
1267 }
1268
1269 /*
1270 * Given a disjoint set of revs, return the subset with the longest
1271 * path to the root.
1272 */
1273 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1274 {
1275 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1276 static const Py_ssize_t capacity = 24;
1277 int *depth, *interesting = NULL;
1278 int i, j, v, ninteresting;
1279 PyObject *dict = NULL, *keys;
1280 long *seen = NULL;
1281 int maxrev = -1;
1282 long final;
1283
1284 if (revcount > capacity) {
1285 PyErr_Format(PyExc_OverflowError,
1286 "bitset size (%ld) > capacity (%ld)",
1287 revcount, capacity);
1288 return NULL;
1289 }
1290
1291 for (i = 0; i < revcount; i++) {
1292 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1293 if (n > maxrev)
1294 maxrev = n;
1295 }
1296
1297 depth = calloc(sizeof(*depth), maxrev + 1);
1298 if (depth == NULL)
1299 return PyErr_NoMemory();
1300
1301 seen = calloc(sizeof(*seen), maxrev + 1);
1302 if (seen == NULL) {
1303 PyErr_NoMemory();
1304 goto bail;
1305 }
1306
1307 interesting = calloc(sizeof(*interesting), 2 << revcount);
1308 if (interesting == NULL) {
1309 PyErr_NoMemory();
1310 goto bail;
1311 }
1312
1313 for (i = 0; i < revcount; i++) {
1314 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1315 long b = 1l << i;
1316 depth[n] = 1;
1317 seen[n] = b;
1318 interesting[b] = 1;
1319 }
1320
1321 ninteresting = (int)revcount;
1322
1323 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1324 int dv = depth[v];
1325 int parents[2];
1326 long sv;
1327
1328 if (dv == 0)
1329 continue;
1330
1331 sv = seen[v];
1332 index_get_parents(self, v, parents);
1333
1334 for (i = 0; i < 2; i++) {
1335 int p = parents[i];
1336 long nsp, sp;
1337 int dp;
1338
1339 if (p == -1)
1340 continue;
1341
1342 dp = depth[p];
1343 nsp = sp = seen[p];
1344 if (dp <= dv) {
1345 depth[p] = dv + 1;
1346 if (sp != sv) {
1347 interesting[sv] += 1;
1348 nsp = seen[p] = sv;
1349 if (sp) {
1350 interesting[sp] -= 1;
1351 if (interesting[sp] == 0)
1352 ninteresting -= 1;
1353 }
1354 }
1355 }
1356 else if (dv == dp - 1) {
1357 nsp = sp | sv;
1358 if (nsp == sp)
1359 continue;
1360 seen[p] = nsp;
1361 interesting[nsp] += 1;
1362 interesting[sp] -= 1;
1363 if (interesting[sp] == 0)
1364 ninteresting -= 1;
1365 }
1366 }
1367 interesting[sv] -= 1;
1368 if (interesting[sv] == 0)
1369 ninteresting -= 1;
1370 }
1371
1372 final = 0;
1373 j = ninteresting;
1374 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
1375 if (interesting[i] == 0)
1376 continue;
1377 final |= i;
1378 j -= 1;
1379 }
1380 if (final == 0)
1381 return PyList_New(0);
1382
1383 dict = PyDict_New();
1384 if (dict == NULL)
1385 goto bail;
1386
1387 j = ninteresting;
1388 for (i = 0; i < revcount && j > 0; i++) {
1389 PyObject *key;
1390
1391 if ((final & (1 << i)) == 0)
1392 continue;
1393
1394 key = PyList_GET_ITEM(revs, i);
1395 Py_INCREF(key);
1396 Py_INCREF(Py_None);
1397 if (PyDict_SetItem(dict, key, Py_None) == -1) {
1398 Py_DECREF(key);
1399 Py_DECREF(Py_None);
1400 goto bail;
1401 }
1402 j -= 1;
1403 }
1404
1405 keys = PyDict_Keys(dict);
1406
1407 free(depth);
1408 free(seen);
1409 free(interesting);
1410 Py_DECREF(dict);
1411
1412 return keys;
1413 bail:
1414 free(depth);
1415 free(seen);
1416 free(interesting);
1417 Py_XDECREF(dict);
1418
1419 return NULL;
1420 }
1421
1422 /*
1423 * Given a (possibly overlapping) set of revs, return the greatest
1424 * common ancestors: those with the longest path to the root.
1425 */
1426 static PyObject *index_ancestors(indexObject *self, PyObject *args)
1427 {
1428 PyObject *ret = NULL, *gca = NULL;
1429 Py_ssize_t argcount, i, len;
1430 bitmask repeat = 0;
1431 int revcount = 0;
1432 int *revs;
1433
1434 argcount = PySequence_Length(args);
1435 revs = malloc(argcount * sizeof(*revs));
1436 if (argcount > 0 && revs == NULL)
1437 return PyErr_NoMemory();
1438 len = index_length(self) - 1;
1439
1440 for (i = 0; i < argcount; i++) {
1441 static const int capacity = 24;
1442 PyObject *obj = PySequence_GetItem(args, i);
1443 bitmask x;
1444 long val;
1445
1446 if (!PyInt_Check(obj)) {
1447 PyErr_SetString(PyExc_TypeError,
1448 "arguments must all be ints");
1449 goto bail;
1450 }
1451 val = PyInt_AsLong(obj);
1452 if (val == -1) {
1453 ret = PyList_New(0);
1454 goto done;
1455 }
1456 if (val < 0 || val >= len) {
1457 PyErr_SetString(PyExc_IndexError,
1458 "index out of range");
1459 goto bail;
1460 }
1461 /* this cheesy bloom filter lets us avoid some more
1462 * expensive duplicate checks in the common set-is-disjoint
1463 * case */
1464 x = 1ull << (val & 0x3f);
1465 if (repeat & x) {
1466 int k;
1467 for (k = 0; k < revcount; k++) {
1468 if (val == revs[k])
1469 goto duplicate;
1470 }
1471 }
1472 else repeat |= x;
1473 if (revcount >= capacity) {
1474 PyErr_Format(PyExc_OverflowError,
1475 "bitset size (%d) > capacity (%d)",
1476 revcount, capacity);
1477 goto bail;
1478 }
1479 revs[revcount++] = (int)val;
1480 duplicate:;
1481 }
1482
1483 if (revcount == 0) {
1484 ret = PyList_New(0);
1485 goto done;
1486 }
1487 if (revcount == 1) {
1488 PyObject *obj;
1489 ret = PyList_New(1);
1490 if (ret == NULL)
1491 goto bail;
1492 obj = PyInt_FromLong(revs[0]);
1493 if (obj == NULL)
1494 goto bail;
1495 PyList_SET_ITEM(ret, 0, obj);
1496 goto done;
1497 }
1498
1499 gca = find_gca_candidates(self, revs, revcount);
1500 if (gca == NULL)
1501 goto bail;
1502
1503 if (PyList_GET_SIZE(gca) <= 1) {
1504 ret = gca;
1505 Py_INCREF(gca);
1506 }
1507 else if (PyList_GET_SIZE(gca) == 1) {
1508 ret = PyList_GET_ITEM(gca, 0);
1509 Py_INCREF(ret);
1510 }
1511 else ret = find_deepest(self, gca);
1512
1513 done:
1514 free(revs);
1515 Py_XDECREF(gca);
1516
1517 return ret;
1518
1519 bail:
1520 free(revs);
1521 Py_XDECREF(gca);
1522 Py_XDECREF(ret);
1523 return NULL;
1524 }
1525
1166 /*
1526 /*
1167 * Invalidate any trie entries introduced by added revs.
1527 * Invalidate any trie entries introduced by added revs.
1168 */
1528 */
1169 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1529 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1170 {
1530 {
1171 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1531 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1172
1532
1173 for (i = start; i < len; i++) {
1533 for (i = start; i < len; i++) {
1174 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1534 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1175 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1535 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1176
1536
1177 nt_insert(self, PyString_AS_STRING(node), -1);
1537 nt_insert(self, PyString_AS_STRING(node), -1);
1178 }
1538 }
1179
1539
1180 if (start == 0)
1540 if (start == 0)
1181 Py_CLEAR(self->added);
1541 Py_CLEAR(self->added);
1182 }
1542 }
1183
1543
1184 /*
1544 /*
1185 * Delete a numeric range of revs, which must be at the end of the
1545 * Delete a numeric range of revs, which must be at the end of the
1186 * range, but exclude the sentinel nullid entry.
1546 * range, but exclude the sentinel nullid entry.
1187 */
1547 */
1188 static int index_slice_del(indexObject *self, PyObject *item)
1548 static int index_slice_del(indexObject *self, PyObject *item)
1189 {
1549 {
1190 Py_ssize_t start, stop, step, slicelength;
1550 Py_ssize_t start, stop, step, slicelength;
1191 Py_ssize_t length = index_length(self);
1551 Py_ssize_t length = index_length(self);
1192 int ret = 0;
1552 int ret = 0;
1193
1553
1194 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1554 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1195 &start, &stop, &step, &slicelength) < 0)
1555 &start, &stop, &step, &slicelength) < 0)
1196 return -1;
1556 return -1;
1197
1557
1198 if (slicelength <= 0)
1558 if (slicelength <= 0)
1199 return 0;
1559 return 0;
1200
1560
1201 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1561 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1202 stop = start;
1562 stop = start;
1203
1563
1204 if (step < 0) {
1564 if (step < 0) {
1205 stop = start + 1;
1565 stop = start + 1;
1206 start = stop + step*(slicelength - 1) - 1;
1566 start = stop + step*(slicelength - 1) - 1;
1207 step = -step;
1567 step = -step;
1208 }
1568 }
1209
1569
1210 if (step != 1) {
1570 if (step != 1) {
1211 PyErr_SetString(PyExc_ValueError,
1571 PyErr_SetString(PyExc_ValueError,
1212 "revlog index delete requires step size of 1");
1572 "revlog index delete requires step size of 1");
1213 return -1;
1573 return -1;
1214 }
1574 }
1215
1575
1216 if (stop != length - 1) {
1576 if (stop != length - 1) {
1217 PyErr_SetString(PyExc_IndexError,
1577 PyErr_SetString(PyExc_IndexError,
1218 "revlog index deletion indices are invalid");
1578 "revlog index deletion indices are invalid");
1219 return -1;
1579 return -1;
1220 }
1580 }
1221
1581
1222 if (start < self->length - 1) {
1582 if (start < self->length - 1) {
1223 if (self->nt) {
1583 if (self->nt) {
1224 Py_ssize_t i;
1584 Py_ssize_t i;
1225
1585
1226 for (i = start + 1; i < self->length - 1; i++) {
1586 for (i = start + 1; i < self->length - 1; i++) {
1227 const char *node = index_node(self, i);
1587 const char *node = index_node(self, i);
1228
1588
1229 if (node)
1589 if (node)
1230 nt_insert(self, node, -1);
1590 nt_insert(self, node, -1);
1231 }
1591 }
1232 if (self->added)
1592 if (self->added)
1233 nt_invalidate_added(self, 0);
1593 nt_invalidate_added(self, 0);
1234 if (self->ntrev > start)
1594 if (self->ntrev > start)
1235 self->ntrev = (int)start;
1595 self->ntrev = (int)start;
1236 }
1596 }
1237 self->length = start + 1;
1597 self->length = start + 1;
1238 if (start < self->raw_length) {
1598 if (start < self->raw_length) {
1239 if (self->cache) {
1599 if (self->cache) {
1240 Py_ssize_t i;
1600 Py_ssize_t i;
1241 for (i = start; i < self->raw_length; i++)
1601 for (i = start; i < self->raw_length; i++)
1242 Py_CLEAR(self->cache[i]);
1602 Py_CLEAR(self->cache[i]);
1243 }
1603 }
1244 self->raw_length = start;
1604 self->raw_length = start;
1245 }
1605 }
1246 goto done;
1606 goto done;
1247 }
1607 }
1248
1608
1249 if (self->nt) {
1609 if (self->nt) {
1250 nt_invalidate_added(self, start - self->length + 1);
1610 nt_invalidate_added(self, start - self->length + 1);
1251 if (self->ntrev > start)
1611 if (self->ntrev > start)
1252 self->ntrev = (int)start;
1612 self->ntrev = (int)start;
1253 }
1613 }
1254 if (self->added)
1614 if (self->added)
1255 ret = PyList_SetSlice(self->added, start - self->length + 1,
1615 ret = PyList_SetSlice(self->added, start - self->length + 1,
1256 PyList_GET_SIZE(self->added), NULL);
1616 PyList_GET_SIZE(self->added), NULL);
1257 done:
1617 done:
1258 Py_CLEAR(self->headrevs);
1618 Py_CLEAR(self->headrevs);
1259 return ret;
1619 return ret;
1260 }
1620 }
1261
1621
1262 /*
1622 /*
1263 * Supported ops:
1623 * Supported ops:
1264 *
1624 *
1265 * slice deletion
1625 * slice deletion
1266 * string assignment (extend node->rev mapping)
1626 * string assignment (extend node->rev mapping)
1267 * string deletion (shrink node->rev mapping)
1627 * string deletion (shrink node->rev mapping)
1268 */
1628 */
1269 static int index_assign_subscript(indexObject *self, PyObject *item,
1629 static int index_assign_subscript(indexObject *self, PyObject *item,
1270 PyObject *value)
1630 PyObject *value)
1271 {
1631 {
1272 char *node;
1632 char *node;
1273 Py_ssize_t nodelen;
1633 Py_ssize_t nodelen;
1274 long rev;
1634 long rev;
1275
1635
1276 if (PySlice_Check(item) && value == NULL)
1636 if (PySlice_Check(item) && value == NULL)
1277 return index_slice_del(self, item);
1637 return index_slice_del(self, item);
1278
1638
1279 if (node_check(item, &node, &nodelen) == -1)
1639 if (node_check(item, &node, &nodelen) == -1)
1280 return -1;
1640 return -1;
1281
1641
1282 if (value == NULL)
1642 if (value == NULL)
1283 return self->nt ? nt_insert(self, node, -1) : 0;
1643 return self->nt ? nt_insert(self, node, -1) : 0;
1284 rev = PyInt_AsLong(value);
1644 rev = PyInt_AsLong(value);
1285 if (rev > INT_MAX || rev < 0) {
1645 if (rev > INT_MAX || rev < 0) {
1286 if (!PyErr_Occurred())
1646 if (!PyErr_Occurred())
1287 PyErr_SetString(PyExc_ValueError, "rev out of range");
1647 PyErr_SetString(PyExc_ValueError, "rev out of range");
1288 return -1;
1648 return -1;
1289 }
1649 }
1290 return nt_insert(self, node, (int)rev);
1650 return nt_insert(self, node, (int)rev);
1291 }
1651 }
1292
1652
1293 /*
1653 /*
1294 * Find all RevlogNG entries in an index that has inline data. Update
1654 * Find all RevlogNG entries in an index that has inline data. Update
1295 * the optional "offsets" table with those entries.
1655 * the optional "offsets" table with those entries.
1296 */
1656 */
1297 static long inline_scan(indexObject *self, const char **offsets)
1657 static long inline_scan(indexObject *self, const char **offsets)
1298 {
1658 {
1299 const char *data = PyString_AS_STRING(self->data);
1659 const char *data = PyString_AS_STRING(self->data);
1300 const char *end = data + PyString_GET_SIZE(self->data);
1660 const char *end = data + PyString_GET_SIZE(self->data);
1301 long incr = v1_hdrsize;
1661 long incr = v1_hdrsize;
1302 Py_ssize_t len = 0;
1662 Py_ssize_t len = 0;
1303
1663
1304 while (data + v1_hdrsize <= end) {
1664 while (data + v1_hdrsize <= end) {
1305 uint32_t comp_len;
1665 uint32_t comp_len;
1306 const char *old_data;
1666 const char *old_data;
1307 /* 3rd element of header is length of compressed inline data */
1667 /* 3rd element of header is length of compressed inline data */
1308 comp_len = getbe32(data + 8);
1668 comp_len = getbe32(data + 8);
1309 incr = v1_hdrsize + comp_len;
1669 incr = v1_hdrsize + comp_len;
1310 if (incr < v1_hdrsize)
1670 if (incr < v1_hdrsize)
1311 break;
1671 break;
1312 if (offsets)
1672 if (offsets)
1313 offsets[len] = data;
1673 offsets[len] = data;
1314 len++;
1674 len++;
1315 old_data = data;
1675 old_data = data;
1316 data += incr;
1676 data += incr;
1317 if (data <= old_data)
1677 if (data <= old_data)
1318 break;
1678 break;
1319 }
1679 }
1320
1680
1321 if (data != end && data + v1_hdrsize != end) {
1681 if (data != end && data + v1_hdrsize != end) {
1322 if (!PyErr_Occurred())
1682 if (!PyErr_Occurred())
1323 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1683 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1324 return -1;
1684 return -1;
1325 }
1685 }
1326
1686
1327 return len;
1687 return len;
1328 }
1688 }
1329
1689
1330 static int index_init(indexObject *self, PyObject *args)
1690 static int index_init(indexObject *self, PyObject *args)
1331 {
1691 {
1332 PyObject *data_obj, *inlined_obj;
1692 PyObject *data_obj, *inlined_obj;
1333 Py_ssize_t size;
1693 Py_ssize_t size;
1334
1694
1335 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1695 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1336 return -1;
1696 return -1;
1337 if (!PyString_Check(data_obj)) {
1697 if (!PyString_Check(data_obj)) {
1338 PyErr_SetString(PyExc_TypeError, "data is not a string");
1698 PyErr_SetString(PyExc_TypeError, "data is not a string");
1339 return -1;
1699 return -1;
1340 }
1700 }
1341 size = PyString_GET_SIZE(data_obj);
1701 size = PyString_GET_SIZE(data_obj);
1342
1702
1343 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1703 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1344 self->data = data_obj;
1704 self->data = data_obj;
1345 self->cache = NULL;
1705 self->cache = NULL;
1346
1706
1347 self->added = NULL;
1707 self->added = NULL;
1348 self->headrevs = NULL;
1708 self->headrevs = NULL;
1349 self->offsets = NULL;
1709 self->offsets = NULL;
1350 self->nt = NULL;
1710 self->nt = NULL;
1351 self->ntlength = self->ntcapacity = 0;
1711 self->ntlength = self->ntcapacity = 0;
1352 self->ntdepth = self->ntsplits = 0;
1712 self->ntdepth = self->ntsplits = 0;
1353 self->ntlookups = self->ntmisses = 0;
1713 self->ntlookups = self->ntmisses = 0;
1354 self->ntrev = -1;
1714 self->ntrev = -1;
1355 Py_INCREF(self->data);
1715 Py_INCREF(self->data);
1356
1716
1357 if (self->inlined) {
1717 if (self->inlined) {
1358 long len = inline_scan(self, NULL);
1718 long len = inline_scan(self, NULL);
1359 if (len == -1)
1719 if (len == -1)
1360 goto bail;
1720 goto bail;
1361 self->raw_length = len;
1721 self->raw_length = len;
1362 self->length = len + 1;
1722 self->length = len + 1;
1363 } else {
1723 } else {
1364 if (size % v1_hdrsize) {
1724 if (size % v1_hdrsize) {
1365 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1725 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1366 goto bail;
1726 goto bail;
1367 }
1727 }
1368 self->raw_length = size / v1_hdrsize;
1728 self->raw_length = size / v1_hdrsize;
1369 self->length = self->raw_length + 1;
1729 self->length = self->raw_length + 1;
1370 }
1730 }
1371
1731
1372 return 0;
1732 return 0;
1373 bail:
1733 bail:
1374 return -1;
1734 return -1;
1375 }
1735 }
1376
1736
1377 static PyObject *index_nodemap(indexObject *self)
1737 static PyObject *index_nodemap(indexObject *self)
1378 {
1738 {
1379 Py_INCREF(self);
1739 Py_INCREF(self);
1380 return (PyObject *)self;
1740 return (PyObject *)self;
1381 }
1741 }
1382
1742
1383 static void index_dealloc(indexObject *self)
1743 static void index_dealloc(indexObject *self)
1384 {
1744 {
1385 _index_clearcaches(self);
1745 _index_clearcaches(self);
1386 Py_DECREF(self->data);
1746 Py_DECREF(self->data);
1387 Py_XDECREF(self->added);
1747 Py_XDECREF(self->added);
1388 PyObject_Del(self);
1748 PyObject_Del(self);
1389 }
1749 }
1390
1750
1391 static PySequenceMethods index_sequence_methods = {
1751 static PySequenceMethods index_sequence_methods = {
1392 (lenfunc)index_length, /* sq_length */
1752 (lenfunc)index_length, /* sq_length */
1393 0, /* sq_concat */
1753 0, /* sq_concat */
1394 0, /* sq_repeat */
1754 0, /* sq_repeat */
1395 (ssizeargfunc)index_get, /* sq_item */
1755 (ssizeargfunc)index_get, /* sq_item */
1396 0, /* sq_slice */
1756 0, /* sq_slice */
1397 0, /* sq_ass_item */
1757 0, /* sq_ass_item */
1398 0, /* sq_ass_slice */
1758 0, /* sq_ass_slice */
1399 (objobjproc)index_contains, /* sq_contains */
1759 (objobjproc)index_contains, /* sq_contains */
1400 };
1760 };
1401
1761
1402 static PyMappingMethods index_mapping_methods = {
1762 static PyMappingMethods index_mapping_methods = {
1403 (lenfunc)index_length, /* mp_length */
1763 (lenfunc)index_length, /* mp_length */
1404 (binaryfunc)index_getitem, /* mp_subscript */
1764 (binaryfunc)index_getitem, /* mp_subscript */
1405 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1765 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1406 };
1766 };
1407
1767
1408 static PyMethodDef index_methods[] = {
1768 static PyMethodDef index_methods[] = {
1769 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
1770 "return the gca set of the given revs"},
1409 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1771 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1410 "clear the index caches"},
1772 "clear the index caches"},
1411 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1773 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1412 "get an index entry"},
1774 "get an index entry"},
1413 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1775 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1414 "get head revisions"},
1776 "get head revisions"},
1415 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1777 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1416 "insert an index entry"},
1778 "insert an index entry"},
1417 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1779 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1418 "match a potentially ambiguous node ID"},
1780 "match a potentially ambiguous node ID"},
1419 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1781 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1420 "stats for the index"},
1782 "stats for the index"},
1421 {NULL} /* Sentinel */
1783 {NULL} /* Sentinel */
1422 };
1784 };
1423
1785
1424 static PyGetSetDef index_getset[] = {
1786 static PyGetSetDef index_getset[] = {
1425 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1787 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1426 {NULL} /* Sentinel */
1788 {NULL} /* Sentinel */
1427 };
1789 };
1428
1790
1429 static PyTypeObject indexType = {
1791 static PyTypeObject indexType = {
1430 PyObject_HEAD_INIT(NULL)
1792 PyObject_HEAD_INIT(NULL)
1431 0, /* ob_size */
1793 0, /* ob_size */
1432 "parsers.index", /* tp_name */
1794 "parsers.index", /* tp_name */
1433 sizeof(indexObject), /* tp_basicsize */
1795 sizeof(indexObject), /* tp_basicsize */
1434 0, /* tp_itemsize */
1796 0, /* tp_itemsize */
1435 (destructor)index_dealloc, /* tp_dealloc */
1797 (destructor)index_dealloc, /* tp_dealloc */
1436 0, /* tp_print */
1798 0, /* tp_print */
1437 0, /* tp_getattr */
1799 0, /* tp_getattr */
1438 0, /* tp_setattr */
1800 0, /* tp_setattr */
1439 0, /* tp_compare */
1801 0, /* tp_compare */
1440 0, /* tp_repr */
1802 0, /* tp_repr */
1441 0, /* tp_as_number */
1803 0, /* tp_as_number */
1442 &index_sequence_methods, /* tp_as_sequence */
1804 &index_sequence_methods, /* tp_as_sequence */
1443 &index_mapping_methods, /* tp_as_mapping */
1805 &index_mapping_methods, /* tp_as_mapping */
1444 0, /* tp_hash */
1806 0, /* tp_hash */
1445 0, /* tp_call */
1807 0, /* tp_call */
1446 0, /* tp_str */
1808 0, /* tp_str */
1447 0, /* tp_getattro */
1809 0, /* tp_getattro */
1448 0, /* tp_setattro */
1810 0, /* tp_setattro */
1449 0, /* tp_as_buffer */
1811 0, /* tp_as_buffer */
1450 Py_TPFLAGS_DEFAULT, /* tp_flags */
1812 Py_TPFLAGS_DEFAULT, /* tp_flags */
1451 "revlog index", /* tp_doc */
1813 "revlog index", /* tp_doc */
1452 0, /* tp_traverse */
1814 0, /* tp_traverse */
1453 0, /* tp_clear */
1815 0, /* tp_clear */
1454 0, /* tp_richcompare */
1816 0, /* tp_richcompare */
1455 0, /* tp_weaklistoffset */
1817 0, /* tp_weaklistoffset */
1456 0, /* tp_iter */
1818 0, /* tp_iter */
1457 0, /* tp_iternext */
1819 0, /* tp_iternext */
1458 index_methods, /* tp_methods */
1820 index_methods, /* tp_methods */
1459 0, /* tp_members */
1821 0, /* tp_members */
1460 index_getset, /* tp_getset */
1822 index_getset, /* tp_getset */
1461 0, /* tp_base */
1823 0, /* tp_base */
1462 0, /* tp_dict */
1824 0, /* tp_dict */
1463 0, /* tp_descr_get */
1825 0, /* tp_descr_get */
1464 0, /* tp_descr_set */
1826 0, /* tp_descr_set */
1465 0, /* tp_dictoffset */
1827 0, /* tp_dictoffset */
1466 (initproc)index_init, /* tp_init */
1828 (initproc)index_init, /* tp_init */
1467 0, /* tp_alloc */
1829 0, /* tp_alloc */
1468 };
1830 };
1469
1831
1470 /*
1832 /*
1471 * returns a tuple of the form (index, index, cache) with elements as
1833 * returns a tuple of the form (index, index, cache) with elements as
1472 * follows:
1834 * follows:
1473 *
1835 *
1474 * index: an index object that lazily parses RevlogNG records
1836 * index: an index object that lazily parses RevlogNG records
1475 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1837 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1476 *
1838 *
1477 * added complications are for backwards compatibility
1839 * added complications are for backwards compatibility
1478 */
1840 */
1479 static PyObject *parse_index2(PyObject *self, PyObject *args)
1841 static PyObject *parse_index2(PyObject *self, PyObject *args)
1480 {
1842 {
1481 PyObject *tuple = NULL, *cache = NULL;
1843 PyObject *tuple = NULL, *cache = NULL;
1482 indexObject *idx;
1844 indexObject *idx;
1483 int ret;
1845 int ret;
1484
1846
1485 idx = PyObject_New(indexObject, &indexType);
1847 idx = PyObject_New(indexObject, &indexType);
1486 if (idx == NULL)
1848 if (idx == NULL)
1487 goto bail;
1849 goto bail;
1488
1850
1489 ret = index_init(idx, args);
1851 ret = index_init(idx, args);
1490 if (ret == -1)
1852 if (ret == -1)
1491 goto bail;
1853 goto bail;
1492
1854
1493 if (idx->inlined) {
1855 if (idx->inlined) {
1494 cache = Py_BuildValue("iO", 0, idx->data);
1856 cache = Py_BuildValue("iO", 0, idx->data);
1495 if (cache == NULL)
1857 if (cache == NULL)
1496 goto bail;
1858 goto bail;
1497 } else {
1859 } else {
1498 cache = Py_None;
1860 cache = Py_None;
1499 Py_INCREF(cache);
1861 Py_INCREF(cache);
1500 }
1862 }
1501
1863
1502 tuple = Py_BuildValue("NN", idx, cache);
1864 tuple = Py_BuildValue("NN", idx, cache);
1503 if (!tuple)
1865 if (!tuple)
1504 goto bail;
1866 goto bail;
1505 return tuple;
1867 return tuple;
1506
1868
1507 bail:
1869 bail:
1508 Py_XDECREF(idx);
1870 Py_XDECREF(idx);
1509 Py_XDECREF(cache);
1871 Py_XDECREF(cache);
1510 Py_XDECREF(tuple);
1872 Py_XDECREF(tuple);
1511 return NULL;
1873 return NULL;
1512 }
1874 }
1513
1875
1514 static char parsers_doc[] = "Efficient content parsing.";
1876 static char parsers_doc[] = "Efficient content parsing.";
1515
1877
1516 PyObject *encodedir(PyObject *self, PyObject *args);
1878 PyObject *encodedir(PyObject *self, PyObject *args);
1517 PyObject *pathencode(PyObject *self, PyObject *args);
1879 PyObject *pathencode(PyObject *self, PyObject *args);
1518 PyObject *lowerencode(PyObject *self, PyObject *args);
1880 PyObject *lowerencode(PyObject *self, PyObject *args);
1519
1881
1520 static PyMethodDef methods[] = {
1882 static PyMethodDef methods[] = {
1521 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1883 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1522 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1884 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1523 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1885 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1524 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1886 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1525 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1887 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1526 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1888 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1527 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1889 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1528 {NULL, NULL}
1890 {NULL, NULL}
1529 };
1891 };
1530
1892
1531 void dirs_module_init(PyObject *mod);
1893 void dirs_module_init(PyObject *mod);
1532
1894
1533 static void module_init(PyObject *mod)
1895 static void module_init(PyObject *mod)
1534 {
1896 {
1535 dirs_module_init(mod);
1897 dirs_module_init(mod);
1536
1898
1537 indexType.tp_new = PyType_GenericNew;
1899 indexType.tp_new = PyType_GenericNew;
1538 if (PyType_Ready(&indexType) < 0)
1900 if (PyType_Ready(&indexType) < 0)
1539 return;
1901 return;
1540 Py_INCREF(&indexType);
1902 Py_INCREF(&indexType);
1541
1903
1542 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1904 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1543
1905
1544 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1906 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1545 -1, -1, -1, -1, nullid, 20);
1907 -1, -1, -1, -1, nullid, 20);
1546 if (nullentry)
1908 if (nullentry)
1547 PyObject_GC_UnTrack(nullentry);
1909 PyObject_GC_UnTrack(nullentry);
1548
1910
1549 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1911 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1550 }
1912 }
1551
1913
1552 #ifdef IS_PY3K
1914 #ifdef IS_PY3K
1553 static struct PyModuleDef parsers_module = {
1915 static struct PyModuleDef parsers_module = {
1554 PyModuleDef_HEAD_INIT,
1916 PyModuleDef_HEAD_INIT,
1555 "parsers",
1917 "parsers",
1556 parsers_doc,
1918 parsers_doc,
1557 -1,
1919 -1,
1558 methods
1920 methods
1559 };
1921 };
1560
1922
1561 PyMODINIT_FUNC PyInit_parsers(void)
1923 PyMODINIT_FUNC PyInit_parsers(void)
1562 {
1924 {
1563 PyObject *mod = PyModule_Create(&parsers_module);
1925 PyObject *mod = PyModule_Create(&parsers_module);
1564 module_init(mod);
1926 module_init(mod);
1565 return mod;
1927 return mod;
1566 }
1928 }
1567 #else
1929 #else
1568 PyMODINIT_FUNC initparsers(void)
1930 PyMODINIT_FUNC initparsers(void)
1569 {
1931 {
1570 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1932 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1571 module_init(mod);
1933 module_init(mod);
1572 }
1934 }
1573 #endif
1935 #endif
@@ -1,1337 +1,1343 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev
15 from node import bin, hex, nullid, nullrev
16 from i18n import _
16 from i18n import _
17 import ancestor, mdiff, parsers, error, util, dagutil
17 import ancestor, mdiff, parsers, error, util, dagutil
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGGENERALDELTA = (1 << 17)
30 REVLOGGENERALDELTA = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_KNOWN_FLAGS = 0
37 REVIDX_KNOWN_FLAGS = 0
38
38
39 # max size of revlog with inline data
39 # max size of revlog with inline data
40 _maxinline = 131072
40 _maxinline = 131072
41 _chunksize = 1048576
41 _chunksize = 1048576
42
42
43 RevlogError = error.RevlogError
43 RevlogError = error.RevlogError
44 LookupError = error.LookupError
44 LookupError = error.LookupError
45
45
46 def getoffset(q):
46 def getoffset(q):
47 return int(q >> 16)
47 return int(q >> 16)
48
48
49 def gettype(q):
49 def gettype(q):
50 return int(q & 0xFFFF)
50 return int(q & 0xFFFF)
51
51
52 def offset_type(offset, type):
52 def offset_type(offset, type):
53 return long(long(offset) << 16 | type)
53 return long(long(offset) << 16 | type)
54
54
55 nullhash = _sha(nullid)
55 nullhash = _sha(nullid)
56
56
57 def hash(text, p1, p2):
57 def hash(text, p1, p2):
58 """generate a hash from the given text and its parent hashes
58 """generate a hash from the given text and its parent hashes
59
59
60 This hash combines both the current file contents and its history
60 This hash combines both the current file contents and its history
61 in a manner that makes it easy to distinguish nodes with the same
61 in a manner that makes it easy to distinguish nodes with the same
62 content in the revision graph.
62 content in the revision graph.
63 """
63 """
64 # As of now, if one of the parent node is null, p2 is null
64 # As of now, if one of the parent node is null, p2 is null
65 if p2 == nullid:
65 if p2 == nullid:
66 # deep copy of a hash is faster than creating one
66 # deep copy of a hash is faster than creating one
67 s = nullhash.copy()
67 s = nullhash.copy()
68 s.update(p1)
68 s.update(p1)
69 else:
69 else:
70 # none of the parent nodes are nullid
70 # none of the parent nodes are nullid
71 l = [p1, p2]
71 l = [p1, p2]
72 l.sort()
72 l.sort()
73 s = _sha(l[0])
73 s = _sha(l[0])
74 s.update(l[1])
74 s.update(l[1])
75 s.update(text)
75 s.update(text)
76 return s.digest()
76 return s.digest()
77
77
78 def decompress(bin):
78 def decompress(bin):
79 """ decompress the given input """
79 """ decompress the given input """
80 if not bin:
80 if not bin:
81 return bin
81 return bin
82 t = bin[0]
82 t = bin[0]
83 if t == '\0':
83 if t == '\0':
84 return bin
84 return bin
85 if t == 'x':
85 if t == 'x':
86 try:
86 try:
87 return _decompress(bin)
87 return _decompress(bin)
88 except zlib.error, e:
88 except zlib.error, e:
89 raise RevlogError(_("revlog decompress error: %s") % str(e))
89 raise RevlogError(_("revlog decompress error: %s") % str(e))
90 if t == 'u':
90 if t == 'u':
91 return bin[1:]
91 return bin[1:]
92 raise RevlogError(_("unknown compression type %r") % t)
92 raise RevlogError(_("unknown compression type %r") % t)
93
93
94 # index v0:
94 # index v0:
95 # 4 bytes: offset
95 # 4 bytes: offset
96 # 4 bytes: compressed length
96 # 4 bytes: compressed length
97 # 4 bytes: base rev
97 # 4 bytes: base rev
98 # 4 bytes: link rev
98 # 4 bytes: link rev
99 # 32 bytes: parent 1 nodeid
99 # 32 bytes: parent 1 nodeid
100 # 32 bytes: parent 2 nodeid
100 # 32 bytes: parent 2 nodeid
101 # 32 bytes: nodeid
101 # 32 bytes: nodeid
102 indexformatv0 = ">4l20s20s20s"
102 indexformatv0 = ">4l20s20s20s"
103 v0shaoffset = 56
103 v0shaoffset = 56
104
104
105 class revlogoldio(object):
105 class revlogoldio(object):
106 def __init__(self):
106 def __init__(self):
107 self.size = struct.calcsize(indexformatv0)
107 self.size = struct.calcsize(indexformatv0)
108
108
109 def parseindex(self, data, inline):
109 def parseindex(self, data, inline):
110 s = self.size
110 s = self.size
111 index = []
111 index = []
112 nodemap = {nullid: nullrev}
112 nodemap = {nullid: nullrev}
113 n = off = 0
113 n = off = 0
114 l = len(data)
114 l = len(data)
115 while off + s <= l:
115 while off + s <= l:
116 cur = data[off:off + s]
116 cur = data[off:off + s]
117 off += s
117 off += s
118 e = _unpack(indexformatv0, cur)
118 e = _unpack(indexformatv0, cur)
119 # transform to revlogv1 format
119 # transform to revlogv1 format
120 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
120 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
121 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
121 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
122 index.append(e2)
122 index.append(e2)
123 nodemap[e[6]] = n
123 nodemap[e[6]] = n
124 n += 1
124 n += 1
125
125
126 # add the magic null revision at -1
126 # add the magic null revision at -1
127 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
127 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
128
128
129 return index, nodemap, None
129 return index, nodemap, None
130
130
131 def packentry(self, entry, node, version, rev):
131 def packentry(self, entry, node, version, rev):
132 if gettype(entry[0]):
132 if gettype(entry[0]):
133 raise RevlogError(_("index entry flags need RevlogNG"))
133 raise RevlogError(_("index entry flags need RevlogNG"))
134 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
134 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
135 node(entry[5]), node(entry[6]), entry[7])
135 node(entry[5]), node(entry[6]), entry[7])
136 return _pack(indexformatv0, *e2)
136 return _pack(indexformatv0, *e2)
137
137
138 # index ng:
138 # index ng:
139 # 6 bytes: offset
139 # 6 bytes: offset
140 # 2 bytes: flags
140 # 2 bytes: flags
141 # 4 bytes: compressed length
141 # 4 bytes: compressed length
142 # 4 bytes: uncompressed length
142 # 4 bytes: uncompressed length
143 # 4 bytes: base rev
143 # 4 bytes: base rev
144 # 4 bytes: link rev
144 # 4 bytes: link rev
145 # 4 bytes: parent 1 rev
145 # 4 bytes: parent 1 rev
146 # 4 bytes: parent 2 rev
146 # 4 bytes: parent 2 rev
147 # 32 bytes: nodeid
147 # 32 bytes: nodeid
148 indexformatng = ">Qiiiiii20s12x"
148 indexformatng = ">Qiiiiii20s12x"
149 ngshaoffset = 32
149 ngshaoffset = 32
150 versionformat = ">I"
150 versionformat = ">I"
151
151
152 class revlogio(object):
152 class revlogio(object):
153 def __init__(self):
153 def __init__(self):
154 self.size = struct.calcsize(indexformatng)
154 self.size = struct.calcsize(indexformatng)
155
155
156 def parseindex(self, data, inline):
156 def parseindex(self, data, inline):
157 # call the C implementation to parse the index data
157 # call the C implementation to parse the index data
158 index, cache = parsers.parse_index2(data, inline)
158 index, cache = parsers.parse_index2(data, inline)
159 return index, getattr(index, 'nodemap', None), cache
159 return index, getattr(index, 'nodemap', None), cache
160
160
161 def packentry(self, entry, node, version, rev):
161 def packentry(self, entry, node, version, rev):
162 p = _pack(indexformatng, *entry)
162 p = _pack(indexformatng, *entry)
163 if rev == 0:
163 if rev == 0:
164 p = _pack(versionformat, version) + p[4:]
164 p = _pack(versionformat, version) + p[4:]
165 return p
165 return p
166
166
167 class revlog(object):
167 class revlog(object):
168 """
168 """
169 the underlying revision storage object
169 the underlying revision storage object
170
170
171 A revlog consists of two parts, an index and the revision data.
171 A revlog consists of two parts, an index and the revision data.
172
172
173 The index is a file with a fixed record size containing
173 The index is a file with a fixed record size containing
174 information on each revision, including its nodeid (hash), the
174 information on each revision, including its nodeid (hash), the
175 nodeids of its parents, the position and offset of its data within
175 nodeids of its parents, the position and offset of its data within
176 the data file, and the revision it's based on. Finally, each entry
176 the data file, and the revision it's based on. Finally, each entry
177 contains a linkrev entry that can serve as a pointer to external
177 contains a linkrev entry that can serve as a pointer to external
178 data.
178 data.
179
179
180 The revision data itself is a linear collection of data chunks.
180 The revision data itself is a linear collection of data chunks.
181 Each chunk represents a revision and is usually represented as a
181 Each chunk represents a revision and is usually represented as a
182 delta against the previous chunk. To bound lookup time, runs of
182 delta against the previous chunk. To bound lookup time, runs of
183 deltas are limited to about 2 times the length of the original
183 deltas are limited to about 2 times the length of the original
184 version data. This makes retrieval of a version proportional to
184 version data. This makes retrieval of a version proportional to
185 its size, or O(1) relative to the number of revisions.
185 its size, or O(1) relative to the number of revisions.
186
186
187 Both pieces of the revlog are written to in an append-only
187 Both pieces of the revlog are written to in an append-only
188 fashion, which means we never need to rewrite a file to insert or
188 fashion, which means we never need to rewrite a file to insert or
189 remove data, and can use some simple techniques to avoid the need
189 remove data, and can use some simple techniques to avoid the need
190 for locking while reading.
190 for locking while reading.
191 """
191 """
192 def __init__(self, opener, indexfile):
192 def __init__(self, opener, indexfile):
193 """
193 """
194 create a revlog object
194 create a revlog object
195
195
196 opener is a function that abstracts the file opening operation
196 opener is a function that abstracts the file opening operation
197 and can be used to implement COW semantics or the like.
197 and can be used to implement COW semantics or the like.
198 """
198 """
199 self.indexfile = indexfile
199 self.indexfile = indexfile
200 self.datafile = indexfile[:-2] + ".d"
200 self.datafile = indexfile[:-2] + ".d"
201 self.opener = opener
201 self.opener = opener
202 self._cache = None
202 self._cache = None
203 self._basecache = (0, 0)
203 self._basecache = (0, 0)
204 self._chunkcache = (0, '')
204 self._chunkcache = (0, '')
205 self.index = []
205 self.index = []
206 self._pcache = {}
206 self._pcache = {}
207 self._nodecache = {nullid: nullrev}
207 self._nodecache = {nullid: nullrev}
208 self._nodepos = None
208 self._nodepos = None
209
209
210 v = REVLOG_DEFAULT_VERSION
210 v = REVLOG_DEFAULT_VERSION
211 opts = getattr(opener, 'options', None)
211 opts = getattr(opener, 'options', None)
212 if opts is not None:
212 if opts is not None:
213 if 'revlogv1' in opts:
213 if 'revlogv1' in opts:
214 if 'generaldelta' in opts:
214 if 'generaldelta' in opts:
215 v |= REVLOGGENERALDELTA
215 v |= REVLOGGENERALDELTA
216 else:
216 else:
217 v = 0
217 v = 0
218
218
219 i = ''
219 i = ''
220 self._initempty = True
220 self._initempty = True
221 try:
221 try:
222 f = self.opener(self.indexfile)
222 f = self.opener(self.indexfile)
223 i = f.read()
223 i = f.read()
224 f.close()
224 f.close()
225 if len(i) > 0:
225 if len(i) > 0:
226 v = struct.unpack(versionformat, i[:4])[0]
226 v = struct.unpack(versionformat, i[:4])[0]
227 self._initempty = False
227 self._initempty = False
228 except IOError, inst:
228 except IOError, inst:
229 if inst.errno != errno.ENOENT:
229 if inst.errno != errno.ENOENT:
230 raise
230 raise
231
231
232 self.version = v
232 self.version = v
233 self._inline = v & REVLOGNGINLINEDATA
233 self._inline = v & REVLOGNGINLINEDATA
234 self._generaldelta = v & REVLOGGENERALDELTA
234 self._generaldelta = v & REVLOGGENERALDELTA
235 flags = v & ~0xFFFF
235 flags = v & ~0xFFFF
236 fmt = v & 0xFFFF
236 fmt = v & 0xFFFF
237 if fmt == REVLOGV0 and flags:
237 if fmt == REVLOGV0 and flags:
238 raise RevlogError(_("index %s unknown flags %#04x for format v0")
238 raise RevlogError(_("index %s unknown flags %#04x for format v0")
239 % (self.indexfile, flags >> 16))
239 % (self.indexfile, flags >> 16))
240 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
240 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
241 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
241 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
242 % (self.indexfile, flags >> 16))
242 % (self.indexfile, flags >> 16))
243 elif fmt > REVLOGNG:
243 elif fmt > REVLOGNG:
244 raise RevlogError(_("index %s unknown format %d")
244 raise RevlogError(_("index %s unknown format %d")
245 % (self.indexfile, fmt))
245 % (self.indexfile, fmt))
246
246
247 self._io = revlogio()
247 self._io = revlogio()
248 if self.version == REVLOGV0:
248 if self.version == REVLOGV0:
249 self._io = revlogoldio()
249 self._io = revlogoldio()
250 try:
250 try:
251 d = self._io.parseindex(i, self._inline)
251 d = self._io.parseindex(i, self._inline)
252 except (ValueError, IndexError):
252 except (ValueError, IndexError):
253 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
253 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
254 self.index, nodemap, self._chunkcache = d
254 self.index, nodemap, self._chunkcache = d
255 if nodemap is not None:
255 if nodemap is not None:
256 self.nodemap = self._nodecache = nodemap
256 self.nodemap = self._nodecache = nodemap
257 if not self._chunkcache:
257 if not self._chunkcache:
258 self._chunkclear()
258 self._chunkclear()
259
259
260 def tip(self):
260 def tip(self):
261 return self.node(len(self.index) - 2)
261 return self.node(len(self.index) - 2)
262 def __len__(self):
262 def __len__(self):
263 return len(self.index) - 1
263 return len(self.index) - 1
264 def __iter__(self):
264 def __iter__(self):
265 return iter(xrange(len(self)))
265 return iter(xrange(len(self)))
266 def revs(self, start=0, stop=None):
266 def revs(self, start=0, stop=None):
267 """iterate over all rev in this revlog (from start to stop)"""
267 """iterate over all rev in this revlog (from start to stop)"""
268 step = 1
268 step = 1
269 if stop is not None:
269 if stop is not None:
270 if start > stop:
270 if start > stop:
271 step = -1
271 step = -1
272 stop += step
272 stop += step
273 else:
273 else:
274 stop = len(self)
274 stop = len(self)
275 return xrange(start, stop, step)
275 return xrange(start, stop, step)
276
276
277 @util.propertycache
277 @util.propertycache
278 def nodemap(self):
278 def nodemap(self):
279 self.rev(self.node(0))
279 self.rev(self.node(0))
280 return self._nodecache
280 return self._nodecache
281
281
282 def hasnode(self, node):
282 def hasnode(self, node):
283 try:
283 try:
284 self.rev(node)
284 self.rev(node)
285 return True
285 return True
286 except KeyError:
286 except KeyError:
287 return False
287 return False
288
288
289 def clearcaches(self):
289 def clearcaches(self):
290 try:
290 try:
291 self._nodecache.clearcaches()
291 self._nodecache.clearcaches()
292 except AttributeError:
292 except AttributeError:
293 self._nodecache = {nullid: nullrev}
293 self._nodecache = {nullid: nullrev}
294 self._nodepos = None
294 self._nodepos = None
295
295
296 def rev(self, node):
296 def rev(self, node):
297 try:
297 try:
298 return self._nodecache[node]
298 return self._nodecache[node]
299 except RevlogError:
299 except RevlogError:
300 # parsers.c radix tree lookup failed
300 # parsers.c radix tree lookup failed
301 raise LookupError(node, self.indexfile, _('no node'))
301 raise LookupError(node, self.indexfile, _('no node'))
302 except KeyError:
302 except KeyError:
303 # pure python cache lookup failed
303 # pure python cache lookup failed
304 n = self._nodecache
304 n = self._nodecache
305 i = self.index
305 i = self.index
306 p = self._nodepos
306 p = self._nodepos
307 if p is None:
307 if p is None:
308 p = len(i) - 2
308 p = len(i) - 2
309 for r in xrange(p, -1, -1):
309 for r in xrange(p, -1, -1):
310 v = i[r][7]
310 v = i[r][7]
311 n[v] = r
311 n[v] = r
312 if v == node:
312 if v == node:
313 self._nodepos = r - 1
313 self._nodepos = r - 1
314 return r
314 return r
315 raise LookupError(node, self.indexfile, _('no node'))
315 raise LookupError(node, self.indexfile, _('no node'))
316
316
317 def node(self, rev):
317 def node(self, rev):
318 return self.index[rev][7]
318 return self.index[rev][7]
319 def linkrev(self, rev):
319 def linkrev(self, rev):
320 return self.index[rev][4]
320 return self.index[rev][4]
321 def parents(self, node):
321 def parents(self, node):
322 i = self.index
322 i = self.index
323 d = i[self.rev(node)]
323 d = i[self.rev(node)]
324 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
324 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
325 def parentrevs(self, rev):
325 def parentrevs(self, rev):
326 return self.index[rev][5:7]
326 return self.index[rev][5:7]
327 def start(self, rev):
327 def start(self, rev):
328 return int(self.index[rev][0] >> 16)
328 return int(self.index[rev][0] >> 16)
329 def end(self, rev):
329 def end(self, rev):
330 return self.start(rev) + self.length(rev)
330 return self.start(rev) + self.length(rev)
331 def length(self, rev):
331 def length(self, rev):
332 return self.index[rev][1]
332 return self.index[rev][1]
333 def chainbase(self, rev):
333 def chainbase(self, rev):
334 index = self.index
334 index = self.index
335 base = index[rev][3]
335 base = index[rev][3]
336 while base != rev:
336 while base != rev:
337 rev = base
337 rev = base
338 base = index[rev][3]
338 base = index[rev][3]
339 return base
339 return base
340 def flags(self, rev):
340 def flags(self, rev):
341 return self.index[rev][0] & 0xFFFF
341 return self.index[rev][0] & 0xFFFF
342 def rawsize(self, rev):
342 def rawsize(self, rev):
343 """return the length of the uncompressed text for a given revision"""
343 """return the length of the uncompressed text for a given revision"""
344 l = self.index[rev][2]
344 l = self.index[rev][2]
345 if l >= 0:
345 if l >= 0:
346 return l
346 return l
347
347
348 t = self.revision(self.node(rev))
348 t = self.revision(self.node(rev))
349 return len(t)
349 return len(t)
350 size = rawsize
350 size = rawsize
351
351
352 def ancestors(self, revs, stoprev=0, inclusive=False):
352 def ancestors(self, revs, stoprev=0, inclusive=False):
353 """Generate the ancestors of 'revs' in reverse topological order.
353 """Generate the ancestors of 'revs' in reverse topological order.
354 Does not generate revs lower than stoprev.
354 Does not generate revs lower than stoprev.
355
355
356 See the documentation for ancestor.lazyancestors for more details."""
356 See the documentation for ancestor.lazyancestors for more details."""
357
357
358 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
358 return ancestor.lazyancestors(self, revs, stoprev=stoprev,
359 inclusive=inclusive)
359 inclusive=inclusive)
360
360
361 def descendants(self, revs):
361 def descendants(self, revs):
362 """Generate the descendants of 'revs' in revision order.
362 """Generate the descendants of 'revs' in revision order.
363
363
364 Yield a sequence of revision numbers starting with a child of
364 Yield a sequence of revision numbers starting with a child of
365 some rev in revs, i.e., each revision is *not* considered a
365 some rev in revs, i.e., each revision is *not* considered a
366 descendant of itself. Results are ordered by revision number (a
366 descendant of itself. Results are ordered by revision number (a
367 topological sort)."""
367 topological sort)."""
368 first = min(revs)
368 first = min(revs)
369 if first == nullrev:
369 if first == nullrev:
370 for i in self:
370 for i in self:
371 yield i
371 yield i
372 return
372 return
373
373
374 seen = set(revs)
374 seen = set(revs)
375 for i in self.revs(start=first + 1):
375 for i in self.revs(start=first + 1):
376 for x in self.parentrevs(i):
376 for x in self.parentrevs(i):
377 if x != nullrev and x in seen:
377 if x != nullrev and x in seen:
378 seen.add(i)
378 seen.add(i)
379 yield i
379 yield i
380 break
380 break
381
381
382 def findcommonmissing(self, common=None, heads=None):
382 def findcommonmissing(self, common=None, heads=None):
383 """Return a tuple of the ancestors of common and the ancestors of heads
383 """Return a tuple of the ancestors of common and the ancestors of heads
384 that are not ancestors of common. In revset terminology, we return the
384 that are not ancestors of common. In revset terminology, we return the
385 tuple:
385 tuple:
386
386
387 ::common, (::heads) - (::common)
387 ::common, (::heads) - (::common)
388
388
389 The list is sorted by revision number, meaning it is
389 The list is sorted by revision number, meaning it is
390 topologically sorted.
390 topologically sorted.
391
391
392 'heads' and 'common' are both lists of node IDs. If heads is
392 'heads' and 'common' are both lists of node IDs. If heads is
393 not supplied, uses all of the revlog's heads. If common is not
393 not supplied, uses all of the revlog's heads. If common is not
394 supplied, uses nullid."""
394 supplied, uses nullid."""
395 if common is None:
395 if common is None:
396 common = [nullid]
396 common = [nullid]
397 if heads is None:
397 if heads is None:
398 heads = self.heads()
398 heads = self.heads()
399
399
400 common = [self.rev(n) for n in common]
400 common = [self.rev(n) for n in common]
401 heads = [self.rev(n) for n in heads]
401 heads = [self.rev(n) for n in heads]
402
402
403 # we want the ancestors, but inclusive
403 # we want the ancestors, but inclusive
404 has = set(self.ancestors(common))
404 has = set(self.ancestors(common))
405 has.add(nullrev)
405 has.add(nullrev)
406 has.update(common)
406 has.update(common)
407
407
408 # take all ancestors from heads that aren't in has
408 # take all ancestors from heads that aren't in has
409 missing = set()
409 missing = set()
410 visit = util.deque(r for r in heads if r not in has)
410 visit = util.deque(r for r in heads if r not in has)
411 while visit:
411 while visit:
412 r = visit.popleft()
412 r = visit.popleft()
413 if r in missing:
413 if r in missing:
414 continue
414 continue
415 else:
415 else:
416 missing.add(r)
416 missing.add(r)
417 for p in self.parentrevs(r):
417 for p in self.parentrevs(r):
418 if p not in has:
418 if p not in has:
419 visit.append(p)
419 visit.append(p)
420 missing = list(missing)
420 missing = list(missing)
421 missing.sort()
421 missing.sort()
422 return has, [self.node(r) for r in missing]
422 return has, [self.node(r) for r in missing]
423
423
424 def findmissingrevs(self, common=None, heads=None):
424 def findmissingrevs(self, common=None, heads=None):
425 """Return the revision numbers of the ancestors of heads that
425 """Return the revision numbers of the ancestors of heads that
426 are not ancestors of common.
426 are not ancestors of common.
427
427
428 More specifically, return a list of revision numbers corresponding to
428 More specifically, return a list of revision numbers corresponding to
429 nodes N such that every N satisfies the following constraints:
429 nodes N such that every N satisfies the following constraints:
430
430
431 1. N is an ancestor of some node in 'heads'
431 1. N is an ancestor of some node in 'heads'
432 2. N is not an ancestor of any node in 'common'
432 2. N is not an ancestor of any node in 'common'
433
433
434 The list is sorted by revision number, meaning it is
434 The list is sorted by revision number, meaning it is
435 topologically sorted.
435 topologically sorted.
436
436
437 'heads' and 'common' are both lists of revision numbers. If heads is
437 'heads' and 'common' are both lists of revision numbers. If heads is
438 not supplied, uses all of the revlog's heads. If common is not
438 not supplied, uses all of the revlog's heads. If common is not
439 supplied, uses nullid."""
439 supplied, uses nullid."""
440 if common is None:
440 if common is None:
441 common = [nullrev]
441 common = [nullrev]
442 if heads is None:
442 if heads is None:
443 heads = self.headrevs()
443 heads = self.headrevs()
444
444
445 return ancestor.missingancestors(heads, common, self.parentrevs)
445 return ancestor.missingancestors(heads, common, self.parentrevs)
446
446
447 def findmissing(self, common=None, heads=None):
447 def findmissing(self, common=None, heads=None):
448 """Return the ancestors of heads that are not ancestors of common.
448 """Return the ancestors of heads that are not ancestors of common.
449
449
450 More specifically, return a list of nodes N such that every N
450 More specifically, return a list of nodes N such that every N
451 satisfies the following constraints:
451 satisfies the following constraints:
452
452
453 1. N is an ancestor of some node in 'heads'
453 1. N is an ancestor of some node in 'heads'
454 2. N is not an ancestor of any node in 'common'
454 2. N is not an ancestor of any node in 'common'
455
455
456 The list is sorted by revision number, meaning it is
456 The list is sorted by revision number, meaning it is
457 topologically sorted.
457 topologically sorted.
458
458
459 'heads' and 'common' are both lists of node IDs. If heads is
459 'heads' and 'common' are both lists of node IDs. If heads is
460 not supplied, uses all of the revlog's heads. If common is not
460 not supplied, uses all of the revlog's heads. If common is not
461 supplied, uses nullid."""
461 supplied, uses nullid."""
462 if common is None:
462 if common is None:
463 common = [nullid]
463 common = [nullid]
464 if heads is None:
464 if heads is None:
465 heads = self.heads()
465 heads = self.heads()
466
466
467 common = [self.rev(n) for n in common]
467 common = [self.rev(n) for n in common]
468 heads = [self.rev(n) for n in heads]
468 heads = [self.rev(n) for n in heads]
469
469
470 return [self.node(r) for r in
470 return [self.node(r) for r in
471 ancestor.missingancestors(heads, common, self.parentrevs)]
471 ancestor.missingancestors(heads, common, self.parentrevs)]
472
472
473 def nodesbetween(self, roots=None, heads=None):
473 def nodesbetween(self, roots=None, heads=None):
474 """Return a topological path from 'roots' to 'heads'.
474 """Return a topological path from 'roots' to 'heads'.
475
475
476 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
476 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
477 topologically sorted list of all nodes N that satisfy both of
477 topologically sorted list of all nodes N that satisfy both of
478 these constraints:
478 these constraints:
479
479
480 1. N is a descendant of some node in 'roots'
480 1. N is a descendant of some node in 'roots'
481 2. N is an ancestor of some node in 'heads'
481 2. N is an ancestor of some node in 'heads'
482
482
483 Every node is considered to be both a descendant and an ancestor
483 Every node is considered to be both a descendant and an ancestor
484 of itself, so every reachable node in 'roots' and 'heads' will be
484 of itself, so every reachable node in 'roots' and 'heads' will be
485 included in 'nodes'.
485 included in 'nodes'.
486
486
487 'outroots' is the list of reachable nodes in 'roots', i.e., the
487 'outroots' is the list of reachable nodes in 'roots', i.e., the
488 subset of 'roots' that is returned in 'nodes'. Likewise,
488 subset of 'roots' that is returned in 'nodes'. Likewise,
489 'outheads' is the subset of 'heads' that is also in 'nodes'.
489 'outheads' is the subset of 'heads' that is also in 'nodes'.
490
490
491 'roots' and 'heads' are both lists of node IDs. If 'roots' is
491 'roots' and 'heads' are both lists of node IDs. If 'roots' is
492 unspecified, uses nullid as the only root. If 'heads' is
492 unspecified, uses nullid as the only root. If 'heads' is
493 unspecified, uses list of all of the revlog's heads."""
493 unspecified, uses list of all of the revlog's heads."""
494 nonodes = ([], [], [])
494 nonodes = ([], [], [])
495 if roots is not None:
495 if roots is not None:
496 roots = list(roots)
496 roots = list(roots)
497 if not roots:
497 if not roots:
498 return nonodes
498 return nonodes
499 lowestrev = min([self.rev(n) for n in roots])
499 lowestrev = min([self.rev(n) for n in roots])
500 else:
500 else:
501 roots = [nullid] # Everybody's a descendant of nullid
501 roots = [nullid] # Everybody's a descendant of nullid
502 lowestrev = nullrev
502 lowestrev = nullrev
503 if (lowestrev == nullrev) and (heads is None):
503 if (lowestrev == nullrev) and (heads is None):
504 # We want _all_ the nodes!
504 # We want _all_ the nodes!
505 return ([self.node(r) for r in self], [nullid], list(self.heads()))
505 return ([self.node(r) for r in self], [nullid], list(self.heads()))
506 if heads is None:
506 if heads is None:
507 # All nodes are ancestors, so the latest ancestor is the last
507 # All nodes are ancestors, so the latest ancestor is the last
508 # node.
508 # node.
509 highestrev = len(self) - 1
509 highestrev = len(self) - 1
510 # Set ancestors to None to signal that every node is an ancestor.
510 # Set ancestors to None to signal that every node is an ancestor.
511 ancestors = None
511 ancestors = None
512 # Set heads to an empty dictionary for later discovery of heads
512 # Set heads to an empty dictionary for later discovery of heads
513 heads = {}
513 heads = {}
514 else:
514 else:
515 heads = list(heads)
515 heads = list(heads)
516 if not heads:
516 if not heads:
517 return nonodes
517 return nonodes
518 ancestors = set()
518 ancestors = set()
519 # Turn heads into a dictionary so we can remove 'fake' heads.
519 # Turn heads into a dictionary so we can remove 'fake' heads.
520 # Also, later we will be using it to filter out the heads we can't
520 # Also, later we will be using it to filter out the heads we can't
521 # find from roots.
521 # find from roots.
522 heads = dict.fromkeys(heads, False)
522 heads = dict.fromkeys(heads, False)
523 # Start at the top and keep marking parents until we're done.
523 # Start at the top and keep marking parents until we're done.
524 nodestotag = set(heads)
524 nodestotag = set(heads)
525 # Remember where the top was so we can use it as a limit later.
525 # Remember where the top was so we can use it as a limit later.
526 highestrev = max([self.rev(n) for n in nodestotag])
526 highestrev = max([self.rev(n) for n in nodestotag])
527 while nodestotag:
527 while nodestotag:
528 # grab a node to tag
528 # grab a node to tag
529 n = nodestotag.pop()
529 n = nodestotag.pop()
530 # Never tag nullid
530 # Never tag nullid
531 if n == nullid:
531 if n == nullid:
532 continue
532 continue
533 # A node's revision number represents its place in a
533 # A node's revision number represents its place in a
534 # topologically sorted list of nodes.
534 # topologically sorted list of nodes.
535 r = self.rev(n)
535 r = self.rev(n)
536 if r >= lowestrev:
536 if r >= lowestrev:
537 if n not in ancestors:
537 if n not in ancestors:
538 # If we are possibly a descendant of one of the roots
538 # If we are possibly a descendant of one of the roots
539 # and we haven't already been marked as an ancestor
539 # and we haven't already been marked as an ancestor
540 ancestors.add(n) # Mark as ancestor
540 ancestors.add(n) # Mark as ancestor
541 # Add non-nullid parents to list of nodes to tag.
541 # Add non-nullid parents to list of nodes to tag.
542 nodestotag.update([p for p in self.parents(n) if
542 nodestotag.update([p for p in self.parents(n) if
543 p != nullid])
543 p != nullid])
544 elif n in heads: # We've seen it before, is it a fake head?
544 elif n in heads: # We've seen it before, is it a fake head?
545 # So it is, real heads should not be the ancestors of
545 # So it is, real heads should not be the ancestors of
546 # any other heads.
546 # any other heads.
547 heads.pop(n)
547 heads.pop(n)
548 if not ancestors:
548 if not ancestors:
549 return nonodes
549 return nonodes
550 # Now that we have our set of ancestors, we want to remove any
550 # Now that we have our set of ancestors, we want to remove any
551 # roots that are not ancestors.
551 # roots that are not ancestors.
552
552
553 # If one of the roots was nullid, everything is included anyway.
553 # If one of the roots was nullid, everything is included anyway.
554 if lowestrev > nullrev:
554 if lowestrev > nullrev:
555 # But, since we weren't, let's recompute the lowest rev to not
555 # But, since we weren't, let's recompute the lowest rev to not
556 # include roots that aren't ancestors.
556 # include roots that aren't ancestors.
557
557
558 # Filter out roots that aren't ancestors of heads
558 # Filter out roots that aren't ancestors of heads
559 roots = [n for n in roots if n in ancestors]
559 roots = [n for n in roots if n in ancestors]
560 # Recompute the lowest revision
560 # Recompute the lowest revision
561 if roots:
561 if roots:
562 lowestrev = min([self.rev(n) for n in roots])
562 lowestrev = min([self.rev(n) for n in roots])
563 else:
563 else:
564 # No more roots? Return empty list
564 # No more roots? Return empty list
565 return nonodes
565 return nonodes
566 else:
566 else:
567 # We are descending from nullid, and don't need to care about
567 # We are descending from nullid, and don't need to care about
568 # any other roots.
568 # any other roots.
569 lowestrev = nullrev
569 lowestrev = nullrev
570 roots = [nullid]
570 roots = [nullid]
571 # Transform our roots list into a set.
571 # Transform our roots list into a set.
572 descendants = set(roots)
572 descendants = set(roots)
573 # Also, keep the original roots so we can filter out roots that aren't
573 # Also, keep the original roots so we can filter out roots that aren't
574 # 'real' roots (i.e. are descended from other roots).
574 # 'real' roots (i.e. are descended from other roots).
575 roots = descendants.copy()
575 roots = descendants.copy()
576 # Our topologically sorted list of output nodes.
576 # Our topologically sorted list of output nodes.
577 orderedout = []
577 orderedout = []
578 # Don't start at nullid since we don't want nullid in our output list,
578 # Don't start at nullid since we don't want nullid in our output list,
579 # and if nullid shows up in descendants, empty parents will look like
579 # and if nullid shows up in descendants, empty parents will look like
580 # they're descendants.
580 # they're descendants.
581 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
581 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
582 n = self.node(r)
582 n = self.node(r)
583 isdescendant = False
583 isdescendant = False
584 if lowestrev == nullrev: # Everybody is a descendant of nullid
584 if lowestrev == nullrev: # Everybody is a descendant of nullid
585 isdescendant = True
585 isdescendant = True
586 elif n in descendants:
586 elif n in descendants:
587 # n is already a descendant
587 # n is already a descendant
588 isdescendant = True
588 isdescendant = True
589 # This check only needs to be done here because all the roots
589 # This check only needs to be done here because all the roots
590 # will start being marked is descendants before the loop.
590 # will start being marked is descendants before the loop.
591 if n in roots:
591 if n in roots:
592 # If n was a root, check if it's a 'real' root.
592 # If n was a root, check if it's a 'real' root.
593 p = tuple(self.parents(n))
593 p = tuple(self.parents(n))
594 # If any of its parents are descendants, it's not a root.
594 # If any of its parents are descendants, it's not a root.
595 if (p[0] in descendants) or (p[1] in descendants):
595 if (p[0] in descendants) or (p[1] in descendants):
596 roots.remove(n)
596 roots.remove(n)
597 else:
597 else:
598 p = tuple(self.parents(n))
598 p = tuple(self.parents(n))
599 # A node is a descendant if either of its parents are
599 # A node is a descendant if either of its parents are
600 # descendants. (We seeded the dependents list with the roots
600 # descendants. (We seeded the dependents list with the roots
601 # up there, remember?)
601 # up there, remember?)
602 if (p[0] in descendants) or (p[1] in descendants):
602 if (p[0] in descendants) or (p[1] in descendants):
603 descendants.add(n)
603 descendants.add(n)
604 isdescendant = True
604 isdescendant = True
605 if isdescendant and ((ancestors is None) or (n in ancestors)):
605 if isdescendant and ((ancestors is None) or (n in ancestors)):
606 # Only include nodes that are both descendants and ancestors.
606 # Only include nodes that are both descendants and ancestors.
607 orderedout.append(n)
607 orderedout.append(n)
608 if (ancestors is not None) and (n in heads):
608 if (ancestors is not None) and (n in heads):
609 # We're trying to figure out which heads are reachable
609 # We're trying to figure out which heads are reachable
610 # from roots.
610 # from roots.
611 # Mark this head as having been reached
611 # Mark this head as having been reached
612 heads[n] = True
612 heads[n] = True
613 elif ancestors is None:
613 elif ancestors is None:
614 # Otherwise, we're trying to discover the heads.
614 # Otherwise, we're trying to discover the heads.
615 # Assume this is a head because if it isn't, the next step
615 # Assume this is a head because if it isn't, the next step
616 # will eventually remove it.
616 # will eventually remove it.
617 heads[n] = True
617 heads[n] = True
618 # But, obviously its parents aren't.
618 # But, obviously its parents aren't.
619 for p in self.parents(n):
619 for p in self.parents(n):
620 heads.pop(p, None)
620 heads.pop(p, None)
621 heads = [n for n, flag in heads.iteritems() if flag]
621 heads = [n for n, flag in heads.iteritems() if flag]
622 roots = list(roots)
622 roots = list(roots)
623 assert orderedout
623 assert orderedout
624 assert roots
624 assert roots
625 assert heads
625 assert heads
626 return (orderedout, roots, heads)
626 return (orderedout, roots, heads)
627
627
628 def headrevs(self):
628 def headrevs(self):
629 try:
629 try:
630 return self.index.headrevs()
630 return self.index.headrevs()
631 except AttributeError:
631 except AttributeError:
632 return self._headrevs()
632 return self._headrevs()
633
633
634 def _headrevs(self):
634 def _headrevs(self):
635 count = len(self)
635 count = len(self)
636 if not count:
636 if not count:
637 return [nullrev]
637 return [nullrev]
638 # we won't iter over filtered rev so nobody is a head at start
638 # we won't iter over filtered rev so nobody is a head at start
639 ishead = [0] * (count + 1)
639 ishead = [0] * (count + 1)
640 index = self.index
640 index = self.index
641 for r in self:
641 for r in self:
642 ishead[r] = 1 # I may be an head
642 ishead[r] = 1 # I may be an head
643 e = index[r]
643 e = index[r]
644 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
644 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
645 return [r for r, val in enumerate(ishead) if val]
645 return [r for r, val in enumerate(ishead) if val]
646
646
647 def heads(self, start=None, stop=None):
647 def heads(self, start=None, stop=None):
648 """return the list of all nodes that have no children
648 """return the list of all nodes that have no children
649
649
650 if start is specified, only heads that are descendants of
650 if start is specified, only heads that are descendants of
651 start will be returned
651 start will be returned
652 if stop is specified, it will consider all the revs from stop
652 if stop is specified, it will consider all the revs from stop
653 as if they had no children
653 as if they had no children
654 """
654 """
655 if start is None and stop is None:
655 if start is None and stop is None:
656 if not len(self):
656 if not len(self):
657 return [nullid]
657 return [nullid]
658 return [self.node(r) for r in self.headrevs()]
658 return [self.node(r) for r in self.headrevs()]
659
659
660 if start is None:
660 if start is None:
661 start = nullid
661 start = nullid
662 if stop is None:
662 if stop is None:
663 stop = []
663 stop = []
664 stoprevs = set([self.rev(n) for n in stop])
664 stoprevs = set([self.rev(n) for n in stop])
665 startrev = self.rev(start)
665 startrev = self.rev(start)
666 reachable = set((startrev,))
666 reachable = set((startrev,))
667 heads = set((startrev,))
667 heads = set((startrev,))
668
668
669 parentrevs = self.parentrevs
669 parentrevs = self.parentrevs
670 for r in self.revs(start=startrev + 1):
670 for r in self.revs(start=startrev + 1):
671 for p in parentrevs(r):
671 for p in parentrevs(r):
672 if p in reachable:
672 if p in reachable:
673 if r not in stoprevs:
673 if r not in stoprevs:
674 reachable.add(r)
674 reachable.add(r)
675 heads.add(r)
675 heads.add(r)
676 if p in heads and p not in stoprevs:
676 if p in heads and p not in stoprevs:
677 heads.remove(p)
677 heads.remove(p)
678
678
679 return [self.node(r) for r in heads]
679 return [self.node(r) for r in heads]
680
680
681 def children(self, node):
681 def children(self, node):
682 """find the children of a given node"""
682 """find the children of a given node"""
683 c = []
683 c = []
684 p = self.rev(node)
684 p = self.rev(node)
685 for r in self.revs(start=p + 1):
685 for r in self.revs(start=p + 1):
686 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
686 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
687 if prevs:
687 if prevs:
688 for pr in prevs:
688 for pr in prevs:
689 if pr == p:
689 if pr == p:
690 c.append(self.node(r))
690 c.append(self.node(r))
691 elif p == nullrev:
691 elif p == nullrev:
692 c.append(self.node(r))
692 c.append(self.node(r))
693 return c
693 return c
694
694
695 def descendant(self, start, end):
695 def descendant(self, start, end):
696 if start == nullrev:
696 if start == nullrev:
697 return True
697 return True
698 for i in self.descendants([start]):
698 for i in self.descendants([start]):
699 if i == end:
699 if i == end:
700 return True
700 return True
701 elif i > end:
701 elif i > end:
702 break
702 break
703 return False
703 return False
704
704
705 def ancestor(self, a, b):
705 def ancestor(self, a, b):
706 """calculate the least common ancestor of nodes a and b"""
706 """calculate the least common ancestor of nodes a and b"""
707
707
708 a, b = self.rev(a), self.rev(b)
708 a, b = self.rev(a), self.rev(b)
709 ancs = ancestor.ancestors(self.parentrevs, a, b)
709 try:
710 ancs = self.index.ancestors(a, b)
711 old = ancestor.ancestors(self.parentrevs, a, b)
712 assert set(ancs) == old, ('opinions differ over ancestor(%d, %d)' %
713 (a, b))
714 except (AttributeError, OverflowError):
715 ancs = ancestor.ancestors(self.parentrevs, a, b)
710 if ancs:
716 if ancs:
711 # choose a consistent winner when there's a tie
717 # choose a consistent winner when there's a tie
712 return min(map(self.node, ancs))
718 return min(map(self.node, ancs))
713 return nullid
719 return nullid
714
720
715 def _match(self, id):
721 def _match(self, id):
716 if isinstance(id, int):
722 if isinstance(id, int):
717 # rev
723 # rev
718 return self.node(id)
724 return self.node(id)
719 if len(id) == 20:
725 if len(id) == 20:
720 # possibly a binary node
726 # possibly a binary node
721 # odds of a binary node being all hex in ASCII are 1 in 10**25
727 # odds of a binary node being all hex in ASCII are 1 in 10**25
722 try:
728 try:
723 node = id
729 node = id
724 self.rev(node) # quick search the index
730 self.rev(node) # quick search the index
725 return node
731 return node
726 except LookupError:
732 except LookupError:
727 pass # may be partial hex id
733 pass # may be partial hex id
728 try:
734 try:
729 # str(rev)
735 # str(rev)
730 rev = int(id)
736 rev = int(id)
731 if str(rev) != id:
737 if str(rev) != id:
732 raise ValueError
738 raise ValueError
733 if rev < 0:
739 if rev < 0:
734 rev = len(self) + rev
740 rev = len(self) + rev
735 if rev < 0 or rev >= len(self):
741 if rev < 0 or rev >= len(self):
736 raise ValueError
742 raise ValueError
737 return self.node(rev)
743 return self.node(rev)
738 except (ValueError, OverflowError):
744 except (ValueError, OverflowError):
739 pass
745 pass
740 if len(id) == 40:
746 if len(id) == 40:
741 try:
747 try:
742 # a full hex nodeid?
748 # a full hex nodeid?
743 node = bin(id)
749 node = bin(id)
744 self.rev(node)
750 self.rev(node)
745 return node
751 return node
746 except (TypeError, LookupError):
752 except (TypeError, LookupError):
747 pass
753 pass
748
754
749 def _partialmatch(self, id):
755 def _partialmatch(self, id):
750 try:
756 try:
751 return self.index.partialmatch(id)
757 return self.index.partialmatch(id)
752 except RevlogError:
758 except RevlogError:
753 # parsers.c radix tree lookup gave multiple matches
759 # parsers.c radix tree lookup gave multiple matches
754 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
760 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
755 except (AttributeError, ValueError):
761 except (AttributeError, ValueError):
756 # we are pure python, or key was too short to search radix tree
762 # we are pure python, or key was too short to search radix tree
757 pass
763 pass
758
764
759 if id in self._pcache:
765 if id in self._pcache:
760 return self._pcache[id]
766 return self._pcache[id]
761
767
762 if len(id) < 40:
768 if len(id) < 40:
763 try:
769 try:
764 # hex(node)[:...]
770 # hex(node)[:...]
765 l = len(id) // 2 # grab an even number of digits
771 l = len(id) // 2 # grab an even number of digits
766 prefix = bin(id[:l * 2])
772 prefix = bin(id[:l * 2])
767 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
773 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
768 nl = [n for n in nl if hex(n).startswith(id)]
774 nl = [n for n in nl if hex(n).startswith(id)]
769 if len(nl) > 0:
775 if len(nl) > 0:
770 if len(nl) == 1:
776 if len(nl) == 1:
771 self._pcache[id] = nl[0]
777 self._pcache[id] = nl[0]
772 return nl[0]
778 return nl[0]
773 raise LookupError(id, self.indexfile,
779 raise LookupError(id, self.indexfile,
774 _('ambiguous identifier'))
780 _('ambiguous identifier'))
775 return None
781 return None
776 except TypeError:
782 except TypeError:
777 pass
783 pass
778
784
779 def lookup(self, id):
785 def lookup(self, id):
780 """locate a node based on:
786 """locate a node based on:
781 - revision number or str(revision number)
787 - revision number or str(revision number)
782 - nodeid or subset of hex nodeid
788 - nodeid or subset of hex nodeid
783 """
789 """
784 n = self._match(id)
790 n = self._match(id)
785 if n is not None:
791 if n is not None:
786 return n
792 return n
787 n = self._partialmatch(id)
793 n = self._partialmatch(id)
788 if n:
794 if n:
789 return n
795 return n
790
796
791 raise LookupError(id, self.indexfile, _('no match found'))
797 raise LookupError(id, self.indexfile, _('no match found'))
792
798
793 def cmp(self, node, text):
799 def cmp(self, node, text):
794 """compare text with a given file revision
800 """compare text with a given file revision
795
801
796 returns True if text is different than what is stored.
802 returns True if text is different than what is stored.
797 """
803 """
798 p1, p2 = self.parents(node)
804 p1, p2 = self.parents(node)
799 return hash(text, p1, p2) != node
805 return hash(text, p1, p2) != node
800
806
801 def _addchunk(self, offset, data):
807 def _addchunk(self, offset, data):
802 o, d = self._chunkcache
808 o, d = self._chunkcache
803 # try to add to existing cache
809 # try to add to existing cache
804 if o + len(d) == offset and len(d) + len(data) < _chunksize:
810 if o + len(d) == offset and len(d) + len(data) < _chunksize:
805 self._chunkcache = o, d + data
811 self._chunkcache = o, d + data
806 else:
812 else:
807 self._chunkcache = offset, data
813 self._chunkcache = offset, data
808
814
809 def _loadchunk(self, offset, length):
815 def _loadchunk(self, offset, length):
810 if self._inline:
816 if self._inline:
811 df = self.opener(self.indexfile)
817 df = self.opener(self.indexfile)
812 else:
818 else:
813 df = self.opener(self.datafile)
819 df = self.opener(self.datafile)
814
820
815 readahead = max(65536, length)
821 readahead = max(65536, length)
816 df.seek(offset)
822 df.seek(offset)
817 d = df.read(readahead)
823 d = df.read(readahead)
818 df.close()
824 df.close()
819 self._addchunk(offset, d)
825 self._addchunk(offset, d)
820 if readahead > length:
826 if readahead > length:
821 return util.buffer(d, 0, length)
827 return util.buffer(d, 0, length)
822 return d
828 return d
823
829
824 def _getchunk(self, offset, length):
830 def _getchunk(self, offset, length):
825 o, d = self._chunkcache
831 o, d = self._chunkcache
826 l = len(d)
832 l = len(d)
827
833
828 # is it in the cache?
834 # is it in the cache?
829 cachestart = offset - o
835 cachestart = offset - o
830 cacheend = cachestart + length
836 cacheend = cachestart + length
831 if cachestart >= 0 and cacheend <= l:
837 if cachestart >= 0 and cacheend <= l:
832 if cachestart == 0 and cacheend == l:
838 if cachestart == 0 and cacheend == l:
833 return d # avoid a copy
839 return d # avoid a copy
834 return util.buffer(d, cachestart, cacheend - cachestart)
840 return util.buffer(d, cachestart, cacheend - cachestart)
835
841
836 return self._loadchunk(offset, length)
842 return self._loadchunk(offset, length)
837
843
838 def _chunkraw(self, startrev, endrev):
844 def _chunkraw(self, startrev, endrev):
839 start = self.start(startrev)
845 start = self.start(startrev)
840 length = self.end(endrev) - start
846 length = self.end(endrev) - start
841 if self._inline:
847 if self._inline:
842 start += (startrev + 1) * self._io.size
848 start += (startrev + 1) * self._io.size
843 return self._getchunk(start, length)
849 return self._getchunk(start, length)
844
850
845 def _chunk(self, rev):
851 def _chunk(self, rev):
846 return decompress(self._chunkraw(rev, rev))
852 return decompress(self._chunkraw(rev, rev))
847
853
848 def _chunkbase(self, rev):
854 def _chunkbase(self, rev):
849 return self._chunk(rev)
855 return self._chunk(rev)
850
856
851 def _chunkclear(self):
857 def _chunkclear(self):
852 self._chunkcache = (0, '')
858 self._chunkcache = (0, '')
853
859
854 def deltaparent(self, rev):
860 def deltaparent(self, rev):
855 """return deltaparent of the given revision"""
861 """return deltaparent of the given revision"""
856 base = self.index[rev][3]
862 base = self.index[rev][3]
857 if base == rev:
863 if base == rev:
858 return nullrev
864 return nullrev
859 elif self._generaldelta:
865 elif self._generaldelta:
860 return base
866 return base
861 else:
867 else:
862 return rev - 1
868 return rev - 1
863
869
864 def revdiff(self, rev1, rev2):
870 def revdiff(self, rev1, rev2):
865 """return or calculate a delta between two revisions"""
871 """return or calculate a delta between two revisions"""
866 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
872 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
867 return str(self._chunk(rev2))
873 return str(self._chunk(rev2))
868
874
869 return mdiff.textdiff(self.revision(rev1),
875 return mdiff.textdiff(self.revision(rev1),
870 self.revision(rev2))
876 self.revision(rev2))
871
877
872 def revision(self, nodeorrev):
878 def revision(self, nodeorrev):
873 """return an uncompressed revision of a given node or revision
879 """return an uncompressed revision of a given node or revision
874 number.
880 number.
875 """
881 """
876 if isinstance(nodeorrev, int):
882 if isinstance(nodeorrev, int):
877 rev = nodeorrev
883 rev = nodeorrev
878 node = self.node(rev)
884 node = self.node(rev)
879 else:
885 else:
880 node = nodeorrev
886 node = nodeorrev
881 rev = None
887 rev = None
882
888
883 cachedrev = None
889 cachedrev = None
884 if node == nullid:
890 if node == nullid:
885 return ""
891 return ""
886 if self._cache:
892 if self._cache:
887 if self._cache[0] == node:
893 if self._cache[0] == node:
888 return self._cache[2]
894 return self._cache[2]
889 cachedrev = self._cache[1]
895 cachedrev = self._cache[1]
890
896
891 # look up what we need to read
897 # look up what we need to read
892 text = None
898 text = None
893 if rev is None:
899 if rev is None:
894 rev = self.rev(node)
900 rev = self.rev(node)
895
901
896 # check rev flags
902 # check rev flags
897 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
903 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
898 raise RevlogError(_('incompatible revision flag %x') %
904 raise RevlogError(_('incompatible revision flag %x') %
899 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
905 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
900
906
901 # build delta chain
907 # build delta chain
902 chain = []
908 chain = []
903 index = self.index # for performance
909 index = self.index # for performance
904 generaldelta = self._generaldelta
910 generaldelta = self._generaldelta
905 iterrev = rev
911 iterrev = rev
906 e = index[iterrev]
912 e = index[iterrev]
907 while iterrev != e[3] and iterrev != cachedrev:
913 while iterrev != e[3] and iterrev != cachedrev:
908 chain.append(iterrev)
914 chain.append(iterrev)
909 if generaldelta:
915 if generaldelta:
910 iterrev = e[3]
916 iterrev = e[3]
911 else:
917 else:
912 iterrev -= 1
918 iterrev -= 1
913 e = index[iterrev]
919 e = index[iterrev]
914 chain.reverse()
920 chain.reverse()
915 base = iterrev
921 base = iterrev
916
922
917 if iterrev == cachedrev:
923 if iterrev == cachedrev:
918 # cache hit
924 # cache hit
919 text = self._cache[2]
925 text = self._cache[2]
920
926
921 # drop cache to save memory
927 # drop cache to save memory
922 self._cache = None
928 self._cache = None
923
929
924 self._chunkraw(base, rev)
930 self._chunkraw(base, rev)
925 if text is None:
931 if text is None:
926 text = str(self._chunkbase(base))
932 text = str(self._chunkbase(base))
927
933
928 bins = [self._chunk(r) for r in chain]
934 bins = [self._chunk(r) for r in chain]
929 text = mdiff.patches(text, bins)
935 text = mdiff.patches(text, bins)
930
936
931 text = self._checkhash(text, node, rev)
937 text = self._checkhash(text, node, rev)
932
938
933 self._cache = (node, rev, text)
939 self._cache = (node, rev, text)
934 return text
940 return text
935
941
936 def _checkhash(self, text, node, rev):
942 def _checkhash(self, text, node, rev):
937 p1, p2 = self.parents(node)
943 p1, p2 = self.parents(node)
938 if node != hash(text, p1, p2):
944 if node != hash(text, p1, p2):
939 raise RevlogError(_("integrity check failed on %s:%d")
945 raise RevlogError(_("integrity check failed on %s:%d")
940 % (self.indexfile, rev))
946 % (self.indexfile, rev))
941 return text
947 return text
942
948
943 def checkinlinesize(self, tr, fp=None):
949 def checkinlinesize(self, tr, fp=None):
944 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
950 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
945 return
951 return
946
952
947 trinfo = tr.find(self.indexfile)
953 trinfo = tr.find(self.indexfile)
948 if trinfo is None:
954 if trinfo is None:
949 raise RevlogError(_("%s not found in the transaction")
955 raise RevlogError(_("%s not found in the transaction")
950 % self.indexfile)
956 % self.indexfile)
951
957
952 trindex = trinfo[2]
958 trindex = trinfo[2]
953 dataoff = self.start(trindex)
959 dataoff = self.start(trindex)
954
960
955 tr.add(self.datafile, dataoff)
961 tr.add(self.datafile, dataoff)
956
962
957 if fp:
963 if fp:
958 fp.flush()
964 fp.flush()
959 fp.close()
965 fp.close()
960
966
961 df = self.opener(self.datafile, 'w')
967 df = self.opener(self.datafile, 'w')
962 try:
968 try:
963 for r in self:
969 for r in self:
964 df.write(self._chunkraw(r, r))
970 df.write(self._chunkraw(r, r))
965 finally:
971 finally:
966 df.close()
972 df.close()
967
973
968 fp = self.opener(self.indexfile, 'w', atomictemp=True)
974 fp = self.opener(self.indexfile, 'w', atomictemp=True)
969 self.version &= ~(REVLOGNGINLINEDATA)
975 self.version &= ~(REVLOGNGINLINEDATA)
970 self._inline = False
976 self._inline = False
971 for i in self:
977 for i in self:
972 e = self._io.packentry(self.index[i], self.node, self.version, i)
978 e = self._io.packentry(self.index[i], self.node, self.version, i)
973 fp.write(e)
979 fp.write(e)
974
980
975 # if we don't call close, the temp file will never replace the
981 # if we don't call close, the temp file will never replace the
976 # real index
982 # real index
977 fp.close()
983 fp.close()
978
984
979 tr.replace(self.indexfile, trindex * self._io.size)
985 tr.replace(self.indexfile, trindex * self._io.size)
980 self._chunkclear()
986 self._chunkclear()
981
987
982 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
988 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
983 """add a revision to the log
989 """add a revision to the log
984
990
985 text - the revision data to add
991 text - the revision data to add
986 transaction - the transaction object used for rollback
992 transaction - the transaction object used for rollback
987 link - the linkrev data to add
993 link - the linkrev data to add
988 p1, p2 - the parent nodeids of the revision
994 p1, p2 - the parent nodeids of the revision
989 cachedelta - an optional precomputed delta
995 cachedelta - an optional precomputed delta
990 """
996 """
991 node = hash(text, p1, p2)
997 node = hash(text, p1, p2)
992 if node in self.nodemap:
998 if node in self.nodemap:
993 return node
999 return node
994
1000
995 dfh = None
1001 dfh = None
996 if not self._inline:
1002 if not self._inline:
997 dfh = self.opener(self.datafile, "a")
1003 dfh = self.opener(self.datafile, "a")
998 ifh = self.opener(self.indexfile, "a+")
1004 ifh = self.opener(self.indexfile, "a+")
999 try:
1005 try:
1000 return self._addrevision(node, text, transaction, link, p1, p2,
1006 return self._addrevision(node, text, transaction, link, p1, p2,
1001 cachedelta, ifh, dfh)
1007 cachedelta, ifh, dfh)
1002 finally:
1008 finally:
1003 if dfh:
1009 if dfh:
1004 dfh.close()
1010 dfh.close()
1005 ifh.close()
1011 ifh.close()
1006
1012
1007 def compress(self, text):
1013 def compress(self, text):
1008 """ generate a possibly-compressed representation of text """
1014 """ generate a possibly-compressed representation of text """
1009 if not text:
1015 if not text:
1010 return ("", text)
1016 return ("", text)
1011 l = len(text)
1017 l = len(text)
1012 bin = None
1018 bin = None
1013 if l < 44:
1019 if l < 44:
1014 pass
1020 pass
1015 elif l > 1000000:
1021 elif l > 1000000:
1016 # zlib makes an internal copy, thus doubling memory usage for
1022 # zlib makes an internal copy, thus doubling memory usage for
1017 # large files, so lets do this in pieces
1023 # large files, so lets do this in pieces
1018 z = zlib.compressobj()
1024 z = zlib.compressobj()
1019 p = []
1025 p = []
1020 pos = 0
1026 pos = 0
1021 while pos < l:
1027 while pos < l:
1022 pos2 = pos + 2**20
1028 pos2 = pos + 2**20
1023 p.append(z.compress(text[pos:pos2]))
1029 p.append(z.compress(text[pos:pos2]))
1024 pos = pos2
1030 pos = pos2
1025 p.append(z.flush())
1031 p.append(z.flush())
1026 if sum(map(len, p)) < l:
1032 if sum(map(len, p)) < l:
1027 bin = "".join(p)
1033 bin = "".join(p)
1028 else:
1034 else:
1029 bin = _compress(text)
1035 bin = _compress(text)
1030 if bin is None or len(bin) > l:
1036 if bin is None or len(bin) > l:
1031 if text[0] == '\0':
1037 if text[0] == '\0':
1032 return ("", text)
1038 return ("", text)
1033 return ('u', text)
1039 return ('u', text)
1034 return ("", bin)
1040 return ("", bin)
1035
1041
1036 def _addrevision(self, node, text, transaction, link, p1, p2,
1042 def _addrevision(self, node, text, transaction, link, p1, p2,
1037 cachedelta, ifh, dfh):
1043 cachedelta, ifh, dfh):
1038 """internal function to add revisions to the log
1044 """internal function to add revisions to the log
1039
1045
1040 see addrevision for argument descriptions.
1046 see addrevision for argument descriptions.
1041 invariants:
1047 invariants:
1042 - text is optional (can be None); if not set, cachedelta must be set.
1048 - text is optional (can be None); if not set, cachedelta must be set.
1043 if both are set, they must correspond to each other.
1049 if both are set, they must correspond to each other.
1044 """
1050 """
1045 btext = [text]
1051 btext = [text]
1046 def buildtext():
1052 def buildtext():
1047 if btext[0] is not None:
1053 if btext[0] is not None:
1048 return btext[0]
1054 return btext[0]
1049 # flush any pending writes here so we can read it in revision
1055 # flush any pending writes here so we can read it in revision
1050 if dfh:
1056 if dfh:
1051 dfh.flush()
1057 dfh.flush()
1052 ifh.flush()
1058 ifh.flush()
1053 basetext = self.revision(self.node(cachedelta[0]))
1059 basetext = self.revision(self.node(cachedelta[0]))
1054 btext[0] = mdiff.patch(basetext, cachedelta[1])
1060 btext[0] = mdiff.patch(basetext, cachedelta[1])
1055 chk = hash(btext[0], p1, p2)
1061 chk = hash(btext[0], p1, p2)
1056 if chk != node:
1062 if chk != node:
1057 raise RevlogError(_("consistency error in delta"))
1063 raise RevlogError(_("consistency error in delta"))
1058 return btext[0]
1064 return btext[0]
1059
1065
1060 def builddelta(rev):
1066 def builddelta(rev):
1061 # can we use the cached delta?
1067 # can we use the cached delta?
1062 if cachedelta and cachedelta[0] == rev:
1068 if cachedelta and cachedelta[0] == rev:
1063 delta = cachedelta[1]
1069 delta = cachedelta[1]
1064 else:
1070 else:
1065 t = buildtext()
1071 t = buildtext()
1066 ptext = self.revision(self.node(rev))
1072 ptext = self.revision(self.node(rev))
1067 delta = mdiff.textdiff(ptext, t)
1073 delta = mdiff.textdiff(ptext, t)
1068 data = self.compress(delta)
1074 data = self.compress(delta)
1069 l = len(data[1]) + len(data[0])
1075 l = len(data[1]) + len(data[0])
1070 if basecache[0] == rev:
1076 if basecache[0] == rev:
1071 chainbase = basecache[1]
1077 chainbase = basecache[1]
1072 else:
1078 else:
1073 chainbase = self.chainbase(rev)
1079 chainbase = self.chainbase(rev)
1074 dist = l + offset - self.start(chainbase)
1080 dist = l + offset - self.start(chainbase)
1075 if self._generaldelta:
1081 if self._generaldelta:
1076 base = rev
1082 base = rev
1077 else:
1083 else:
1078 base = chainbase
1084 base = chainbase
1079 return dist, l, data, base, chainbase
1085 return dist, l, data, base, chainbase
1080
1086
1081 curr = len(self)
1087 curr = len(self)
1082 prev = curr - 1
1088 prev = curr - 1
1083 base = chainbase = curr
1089 base = chainbase = curr
1084 offset = self.end(prev)
1090 offset = self.end(prev)
1085 flags = 0
1091 flags = 0
1086 d = None
1092 d = None
1087 basecache = self._basecache
1093 basecache = self._basecache
1088 p1r, p2r = self.rev(p1), self.rev(p2)
1094 p1r, p2r = self.rev(p1), self.rev(p2)
1089
1095
1090 # should we try to build a delta?
1096 # should we try to build a delta?
1091 if prev != nullrev:
1097 if prev != nullrev:
1092 if self._generaldelta:
1098 if self._generaldelta:
1093 if p1r >= basecache[1]:
1099 if p1r >= basecache[1]:
1094 d = builddelta(p1r)
1100 d = builddelta(p1r)
1095 elif p2r >= basecache[1]:
1101 elif p2r >= basecache[1]:
1096 d = builddelta(p2r)
1102 d = builddelta(p2r)
1097 else:
1103 else:
1098 d = builddelta(prev)
1104 d = builddelta(prev)
1099 else:
1105 else:
1100 d = builddelta(prev)
1106 d = builddelta(prev)
1101 dist, l, data, base, chainbase = d
1107 dist, l, data, base, chainbase = d
1102
1108
1103 # full versions are inserted when the needed deltas
1109 # full versions are inserted when the needed deltas
1104 # become comparable to the uncompressed text
1110 # become comparable to the uncompressed text
1105 if text is None:
1111 if text is None:
1106 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1112 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1107 cachedelta[1])
1113 cachedelta[1])
1108 else:
1114 else:
1109 textlen = len(text)
1115 textlen = len(text)
1110 if d is None or dist > textlen * 2:
1116 if d is None or dist > textlen * 2:
1111 text = buildtext()
1117 text = buildtext()
1112 data = self.compress(text)
1118 data = self.compress(text)
1113 l = len(data[1]) + len(data[0])
1119 l = len(data[1]) + len(data[0])
1114 base = chainbase = curr
1120 base = chainbase = curr
1115
1121
1116 e = (offset_type(offset, flags), l, textlen,
1122 e = (offset_type(offset, flags), l, textlen,
1117 base, link, p1r, p2r, node)
1123 base, link, p1r, p2r, node)
1118 self.index.insert(-1, e)
1124 self.index.insert(-1, e)
1119 self.nodemap[node] = curr
1125 self.nodemap[node] = curr
1120
1126
1121 entry = self._io.packentry(e, self.node, self.version, curr)
1127 entry = self._io.packentry(e, self.node, self.version, curr)
1122 if not self._inline:
1128 if not self._inline:
1123 transaction.add(self.datafile, offset)
1129 transaction.add(self.datafile, offset)
1124 transaction.add(self.indexfile, curr * len(entry))
1130 transaction.add(self.indexfile, curr * len(entry))
1125 if data[0]:
1131 if data[0]:
1126 dfh.write(data[0])
1132 dfh.write(data[0])
1127 dfh.write(data[1])
1133 dfh.write(data[1])
1128 dfh.flush()
1134 dfh.flush()
1129 ifh.write(entry)
1135 ifh.write(entry)
1130 else:
1136 else:
1131 offset += curr * self._io.size
1137 offset += curr * self._io.size
1132 transaction.add(self.indexfile, offset, curr)
1138 transaction.add(self.indexfile, offset, curr)
1133 ifh.write(entry)
1139 ifh.write(entry)
1134 ifh.write(data[0])
1140 ifh.write(data[0])
1135 ifh.write(data[1])
1141 ifh.write(data[1])
1136 self.checkinlinesize(transaction, ifh)
1142 self.checkinlinesize(transaction, ifh)
1137
1143
1138 if type(text) == str: # only accept immutable objects
1144 if type(text) == str: # only accept immutable objects
1139 self._cache = (node, curr, text)
1145 self._cache = (node, curr, text)
1140 self._basecache = (curr, chainbase)
1146 self._basecache = (curr, chainbase)
1141 return node
1147 return node
1142
1148
1143 def group(self, nodelist, bundler, reorder=None):
1149 def group(self, nodelist, bundler, reorder=None):
1144 """Calculate a delta group, yielding a sequence of changegroup chunks
1150 """Calculate a delta group, yielding a sequence of changegroup chunks
1145 (strings).
1151 (strings).
1146
1152
1147 Given a list of changeset revs, return a set of deltas and
1153 Given a list of changeset revs, return a set of deltas and
1148 metadata corresponding to nodes. The first delta is
1154 metadata corresponding to nodes. The first delta is
1149 first parent(nodelist[0]) -> nodelist[0], the receiver is
1155 first parent(nodelist[0]) -> nodelist[0], the receiver is
1150 guaranteed to have this parent as it has all history before
1156 guaranteed to have this parent as it has all history before
1151 these changesets. In the case firstparent is nullrev the
1157 these changesets. In the case firstparent is nullrev the
1152 changegroup starts with a full revision.
1158 changegroup starts with a full revision.
1153 """
1159 """
1154
1160
1155 # if we don't have any revisions touched by these changesets, bail
1161 # if we don't have any revisions touched by these changesets, bail
1156 if len(nodelist) == 0:
1162 if len(nodelist) == 0:
1157 yield bundler.close()
1163 yield bundler.close()
1158 return
1164 return
1159
1165
1160 # for generaldelta revlogs, we linearize the revs; this will both be
1166 # for generaldelta revlogs, we linearize the revs; this will both be
1161 # much quicker and generate a much smaller bundle
1167 # much quicker and generate a much smaller bundle
1162 if (self._generaldelta and reorder is not False) or reorder:
1168 if (self._generaldelta and reorder is not False) or reorder:
1163 dag = dagutil.revlogdag(self)
1169 dag = dagutil.revlogdag(self)
1164 revs = set(self.rev(n) for n in nodelist)
1170 revs = set(self.rev(n) for n in nodelist)
1165 revs = dag.linearize(revs)
1171 revs = dag.linearize(revs)
1166 else:
1172 else:
1167 revs = sorted([self.rev(n) for n in nodelist])
1173 revs = sorted([self.rev(n) for n in nodelist])
1168
1174
1169 # add the parent of the first rev
1175 # add the parent of the first rev
1170 p = self.parentrevs(revs[0])[0]
1176 p = self.parentrevs(revs[0])[0]
1171 revs.insert(0, p)
1177 revs.insert(0, p)
1172
1178
1173 # build deltas
1179 # build deltas
1174 for r in xrange(len(revs) - 1):
1180 for r in xrange(len(revs) - 1):
1175 prev, curr = revs[r], revs[r + 1]
1181 prev, curr = revs[r], revs[r + 1]
1176 for c in bundler.revchunk(self, curr, prev):
1182 for c in bundler.revchunk(self, curr, prev):
1177 yield c
1183 yield c
1178
1184
1179 yield bundler.close()
1185 yield bundler.close()
1180
1186
1181 def addgroup(self, bundle, linkmapper, transaction):
1187 def addgroup(self, bundle, linkmapper, transaction):
1182 """
1188 """
1183 add a delta group
1189 add a delta group
1184
1190
1185 given a set of deltas, add them to the revision log. the
1191 given a set of deltas, add them to the revision log. the
1186 first delta is against its parent, which should be in our
1192 first delta is against its parent, which should be in our
1187 log, the rest are against the previous delta.
1193 log, the rest are against the previous delta.
1188 """
1194 """
1189
1195
1190 # track the base of the current delta log
1196 # track the base of the current delta log
1191 content = []
1197 content = []
1192 node = None
1198 node = None
1193
1199
1194 r = len(self)
1200 r = len(self)
1195 end = 0
1201 end = 0
1196 if r:
1202 if r:
1197 end = self.end(r - 1)
1203 end = self.end(r - 1)
1198 ifh = self.opener(self.indexfile, "a+")
1204 ifh = self.opener(self.indexfile, "a+")
1199 isize = r * self._io.size
1205 isize = r * self._io.size
1200 if self._inline:
1206 if self._inline:
1201 transaction.add(self.indexfile, end + isize, r)
1207 transaction.add(self.indexfile, end + isize, r)
1202 dfh = None
1208 dfh = None
1203 else:
1209 else:
1204 transaction.add(self.indexfile, isize, r)
1210 transaction.add(self.indexfile, isize, r)
1205 transaction.add(self.datafile, end)
1211 transaction.add(self.datafile, end)
1206 dfh = self.opener(self.datafile, "a")
1212 dfh = self.opener(self.datafile, "a")
1207
1213
1208 try:
1214 try:
1209 # loop through our set of deltas
1215 # loop through our set of deltas
1210 chain = None
1216 chain = None
1211 while True:
1217 while True:
1212 chunkdata = bundle.deltachunk(chain)
1218 chunkdata = bundle.deltachunk(chain)
1213 if not chunkdata:
1219 if not chunkdata:
1214 break
1220 break
1215 node = chunkdata['node']
1221 node = chunkdata['node']
1216 p1 = chunkdata['p1']
1222 p1 = chunkdata['p1']
1217 p2 = chunkdata['p2']
1223 p2 = chunkdata['p2']
1218 cs = chunkdata['cs']
1224 cs = chunkdata['cs']
1219 deltabase = chunkdata['deltabase']
1225 deltabase = chunkdata['deltabase']
1220 delta = chunkdata['delta']
1226 delta = chunkdata['delta']
1221
1227
1222 content.append(node)
1228 content.append(node)
1223
1229
1224 link = linkmapper(cs)
1230 link = linkmapper(cs)
1225 if node in self.nodemap:
1231 if node in self.nodemap:
1226 # this can happen if two branches make the same change
1232 # this can happen if two branches make the same change
1227 chain = node
1233 chain = node
1228 continue
1234 continue
1229
1235
1230 for p in (p1, p2):
1236 for p in (p1, p2):
1231 if p not in self.nodemap:
1237 if p not in self.nodemap:
1232 raise LookupError(p, self.indexfile,
1238 raise LookupError(p, self.indexfile,
1233 _('unknown parent'))
1239 _('unknown parent'))
1234
1240
1235 if deltabase not in self.nodemap:
1241 if deltabase not in self.nodemap:
1236 raise LookupError(deltabase, self.indexfile,
1242 raise LookupError(deltabase, self.indexfile,
1237 _('unknown delta base'))
1243 _('unknown delta base'))
1238
1244
1239 baserev = self.rev(deltabase)
1245 baserev = self.rev(deltabase)
1240 chain = self._addrevision(node, None, transaction, link,
1246 chain = self._addrevision(node, None, transaction, link,
1241 p1, p2, (baserev, delta), ifh, dfh)
1247 p1, p2, (baserev, delta), ifh, dfh)
1242 if not dfh and not self._inline:
1248 if not dfh and not self._inline:
1243 # addrevision switched from inline to conventional
1249 # addrevision switched from inline to conventional
1244 # reopen the index
1250 # reopen the index
1245 ifh.close()
1251 ifh.close()
1246 dfh = self.opener(self.datafile, "a")
1252 dfh = self.opener(self.datafile, "a")
1247 ifh = self.opener(self.indexfile, "a")
1253 ifh = self.opener(self.indexfile, "a")
1248 finally:
1254 finally:
1249 if dfh:
1255 if dfh:
1250 dfh.close()
1256 dfh.close()
1251 ifh.close()
1257 ifh.close()
1252
1258
1253 return content
1259 return content
1254
1260
1255 def strip(self, minlink, transaction):
1261 def strip(self, minlink, transaction):
1256 """truncate the revlog on the first revision with a linkrev >= minlink
1262 """truncate the revlog on the first revision with a linkrev >= minlink
1257
1263
1258 This function is called when we're stripping revision minlink and
1264 This function is called when we're stripping revision minlink and
1259 its descendants from the repository.
1265 its descendants from the repository.
1260
1266
1261 We have to remove all revisions with linkrev >= minlink, because
1267 We have to remove all revisions with linkrev >= minlink, because
1262 the equivalent changelog revisions will be renumbered after the
1268 the equivalent changelog revisions will be renumbered after the
1263 strip.
1269 strip.
1264
1270
1265 So we truncate the revlog on the first of these revisions, and
1271 So we truncate the revlog on the first of these revisions, and
1266 trust that the caller has saved the revisions that shouldn't be
1272 trust that the caller has saved the revisions that shouldn't be
1267 removed and that it'll re-add them after this truncation.
1273 removed and that it'll re-add them after this truncation.
1268 """
1274 """
1269 if len(self) == 0:
1275 if len(self) == 0:
1270 return
1276 return
1271
1277
1272 for rev in self:
1278 for rev in self:
1273 if self.index[rev][4] >= minlink:
1279 if self.index[rev][4] >= minlink:
1274 break
1280 break
1275 else:
1281 else:
1276 return
1282 return
1277
1283
1278 # first truncate the files on disk
1284 # first truncate the files on disk
1279 end = self.start(rev)
1285 end = self.start(rev)
1280 if not self._inline:
1286 if not self._inline:
1281 transaction.add(self.datafile, end)
1287 transaction.add(self.datafile, end)
1282 end = rev * self._io.size
1288 end = rev * self._io.size
1283 else:
1289 else:
1284 end += rev * self._io.size
1290 end += rev * self._io.size
1285
1291
1286 transaction.add(self.indexfile, end)
1292 transaction.add(self.indexfile, end)
1287
1293
1288 # then reset internal state in memory to forget those revisions
1294 # then reset internal state in memory to forget those revisions
1289 self._cache = None
1295 self._cache = None
1290 self._chunkclear()
1296 self._chunkclear()
1291 for x in xrange(rev, len(self)):
1297 for x in xrange(rev, len(self)):
1292 del self.nodemap[self.node(x)]
1298 del self.nodemap[self.node(x)]
1293
1299
1294 del self.index[rev:-1]
1300 del self.index[rev:-1]
1295
1301
1296 def checksize(self):
1302 def checksize(self):
1297 expected = 0
1303 expected = 0
1298 if len(self):
1304 if len(self):
1299 expected = max(0, self.end(len(self) - 1))
1305 expected = max(0, self.end(len(self) - 1))
1300
1306
1301 try:
1307 try:
1302 f = self.opener(self.datafile)
1308 f = self.opener(self.datafile)
1303 f.seek(0, 2)
1309 f.seek(0, 2)
1304 actual = f.tell()
1310 actual = f.tell()
1305 f.close()
1311 f.close()
1306 dd = actual - expected
1312 dd = actual - expected
1307 except IOError, inst:
1313 except IOError, inst:
1308 if inst.errno != errno.ENOENT:
1314 if inst.errno != errno.ENOENT:
1309 raise
1315 raise
1310 dd = 0
1316 dd = 0
1311
1317
1312 try:
1318 try:
1313 f = self.opener(self.indexfile)
1319 f = self.opener(self.indexfile)
1314 f.seek(0, 2)
1320 f.seek(0, 2)
1315 actual = f.tell()
1321 actual = f.tell()
1316 f.close()
1322 f.close()
1317 s = self._io.size
1323 s = self._io.size
1318 i = max(0, actual // s)
1324 i = max(0, actual // s)
1319 di = actual - (i * s)
1325 di = actual - (i * s)
1320 if self._inline:
1326 if self._inline:
1321 databytes = 0
1327 databytes = 0
1322 for r in self:
1328 for r in self:
1323 databytes += max(0, self.length(r))
1329 databytes += max(0, self.length(r))
1324 dd = 0
1330 dd = 0
1325 di = actual - len(self) * s - databytes
1331 di = actual - len(self) * s - databytes
1326 except IOError, inst:
1332 except IOError, inst:
1327 if inst.errno != errno.ENOENT:
1333 if inst.errno != errno.ENOENT:
1328 raise
1334 raise
1329 di = 0
1335 di = 0
1330
1336
1331 return (dd, di)
1337 return (dd, di)
1332
1338
1333 def files(self):
1339 def files(self):
1334 res = [self.indexfile]
1340 res = [self.indexfile]
1335 if not self._inline:
1341 if not self._inline:
1336 res.append(self.datafile)
1342 res.append(self.datafile)
1337 return res
1343 return res
General Comments 0
You need to be logged in to leave comments. Login now