##// END OF EJS Templates
revlog: switch to a C version of headrevs...
Bryan O'Sullivan -
r16786:2631cd5d default
parent child Browse files
Show More
@@ -1,1291 +1,1368 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <ctype.h>
11 #include <ctype.h>
12 #include <string.h>
12 #include <string.h>
13
13
14 #include "util.h"
14 #include "util.h"
15
15
16 static inline int hexdigit(const char *p, Py_ssize_t off)
16 static inline int hexdigit(const char *p, Py_ssize_t off)
17 {
17 {
18 char c = p[off];
18 char c = p[off];
19
19
20 if (c >= '0' && c <= '9')
20 if (c >= '0' && c <= '9')
21 return c - '0';
21 return c - '0';
22 if (c >= 'a' && c <= 'f')
22 if (c >= 'a' && c <= 'f')
23 return c - 'a' + 10;
23 return c - 'a' + 10;
24 if (c >= 'A' && c <= 'F')
24 if (c >= 'A' && c <= 'F')
25 return c - 'A' + 10;
25 return c - 'A' + 10;
26
26
27 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
27 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
28 return 0;
28 return 0;
29 }
29 }
30
30
31 /*
31 /*
32 * Turn a hex-encoded string into binary.
32 * Turn a hex-encoded string into binary.
33 */
33 */
34 static PyObject *unhexlify(const char *str, int len)
34 static PyObject *unhexlify(const char *str, int len)
35 {
35 {
36 PyObject *ret;
36 PyObject *ret;
37 char *d;
37 char *d;
38 int i;
38 int i;
39
39
40 ret = PyBytes_FromStringAndSize(NULL, len / 2);
40 ret = PyBytes_FromStringAndSize(NULL, len / 2);
41
41
42 if (!ret)
42 if (!ret)
43 return NULL;
43 return NULL;
44
44
45 d = PyBytes_AsString(ret);
45 d = PyBytes_AsString(ret);
46
46
47 for (i = 0; i < len;) {
47 for (i = 0; i < len;) {
48 int hi = hexdigit(str, i++);
48 int hi = hexdigit(str, i++);
49 int lo = hexdigit(str, i++);
49 int lo = hexdigit(str, i++);
50 *d++ = (hi << 4) | lo;
50 *d++ = (hi << 4) | lo;
51 }
51 }
52
52
53 return ret;
53 return ret;
54 }
54 }
55
55
56 /*
56 /*
57 * This code assumes that a manifest is stitched together with newline
57 * This code assumes that a manifest is stitched together with newline
58 * ('\n') characters.
58 * ('\n') characters.
59 */
59 */
60 static PyObject *parse_manifest(PyObject *self, PyObject *args)
60 static PyObject *parse_manifest(PyObject *self, PyObject *args)
61 {
61 {
62 PyObject *mfdict, *fdict;
62 PyObject *mfdict, *fdict;
63 char *str, *cur, *start, *zero;
63 char *str, *cur, *start, *zero;
64 int len;
64 int len;
65
65
66 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
66 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
67 &PyDict_Type, &mfdict,
67 &PyDict_Type, &mfdict,
68 &PyDict_Type, &fdict,
68 &PyDict_Type, &fdict,
69 &str, &len))
69 &str, &len))
70 goto quit;
70 goto quit;
71
71
72 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
72 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
73 PyObject *file = NULL, *node = NULL;
73 PyObject *file = NULL, *node = NULL;
74 PyObject *flags = NULL;
74 PyObject *flags = NULL;
75 int nlen;
75 int nlen;
76
76
77 if (!*cur) {
77 if (!*cur) {
78 zero = cur;
78 zero = cur;
79 continue;
79 continue;
80 }
80 }
81 else if (*cur != '\n')
81 else if (*cur != '\n')
82 continue;
82 continue;
83
83
84 if (!zero) {
84 if (!zero) {
85 PyErr_SetString(PyExc_ValueError,
85 PyErr_SetString(PyExc_ValueError,
86 "manifest entry has no separator");
86 "manifest entry has no separator");
87 goto quit;
87 goto quit;
88 }
88 }
89
89
90 file = PyBytes_FromStringAndSize(start, zero - start);
90 file = PyBytes_FromStringAndSize(start, zero - start);
91
91
92 if (!file)
92 if (!file)
93 goto bail;
93 goto bail;
94
94
95 nlen = cur - zero - 1;
95 nlen = cur - zero - 1;
96
96
97 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
97 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
98 if (!node)
98 if (!node)
99 goto bail;
99 goto bail;
100
100
101 if (nlen > 40) {
101 if (nlen > 40) {
102 flags = PyBytes_FromStringAndSize(zero + 41,
102 flags = PyBytes_FromStringAndSize(zero + 41,
103 nlen - 40);
103 nlen - 40);
104 if (!flags)
104 if (!flags)
105 goto bail;
105 goto bail;
106
106
107 if (PyDict_SetItem(fdict, file, flags) == -1)
107 if (PyDict_SetItem(fdict, file, flags) == -1)
108 goto bail;
108 goto bail;
109 }
109 }
110
110
111 if (PyDict_SetItem(mfdict, file, node) == -1)
111 if (PyDict_SetItem(mfdict, file, node) == -1)
112 goto bail;
112 goto bail;
113
113
114 start = cur + 1;
114 start = cur + 1;
115 zero = NULL;
115 zero = NULL;
116
116
117 Py_XDECREF(flags);
117 Py_XDECREF(flags);
118 Py_XDECREF(node);
118 Py_XDECREF(node);
119 Py_XDECREF(file);
119 Py_XDECREF(file);
120 continue;
120 continue;
121 bail:
121 bail:
122 Py_XDECREF(flags);
122 Py_XDECREF(flags);
123 Py_XDECREF(node);
123 Py_XDECREF(node);
124 Py_XDECREF(file);
124 Py_XDECREF(file);
125 goto quit;
125 goto quit;
126 }
126 }
127
127
128 if (len > 0 && *(cur - 1) != '\n') {
128 if (len > 0 && *(cur - 1) != '\n') {
129 PyErr_SetString(PyExc_ValueError,
129 PyErr_SetString(PyExc_ValueError,
130 "manifest contains trailing garbage");
130 "manifest contains trailing garbage");
131 goto quit;
131 goto quit;
132 }
132 }
133
133
134 Py_INCREF(Py_None);
134 Py_INCREF(Py_None);
135 return Py_None;
135 return Py_None;
136 quit:
136 quit:
137 return NULL;
137 return NULL;
138 }
138 }
139
139
140 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
140 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
141 {
141 {
142 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
142 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
143 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
143 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
144 char *str, *cur, *end, *cpos;
144 char *str, *cur, *end, *cpos;
145 int state, mode, size, mtime;
145 int state, mode, size, mtime;
146 unsigned int flen;
146 unsigned int flen;
147 int len;
147 int len;
148
148
149 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
149 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
150 &PyDict_Type, &dmap,
150 &PyDict_Type, &dmap,
151 &PyDict_Type, &cmap,
151 &PyDict_Type, &cmap,
152 &str, &len))
152 &str, &len))
153 goto quit;
153 goto quit;
154
154
155 /* read parents */
155 /* read parents */
156 if (len < 40)
156 if (len < 40)
157 goto quit;
157 goto quit;
158
158
159 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
159 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
160 if (!parents)
160 if (!parents)
161 goto quit;
161 goto quit;
162
162
163 /* read filenames */
163 /* read filenames */
164 cur = str + 40;
164 cur = str + 40;
165 end = str + len;
165 end = str + len;
166
166
167 while (cur < end - 17) {
167 while (cur < end - 17) {
168 /* unpack header */
168 /* unpack header */
169 state = *cur;
169 state = *cur;
170 mode = getbe32(cur + 1);
170 mode = getbe32(cur + 1);
171 size = getbe32(cur + 5);
171 size = getbe32(cur + 5);
172 mtime = getbe32(cur + 9);
172 mtime = getbe32(cur + 9);
173 flen = getbe32(cur + 13);
173 flen = getbe32(cur + 13);
174 cur += 17;
174 cur += 17;
175 if (cur + flen > end || cur + flen < cur) {
175 if (cur + flen > end || cur + flen < cur) {
176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 goto quit;
177 goto quit;
178 }
178 }
179
179
180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 if (!entry)
181 if (!entry)
182 goto quit;
182 goto quit;
183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184
184
185 cpos = memchr(cur, 0, flen);
185 cpos = memchr(cur, 0, flen);
186 if (cpos) {
186 if (cpos) {
187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 cname = PyBytes_FromStringAndSize(cpos + 1,
188 cname = PyBytes_FromStringAndSize(cpos + 1,
189 flen - (cpos - cur) - 1);
189 flen - (cpos - cur) - 1);
190 if (!fname || !cname ||
190 if (!fname || !cname ||
191 PyDict_SetItem(cmap, fname, cname) == -1 ||
191 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 PyDict_SetItem(dmap, fname, entry) == -1)
192 PyDict_SetItem(dmap, fname, entry) == -1)
193 goto quit;
193 goto quit;
194 Py_DECREF(cname);
194 Py_DECREF(cname);
195 } else {
195 } else {
196 fname = PyBytes_FromStringAndSize(cur, flen);
196 fname = PyBytes_FromStringAndSize(cur, flen);
197 if (!fname ||
197 if (!fname ||
198 PyDict_SetItem(dmap, fname, entry) == -1)
198 PyDict_SetItem(dmap, fname, entry) == -1)
199 goto quit;
199 goto quit;
200 }
200 }
201 cur += flen;
201 cur += flen;
202 Py_DECREF(fname);
202 Py_DECREF(fname);
203 Py_DECREF(entry);
203 Py_DECREF(entry);
204 fname = cname = entry = NULL;
204 fname = cname = entry = NULL;
205 }
205 }
206
206
207 ret = parents;
207 ret = parents;
208 Py_INCREF(ret);
208 Py_INCREF(ret);
209 quit:
209 quit:
210 Py_XDECREF(fname);
210 Py_XDECREF(fname);
211 Py_XDECREF(cname);
211 Py_XDECREF(cname);
212 Py_XDECREF(entry);
212 Py_XDECREF(entry);
213 Py_XDECREF(parents);
213 Py_XDECREF(parents);
214 return ret;
214 return ret;
215 }
215 }
216
216
217 /*
217 /*
218 * A base-16 trie for fast node->rev mapping.
218 * A base-16 trie for fast node->rev mapping.
219 *
219 *
220 * Positive value is index of the next node in the trie
220 * Positive value is index of the next node in the trie
221 * Negative value is a leaf: -(rev + 1)
221 * Negative value is a leaf: -(rev + 1)
222 * Zero is empty
222 * Zero is empty
223 */
223 */
224 typedef struct {
224 typedef struct {
225 int children[16];
225 int children[16];
226 } nodetree;
226 } nodetree;
227
227
228 /*
228 /*
229 * This class has two behaviours.
229 * This class has two behaviours.
230 *
230 *
231 * When used in a list-like way (with integer keys), we decode an
231 * When used in a list-like way (with integer keys), we decode an
232 * entry in a RevlogNG index file on demand. Our last entry is a
232 * entry in a RevlogNG index file on demand. Our last entry is a
233 * sentinel, always a nullid. We have limited support for
233 * sentinel, always a nullid. We have limited support for
234 * integer-keyed insert and delete, only at elements right before the
234 * integer-keyed insert and delete, only at elements right before the
235 * sentinel.
235 * sentinel.
236 *
236 *
237 * With string keys, we lazily perform a reverse mapping from node to
237 * With string keys, we lazily perform a reverse mapping from node to
238 * rev, using a base-16 trie.
238 * rev, using a base-16 trie.
239 */
239 */
240 typedef struct {
240 typedef struct {
241 PyObject_HEAD
241 PyObject_HEAD
242 /* Type-specific fields go here. */
242 /* Type-specific fields go here. */
243 PyObject *data; /* raw bytes of index */
243 PyObject *data; /* raw bytes of index */
244 PyObject **cache; /* cached tuples */
244 PyObject **cache; /* cached tuples */
245 const char **offsets; /* populated on demand */
245 const char **offsets; /* populated on demand */
246 Py_ssize_t raw_length; /* original number of elements */
246 Py_ssize_t raw_length; /* original number of elements */
247 Py_ssize_t length; /* current number of elements */
247 Py_ssize_t length; /* current number of elements */
248 PyObject *added; /* populated on demand */
248 PyObject *added; /* populated on demand */
249 nodetree *nt; /* base-16 trie */
249 nodetree *nt; /* base-16 trie */
250 int ntlength; /* # nodes in use */
250 int ntlength; /* # nodes in use */
251 int ntcapacity; /* # nodes allocated */
251 int ntcapacity; /* # nodes allocated */
252 int ntdepth; /* maximum depth of tree */
252 int ntdepth; /* maximum depth of tree */
253 int ntsplits; /* # splits performed */
253 int ntsplits; /* # splits performed */
254 int ntrev; /* last rev scanned */
254 int ntrev; /* last rev scanned */
255 int ntlookups; /* # lookups */
255 int ntlookups; /* # lookups */
256 int ntmisses; /* # lookups that miss the cache */
256 int ntmisses; /* # lookups that miss the cache */
257 int inlined;
257 int inlined;
258 } indexObject;
258 } indexObject;
259
259
260 static Py_ssize_t index_length(const indexObject *self)
260 static Py_ssize_t index_length(const indexObject *self)
261 {
261 {
262 if (self->added == NULL)
262 if (self->added == NULL)
263 return self->length;
263 return self->length;
264 return self->length + PyList_GET_SIZE(self->added);
264 return self->length + PyList_GET_SIZE(self->added);
265 }
265 }
266
266
267 static PyObject *nullentry;
267 static PyObject *nullentry;
268 static const char nullid[20];
268 static const char nullid[20];
269
269
270 static long inline_scan(indexObject *self, const char **offsets);
270 static long inline_scan(indexObject *self, const char **offsets);
271
271
272 #if LONG_MAX == 0x7fffffffL
272 #if LONG_MAX == 0x7fffffffL
273 static char *tuple_format = "Kiiiiiis#";
273 static char *tuple_format = "Kiiiiiis#";
274 #else
274 #else
275 static char *tuple_format = "kiiiiiis#";
275 static char *tuple_format = "kiiiiiis#";
276 #endif
276 #endif
277
277
278 /*
278 /*
279 * Return a pointer to the beginning of a RevlogNG record.
279 * Return a pointer to the beginning of a RevlogNG record.
280 */
280 */
281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
282 {
282 {
283 if (self->inlined && pos > 0) {
283 if (self->inlined && pos > 0) {
284 if (self->offsets == NULL) {
284 if (self->offsets == NULL) {
285 self->offsets = malloc(self->raw_length *
285 self->offsets = malloc(self->raw_length *
286 sizeof(*self->offsets));
286 sizeof(*self->offsets));
287 if (self->offsets == NULL)
287 if (self->offsets == NULL)
288 return (const char *)PyErr_NoMemory();
288 return (const char *)PyErr_NoMemory();
289 inline_scan(self, self->offsets);
289 inline_scan(self, self->offsets);
290 }
290 }
291 return self->offsets[pos];
291 return self->offsets[pos];
292 }
292 }
293
293
294 return PyString_AS_STRING(self->data) + pos * 64;
294 return PyString_AS_STRING(self->data) + pos * 64;
295 }
295 }
296
296
297 /*
297 /*
298 * RevlogNG format (all in big endian, data may be inlined):
298 * RevlogNG format (all in big endian, data may be inlined):
299 * 6 bytes: offset
299 * 6 bytes: offset
300 * 2 bytes: flags
300 * 2 bytes: flags
301 * 4 bytes: compressed length
301 * 4 bytes: compressed length
302 * 4 bytes: uncompressed length
302 * 4 bytes: uncompressed length
303 * 4 bytes: base revision
303 * 4 bytes: base revision
304 * 4 bytes: link revision
304 * 4 bytes: link revision
305 * 4 bytes: parent 1 revision
305 * 4 bytes: parent 1 revision
306 * 4 bytes: parent 2 revision
306 * 4 bytes: parent 2 revision
307 * 32 bytes: nodeid (only 20 bytes used)
307 * 32 bytes: nodeid (only 20 bytes used)
308 */
308 */
309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
310 {
310 {
311 uint64_t offset_flags;
311 uint64_t offset_flags;
312 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
312 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
313 const char *c_node_id;
313 const char *c_node_id;
314 const char *data;
314 const char *data;
315 Py_ssize_t length = index_length(self);
315 Py_ssize_t length = index_length(self);
316 PyObject *entry;
316 PyObject *entry;
317
317
318 if (pos < 0)
318 if (pos < 0)
319 pos += length;
319 pos += length;
320
320
321 if (pos < 0 || pos >= length) {
321 if (pos < 0 || pos >= length) {
322 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
322 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
323 return NULL;
323 return NULL;
324 }
324 }
325
325
326 if (pos == length - 1) {
326 if (pos == length - 1) {
327 Py_INCREF(nullentry);
327 Py_INCREF(nullentry);
328 return nullentry;
328 return nullentry;
329 }
329 }
330
330
331 if (pos >= self->length - 1) {
331 if (pos >= self->length - 1) {
332 PyObject *obj;
332 PyObject *obj;
333 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
333 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
334 Py_INCREF(obj);
334 Py_INCREF(obj);
335 return obj;
335 return obj;
336 }
336 }
337
337
338 if (self->cache) {
338 if (self->cache) {
339 if (self->cache[pos]) {
339 if (self->cache[pos]) {
340 Py_INCREF(self->cache[pos]);
340 Py_INCREF(self->cache[pos]);
341 return self->cache[pos];
341 return self->cache[pos];
342 }
342 }
343 } else {
343 } else {
344 self->cache = calloc(self->raw_length, sizeof(PyObject *));
344 self->cache = calloc(self->raw_length, sizeof(PyObject *));
345 if (self->cache == NULL)
345 if (self->cache == NULL)
346 return PyErr_NoMemory();
346 return PyErr_NoMemory();
347 }
347 }
348
348
349 data = index_deref(self, pos);
349 data = index_deref(self, pos);
350 if (data == NULL)
350 if (data == NULL)
351 return NULL;
351 return NULL;
352
352
353 offset_flags = getbe32(data + 4);
353 offset_flags = getbe32(data + 4);
354 if (pos == 0) /* mask out version number for the first entry */
354 if (pos == 0) /* mask out version number for the first entry */
355 offset_flags &= 0xFFFF;
355 offset_flags &= 0xFFFF;
356 else {
356 else {
357 uint32_t offset_high = getbe32(data);
357 uint32_t offset_high = getbe32(data);
358 offset_flags |= ((uint64_t)offset_high) << 32;
358 offset_flags |= ((uint64_t)offset_high) << 32;
359 }
359 }
360
360
361 comp_len = getbe32(data + 8);
361 comp_len = getbe32(data + 8);
362 uncomp_len = getbe32(data + 12);
362 uncomp_len = getbe32(data + 12);
363 base_rev = getbe32(data + 16);
363 base_rev = getbe32(data + 16);
364 link_rev = getbe32(data + 20);
364 link_rev = getbe32(data + 20);
365 parent_1 = getbe32(data + 24);
365 parent_1 = getbe32(data + 24);
366 parent_2 = getbe32(data + 28);
366 parent_2 = getbe32(data + 28);
367 c_node_id = data + 32;
367 c_node_id = data + 32;
368
368
369 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
369 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
370 uncomp_len, base_rev, link_rev,
370 uncomp_len, base_rev, link_rev,
371 parent_1, parent_2, c_node_id, 20);
371 parent_1, parent_2, c_node_id, 20);
372
372
373 if (entry)
373 if (entry)
374 PyObject_GC_UnTrack(entry);
374 PyObject_GC_UnTrack(entry);
375
375
376 self->cache[pos] = entry;
376 self->cache[pos] = entry;
377 Py_INCREF(entry);
377 Py_INCREF(entry);
378
378
379 return entry;
379 return entry;
380 }
380 }
381
381
382 /*
382 /*
383 * Return the 20-byte SHA of the node corresponding to the given rev.
383 * Return the 20-byte SHA of the node corresponding to the given rev.
384 */
384 */
385 static const char *index_node(indexObject *self, Py_ssize_t pos)
385 static const char *index_node(indexObject *self, Py_ssize_t pos)
386 {
386 {
387 Py_ssize_t length = index_length(self);
387 Py_ssize_t length = index_length(self);
388 const char *data;
388 const char *data;
389
389
390 if (pos == length - 1 || pos == INT_MAX)
390 if (pos == length - 1 || pos == INT_MAX)
391 return nullid;
391 return nullid;
392
392
393 if (pos >= length)
393 if (pos >= length)
394 return NULL;
394 return NULL;
395
395
396 if (pos >= self->length - 1) {
396 if (pos >= self->length - 1) {
397 PyObject *tuple, *str;
397 PyObject *tuple, *str;
398 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
398 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
399 str = PyTuple_GetItem(tuple, 7);
399 str = PyTuple_GetItem(tuple, 7);
400 return str ? PyString_AS_STRING(str) : NULL;
400 return str ? PyString_AS_STRING(str) : NULL;
401 }
401 }
402
402
403 data = index_deref(self, pos);
403 data = index_deref(self, pos);
404 return data ? data + 32 : NULL;
404 return data ? data + 32 : NULL;
405 }
405 }
406
406
407 static int nt_insert(indexObject *self, const char *node, int rev);
407 static int nt_insert(indexObject *self, const char *node, int rev);
408
408
409 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
409 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
410 {
410 {
411 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
411 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
412 return -1;
412 return -1;
413 if (*nodelen == 20)
413 if (*nodelen == 20)
414 return 0;
414 return 0;
415 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
415 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
416 return -1;
416 return -1;
417 }
417 }
418
418
419 static PyObject *index_insert(indexObject *self, PyObject *args)
419 static PyObject *index_insert(indexObject *self, PyObject *args)
420 {
420 {
421 PyObject *obj;
421 PyObject *obj;
422 char *node;
422 char *node;
423 long offset;
423 long offset;
424 Py_ssize_t len, nodelen;
424 Py_ssize_t len, nodelen;
425
425
426 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
426 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
427 return NULL;
427 return NULL;
428
428
429 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
429 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
430 PyErr_SetString(PyExc_TypeError, "8-tuple required");
430 PyErr_SetString(PyExc_TypeError, "8-tuple required");
431 return NULL;
431 return NULL;
432 }
432 }
433
433
434 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
434 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
435 return NULL;
435 return NULL;
436
436
437 len = index_length(self);
437 len = index_length(self);
438
438
439 if (offset < 0)
439 if (offset < 0)
440 offset += len;
440 offset += len;
441
441
442 if (offset != len - 1) {
442 if (offset != len - 1) {
443 PyErr_SetString(PyExc_IndexError,
443 PyErr_SetString(PyExc_IndexError,
444 "insert only supported at index -1");
444 "insert only supported at index -1");
445 return NULL;
445 return NULL;
446 }
446 }
447
447
448 if (offset > INT_MAX) {
448 if (offset > INT_MAX) {
449 PyErr_SetString(PyExc_ValueError,
449 PyErr_SetString(PyExc_ValueError,
450 "currently only 2**31 revs supported");
450 "currently only 2**31 revs supported");
451 return NULL;
451 return NULL;
452 }
452 }
453
453
454 if (self->added == NULL) {
454 if (self->added == NULL) {
455 self->added = PyList_New(0);
455 self->added = PyList_New(0);
456 if (self->added == NULL)
456 if (self->added == NULL)
457 return NULL;
457 return NULL;
458 }
458 }
459
459
460 if (PyList_Append(self->added, obj) == -1)
460 if (PyList_Append(self->added, obj) == -1)
461 return NULL;
461 return NULL;
462
462
463 if (self->nt)
463 if (self->nt)
464 nt_insert(self, node, (int)offset);
464 nt_insert(self, node, (int)offset);
465
465
466 Py_RETURN_NONE;
466 Py_RETURN_NONE;
467 }
467 }
468
468
469 static void _index_clearcaches(indexObject *self)
469 static void _index_clearcaches(indexObject *self)
470 {
470 {
471 if (self->cache) {
471 if (self->cache) {
472 Py_ssize_t i;
472 Py_ssize_t i;
473
473
474 for (i = 0; i < self->raw_length; i++)
474 for (i = 0; i < self->raw_length; i++)
475 Py_CLEAR(self->cache[i]);
475 Py_CLEAR(self->cache[i]);
476 free(self->cache);
476 free(self->cache);
477 self->cache = NULL;
477 self->cache = NULL;
478 }
478 }
479 if (self->offsets) {
479 if (self->offsets) {
480 free(self->offsets);
480 free(self->offsets);
481 self->offsets = NULL;
481 self->offsets = NULL;
482 }
482 }
483 if (self->nt) {
483 if (self->nt) {
484 free(self->nt);
484 free(self->nt);
485 self->nt = NULL;
485 self->nt = NULL;
486 }
486 }
487 }
487 }
488
488
489 static PyObject *index_clearcaches(indexObject *self)
489 static PyObject *index_clearcaches(indexObject *self)
490 {
490 {
491 _index_clearcaches(self);
491 _index_clearcaches(self);
492 self->ntlength = self->ntcapacity = 0;
492 self->ntlength = self->ntcapacity = 0;
493 self->ntdepth = self->ntsplits = 0;
493 self->ntdepth = self->ntsplits = 0;
494 self->ntrev = -1;
494 self->ntrev = -1;
495 self->ntlookups = self->ntmisses = 0;
495 self->ntlookups = self->ntmisses = 0;
496 Py_RETURN_NONE;
496 Py_RETURN_NONE;
497 }
497 }
498
498
499 static PyObject *index_stats(indexObject *self)
499 static PyObject *index_stats(indexObject *self)
500 {
500 {
501 PyObject *obj = PyDict_New();
501 PyObject *obj = PyDict_New();
502
502
503 if (obj == NULL)
503 if (obj == NULL)
504 return NULL;
504 return NULL;
505
505
506 #define istat(__n, __d) \
506 #define istat(__n, __d) \
507 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
507 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
508 goto bail;
508 goto bail;
509
509
510 if (self->added) {
510 if (self->added) {
511 Py_ssize_t len = PyList_GET_SIZE(self->added);
511 Py_ssize_t len = PyList_GET_SIZE(self->added);
512 if (PyDict_SetItemString(obj, "index entries added",
512 if (PyDict_SetItemString(obj, "index entries added",
513 PyInt_FromSsize_t(len)) == -1)
513 PyInt_FromSsize_t(len)) == -1)
514 goto bail;
514 goto bail;
515 }
515 }
516
516
517 if (self->raw_length != self->length - 1)
517 if (self->raw_length != self->length - 1)
518 istat(raw_length, "revs on disk");
518 istat(raw_length, "revs on disk");
519 istat(length, "revs in memory");
519 istat(length, "revs in memory");
520 istat(ntcapacity, "node trie capacity");
520 istat(ntcapacity, "node trie capacity");
521 istat(ntdepth, "node trie depth");
521 istat(ntdepth, "node trie depth");
522 istat(ntlength, "node trie count");
522 istat(ntlength, "node trie count");
523 istat(ntlookups, "node trie lookups");
523 istat(ntlookups, "node trie lookups");
524 istat(ntmisses, "node trie misses");
524 istat(ntmisses, "node trie misses");
525 istat(ntrev, "node trie last rev scanned");
525 istat(ntrev, "node trie last rev scanned");
526 istat(ntsplits, "node trie splits");
526 istat(ntsplits, "node trie splits");
527
527
528 #undef istat
528 #undef istat
529
529
530 return obj;
530 return obj;
531
531
532 bail:
532 bail:
533 Py_XDECREF(obj);
533 Py_XDECREF(obj);
534 return NULL;
534 return NULL;
535 }
535 }
536
536
537 static PyObject *index_headrevs(indexObject *self)
538 {
539 Py_ssize_t i, len, addlen;
540 char *nothead = NULL;
541 PyObject *heads;
542
543 len = index_length(self) - 1;
544 heads = PyList_New(0);
545 if (heads == NULL)
546 goto bail;
547 if (len == 0) {
548 PyObject *nullid = PyInt_FromLong(-1);
549 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
550 Py_XDECREF(nullid);
551 goto bail;
552 }
553 goto done;
554 }
555
556 nothead = calloc(len, 1);
557 if (nothead == NULL)
558 goto bail;
559
560 for (i = 0; i < self->raw_length; i++) {
561 const char *data = index_deref(self, i);
562 int parent_1 = getbe32(data + 24);
563 int parent_2 = getbe32(data + 28);
564 if (parent_1 >= 0)
565 nothead[parent_1] = 1;
566 if (parent_2 >= 0)
567 nothead[parent_2] = 1;
568 }
569
570 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
571
572 for (i = 0; i < addlen; i++) {
573 PyObject *rev = PyList_GET_ITEM(self->added, i);
574 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
575 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
576 long parent_1, parent_2;
577
578 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
579 PyErr_SetString(PyExc_TypeError,
580 "revlog parents are invalid");
581 goto bail;
582 }
583 parent_1 = PyInt_AS_LONG(p1);
584 parent_2 = PyInt_AS_LONG(p2);
585 if (parent_1 >= 0)
586 nothead[parent_1] = 1;
587 if (parent_2 >= 0)
588 nothead[parent_2] = 1;
589 }
590
591 for (i = 0; i < len; i++) {
592 PyObject *head;
593
594 if (nothead[i])
595 continue;
596 head = PyInt_FromLong(i);
597 if (head == NULL || PyList_Append(heads, head) == -1) {
598 Py_XDECREF(head);
599 goto bail;
600 }
601 }
602
603 done:
604 free(nothead);
605 return heads;
606 bail:
607 Py_XDECREF(heads);
608 free(nothead);
609 return NULL;
610 }
611
537 static inline int nt_level(const char *node, Py_ssize_t level)
612 static inline int nt_level(const char *node, Py_ssize_t level)
538 {
613 {
539 int v = node[level>>1];
614 int v = node[level>>1];
540 if (!(level & 1))
615 if (!(level & 1))
541 v >>= 4;
616 v >>= 4;
542 return v & 0xf;
617 return v & 0xf;
543 }
618 }
544
619
545 /*
620 /*
546 * Return values:
621 * Return values:
547 *
622 *
548 * -4: match is ambiguous (multiple candidates)
623 * -4: match is ambiguous (multiple candidates)
549 * -2: not found
624 * -2: not found
550 * rest: valid rev
625 * rest: valid rev
551 */
626 */
552 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
627 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
553 int hex)
628 int hex)
554 {
629 {
555 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
630 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
556 int level, maxlevel, off;
631 int level, maxlevel, off;
557
632
558 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
633 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
559 return -1;
634 return -1;
560
635
561 if (self->nt == NULL)
636 if (self->nt == NULL)
562 return -2;
637 return -2;
563
638
564 if (hex)
639 if (hex)
565 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
640 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
566 else
641 else
567 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
642 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
568
643
569 for (level = off = 0; level < maxlevel; level++) {
644 for (level = off = 0; level < maxlevel; level++) {
570 int k = getnybble(node, level);
645 int k = getnybble(node, level);
571 nodetree *n = &self->nt[off];
646 nodetree *n = &self->nt[off];
572 int v = n->children[k];
647 int v = n->children[k];
573
648
574 if (v < 0) {
649 if (v < 0) {
575 const char *n;
650 const char *n;
576 Py_ssize_t i;
651 Py_ssize_t i;
577
652
578 v = -v - 1;
653 v = -v - 1;
579 n = index_node(self, v);
654 n = index_node(self, v);
580 if (n == NULL)
655 if (n == NULL)
581 return -2;
656 return -2;
582 for (i = level; i < maxlevel; i++)
657 for (i = level; i < maxlevel; i++)
583 if (getnybble(node, i) != nt_level(n, i))
658 if (getnybble(node, i) != nt_level(n, i))
584 return -2;
659 return -2;
585 return v;
660 return v;
586 }
661 }
587 if (v == 0)
662 if (v == 0)
588 return -2;
663 return -2;
589 off = v;
664 off = v;
590 }
665 }
591 /* multiple matches against an ambiguous prefix */
666 /* multiple matches against an ambiguous prefix */
592 return -4;
667 return -4;
593 }
668 }
594
669
595 static int nt_new(indexObject *self)
670 static int nt_new(indexObject *self)
596 {
671 {
597 if (self->ntlength == self->ntcapacity) {
672 if (self->ntlength == self->ntcapacity) {
598 self->ntcapacity *= 2;
673 self->ntcapacity *= 2;
599 self->nt = realloc(self->nt,
674 self->nt = realloc(self->nt,
600 self->ntcapacity * sizeof(nodetree));
675 self->ntcapacity * sizeof(nodetree));
601 if (self->nt == NULL) {
676 if (self->nt == NULL) {
602 PyErr_SetString(PyExc_MemoryError, "out of memory");
677 PyErr_SetString(PyExc_MemoryError, "out of memory");
603 return -1;
678 return -1;
604 }
679 }
605 memset(&self->nt[self->ntlength], 0,
680 memset(&self->nt[self->ntlength], 0,
606 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
681 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
607 }
682 }
608 return self->ntlength++;
683 return self->ntlength++;
609 }
684 }
610
685
611 static int nt_insert(indexObject *self, const char *node, int rev)
686 static int nt_insert(indexObject *self, const char *node, int rev)
612 {
687 {
613 int level = 0;
688 int level = 0;
614 int off = 0;
689 int off = 0;
615
690
616 while (level < 40) {
691 while (level < 40) {
617 int k = nt_level(node, level);
692 int k = nt_level(node, level);
618 nodetree *n;
693 nodetree *n;
619 int v;
694 int v;
620
695
621 n = &self->nt[off];
696 n = &self->nt[off];
622 v = n->children[k];
697 v = n->children[k];
623
698
624 if (v == 0) {
699 if (v == 0) {
625 n->children[k] = -rev - 1;
700 n->children[k] = -rev - 1;
626 return 0;
701 return 0;
627 }
702 }
628 if (v < 0) {
703 if (v < 0) {
629 const char *oldnode = index_node(self, -v - 1);
704 const char *oldnode = index_node(self, -v - 1);
630 int noff;
705 int noff;
631
706
632 if (!oldnode || !memcmp(oldnode, node, 20)) {
707 if (!oldnode || !memcmp(oldnode, node, 20)) {
633 n->children[k] = -rev - 1;
708 n->children[k] = -rev - 1;
634 return 0;
709 return 0;
635 }
710 }
636 noff = nt_new(self);
711 noff = nt_new(self);
637 if (noff == -1)
712 if (noff == -1)
638 return -1;
713 return -1;
639 /* self->nt may have been changed by realloc */
714 /* self->nt may have been changed by realloc */
640 self->nt[off].children[k] = noff;
715 self->nt[off].children[k] = noff;
641 off = noff;
716 off = noff;
642 n = &self->nt[off];
717 n = &self->nt[off];
643 n->children[nt_level(oldnode, ++level)] = v;
718 n->children[nt_level(oldnode, ++level)] = v;
644 if (level > self->ntdepth)
719 if (level > self->ntdepth)
645 self->ntdepth = level;
720 self->ntdepth = level;
646 self->ntsplits += 1;
721 self->ntsplits += 1;
647 } else {
722 } else {
648 level += 1;
723 level += 1;
649 off = v;
724 off = v;
650 }
725 }
651 }
726 }
652
727
653 return -1;
728 return -1;
654 }
729 }
655
730
656 static int nt_init(indexObject *self)
731 static int nt_init(indexObject *self)
657 {
732 {
658 if (self->nt == NULL) {
733 if (self->nt == NULL) {
659 self->ntcapacity = self->raw_length < 4
734 self->ntcapacity = self->raw_length < 4
660 ? 4 : self->raw_length / 2;
735 ? 4 : self->raw_length / 2;
661 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
736 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
662 if (self->nt == NULL) {
737 if (self->nt == NULL) {
663 PyErr_NoMemory();
738 PyErr_NoMemory();
664 return -1;
739 return -1;
665 }
740 }
666 self->ntlength = 1;
741 self->ntlength = 1;
667 self->ntrev = (int)index_length(self) - 1;
742 self->ntrev = (int)index_length(self) - 1;
668 self->ntlookups = 1;
743 self->ntlookups = 1;
669 self->ntmisses = 0;
744 self->ntmisses = 0;
670 if (nt_insert(self, nullid, INT_MAX) == -1)
745 if (nt_insert(self, nullid, INT_MAX) == -1)
671 return -1;
746 return -1;
672 }
747 }
673 return 0;
748 return 0;
674 }
749 }
675
750
676 /*
751 /*
677 * Return values:
752 * Return values:
678 *
753 *
679 * -3: error (exception set)
754 * -3: error (exception set)
680 * -2: not found (no exception set)
755 * -2: not found (no exception set)
681 * rest: valid rev
756 * rest: valid rev
682 */
757 */
683 static int index_find_node(indexObject *self,
758 static int index_find_node(indexObject *self,
684 const char *node, Py_ssize_t nodelen)
759 const char *node, Py_ssize_t nodelen)
685 {
760 {
686 int rev;
761 int rev;
687
762
688 self->ntlookups++;
763 self->ntlookups++;
689 rev = nt_find(self, node, nodelen, 0);
764 rev = nt_find(self, node, nodelen, 0);
690 if (rev >= -1)
765 if (rev >= -1)
691 return rev;
766 return rev;
692
767
693 if (nt_init(self) == -1)
768 if (nt_init(self) == -1)
694 return -3;
769 return -3;
695
770
696 /*
771 /*
697 * For the first handful of lookups, we scan the entire index,
772 * For the first handful of lookups, we scan the entire index,
698 * and cache only the matching nodes. This optimizes for cases
773 * and cache only the matching nodes. This optimizes for cases
699 * like "hg tip", where only a few nodes are accessed.
774 * like "hg tip", where only a few nodes are accessed.
700 *
775 *
701 * After that, we cache every node we visit, using a single
776 * After that, we cache every node we visit, using a single
702 * scan amortized over multiple lookups. This gives the best
777 * scan amortized over multiple lookups. This gives the best
703 * bulk performance, e.g. for "hg log".
778 * bulk performance, e.g. for "hg log".
704 */
779 */
705 if (self->ntmisses++ < 4) {
780 if (self->ntmisses++ < 4) {
706 for (rev = self->ntrev - 1; rev >= 0; rev--) {
781 for (rev = self->ntrev - 1; rev >= 0; rev--) {
707 const char *n = index_node(self, rev);
782 const char *n = index_node(self, rev);
708 if (n == NULL)
783 if (n == NULL)
709 return -2;
784 return -2;
710 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
785 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
711 if (nt_insert(self, n, rev) == -1)
786 if (nt_insert(self, n, rev) == -1)
712 return -3;
787 return -3;
713 break;
788 break;
714 }
789 }
715 }
790 }
716 } else {
791 } else {
717 for (rev = self->ntrev - 1; rev >= 0; rev--) {
792 for (rev = self->ntrev - 1; rev >= 0; rev--) {
718 const char *n = index_node(self, rev);
793 const char *n = index_node(self, rev);
719 if (n == NULL) {
794 if (n == NULL) {
720 self->ntrev = rev + 1;
795 self->ntrev = rev + 1;
721 return -2;
796 return -2;
722 }
797 }
723 if (nt_insert(self, n, rev) == -1) {
798 if (nt_insert(self, n, rev) == -1) {
724 self->ntrev = rev + 1;
799 self->ntrev = rev + 1;
725 return -3;
800 return -3;
726 }
801 }
727 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
802 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
728 break;
803 break;
729 }
804 }
730 }
805 }
731 self->ntrev = rev;
806 self->ntrev = rev;
732 }
807 }
733
808
734 if (rev >= 0)
809 if (rev >= 0)
735 return rev;
810 return rev;
736 return -2;
811 return -2;
737 }
812 }
738
813
739 static PyObject *raise_revlog_error(void)
814 static PyObject *raise_revlog_error(void)
740 {
815 {
741 static PyObject *errclass;
816 static PyObject *errclass;
742 PyObject *mod = NULL, *errobj;
817 PyObject *mod = NULL, *errobj;
743
818
744 if (errclass == NULL) {
819 if (errclass == NULL) {
745 PyObject *dict;
820 PyObject *dict;
746
821
747 mod = PyImport_ImportModule("mercurial.error");
822 mod = PyImport_ImportModule("mercurial.error");
748 if (mod == NULL)
823 if (mod == NULL)
749 goto classfail;
824 goto classfail;
750
825
751 dict = PyModule_GetDict(mod);
826 dict = PyModule_GetDict(mod);
752 if (dict == NULL)
827 if (dict == NULL)
753 goto classfail;
828 goto classfail;
754
829
755 errclass = PyDict_GetItemString(dict, "RevlogError");
830 errclass = PyDict_GetItemString(dict, "RevlogError");
756 if (errclass == NULL) {
831 if (errclass == NULL) {
757 PyErr_SetString(PyExc_SystemError,
832 PyErr_SetString(PyExc_SystemError,
758 "could not find RevlogError");
833 "could not find RevlogError");
759 goto classfail;
834 goto classfail;
760 }
835 }
761 Py_INCREF(errclass);
836 Py_INCREF(errclass);
762 }
837 }
763
838
764 errobj = PyObject_CallFunction(errclass, NULL);
839 errobj = PyObject_CallFunction(errclass, NULL);
765 if (errobj == NULL)
840 if (errobj == NULL)
766 return NULL;
841 return NULL;
767 PyErr_SetObject(errclass, errobj);
842 PyErr_SetObject(errclass, errobj);
768 return errobj;
843 return errobj;
769
844
770 classfail:
845 classfail:
771 Py_XDECREF(mod);
846 Py_XDECREF(mod);
772 return NULL;
847 return NULL;
773 }
848 }
774
849
775 static PyObject *index_getitem(indexObject *self, PyObject *value)
850 static PyObject *index_getitem(indexObject *self, PyObject *value)
776 {
851 {
777 char *node;
852 char *node;
778 Py_ssize_t nodelen;
853 Py_ssize_t nodelen;
779 int rev;
854 int rev;
780
855
781 if (PyInt_Check(value))
856 if (PyInt_Check(value))
782 return index_get(self, PyInt_AS_LONG(value));
857 return index_get(self, PyInt_AS_LONG(value));
783
858
784 if (node_check(value, &node, &nodelen) == -1)
859 if (node_check(value, &node, &nodelen) == -1)
785 return NULL;
860 return NULL;
786 rev = index_find_node(self, node, nodelen);
861 rev = index_find_node(self, node, nodelen);
787 if (rev >= -1)
862 if (rev >= -1)
788 return PyInt_FromLong(rev);
863 return PyInt_FromLong(rev);
789 if (rev == -2)
864 if (rev == -2)
790 raise_revlog_error();
865 raise_revlog_error();
791 return NULL;
866 return NULL;
792 }
867 }
793
868
794 static int nt_partialmatch(indexObject *self, const char *node,
869 static int nt_partialmatch(indexObject *self, const char *node,
795 Py_ssize_t nodelen)
870 Py_ssize_t nodelen)
796 {
871 {
797 int rev;
872 int rev;
798
873
799 if (nt_init(self) == -1)
874 if (nt_init(self) == -1)
800 return -3;
875 return -3;
801
876
802 if (self->ntrev > 0) {
877 if (self->ntrev > 0) {
803 /* ensure that the radix tree is fully populated */
878 /* ensure that the radix tree is fully populated */
804 for (rev = self->ntrev - 1; rev >= 0; rev--) {
879 for (rev = self->ntrev - 1; rev >= 0; rev--) {
805 const char *n = index_node(self, rev);
880 const char *n = index_node(self, rev);
806 if (n == NULL)
881 if (n == NULL)
807 return -2;
882 return -2;
808 if (nt_insert(self, n, rev) == -1)
883 if (nt_insert(self, n, rev) == -1)
809 return -3;
884 return -3;
810 }
885 }
811 self->ntrev = rev;
886 self->ntrev = rev;
812 }
887 }
813
888
814 return nt_find(self, node, nodelen, 1);
889 return nt_find(self, node, nodelen, 1);
815 }
890 }
816
891
817 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
892 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
818 {
893 {
819 const char *fullnode;
894 const char *fullnode;
820 int nodelen;
895 int nodelen;
821 char *node;
896 char *node;
822 int rev, i;
897 int rev, i;
823
898
824 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
899 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
825 return NULL;
900 return NULL;
826
901
827 if (nodelen < 4) {
902 if (nodelen < 4) {
828 PyErr_SetString(PyExc_ValueError, "key too short");
903 PyErr_SetString(PyExc_ValueError, "key too short");
829 return NULL;
904 return NULL;
830 }
905 }
831
906
832 if (nodelen > 40)
907 if (nodelen > 40)
833 nodelen = 40;
908 nodelen = 40;
834
909
835 for (i = 0; i < nodelen; i++)
910 for (i = 0; i < nodelen; i++)
836 hexdigit(node, i);
911 hexdigit(node, i);
837 if (PyErr_Occurred()) {
912 if (PyErr_Occurred()) {
838 /* input contains non-hex characters */
913 /* input contains non-hex characters */
839 PyErr_Clear();
914 PyErr_Clear();
840 Py_RETURN_NONE;
915 Py_RETURN_NONE;
841 }
916 }
842
917
843 rev = nt_partialmatch(self, node, nodelen);
918 rev = nt_partialmatch(self, node, nodelen);
844
919
845 switch (rev) {
920 switch (rev) {
846 case -4:
921 case -4:
847 raise_revlog_error();
922 raise_revlog_error();
848 case -3:
923 case -3:
849 return NULL;
924 return NULL;
850 case -2:
925 case -2:
851 Py_RETURN_NONE;
926 Py_RETURN_NONE;
852 case -1:
927 case -1:
853 return PyString_FromStringAndSize(nullid, 20);
928 return PyString_FromStringAndSize(nullid, 20);
854 }
929 }
855
930
856 fullnode = index_node(self, rev);
931 fullnode = index_node(self, rev);
857 if (fullnode == NULL) {
932 if (fullnode == NULL) {
858 PyErr_Format(PyExc_IndexError,
933 PyErr_Format(PyExc_IndexError,
859 "could not access rev %d", rev);
934 "could not access rev %d", rev);
860 return NULL;
935 return NULL;
861 }
936 }
862 return PyString_FromStringAndSize(fullnode, 20);
937 return PyString_FromStringAndSize(fullnode, 20);
863 }
938 }
864
939
865 static PyObject *index_m_get(indexObject *self, PyObject *args)
940 static PyObject *index_m_get(indexObject *self, PyObject *args)
866 {
941 {
867 Py_ssize_t nodelen;
942 Py_ssize_t nodelen;
868 PyObject *val;
943 PyObject *val;
869 char *node;
944 char *node;
870 int rev;
945 int rev;
871
946
872 if (!PyArg_ParseTuple(args, "O", &val))
947 if (!PyArg_ParseTuple(args, "O", &val))
873 return NULL;
948 return NULL;
874 if (node_check(val, &node, &nodelen) == -1)
949 if (node_check(val, &node, &nodelen) == -1)
875 return NULL;
950 return NULL;
876 rev = index_find_node(self, node, nodelen);
951 rev = index_find_node(self, node, nodelen);
877 if (rev == -3)
952 if (rev == -3)
878 return NULL;
953 return NULL;
879 if (rev == -2)
954 if (rev == -2)
880 Py_RETURN_NONE;
955 Py_RETURN_NONE;
881 return PyInt_FromLong(rev);
956 return PyInt_FromLong(rev);
882 }
957 }
883
958
884 static int index_contains(indexObject *self, PyObject *value)
959 static int index_contains(indexObject *self, PyObject *value)
885 {
960 {
886 char *node;
961 char *node;
887 Py_ssize_t nodelen;
962 Py_ssize_t nodelen;
888
963
889 if (PyInt_Check(value)) {
964 if (PyInt_Check(value)) {
890 long rev = PyInt_AS_LONG(value);
965 long rev = PyInt_AS_LONG(value);
891 return rev >= -1 && rev < index_length(self);
966 return rev >= -1 && rev < index_length(self);
892 }
967 }
893
968
894 if (node_check(value, &node, &nodelen) == -1)
969 if (node_check(value, &node, &nodelen) == -1)
895 return -1;
970 return -1;
896
971
897 switch (index_find_node(self, node, nodelen)) {
972 switch (index_find_node(self, node, nodelen)) {
898 case -3:
973 case -3:
899 return -1;
974 return -1;
900 case -2:
975 case -2:
901 return 0;
976 return 0;
902 default:
977 default:
903 return 1;
978 return 1;
904 }
979 }
905 }
980 }
906
981
907 /*
982 /*
908 * Invalidate any trie entries introduced by added revs.
983 * Invalidate any trie entries introduced by added revs.
909 */
984 */
910 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
985 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
911 {
986 {
912 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
987 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
913
988
914 for (i = start; i < len; i++) {
989 for (i = start; i < len; i++) {
915 PyObject *tuple = PyList_GET_ITEM(self->added, i);
990 PyObject *tuple = PyList_GET_ITEM(self->added, i);
916 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
991 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
917
992
918 nt_insert(self, PyString_AS_STRING(node), -1);
993 nt_insert(self, PyString_AS_STRING(node), -1);
919 }
994 }
920
995
921 if (start == 0)
996 if (start == 0)
922 Py_CLEAR(self->added);
997 Py_CLEAR(self->added);
923 }
998 }
924
999
925 /*
1000 /*
926 * Delete a numeric range of revs, which must be at the end of the
1001 * Delete a numeric range of revs, which must be at the end of the
927 * range, but exclude the sentinel nullid entry.
1002 * range, but exclude the sentinel nullid entry.
928 */
1003 */
929 static int index_slice_del(indexObject *self, PyObject *item)
1004 static int index_slice_del(indexObject *self, PyObject *item)
930 {
1005 {
931 Py_ssize_t start, stop, step, slicelength;
1006 Py_ssize_t start, stop, step, slicelength;
932 Py_ssize_t length = index_length(self);
1007 Py_ssize_t length = index_length(self);
933 int ret = 0;
1008 int ret = 0;
934
1009
935 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1010 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
936 &start, &stop, &step, &slicelength) < 0)
1011 &start, &stop, &step, &slicelength) < 0)
937 return -1;
1012 return -1;
938
1013
939 if (slicelength <= 0)
1014 if (slicelength <= 0)
940 return 0;
1015 return 0;
941
1016
942 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1017 if ((step < 0 && start < stop) || (step > 0 && start > stop))
943 stop = start;
1018 stop = start;
944
1019
945 if (step < 0) {
1020 if (step < 0) {
946 stop = start + 1;
1021 stop = start + 1;
947 start = stop + step*(slicelength - 1) - 1;
1022 start = stop + step*(slicelength - 1) - 1;
948 step = -step;
1023 step = -step;
949 }
1024 }
950
1025
951 if (step != 1) {
1026 if (step != 1) {
952 PyErr_SetString(PyExc_ValueError,
1027 PyErr_SetString(PyExc_ValueError,
953 "revlog index delete requires step size of 1");
1028 "revlog index delete requires step size of 1");
954 return -1;
1029 return -1;
955 }
1030 }
956
1031
957 if (stop != length - 1) {
1032 if (stop != length - 1) {
958 PyErr_SetString(PyExc_IndexError,
1033 PyErr_SetString(PyExc_IndexError,
959 "revlog index deletion indices are invalid");
1034 "revlog index deletion indices are invalid");
960 return -1;
1035 return -1;
961 }
1036 }
962
1037
963 if (start < self->length - 1) {
1038 if (start < self->length - 1) {
964 if (self->nt) {
1039 if (self->nt) {
965 Py_ssize_t i;
1040 Py_ssize_t i;
966
1041
967 for (i = start + 1; i < self->length - 1; i++) {
1042 for (i = start + 1; i < self->length - 1; i++) {
968 const char *node = index_node(self, i);
1043 const char *node = index_node(self, i);
969
1044
970 if (node)
1045 if (node)
971 nt_insert(self, node, -1);
1046 nt_insert(self, node, -1);
972 }
1047 }
973 if (self->added)
1048 if (self->added)
974 nt_invalidate_added(self, 0);
1049 nt_invalidate_added(self, 0);
975 if (self->ntrev > start)
1050 if (self->ntrev > start)
976 self->ntrev = (int)start;
1051 self->ntrev = (int)start;
977 }
1052 }
978 self->length = start + 1;
1053 self->length = start + 1;
979 if (start < self->raw_length)
1054 if (start < self->raw_length)
980 self->raw_length = start;
1055 self->raw_length = start;
981 goto done;
1056 goto done;
982 }
1057 }
983
1058
984 if (self->nt) {
1059 if (self->nt) {
985 nt_invalidate_added(self, start - self->length + 1);
1060 nt_invalidate_added(self, start - self->length + 1);
986 if (self->ntrev > start)
1061 if (self->ntrev > start)
987 self->ntrev = (int)start;
1062 self->ntrev = (int)start;
988 }
1063 }
989 if (self->added)
1064 if (self->added)
990 ret = PyList_SetSlice(self->added, start - self->length + 1,
1065 ret = PyList_SetSlice(self->added, start - self->length + 1,
991 PyList_GET_SIZE(self->added), NULL);
1066 PyList_GET_SIZE(self->added), NULL);
992 done:
1067 done:
993 return ret;
1068 return ret;
994 }
1069 }
995
1070
996 /*
1071 /*
997 * Supported ops:
1072 * Supported ops:
998 *
1073 *
999 * slice deletion
1074 * slice deletion
1000 * string assignment (extend node->rev mapping)
1075 * string assignment (extend node->rev mapping)
1001 * string deletion (shrink node->rev mapping)
1076 * string deletion (shrink node->rev mapping)
1002 */
1077 */
1003 static int index_assign_subscript(indexObject *self, PyObject *item,
1078 static int index_assign_subscript(indexObject *self, PyObject *item,
1004 PyObject *value)
1079 PyObject *value)
1005 {
1080 {
1006 char *node;
1081 char *node;
1007 Py_ssize_t nodelen;
1082 Py_ssize_t nodelen;
1008 long rev;
1083 long rev;
1009
1084
1010 if (PySlice_Check(item) && value == NULL)
1085 if (PySlice_Check(item) && value == NULL)
1011 return index_slice_del(self, item);
1086 return index_slice_del(self, item);
1012
1087
1013 if (node_check(item, &node, &nodelen) == -1)
1088 if (node_check(item, &node, &nodelen) == -1)
1014 return -1;
1089 return -1;
1015
1090
1016 if (value == NULL)
1091 if (value == NULL)
1017 return self->nt ? nt_insert(self, node, -1) : 0;
1092 return self->nt ? nt_insert(self, node, -1) : 0;
1018 rev = PyInt_AsLong(value);
1093 rev = PyInt_AsLong(value);
1019 if (rev > INT_MAX || rev < 0) {
1094 if (rev > INT_MAX || rev < 0) {
1020 if (!PyErr_Occurred())
1095 if (!PyErr_Occurred())
1021 PyErr_SetString(PyExc_ValueError, "rev out of range");
1096 PyErr_SetString(PyExc_ValueError, "rev out of range");
1022 return -1;
1097 return -1;
1023 }
1098 }
1024 return nt_insert(self, node, (int)rev);
1099 return nt_insert(self, node, (int)rev);
1025 }
1100 }
1026
1101
1027 /*
1102 /*
1028 * Find all RevlogNG entries in an index that has inline data. Update
1103 * Find all RevlogNG entries in an index that has inline data. Update
1029 * the optional "offsets" table with those entries.
1104 * the optional "offsets" table with those entries.
1030 */
1105 */
1031 static long inline_scan(indexObject *self, const char **offsets)
1106 static long inline_scan(indexObject *self, const char **offsets)
1032 {
1107 {
1033 const char *data = PyString_AS_STRING(self->data);
1108 const char *data = PyString_AS_STRING(self->data);
1034 const char *end = data + PyString_GET_SIZE(self->data);
1109 const char *end = data + PyString_GET_SIZE(self->data);
1035 const long hdrsize = 64;
1110 const long hdrsize = 64;
1036 long incr = hdrsize;
1111 long incr = hdrsize;
1037 Py_ssize_t len = 0;
1112 Py_ssize_t len = 0;
1038
1113
1039 while (data + hdrsize <= end) {
1114 while (data + hdrsize <= end) {
1040 uint32_t comp_len;
1115 uint32_t comp_len;
1041 const char *old_data;
1116 const char *old_data;
1042 /* 3rd element of header is length of compressed inline data */
1117 /* 3rd element of header is length of compressed inline data */
1043 comp_len = getbe32(data + 8);
1118 comp_len = getbe32(data + 8);
1044 incr = hdrsize + comp_len;
1119 incr = hdrsize + comp_len;
1045 if (incr < hdrsize)
1120 if (incr < hdrsize)
1046 break;
1121 break;
1047 if (offsets)
1122 if (offsets)
1048 offsets[len] = data;
1123 offsets[len] = data;
1049 len++;
1124 len++;
1050 old_data = data;
1125 old_data = data;
1051 data += incr;
1126 data += incr;
1052 if (data <= old_data)
1127 if (data <= old_data)
1053 break;
1128 break;
1054 }
1129 }
1055
1130
1056 if (data != end && data + hdrsize != end) {
1131 if (data != end && data + hdrsize != end) {
1057 if (!PyErr_Occurred())
1132 if (!PyErr_Occurred())
1058 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1133 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1059 return -1;
1134 return -1;
1060 }
1135 }
1061
1136
1062 return len;
1137 return len;
1063 }
1138 }
1064
1139
1065 static int index_init(indexObject *self, PyObject *args)
1140 static int index_init(indexObject *self, PyObject *args)
1066 {
1141 {
1067 PyObject *data_obj, *inlined_obj;
1142 PyObject *data_obj, *inlined_obj;
1068 Py_ssize_t size;
1143 Py_ssize_t size;
1069
1144
1070 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1145 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1071 return -1;
1146 return -1;
1072 if (!PyString_Check(data_obj)) {
1147 if (!PyString_Check(data_obj)) {
1073 PyErr_SetString(PyExc_TypeError, "data is not a string");
1148 PyErr_SetString(PyExc_TypeError, "data is not a string");
1074 return -1;
1149 return -1;
1075 }
1150 }
1076 size = PyString_GET_SIZE(data_obj);
1151 size = PyString_GET_SIZE(data_obj);
1077
1152
1078 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1153 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1079 self->data = data_obj;
1154 self->data = data_obj;
1080 self->cache = NULL;
1155 self->cache = NULL;
1081
1156
1082 self->added = NULL;
1157 self->added = NULL;
1083 self->offsets = NULL;
1158 self->offsets = NULL;
1084 self->nt = NULL;
1159 self->nt = NULL;
1085 self->ntlength = self->ntcapacity = 0;
1160 self->ntlength = self->ntcapacity = 0;
1086 self->ntdepth = self->ntsplits = 0;
1161 self->ntdepth = self->ntsplits = 0;
1087 self->ntlookups = self->ntmisses = 0;
1162 self->ntlookups = self->ntmisses = 0;
1088 self->ntrev = -1;
1163 self->ntrev = -1;
1089 Py_INCREF(self->data);
1164 Py_INCREF(self->data);
1090
1165
1091 if (self->inlined) {
1166 if (self->inlined) {
1092 long len = inline_scan(self, NULL);
1167 long len = inline_scan(self, NULL);
1093 if (len == -1)
1168 if (len == -1)
1094 goto bail;
1169 goto bail;
1095 self->raw_length = len;
1170 self->raw_length = len;
1096 self->length = len + 1;
1171 self->length = len + 1;
1097 } else {
1172 } else {
1098 if (size % 64) {
1173 if (size % 64) {
1099 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1174 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1100 goto bail;
1175 goto bail;
1101 }
1176 }
1102 self->raw_length = size / 64;
1177 self->raw_length = size / 64;
1103 self->length = self->raw_length + 1;
1178 self->length = self->raw_length + 1;
1104 }
1179 }
1105
1180
1106 return 0;
1181 return 0;
1107 bail:
1182 bail:
1108 return -1;
1183 return -1;
1109 }
1184 }
1110
1185
1111 static PyObject *index_nodemap(indexObject *self)
1186 static PyObject *index_nodemap(indexObject *self)
1112 {
1187 {
1113 Py_INCREF(self);
1188 Py_INCREF(self);
1114 return (PyObject *)self;
1189 return (PyObject *)self;
1115 }
1190 }
1116
1191
1117 static void index_dealloc(indexObject *self)
1192 static void index_dealloc(indexObject *self)
1118 {
1193 {
1119 _index_clearcaches(self);
1194 _index_clearcaches(self);
1120 Py_DECREF(self->data);
1195 Py_DECREF(self->data);
1121 Py_XDECREF(self->added);
1196 Py_XDECREF(self->added);
1122 PyObject_Del(self);
1197 PyObject_Del(self);
1123 }
1198 }
1124
1199
1125 static PySequenceMethods index_sequence_methods = {
1200 static PySequenceMethods index_sequence_methods = {
1126 (lenfunc)index_length, /* sq_length */
1201 (lenfunc)index_length, /* sq_length */
1127 0, /* sq_concat */
1202 0, /* sq_concat */
1128 0, /* sq_repeat */
1203 0, /* sq_repeat */
1129 (ssizeargfunc)index_get, /* sq_item */
1204 (ssizeargfunc)index_get, /* sq_item */
1130 0, /* sq_slice */
1205 0, /* sq_slice */
1131 0, /* sq_ass_item */
1206 0, /* sq_ass_item */
1132 0, /* sq_ass_slice */
1207 0, /* sq_ass_slice */
1133 (objobjproc)index_contains, /* sq_contains */
1208 (objobjproc)index_contains, /* sq_contains */
1134 };
1209 };
1135
1210
1136 static PyMappingMethods index_mapping_methods = {
1211 static PyMappingMethods index_mapping_methods = {
1137 (lenfunc)index_length, /* mp_length */
1212 (lenfunc)index_length, /* mp_length */
1138 (binaryfunc)index_getitem, /* mp_subscript */
1213 (binaryfunc)index_getitem, /* mp_subscript */
1139 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1214 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1140 };
1215 };
1141
1216
1142 static PyMethodDef index_methods[] = {
1217 static PyMethodDef index_methods[] = {
1143 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1218 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1144 "clear the index caches"},
1219 "clear the index caches"},
1145 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1220 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1146 "get an index entry"},
1221 "get an index entry"},
1222 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1223 "get head revisions"},
1147 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1224 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1148 "insert an index entry"},
1225 "insert an index entry"},
1149 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1226 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1150 "match a potentially ambiguous node ID"},
1227 "match a potentially ambiguous node ID"},
1151 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1228 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1152 "stats for the index"},
1229 "stats for the index"},
1153 {NULL} /* Sentinel */
1230 {NULL} /* Sentinel */
1154 };
1231 };
1155
1232
1156 static PyGetSetDef index_getset[] = {
1233 static PyGetSetDef index_getset[] = {
1157 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1234 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1158 {NULL} /* Sentinel */
1235 {NULL} /* Sentinel */
1159 };
1236 };
1160
1237
1161 static PyTypeObject indexType = {
1238 static PyTypeObject indexType = {
1162 PyObject_HEAD_INIT(NULL)
1239 PyObject_HEAD_INIT(NULL)
1163 0, /* ob_size */
1240 0, /* ob_size */
1164 "parsers.index", /* tp_name */
1241 "parsers.index", /* tp_name */
1165 sizeof(indexObject), /* tp_basicsize */
1242 sizeof(indexObject), /* tp_basicsize */
1166 0, /* tp_itemsize */
1243 0, /* tp_itemsize */
1167 (destructor)index_dealloc, /* tp_dealloc */
1244 (destructor)index_dealloc, /* tp_dealloc */
1168 0, /* tp_print */
1245 0, /* tp_print */
1169 0, /* tp_getattr */
1246 0, /* tp_getattr */
1170 0, /* tp_setattr */
1247 0, /* tp_setattr */
1171 0, /* tp_compare */
1248 0, /* tp_compare */
1172 0, /* tp_repr */
1249 0, /* tp_repr */
1173 0, /* tp_as_number */
1250 0, /* tp_as_number */
1174 &index_sequence_methods, /* tp_as_sequence */
1251 &index_sequence_methods, /* tp_as_sequence */
1175 &index_mapping_methods, /* tp_as_mapping */
1252 &index_mapping_methods, /* tp_as_mapping */
1176 0, /* tp_hash */
1253 0, /* tp_hash */
1177 0, /* tp_call */
1254 0, /* tp_call */
1178 0, /* tp_str */
1255 0, /* tp_str */
1179 0, /* tp_getattro */
1256 0, /* tp_getattro */
1180 0, /* tp_setattro */
1257 0, /* tp_setattro */
1181 0, /* tp_as_buffer */
1258 0, /* tp_as_buffer */
1182 Py_TPFLAGS_DEFAULT, /* tp_flags */
1259 Py_TPFLAGS_DEFAULT, /* tp_flags */
1183 "revlog index", /* tp_doc */
1260 "revlog index", /* tp_doc */
1184 0, /* tp_traverse */
1261 0, /* tp_traverse */
1185 0, /* tp_clear */
1262 0, /* tp_clear */
1186 0, /* tp_richcompare */
1263 0, /* tp_richcompare */
1187 0, /* tp_weaklistoffset */
1264 0, /* tp_weaklistoffset */
1188 0, /* tp_iter */
1265 0, /* tp_iter */
1189 0, /* tp_iternext */
1266 0, /* tp_iternext */
1190 index_methods, /* tp_methods */
1267 index_methods, /* tp_methods */
1191 0, /* tp_members */
1268 0, /* tp_members */
1192 index_getset, /* tp_getset */
1269 index_getset, /* tp_getset */
1193 0, /* tp_base */
1270 0, /* tp_base */
1194 0, /* tp_dict */
1271 0, /* tp_dict */
1195 0, /* tp_descr_get */
1272 0, /* tp_descr_get */
1196 0, /* tp_descr_set */
1273 0, /* tp_descr_set */
1197 0, /* tp_dictoffset */
1274 0, /* tp_dictoffset */
1198 (initproc)index_init, /* tp_init */
1275 (initproc)index_init, /* tp_init */
1199 0, /* tp_alloc */
1276 0, /* tp_alloc */
1200 };
1277 };
1201
1278
1202 /*
1279 /*
1203 * returns a tuple of the form (index, index, cache) with elements as
1280 * returns a tuple of the form (index, index, cache) with elements as
1204 * follows:
1281 * follows:
1205 *
1282 *
1206 * index: an index object that lazily parses RevlogNG records
1283 * index: an index object that lazily parses RevlogNG records
1207 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1284 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1208 *
1285 *
1209 * added complications are for backwards compatibility
1286 * added complications are for backwards compatibility
1210 */
1287 */
1211 static PyObject *parse_index2(PyObject *self, PyObject *args)
1288 static PyObject *parse_index2(PyObject *self, PyObject *args)
1212 {
1289 {
1213 PyObject *tuple = NULL, *cache = NULL;
1290 PyObject *tuple = NULL, *cache = NULL;
1214 indexObject *idx;
1291 indexObject *idx;
1215 int ret;
1292 int ret;
1216
1293
1217 idx = PyObject_New(indexObject, &indexType);
1294 idx = PyObject_New(indexObject, &indexType);
1218 if (idx == NULL)
1295 if (idx == NULL)
1219 goto bail;
1296 goto bail;
1220
1297
1221 ret = index_init(idx, args);
1298 ret = index_init(idx, args);
1222 if (ret == -1)
1299 if (ret == -1)
1223 goto bail;
1300 goto bail;
1224
1301
1225 if (idx->inlined) {
1302 if (idx->inlined) {
1226 cache = Py_BuildValue("iO", 0, idx->data);
1303 cache = Py_BuildValue("iO", 0, idx->data);
1227 if (cache == NULL)
1304 if (cache == NULL)
1228 goto bail;
1305 goto bail;
1229 } else {
1306 } else {
1230 cache = Py_None;
1307 cache = Py_None;
1231 Py_INCREF(cache);
1308 Py_INCREF(cache);
1232 }
1309 }
1233
1310
1234 tuple = Py_BuildValue("NN", idx, cache);
1311 tuple = Py_BuildValue("NN", idx, cache);
1235 if (!tuple)
1312 if (!tuple)
1236 goto bail;
1313 goto bail;
1237 return tuple;
1314 return tuple;
1238
1315
1239 bail:
1316 bail:
1240 Py_XDECREF(idx);
1317 Py_XDECREF(idx);
1241 Py_XDECREF(cache);
1318 Py_XDECREF(cache);
1242 Py_XDECREF(tuple);
1319 Py_XDECREF(tuple);
1243 return NULL;
1320 return NULL;
1244 }
1321 }
1245
1322
1246 static char parsers_doc[] = "Efficient content parsing.";
1323 static char parsers_doc[] = "Efficient content parsing.";
1247
1324
1248 static PyMethodDef methods[] = {
1325 static PyMethodDef methods[] = {
1249 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1326 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1250 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1327 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1251 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1328 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1252 {NULL, NULL}
1329 {NULL, NULL}
1253 };
1330 };
1254
1331
1255 static void module_init(PyObject *mod)
1332 static void module_init(PyObject *mod)
1256 {
1333 {
1257 indexType.tp_new = PyType_GenericNew;
1334 indexType.tp_new = PyType_GenericNew;
1258 if (PyType_Ready(&indexType) < 0)
1335 if (PyType_Ready(&indexType) < 0)
1259 return;
1336 return;
1260 Py_INCREF(&indexType);
1337 Py_INCREF(&indexType);
1261
1338
1262 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1339 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1263
1340
1264 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1341 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1265 -1, -1, -1, -1, nullid, 20);
1342 -1, -1, -1, -1, nullid, 20);
1266 if (nullentry)
1343 if (nullentry)
1267 PyObject_GC_UnTrack(nullentry);
1344 PyObject_GC_UnTrack(nullentry);
1268 }
1345 }
1269
1346
1270 #ifdef IS_PY3K
1347 #ifdef IS_PY3K
1271 static struct PyModuleDef parsers_module = {
1348 static struct PyModuleDef parsers_module = {
1272 PyModuleDef_HEAD_INIT,
1349 PyModuleDef_HEAD_INIT,
1273 "parsers",
1350 "parsers",
1274 parsers_doc,
1351 parsers_doc,
1275 -1,
1352 -1,
1276 methods
1353 methods
1277 };
1354 };
1278
1355
1279 PyMODINIT_FUNC PyInit_parsers(void)
1356 PyMODINIT_FUNC PyInit_parsers(void)
1280 {
1357 {
1281 PyObject *mod = PyModule_Create(&parsers_module);
1358 PyObject *mod = PyModule_Create(&parsers_module);
1282 module_init(mod);
1359 module_init(mod);
1283 return mod;
1360 return mod;
1284 }
1361 }
1285 #else
1362 #else
1286 PyMODINIT_FUNC initparsers(void)
1363 PyMODINIT_FUNC initparsers(void)
1287 {
1364 {
1288 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1365 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1289 module_init(mod);
1366 module_init(mod);
1290 }
1367 }
1291 #endif
1368 #endif
@@ -1,1317 +1,1321 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev
15 from node import bin, hex, nullid, nullrev
16 from i18n import _
16 from i18n import _
17 import ancestor, mdiff, parsers, error, util, dagutil
17 import ancestor, mdiff, parsers, error, util, dagutil
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGGENERALDELTA = (1 << 17)
30 REVLOGGENERALDELTA = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_KNOWN_FLAGS = 0
37 REVIDX_KNOWN_FLAGS = 0
38
38
39 # max size of revlog with inline data
39 # max size of revlog with inline data
40 _maxinline = 131072
40 _maxinline = 131072
41 _chunksize = 1048576
41 _chunksize = 1048576
42
42
43 RevlogError = error.RevlogError
43 RevlogError = error.RevlogError
44 LookupError = error.LookupError
44 LookupError = error.LookupError
45
45
46 def getoffset(q):
46 def getoffset(q):
47 return int(q >> 16)
47 return int(q >> 16)
48
48
49 def gettype(q):
49 def gettype(q):
50 return int(q & 0xFFFF)
50 return int(q & 0xFFFF)
51
51
52 def offset_type(offset, type):
52 def offset_type(offset, type):
53 return long(long(offset) << 16 | type)
53 return long(long(offset) << 16 | type)
54
54
55 nullhash = _sha(nullid)
55 nullhash = _sha(nullid)
56
56
57 def hash(text, p1, p2):
57 def hash(text, p1, p2):
58 """generate a hash from the given text and its parent hashes
58 """generate a hash from the given text and its parent hashes
59
59
60 This hash combines both the current file contents and its history
60 This hash combines both the current file contents and its history
61 in a manner that makes it easy to distinguish nodes with the same
61 in a manner that makes it easy to distinguish nodes with the same
62 content in the revision graph.
62 content in the revision graph.
63 """
63 """
64 # As of now, if one of the parent node is null, p2 is null
64 # As of now, if one of the parent node is null, p2 is null
65 if p2 == nullid:
65 if p2 == nullid:
66 # deep copy of a hash is faster than creating one
66 # deep copy of a hash is faster than creating one
67 s = nullhash.copy()
67 s = nullhash.copy()
68 s.update(p1)
68 s.update(p1)
69 else:
69 else:
70 # none of the parent nodes are nullid
70 # none of the parent nodes are nullid
71 l = [p1, p2]
71 l = [p1, p2]
72 l.sort()
72 l.sort()
73 s = _sha(l[0])
73 s = _sha(l[0])
74 s.update(l[1])
74 s.update(l[1])
75 s.update(text)
75 s.update(text)
76 return s.digest()
76 return s.digest()
77
77
78 def compress(text):
78 def compress(text):
79 """ generate a possibly-compressed representation of text """
79 """ generate a possibly-compressed representation of text """
80 if not text:
80 if not text:
81 return ("", text)
81 return ("", text)
82 l = len(text)
82 l = len(text)
83 bin = None
83 bin = None
84 if l < 44:
84 if l < 44:
85 pass
85 pass
86 elif l > 1000000:
86 elif l > 1000000:
87 # zlib makes an internal copy, thus doubling memory usage for
87 # zlib makes an internal copy, thus doubling memory usage for
88 # large files, so lets do this in pieces
88 # large files, so lets do this in pieces
89 z = zlib.compressobj()
89 z = zlib.compressobj()
90 p = []
90 p = []
91 pos = 0
91 pos = 0
92 while pos < l:
92 while pos < l:
93 pos2 = pos + 2**20
93 pos2 = pos + 2**20
94 p.append(z.compress(text[pos:pos2]))
94 p.append(z.compress(text[pos:pos2]))
95 pos = pos2
95 pos = pos2
96 p.append(z.flush())
96 p.append(z.flush())
97 if sum(map(len, p)) < l:
97 if sum(map(len, p)) < l:
98 bin = "".join(p)
98 bin = "".join(p)
99 else:
99 else:
100 bin = _compress(text)
100 bin = _compress(text)
101 if bin is None or len(bin) > l:
101 if bin is None or len(bin) > l:
102 if text[0] == '\0':
102 if text[0] == '\0':
103 return ("", text)
103 return ("", text)
104 return ('u', text)
104 return ('u', text)
105 return ("", bin)
105 return ("", bin)
106
106
107 def decompress(bin):
107 def decompress(bin):
108 """ decompress the given input """
108 """ decompress the given input """
109 if not bin:
109 if not bin:
110 return bin
110 return bin
111 t = bin[0]
111 t = bin[0]
112 if t == '\0':
112 if t == '\0':
113 return bin
113 return bin
114 if t == 'x':
114 if t == 'x':
115 return _decompress(bin)
115 return _decompress(bin)
116 if t == 'u':
116 if t == 'u':
117 return bin[1:]
117 return bin[1:]
118 raise RevlogError(_("unknown compression type %r") % t)
118 raise RevlogError(_("unknown compression type %r") % t)
119
119
120 indexformatv0 = ">4l20s20s20s"
120 indexformatv0 = ">4l20s20s20s"
121 v0shaoffset = 56
121 v0shaoffset = 56
122
122
123 class revlogoldio(object):
123 class revlogoldio(object):
124 def __init__(self):
124 def __init__(self):
125 self.size = struct.calcsize(indexformatv0)
125 self.size = struct.calcsize(indexformatv0)
126
126
127 def parseindex(self, data, inline):
127 def parseindex(self, data, inline):
128 s = self.size
128 s = self.size
129 index = []
129 index = []
130 nodemap = {nullid: nullrev}
130 nodemap = {nullid: nullrev}
131 n = off = 0
131 n = off = 0
132 l = len(data)
132 l = len(data)
133 while off + s <= l:
133 while off + s <= l:
134 cur = data[off:off + s]
134 cur = data[off:off + s]
135 off += s
135 off += s
136 e = _unpack(indexformatv0, cur)
136 e = _unpack(indexformatv0, cur)
137 # transform to revlogv1 format
137 # transform to revlogv1 format
138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
138 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
139 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
140 index.append(e2)
140 index.append(e2)
141 nodemap[e[6]] = n
141 nodemap[e[6]] = n
142 n += 1
142 n += 1
143
143
144 # add the magic null revision at -1
144 # add the magic null revision at -1
145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
145 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
146
146
147 return index, nodemap, None
147 return index, nodemap, None
148
148
149 def packentry(self, entry, node, version, rev):
149 def packentry(self, entry, node, version, rev):
150 if gettype(entry[0]):
150 if gettype(entry[0]):
151 raise RevlogError(_("index entry flags need RevlogNG"))
151 raise RevlogError(_("index entry flags need RevlogNG"))
152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
152 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
153 node(entry[5]), node(entry[6]), entry[7])
153 node(entry[5]), node(entry[6]), entry[7])
154 return _pack(indexformatv0, *e2)
154 return _pack(indexformatv0, *e2)
155
155
156 # index ng:
156 # index ng:
157 # 6 bytes: offset
157 # 6 bytes: offset
158 # 2 bytes: flags
158 # 2 bytes: flags
159 # 4 bytes: compressed length
159 # 4 bytes: compressed length
160 # 4 bytes: uncompressed length
160 # 4 bytes: uncompressed length
161 # 4 bytes: base rev
161 # 4 bytes: base rev
162 # 4 bytes: link rev
162 # 4 bytes: link rev
163 # 4 bytes: parent 1 rev
163 # 4 bytes: parent 1 rev
164 # 4 bytes: parent 2 rev
164 # 4 bytes: parent 2 rev
165 # 32 bytes: nodeid
165 # 32 bytes: nodeid
166 indexformatng = ">Qiiiiii20s12x"
166 indexformatng = ">Qiiiiii20s12x"
167 ngshaoffset = 32
167 ngshaoffset = 32
168 versionformat = ">I"
168 versionformat = ">I"
169
169
170 class revlogio(object):
170 class revlogio(object):
171 def __init__(self):
171 def __init__(self):
172 self.size = struct.calcsize(indexformatng)
172 self.size = struct.calcsize(indexformatng)
173
173
174 def parseindex(self, data, inline):
174 def parseindex(self, data, inline):
175 # call the C implementation to parse the index data
175 # call the C implementation to parse the index data
176 index, cache = parsers.parse_index2(data, inline)
176 index, cache = parsers.parse_index2(data, inline)
177 return index, getattr(index, 'nodemap', None), cache
177 return index, getattr(index, 'nodemap', None), cache
178
178
179 def packentry(self, entry, node, version, rev):
179 def packentry(self, entry, node, version, rev):
180 p = _pack(indexformatng, *entry)
180 p = _pack(indexformatng, *entry)
181 if rev == 0:
181 if rev == 0:
182 p = _pack(versionformat, version) + p[4:]
182 p = _pack(versionformat, version) + p[4:]
183 return p
183 return p
184
184
185 class revlog(object):
185 class revlog(object):
186 """
186 """
187 the underlying revision storage object
187 the underlying revision storage object
188
188
189 A revlog consists of two parts, an index and the revision data.
189 A revlog consists of two parts, an index and the revision data.
190
190
191 The index is a file with a fixed record size containing
191 The index is a file with a fixed record size containing
192 information on each revision, including its nodeid (hash), the
192 information on each revision, including its nodeid (hash), the
193 nodeids of its parents, the position and offset of its data within
193 nodeids of its parents, the position and offset of its data within
194 the data file, and the revision it's based on. Finally, each entry
194 the data file, and the revision it's based on. Finally, each entry
195 contains a linkrev entry that can serve as a pointer to external
195 contains a linkrev entry that can serve as a pointer to external
196 data.
196 data.
197
197
198 The revision data itself is a linear collection of data chunks.
198 The revision data itself is a linear collection of data chunks.
199 Each chunk represents a revision and is usually represented as a
199 Each chunk represents a revision and is usually represented as a
200 delta against the previous chunk. To bound lookup time, runs of
200 delta against the previous chunk. To bound lookup time, runs of
201 deltas are limited to about 2 times the length of the original
201 deltas are limited to about 2 times the length of the original
202 version data. This makes retrieval of a version proportional to
202 version data. This makes retrieval of a version proportional to
203 its size, or O(1) relative to the number of revisions.
203 its size, or O(1) relative to the number of revisions.
204
204
205 Both pieces of the revlog are written to in an append-only
205 Both pieces of the revlog are written to in an append-only
206 fashion, which means we never need to rewrite a file to insert or
206 fashion, which means we never need to rewrite a file to insert or
207 remove data, and can use some simple techniques to avoid the need
207 remove data, and can use some simple techniques to avoid the need
208 for locking while reading.
208 for locking while reading.
209 """
209 """
210 def __init__(self, opener, indexfile):
210 def __init__(self, opener, indexfile):
211 """
211 """
212 create a revlog object
212 create a revlog object
213
213
214 opener is a function that abstracts the file opening operation
214 opener is a function that abstracts the file opening operation
215 and can be used to implement COW semantics or the like.
215 and can be used to implement COW semantics or the like.
216 """
216 """
217 self.indexfile = indexfile
217 self.indexfile = indexfile
218 self.datafile = indexfile[:-2] + ".d"
218 self.datafile = indexfile[:-2] + ".d"
219 self.opener = opener
219 self.opener = opener
220 self._cache = None
220 self._cache = None
221 self._basecache = (0, 0)
221 self._basecache = (0, 0)
222 self._chunkcache = (0, '')
222 self._chunkcache = (0, '')
223 self.index = []
223 self.index = []
224 self._pcache = {}
224 self._pcache = {}
225 self._nodecache = {nullid: nullrev}
225 self._nodecache = {nullid: nullrev}
226 self._nodepos = None
226 self._nodepos = None
227
227
228 v = REVLOG_DEFAULT_VERSION
228 v = REVLOG_DEFAULT_VERSION
229 opts = getattr(opener, 'options', None)
229 opts = getattr(opener, 'options', None)
230 if opts is not None:
230 if opts is not None:
231 if 'revlogv1' in opts:
231 if 'revlogv1' in opts:
232 if 'generaldelta' in opts:
232 if 'generaldelta' in opts:
233 v |= REVLOGGENERALDELTA
233 v |= REVLOGGENERALDELTA
234 else:
234 else:
235 v = 0
235 v = 0
236
236
237 i = ''
237 i = ''
238 self._initempty = True
238 self._initempty = True
239 try:
239 try:
240 f = self.opener(self.indexfile)
240 f = self.opener(self.indexfile)
241 i = f.read()
241 i = f.read()
242 f.close()
242 f.close()
243 if len(i) > 0:
243 if len(i) > 0:
244 v = struct.unpack(versionformat, i[:4])[0]
244 v = struct.unpack(versionformat, i[:4])[0]
245 self._initempty = False
245 self._initempty = False
246 except IOError, inst:
246 except IOError, inst:
247 if inst.errno != errno.ENOENT:
247 if inst.errno != errno.ENOENT:
248 raise
248 raise
249
249
250 self.version = v
250 self.version = v
251 self._inline = v & REVLOGNGINLINEDATA
251 self._inline = v & REVLOGNGINLINEDATA
252 self._generaldelta = v & REVLOGGENERALDELTA
252 self._generaldelta = v & REVLOGGENERALDELTA
253 flags = v & ~0xFFFF
253 flags = v & ~0xFFFF
254 fmt = v & 0xFFFF
254 fmt = v & 0xFFFF
255 if fmt == REVLOGV0 and flags:
255 if fmt == REVLOGV0 and flags:
256 raise RevlogError(_("index %s unknown flags %#04x for format v0")
256 raise RevlogError(_("index %s unknown flags %#04x for format v0")
257 % (self.indexfile, flags >> 16))
257 % (self.indexfile, flags >> 16))
258 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
258 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
259 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
259 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
260 % (self.indexfile, flags >> 16))
260 % (self.indexfile, flags >> 16))
261 elif fmt > REVLOGNG:
261 elif fmt > REVLOGNG:
262 raise RevlogError(_("index %s unknown format %d")
262 raise RevlogError(_("index %s unknown format %d")
263 % (self.indexfile, fmt))
263 % (self.indexfile, fmt))
264
264
265 self._io = revlogio()
265 self._io = revlogio()
266 if self.version == REVLOGV0:
266 if self.version == REVLOGV0:
267 self._io = revlogoldio()
267 self._io = revlogoldio()
268 try:
268 try:
269 d = self._io.parseindex(i, self._inline)
269 d = self._io.parseindex(i, self._inline)
270 except (ValueError, IndexError):
270 except (ValueError, IndexError):
271 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
271 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
272 self.index, nodemap, self._chunkcache = d
272 self.index, nodemap, self._chunkcache = d
273 if nodemap is not None:
273 if nodemap is not None:
274 self.nodemap = self._nodecache = nodemap
274 self.nodemap = self._nodecache = nodemap
275 if not self._chunkcache:
275 if not self._chunkcache:
276 self._chunkclear()
276 self._chunkclear()
277
277
278 def tip(self):
278 def tip(self):
279 return self.node(len(self.index) - 2)
279 return self.node(len(self.index) - 2)
280 def __len__(self):
280 def __len__(self):
281 return len(self.index) - 1
281 return len(self.index) - 1
282 def __iter__(self):
282 def __iter__(self):
283 for i in xrange(len(self)):
283 for i in xrange(len(self)):
284 yield i
284 yield i
285
285
286 @util.propertycache
286 @util.propertycache
287 def nodemap(self):
287 def nodemap(self):
288 self.rev(self.node(0))
288 self.rev(self.node(0))
289 return self._nodecache
289 return self._nodecache
290
290
291 def hasnode(self, node):
291 def hasnode(self, node):
292 try:
292 try:
293 self.rev(node)
293 self.rev(node)
294 return True
294 return True
295 except KeyError:
295 except KeyError:
296 return False
296 return False
297
297
298 def clearcaches(self):
298 def clearcaches(self):
299 try:
299 try:
300 self._nodecache.clearcaches()
300 self._nodecache.clearcaches()
301 except AttributeError:
301 except AttributeError:
302 self._nodecache = {nullid: nullrev}
302 self._nodecache = {nullid: nullrev}
303 self._nodepos = None
303 self._nodepos = None
304
304
305 def rev(self, node):
305 def rev(self, node):
306 try:
306 try:
307 return self._nodecache[node]
307 return self._nodecache[node]
308 except RevlogError:
308 except RevlogError:
309 # parsers.c radix tree lookup failed
309 # parsers.c radix tree lookup failed
310 raise LookupError(node, self.indexfile, _('no node'))
310 raise LookupError(node, self.indexfile, _('no node'))
311 except KeyError:
311 except KeyError:
312 # pure python cache lookup failed
312 # pure python cache lookup failed
313 n = self._nodecache
313 n = self._nodecache
314 i = self.index
314 i = self.index
315 p = self._nodepos
315 p = self._nodepos
316 if p is None:
316 if p is None:
317 p = len(i) - 2
317 p = len(i) - 2
318 for r in xrange(p, -1, -1):
318 for r in xrange(p, -1, -1):
319 v = i[r][7]
319 v = i[r][7]
320 n[v] = r
320 n[v] = r
321 if v == node:
321 if v == node:
322 self._nodepos = r - 1
322 self._nodepos = r - 1
323 return r
323 return r
324 raise LookupError(node, self.indexfile, _('no node'))
324 raise LookupError(node, self.indexfile, _('no node'))
325
325
326 def node(self, rev):
326 def node(self, rev):
327 return self.index[rev][7]
327 return self.index[rev][7]
328 def linkrev(self, rev):
328 def linkrev(self, rev):
329 return self.index[rev][4]
329 return self.index[rev][4]
330 def parents(self, node):
330 def parents(self, node):
331 i = self.index
331 i = self.index
332 d = i[self.rev(node)]
332 d = i[self.rev(node)]
333 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
333 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
334 def parentrevs(self, rev):
334 def parentrevs(self, rev):
335 return self.index[rev][5:7]
335 return self.index[rev][5:7]
336 def start(self, rev):
336 def start(self, rev):
337 return int(self.index[rev][0] >> 16)
337 return int(self.index[rev][0] >> 16)
338 def end(self, rev):
338 def end(self, rev):
339 return self.start(rev) + self.length(rev)
339 return self.start(rev) + self.length(rev)
340 def length(self, rev):
340 def length(self, rev):
341 return self.index[rev][1]
341 return self.index[rev][1]
342 def chainbase(self, rev):
342 def chainbase(self, rev):
343 index = self.index
343 index = self.index
344 base = index[rev][3]
344 base = index[rev][3]
345 while base != rev:
345 while base != rev:
346 rev = base
346 rev = base
347 base = index[rev][3]
347 base = index[rev][3]
348 return base
348 return base
349 def flags(self, rev):
349 def flags(self, rev):
350 return self.index[rev][0] & 0xFFFF
350 return self.index[rev][0] & 0xFFFF
351 def rawsize(self, rev):
351 def rawsize(self, rev):
352 """return the length of the uncompressed text for a given revision"""
352 """return the length of the uncompressed text for a given revision"""
353 l = self.index[rev][2]
353 l = self.index[rev][2]
354 if l >= 0:
354 if l >= 0:
355 return l
355 return l
356
356
357 t = self.revision(self.node(rev))
357 t = self.revision(self.node(rev))
358 return len(t)
358 return len(t)
359 size = rawsize
359 size = rawsize
360
360
361 def reachable(self, node, stop=None):
361 def reachable(self, node, stop=None):
362 """return the set of all nodes ancestral to a given node, including
362 """return the set of all nodes ancestral to a given node, including
363 the node itself, stopping when stop is matched"""
363 the node itself, stopping when stop is matched"""
364 reachable = set((node,))
364 reachable = set((node,))
365 visit = [node]
365 visit = [node]
366 if stop:
366 if stop:
367 stopn = self.rev(stop)
367 stopn = self.rev(stop)
368 else:
368 else:
369 stopn = 0
369 stopn = 0
370 while visit:
370 while visit:
371 n = visit.pop(0)
371 n = visit.pop(0)
372 if n == stop:
372 if n == stop:
373 continue
373 continue
374 if n == nullid:
374 if n == nullid:
375 continue
375 continue
376 for p in self.parents(n):
376 for p in self.parents(n):
377 if self.rev(p) < stopn:
377 if self.rev(p) < stopn:
378 continue
378 continue
379 if p not in reachable:
379 if p not in reachable:
380 reachable.add(p)
380 reachable.add(p)
381 visit.append(p)
381 visit.append(p)
382 return reachable
382 return reachable
383
383
384 def ancestors(self, *revs):
384 def ancestors(self, *revs):
385 """Generate the ancestors of 'revs' in reverse topological order.
385 """Generate the ancestors of 'revs' in reverse topological order.
386
386
387 Yield a sequence of revision numbers starting with the parents
387 Yield a sequence of revision numbers starting with the parents
388 of each revision in revs, i.e., each revision is *not* considered
388 of each revision in revs, i.e., each revision is *not* considered
389 an ancestor of itself. Results are in breadth-first order:
389 an ancestor of itself. Results are in breadth-first order:
390 parents of each rev in revs, then parents of those, etc. Result
390 parents of each rev in revs, then parents of those, etc. Result
391 does not include the null revision."""
391 does not include the null revision."""
392 visit = list(revs)
392 visit = list(revs)
393 seen = set([nullrev])
393 seen = set([nullrev])
394 while visit:
394 while visit:
395 for parent in self.parentrevs(visit.pop(0)):
395 for parent in self.parentrevs(visit.pop(0)):
396 if parent not in seen:
396 if parent not in seen:
397 visit.append(parent)
397 visit.append(parent)
398 seen.add(parent)
398 seen.add(parent)
399 yield parent
399 yield parent
400
400
401 def descendants(self, *revs):
401 def descendants(self, *revs):
402 """Generate the descendants of 'revs' in revision order.
402 """Generate the descendants of 'revs' in revision order.
403
403
404 Yield a sequence of revision numbers starting with a child of
404 Yield a sequence of revision numbers starting with a child of
405 some rev in revs, i.e., each revision is *not* considered a
405 some rev in revs, i.e., each revision is *not* considered a
406 descendant of itself. Results are ordered by revision number (a
406 descendant of itself. Results are ordered by revision number (a
407 topological sort)."""
407 topological sort)."""
408 first = min(revs)
408 first = min(revs)
409 if first == nullrev:
409 if first == nullrev:
410 for i in self:
410 for i in self:
411 yield i
411 yield i
412 return
412 return
413
413
414 seen = set(revs)
414 seen = set(revs)
415 for i in xrange(first + 1, len(self)):
415 for i in xrange(first + 1, len(self)):
416 for x in self.parentrevs(i):
416 for x in self.parentrevs(i):
417 if x != nullrev and x in seen:
417 if x != nullrev and x in seen:
418 seen.add(i)
418 seen.add(i)
419 yield i
419 yield i
420 break
420 break
421
421
422 def findcommonmissing(self, common=None, heads=None):
422 def findcommonmissing(self, common=None, heads=None):
423 """Return a tuple of the ancestors of common and the ancestors of heads
423 """Return a tuple of the ancestors of common and the ancestors of heads
424 that are not ancestors of common. In revset terminology, we return the
424 that are not ancestors of common. In revset terminology, we return the
425 tuple:
425 tuple:
426
426
427 ::common, (::heads) - (::common)
427 ::common, (::heads) - (::common)
428
428
429 The list is sorted by revision number, meaning it is
429 The list is sorted by revision number, meaning it is
430 topologically sorted.
430 topologically sorted.
431
431
432 'heads' and 'common' are both lists of node IDs. If heads is
432 'heads' and 'common' are both lists of node IDs. If heads is
433 not supplied, uses all of the revlog's heads. If common is not
433 not supplied, uses all of the revlog's heads. If common is not
434 supplied, uses nullid."""
434 supplied, uses nullid."""
435 if common is None:
435 if common is None:
436 common = [nullid]
436 common = [nullid]
437 if heads is None:
437 if heads is None:
438 heads = self.heads()
438 heads = self.heads()
439
439
440 common = [self.rev(n) for n in common]
440 common = [self.rev(n) for n in common]
441 heads = [self.rev(n) for n in heads]
441 heads = [self.rev(n) for n in heads]
442
442
443 # we want the ancestors, but inclusive
443 # we want the ancestors, but inclusive
444 has = set(self.ancestors(*common))
444 has = set(self.ancestors(*common))
445 has.add(nullrev)
445 has.add(nullrev)
446 has.update(common)
446 has.update(common)
447
447
448 # take all ancestors from heads that aren't in has
448 # take all ancestors from heads that aren't in has
449 missing = set()
449 missing = set()
450 visit = [r for r in heads if r not in has]
450 visit = [r for r in heads if r not in has]
451 while visit:
451 while visit:
452 r = visit.pop(0)
452 r = visit.pop(0)
453 if r in missing:
453 if r in missing:
454 continue
454 continue
455 else:
455 else:
456 missing.add(r)
456 missing.add(r)
457 for p in self.parentrevs(r):
457 for p in self.parentrevs(r):
458 if p not in has:
458 if p not in has:
459 visit.append(p)
459 visit.append(p)
460 missing = list(missing)
460 missing = list(missing)
461 missing.sort()
461 missing.sort()
462 return has, [self.node(r) for r in missing]
462 return has, [self.node(r) for r in missing]
463
463
464 def findmissing(self, common=None, heads=None):
464 def findmissing(self, common=None, heads=None):
465 """Return the ancestors of heads that are not ancestors of common.
465 """Return the ancestors of heads that are not ancestors of common.
466
466
467 More specifically, return a list of nodes N such that every N
467 More specifically, return a list of nodes N such that every N
468 satisfies the following constraints:
468 satisfies the following constraints:
469
469
470 1. N is an ancestor of some node in 'heads'
470 1. N is an ancestor of some node in 'heads'
471 2. N is not an ancestor of any node in 'common'
471 2. N is not an ancestor of any node in 'common'
472
472
473 The list is sorted by revision number, meaning it is
473 The list is sorted by revision number, meaning it is
474 topologically sorted.
474 topologically sorted.
475
475
476 'heads' and 'common' are both lists of node IDs. If heads is
476 'heads' and 'common' are both lists of node IDs. If heads is
477 not supplied, uses all of the revlog's heads. If common is not
477 not supplied, uses all of the revlog's heads. If common is not
478 supplied, uses nullid."""
478 supplied, uses nullid."""
479 _common, missing = self.findcommonmissing(common, heads)
479 _common, missing = self.findcommonmissing(common, heads)
480 return missing
480 return missing
481
481
482 def nodesbetween(self, roots=None, heads=None):
482 def nodesbetween(self, roots=None, heads=None):
483 """Return a topological path from 'roots' to 'heads'.
483 """Return a topological path from 'roots' to 'heads'.
484
484
485 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
485 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
486 topologically sorted list of all nodes N that satisfy both of
486 topologically sorted list of all nodes N that satisfy both of
487 these constraints:
487 these constraints:
488
488
489 1. N is a descendant of some node in 'roots'
489 1. N is a descendant of some node in 'roots'
490 2. N is an ancestor of some node in 'heads'
490 2. N is an ancestor of some node in 'heads'
491
491
492 Every node is considered to be both a descendant and an ancestor
492 Every node is considered to be both a descendant and an ancestor
493 of itself, so every reachable node in 'roots' and 'heads' will be
493 of itself, so every reachable node in 'roots' and 'heads' will be
494 included in 'nodes'.
494 included in 'nodes'.
495
495
496 'outroots' is the list of reachable nodes in 'roots', i.e., the
496 'outroots' is the list of reachable nodes in 'roots', i.e., the
497 subset of 'roots' that is returned in 'nodes'. Likewise,
497 subset of 'roots' that is returned in 'nodes'. Likewise,
498 'outheads' is the subset of 'heads' that is also in 'nodes'.
498 'outheads' is the subset of 'heads' that is also in 'nodes'.
499
499
500 'roots' and 'heads' are both lists of node IDs. If 'roots' is
500 'roots' and 'heads' are both lists of node IDs. If 'roots' is
501 unspecified, uses nullid as the only root. If 'heads' is
501 unspecified, uses nullid as the only root. If 'heads' is
502 unspecified, uses list of all of the revlog's heads."""
502 unspecified, uses list of all of the revlog's heads."""
503 nonodes = ([], [], [])
503 nonodes = ([], [], [])
504 if roots is not None:
504 if roots is not None:
505 roots = list(roots)
505 roots = list(roots)
506 if not roots:
506 if not roots:
507 return nonodes
507 return nonodes
508 lowestrev = min([self.rev(n) for n in roots])
508 lowestrev = min([self.rev(n) for n in roots])
509 else:
509 else:
510 roots = [nullid] # Everybody's a descendant of nullid
510 roots = [nullid] # Everybody's a descendant of nullid
511 lowestrev = nullrev
511 lowestrev = nullrev
512 if (lowestrev == nullrev) and (heads is None):
512 if (lowestrev == nullrev) and (heads is None):
513 # We want _all_ the nodes!
513 # We want _all_ the nodes!
514 return ([self.node(r) for r in self], [nullid], list(self.heads()))
514 return ([self.node(r) for r in self], [nullid], list(self.heads()))
515 if heads is None:
515 if heads is None:
516 # All nodes are ancestors, so the latest ancestor is the last
516 # All nodes are ancestors, so the latest ancestor is the last
517 # node.
517 # node.
518 highestrev = len(self) - 1
518 highestrev = len(self) - 1
519 # Set ancestors to None to signal that every node is an ancestor.
519 # Set ancestors to None to signal that every node is an ancestor.
520 ancestors = None
520 ancestors = None
521 # Set heads to an empty dictionary for later discovery of heads
521 # Set heads to an empty dictionary for later discovery of heads
522 heads = {}
522 heads = {}
523 else:
523 else:
524 heads = list(heads)
524 heads = list(heads)
525 if not heads:
525 if not heads:
526 return nonodes
526 return nonodes
527 ancestors = set()
527 ancestors = set()
528 # Turn heads into a dictionary so we can remove 'fake' heads.
528 # Turn heads into a dictionary so we can remove 'fake' heads.
529 # Also, later we will be using it to filter out the heads we can't
529 # Also, later we will be using it to filter out the heads we can't
530 # find from roots.
530 # find from roots.
531 heads = dict.fromkeys(heads, False)
531 heads = dict.fromkeys(heads, False)
532 # Start at the top and keep marking parents until we're done.
532 # Start at the top and keep marking parents until we're done.
533 nodestotag = set(heads)
533 nodestotag = set(heads)
534 # Remember where the top was so we can use it as a limit later.
534 # Remember where the top was so we can use it as a limit later.
535 highestrev = max([self.rev(n) for n in nodestotag])
535 highestrev = max([self.rev(n) for n in nodestotag])
536 while nodestotag:
536 while nodestotag:
537 # grab a node to tag
537 # grab a node to tag
538 n = nodestotag.pop()
538 n = nodestotag.pop()
539 # Never tag nullid
539 # Never tag nullid
540 if n == nullid:
540 if n == nullid:
541 continue
541 continue
542 # A node's revision number represents its place in a
542 # A node's revision number represents its place in a
543 # topologically sorted list of nodes.
543 # topologically sorted list of nodes.
544 r = self.rev(n)
544 r = self.rev(n)
545 if r >= lowestrev:
545 if r >= lowestrev:
546 if n not in ancestors:
546 if n not in ancestors:
547 # If we are possibly a descendant of one of the roots
547 # If we are possibly a descendant of one of the roots
548 # and we haven't already been marked as an ancestor
548 # and we haven't already been marked as an ancestor
549 ancestors.add(n) # Mark as ancestor
549 ancestors.add(n) # Mark as ancestor
550 # Add non-nullid parents to list of nodes to tag.
550 # Add non-nullid parents to list of nodes to tag.
551 nodestotag.update([p for p in self.parents(n) if
551 nodestotag.update([p for p in self.parents(n) if
552 p != nullid])
552 p != nullid])
553 elif n in heads: # We've seen it before, is it a fake head?
553 elif n in heads: # We've seen it before, is it a fake head?
554 # So it is, real heads should not be the ancestors of
554 # So it is, real heads should not be the ancestors of
555 # any other heads.
555 # any other heads.
556 heads.pop(n)
556 heads.pop(n)
557 if not ancestors:
557 if not ancestors:
558 return nonodes
558 return nonodes
559 # Now that we have our set of ancestors, we want to remove any
559 # Now that we have our set of ancestors, we want to remove any
560 # roots that are not ancestors.
560 # roots that are not ancestors.
561
561
562 # If one of the roots was nullid, everything is included anyway.
562 # If one of the roots was nullid, everything is included anyway.
563 if lowestrev > nullrev:
563 if lowestrev > nullrev:
564 # But, since we weren't, let's recompute the lowest rev to not
564 # But, since we weren't, let's recompute the lowest rev to not
565 # include roots that aren't ancestors.
565 # include roots that aren't ancestors.
566
566
567 # Filter out roots that aren't ancestors of heads
567 # Filter out roots that aren't ancestors of heads
568 roots = [n for n in roots if n in ancestors]
568 roots = [n for n in roots if n in ancestors]
569 # Recompute the lowest revision
569 # Recompute the lowest revision
570 if roots:
570 if roots:
571 lowestrev = min([self.rev(n) for n in roots])
571 lowestrev = min([self.rev(n) for n in roots])
572 else:
572 else:
573 # No more roots? Return empty list
573 # No more roots? Return empty list
574 return nonodes
574 return nonodes
575 else:
575 else:
576 # We are descending from nullid, and don't need to care about
576 # We are descending from nullid, and don't need to care about
577 # any other roots.
577 # any other roots.
578 lowestrev = nullrev
578 lowestrev = nullrev
579 roots = [nullid]
579 roots = [nullid]
580 # Transform our roots list into a set.
580 # Transform our roots list into a set.
581 descendants = set(roots)
581 descendants = set(roots)
582 # Also, keep the original roots so we can filter out roots that aren't
582 # Also, keep the original roots so we can filter out roots that aren't
583 # 'real' roots (i.e. are descended from other roots).
583 # 'real' roots (i.e. are descended from other roots).
584 roots = descendants.copy()
584 roots = descendants.copy()
585 # Our topologically sorted list of output nodes.
585 # Our topologically sorted list of output nodes.
586 orderedout = []
586 orderedout = []
587 # Don't start at nullid since we don't want nullid in our output list,
587 # Don't start at nullid since we don't want nullid in our output list,
588 # and if nullid shows up in descedents, empty parents will look like
588 # and if nullid shows up in descedents, empty parents will look like
589 # they're descendants.
589 # they're descendants.
590 for r in xrange(max(lowestrev, 0), highestrev + 1):
590 for r in xrange(max(lowestrev, 0), highestrev + 1):
591 n = self.node(r)
591 n = self.node(r)
592 isdescendant = False
592 isdescendant = False
593 if lowestrev == nullrev: # Everybody is a descendant of nullid
593 if lowestrev == nullrev: # Everybody is a descendant of nullid
594 isdescendant = True
594 isdescendant = True
595 elif n in descendants:
595 elif n in descendants:
596 # n is already a descendant
596 # n is already a descendant
597 isdescendant = True
597 isdescendant = True
598 # This check only needs to be done here because all the roots
598 # This check only needs to be done here because all the roots
599 # will start being marked is descendants before the loop.
599 # will start being marked is descendants before the loop.
600 if n in roots:
600 if n in roots:
601 # If n was a root, check if it's a 'real' root.
601 # If n was a root, check if it's a 'real' root.
602 p = tuple(self.parents(n))
602 p = tuple(self.parents(n))
603 # If any of its parents are descendants, it's not a root.
603 # If any of its parents are descendants, it's not a root.
604 if (p[0] in descendants) or (p[1] in descendants):
604 if (p[0] in descendants) or (p[1] in descendants):
605 roots.remove(n)
605 roots.remove(n)
606 else:
606 else:
607 p = tuple(self.parents(n))
607 p = tuple(self.parents(n))
608 # A node is a descendant if either of its parents are
608 # A node is a descendant if either of its parents are
609 # descendants. (We seeded the dependents list with the roots
609 # descendants. (We seeded the dependents list with the roots
610 # up there, remember?)
610 # up there, remember?)
611 if (p[0] in descendants) or (p[1] in descendants):
611 if (p[0] in descendants) or (p[1] in descendants):
612 descendants.add(n)
612 descendants.add(n)
613 isdescendant = True
613 isdescendant = True
614 if isdescendant and ((ancestors is None) or (n in ancestors)):
614 if isdescendant and ((ancestors is None) or (n in ancestors)):
615 # Only include nodes that are both descendants and ancestors.
615 # Only include nodes that are both descendants and ancestors.
616 orderedout.append(n)
616 orderedout.append(n)
617 if (ancestors is not None) and (n in heads):
617 if (ancestors is not None) and (n in heads):
618 # We're trying to figure out which heads are reachable
618 # We're trying to figure out which heads are reachable
619 # from roots.
619 # from roots.
620 # Mark this head as having been reached
620 # Mark this head as having been reached
621 heads[n] = True
621 heads[n] = True
622 elif ancestors is None:
622 elif ancestors is None:
623 # Otherwise, we're trying to discover the heads.
623 # Otherwise, we're trying to discover the heads.
624 # Assume this is a head because if it isn't, the next step
624 # Assume this is a head because if it isn't, the next step
625 # will eventually remove it.
625 # will eventually remove it.
626 heads[n] = True
626 heads[n] = True
627 # But, obviously its parents aren't.
627 # But, obviously its parents aren't.
628 for p in self.parents(n):
628 for p in self.parents(n):
629 heads.pop(p, None)
629 heads.pop(p, None)
630 heads = [n for n, flag in heads.iteritems() if flag]
630 heads = [n for n, flag in heads.iteritems() if flag]
631 roots = list(roots)
631 roots = list(roots)
632 assert orderedout
632 assert orderedout
633 assert roots
633 assert roots
634 assert heads
634 assert heads
635 return (orderedout, roots, heads)
635 return (orderedout, roots, heads)
636
636
637 def headrevs(self):
637 def headrevs(self):
638 try:
639 return self.index.headrevs()
640 except AttributeError:
641 pass
638 count = len(self)
642 count = len(self)
639 if not count:
643 if not count:
640 return [nullrev]
644 return [nullrev]
641 ishead = [1] * (count + 1)
645 ishead = [1] * (count + 1)
642 index = self.index
646 index = self.index
643 for r in xrange(count):
647 for r in xrange(count):
644 e = index[r]
648 e = index[r]
645 ishead[e[5]] = ishead[e[6]] = 0
649 ishead[e[5]] = ishead[e[6]] = 0
646 return [r for r in xrange(count) if ishead[r]]
650 return [r for r in xrange(count) if ishead[r]]
647
651
648 def heads(self, start=None, stop=None):
652 def heads(self, start=None, stop=None):
649 """return the list of all nodes that have no children
653 """return the list of all nodes that have no children
650
654
651 if start is specified, only heads that are descendants of
655 if start is specified, only heads that are descendants of
652 start will be returned
656 start will be returned
653 if stop is specified, it will consider all the revs from stop
657 if stop is specified, it will consider all the revs from stop
654 as if they had no children
658 as if they had no children
655 """
659 """
656 if start is None and stop is None:
660 if start is None and stop is None:
657 if not len(self):
661 if not len(self):
658 return [nullid]
662 return [nullid]
659 return [self.node(r) for r in self.headrevs()]
663 return [self.node(r) for r in self.headrevs()]
660
664
661 if start is None:
665 if start is None:
662 start = nullid
666 start = nullid
663 if stop is None:
667 if stop is None:
664 stop = []
668 stop = []
665 stoprevs = set([self.rev(n) for n in stop])
669 stoprevs = set([self.rev(n) for n in stop])
666 startrev = self.rev(start)
670 startrev = self.rev(start)
667 reachable = set((startrev,))
671 reachable = set((startrev,))
668 heads = set((startrev,))
672 heads = set((startrev,))
669
673
670 parentrevs = self.parentrevs
674 parentrevs = self.parentrevs
671 for r in xrange(startrev + 1, len(self)):
675 for r in xrange(startrev + 1, len(self)):
672 for p in parentrevs(r):
676 for p in parentrevs(r):
673 if p in reachable:
677 if p in reachable:
674 if r not in stoprevs:
678 if r not in stoprevs:
675 reachable.add(r)
679 reachable.add(r)
676 heads.add(r)
680 heads.add(r)
677 if p in heads and p not in stoprevs:
681 if p in heads and p not in stoprevs:
678 heads.remove(p)
682 heads.remove(p)
679
683
680 return [self.node(r) for r in heads]
684 return [self.node(r) for r in heads]
681
685
682 def children(self, node):
686 def children(self, node):
683 """find the children of a given node"""
687 """find the children of a given node"""
684 c = []
688 c = []
685 p = self.rev(node)
689 p = self.rev(node)
686 for r in range(p + 1, len(self)):
690 for r in range(p + 1, len(self)):
687 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
691 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
688 if prevs:
692 if prevs:
689 for pr in prevs:
693 for pr in prevs:
690 if pr == p:
694 if pr == p:
691 c.append(self.node(r))
695 c.append(self.node(r))
692 elif p == nullrev:
696 elif p == nullrev:
693 c.append(self.node(r))
697 c.append(self.node(r))
694 return c
698 return c
695
699
696 def descendant(self, start, end):
700 def descendant(self, start, end):
697 if start == nullrev:
701 if start == nullrev:
698 return True
702 return True
699 for i in self.descendants(start):
703 for i in self.descendants(start):
700 if i == end:
704 if i == end:
701 return True
705 return True
702 elif i > end:
706 elif i > end:
703 break
707 break
704 return False
708 return False
705
709
706 def ancestor(self, a, b):
710 def ancestor(self, a, b):
707 """calculate the least common ancestor of nodes a and b"""
711 """calculate the least common ancestor of nodes a and b"""
708
712
709 # fast path, check if it is a descendant
713 # fast path, check if it is a descendant
710 a, b = self.rev(a), self.rev(b)
714 a, b = self.rev(a), self.rev(b)
711 start, end = sorted((a, b))
715 start, end = sorted((a, b))
712 if self.descendant(start, end):
716 if self.descendant(start, end):
713 return self.node(start)
717 return self.node(start)
714
718
715 def parents(rev):
719 def parents(rev):
716 return [p for p in self.parentrevs(rev) if p != nullrev]
720 return [p for p in self.parentrevs(rev) if p != nullrev]
717
721
718 c = ancestor.ancestor(a, b, parents)
722 c = ancestor.ancestor(a, b, parents)
719 if c is None:
723 if c is None:
720 return nullid
724 return nullid
721
725
722 return self.node(c)
726 return self.node(c)
723
727
724 def _match(self, id):
728 def _match(self, id):
725 if isinstance(id, int):
729 if isinstance(id, int):
726 # rev
730 # rev
727 return self.node(id)
731 return self.node(id)
728 if len(id) == 20:
732 if len(id) == 20:
729 # possibly a binary node
733 # possibly a binary node
730 # odds of a binary node being all hex in ASCII are 1 in 10**25
734 # odds of a binary node being all hex in ASCII are 1 in 10**25
731 try:
735 try:
732 node = id
736 node = id
733 self.rev(node) # quick search the index
737 self.rev(node) # quick search the index
734 return node
738 return node
735 except LookupError:
739 except LookupError:
736 pass # may be partial hex id
740 pass # may be partial hex id
737 try:
741 try:
738 # str(rev)
742 # str(rev)
739 rev = int(id)
743 rev = int(id)
740 if str(rev) != id:
744 if str(rev) != id:
741 raise ValueError
745 raise ValueError
742 if rev < 0:
746 if rev < 0:
743 rev = len(self) + rev
747 rev = len(self) + rev
744 if rev < 0 or rev >= len(self):
748 if rev < 0 or rev >= len(self):
745 raise ValueError
749 raise ValueError
746 return self.node(rev)
750 return self.node(rev)
747 except (ValueError, OverflowError):
751 except (ValueError, OverflowError):
748 pass
752 pass
749 if len(id) == 40:
753 if len(id) == 40:
750 try:
754 try:
751 # a full hex nodeid?
755 # a full hex nodeid?
752 node = bin(id)
756 node = bin(id)
753 self.rev(node)
757 self.rev(node)
754 return node
758 return node
755 except (TypeError, LookupError):
759 except (TypeError, LookupError):
756 pass
760 pass
757
761
758 def _partialmatch(self, id):
762 def _partialmatch(self, id):
759 try:
763 try:
760 return self.index.partialmatch(id)
764 return self.index.partialmatch(id)
761 except RevlogError:
765 except RevlogError:
762 # parsers.c radix tree lookup gave multiple matches
766 # parsers.c radix tree lookup gave multiple matches
763 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
767 raise LookupError(id, self.indexfile, _("ambiguous identifier"))
764 except (AttributeError, ValueError):
768 except (AttributeError, ValueError):
765 # we are pure python, or key was too short to search radix tree
769 # we are pure python, or key was too short to search radix tree
766 pass
770 pass
767
771
768 if id in self._pcache:
772 if id in self._pcache:
769 return self._pcache[id]
773 return self._pcache[id]
770
774
771 if len(id) < 40:
775 if len(id) < 40:
772 try:
776 try:
773 # hex(node)[:...]
777 # hex(node)[:...]
774 l = len(id) // 2 # grab an even number of digits
778 l = len(id) // 2 # grab an even number of digits
775 prefix = bin(id[:l * 2])
779 prefix = bin(id[:l * 2])
776 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
780 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
777 nl = [n for n in nl if hex(n).startswith(id)]
781 nl = [n for n in nl if hex(n).startswith(id)]
778 if len(nl) > 0:
782 if len(nl) > 0:
779 if len(nl) == 1:
783 if len(nl) == 1:
780 self._pcache[id] = nl[0]
784 self._pcache[id] = nl[0]
781 return nl[0]
785 return nl[0]
782 raise LookupError(id, self.indexfile,
786 raise LookupError(id, self.indexfile,
783 _('ambiguous identifier'))
787 _('ambiguous identifier'))
784 return None
788 return None
785 except TypeError:
789 except TypeError:
786 pass
790 pass
787
791
788 def lookup(self, id):
792 def lookup(self, id):
789 """locate a node based on:
793 """locate a node based on:
790 - revision number or str(revision number)
794 - revision number or str(revision number)
791 - nodeid or subset of hex nodeid
795 - nodeid or subset of hex nodeid
792 """
796 """
793 n = self._match(id)
797 n = self._match(id)
794 if n is not None:
798 if n is not None:
795 return n
799 return n
796 n = self._partialmatch(id)
800 n = self._partialmatch(id)
797 if n:
801 if n:
798 return n
802 return n
799
803
800 raise LookupError(id, self.indexfile, _('no match found'))
804 raise LookupError(id, self.indexfile, _('no match found'))
801
805
802 def cmp(self, node, text):
806 def cmp(self, node, text):
803 """compare text with a given file revision
807 """compare text with a given file revision
804
808
805 returns True if text is different than what is stored.
809 returns True if text is different than what is stored.
806 """
810 """
807 p1, p2 = self.parents(node)
811 p1, p2 = self.parents(node)
808 return hash(text, p1, p2) != node
812 return hash(text, p1, p2) != node
809
813
810 def _addchunk(self, offset, data):
814 def _addchunk(self, offset, data):
811 o, d = self._chunkcache
815 o, d = self._chunkcache
812 # try to add to existing cache
816 # try to add to existing cache
813 if o + len(d) == offset and len(d) + len(data) < _chunksize:
817 if o + len(d) == offset and len(d) + len(data) < _chunksize:
814 self._chunkcache = o, d + data
818 self._chunkcache = o, d + data
815 else:
819 else:
816 self._chunkcache = offset, data
820 self._chunkcache = offset, data
817
821
818 def _loadchunk(self, offset, length):
822 def _loadchunk(self, offset, length):
819 if self._inline:
823 if self._inline:
820 df = self.opener(self.indexfile)
824 df = self.opener(self.indexfile)
821 else:
825 else:
822 df = self.opener(self.datafile)
826 df = self.opener(self.datafile)
823
827
824 readahead = max(65536, length)
828 readahead = max(65536, length)
825 df.seek(offset)
829 df.seek(offset)
826 d = df.read(readahead)
830 d = df.read(readahead)
827 df.close()
831 df.close()
828 self._addchunk(offset, d)
832 self._addchunk(offset, d)
829 if readahead > length:
833 if readahead > length:
830 return util.buffer(d, 0, length)
834 return util.buffer(d, 0, length)
831 return d
835 return d
832
836
833 def _getchunk(self, offset, length):
837 def _getchunk(self, offset, length):
834 o, d = self._chunkcache
838 o, d = self._chunkcache
835 l = len(d)
839 l = len(d)
836
840
837 # is it in the cache?
841 # is it in the cache?
838 cachestart = offset - o
842 cachestart = offset - o
839 cacheend = cachestart + length
843 cacheend = cachestart + length
840 if cachestart >= 0 and cacheend <= l:
844 if cachestart >= 0 and cacheend <= l:
841 if cachestart == 0 and cacheend == l:
845 if cachestart == 0 and cacheend == l:
842 return d # avoid a copy
846 return d # avoid a copy
843 return util.buffer(d, cachestart, cacheend - cachestart)
847 return util.buffer(d, cachestart, cacheend - cachestart)
844
848
845 return self._loadchunk(offset, length)
849 return self._loadchunk(offset, length)
846
850
847 def _chunkraw(self, startrev, endrev):
851 def _chunkraw(self, startrev, endrev):
848 start = self.start(startrev)
852 start = self.start(startrev)
849 length = self.end(endrev) - start
853 length = self.end(endrev) - start
850 if self._inline:
854 if self._inline:
851 start += (startrev + 1) * self._io.size
855 start += (startrev + 1) * self._io.size
852 return self._getchunk(start, length)
856 return self._getchunk(start, length)
853
857
854 def _chunk(self, rev):
858 def _chunk(self, rev):
855 return decompress(self._chunkraw(rev, rev))
859 return decompress(self._chunkraw(rev, rev))
856
860
857 def _chunkbase(self, rev):
861 def _chunkbase(self, rev):
858 return self._chunk(rev)
862 return self._chunk(rev)
859
863
860 def _chunkclear(self):
864 def _chunkclear(self):
861 self._chunkcache = (0, '')
865 self._chunkcache = (0, '')
862
866
863 def deltaparent(self, rev):
867 def deltaparent(self, rev):
864 """return deltaparent of the given revision"""
868 """return deltaparent of the given revision"""
865 base = self.index[rev][3]
869 base = self.index[rev][3]
866 if base == rev:
870 if base == rev:
867 return nullrev
871 return nullrev
868 elif self._generaldelta:
872 elif self._generaldelta:
869 return base
873 return base
870 else:
874 else:
871 return rev - 1
875 return rev - 1
872
876
873 def revdiff(self, rev1, rev2):
877 def revdiff(self, rev1, rev2):
874 """return or calculate a delta between two revisions"""
878 """return or calculate a delta between two revisions"""
875 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
879 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
876 return str(self._chunk(rev2))
880 return str(self._chunk(rev2))
877
881
878 return mdiff.textdiff(self.revision(rev1),
882 return mdiff.textdiff(self.revision(rev1),
879 self.revision(rev2))
883 self.revision(rev2))
880
884
881 def revision(self, nodeorrev):
885 def revision(self, nodeorrev):
882 """return an uncompressed revision of a given node or revision
886 """return an uncompressed revision of a given node or revision
883 number.
887 number.
884 """
888 """
885 if isinstance(nodeorrev, int):
889 if isinstance(nodeorrev, int):
886 rev = nodeorrev
890 rev = nodeorrev
887 node = self.node(rev)
891 node = self.node(rev)
888 else:
892 else:
889 node = nodeorrev
893 node = nodeorrev
890 rev = None
894 rev = None
891
895
892 cachedrev = None
896 cachedrev = None
893 if node == nullid:
897 if node == nullid:
894 return ""
898 return ""
895 if self._cache:
899 if self._cache:
896 if self._cache[0] == node:
900 if self._cache[0] == node:
897 return self._cache[2]
901 return self._cache[2]
898 cachedrev = self._cache[1]
902 cachedrev = self._cache[1]
899
903
900 # look up what we need to read
904 # look up what we need to read
901 text = None
905 text = None
902 if rev is None:
906 if rev is None:
903 rev = self.rev(node)
907 rev = self.rev(node)
904
908
905 # check rev flags
909 # check rev flags
906 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
910 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
907 raise RevlogError(_('incompatible revision flag %x') %
911 raise RevlogError(_('incompatible revision flag %x') %
908 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
912 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
909
913
910 # build delta chain
914 # build delta chain
911 chain = []
915 chain = []
912 index = self.index # for performance
916 index = self.index # for performance
913 generaldelta = self._generaldelta
917 generaldelta = self._generaldelta
914 iterrev = rev
918 iterrev = rev
915 e = index[iterrev]
919 e = index[iterrev]
916 while iterrev != e[3] and iterrev != cachedrev:
920 while iterrev != e[3] and iterrev != cachedrev:
917 chain.append(iterrev)
921 chain.append(iterrev)
918 if generaldelta:
922 if generaldelta:
919 iterrev = e[3]
923 iterrev = e[3]
920 else:
924 else:
921 iterrev -= 1
925 iterrev -= 1
922 e = index[iterrev]
926 e = index[iterrev]
923 chain.reverse()
927 chain.reverse()
924 base = iterrev
928 base = iterrev
925
929
926 if iterrev == cachedrev:
930 if iterrev == cachedrev:
927 # cache hit
931 # cache hit
928 text = self._cache[2]
932 text = self._cache[2]
929
933
930 # drop cache to save memory
934 # drop cache to save memory
931 self._cache = None
935 self._cache = None
932
936
933 self._chunkraw(base, rev)
937 self._chunkraw(base, rev)
934 if text is None:
938 if text is None:
935 text = str(self._chunkbase(base))
939 text = str(self._chunkbase(base))
936
940
937 bins = [self._chunk(r) for r in chain]
941 bins = [self._chunk(r) for r in chain]
938 text = mdiff.patches(text, bins)
942 text = mdiff.patches(text, bins)
939
943
940 text = self._checkhash(text, node, rev)
944 text = self._checkhash(text, node, rev)
941
945
942 self._cache = (node, rev, text)
946 self._cache = (node, rev, text)
943 return text
947 return text
944
948
945 def _checkhash(self, text, node, rev):
949 def _checkhash(self, text, node, rev):
946 p1, p2 = self.parents(node)
950 p1, p2 = self.parents(node)
947 if node != hash(text, p1, p2):
951 if node != hash(text, p1, p2):
948 raise RevlogError(_("integrity check failed on %s:%d")
952 raise RevlogError(_("integrity check failed on %s:%d")
949 % (self.indexfile, rev))
953 % (self.indexfile, rev))
950 return text
954 return text
951
955
952 def checkinlinesize(self, tr, fp=None):
956 def checkinlinesize(self, tr, fp=None):
953 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
957 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
954 return
958 return
955
959
956 trinfo = tr.find(self.indexfile)
960 trinfo = tr.find(self.indexfile)
957 if trinfo is None:
961 if trinfo is None:
958 raise RevlogError(_("%s not found in the transaction")
962 raise RevlogError(_("%s not found in the transaction")
959 % self.indexfile)
963 % self.indexfile)
960
964
961 trindex = trinfo[2]
965 trindex = trinfo[2]
962 dataoff = self.start(trindex)
966 dataoff = self.start(trindex)
963
967
964 tr.add(self.datafile, dataoff)
968 tr.add(self.datafile, dataoff)
965
969
966 if fp:
970 if fp:
967 fp.flush()
971 fp.flush()
968 fp.close()
972 fp.close()
969
973
970 df = self.opener(self.datafile, 'w')
974 df = self.opener(self.datafile, 'w')
971 try:
975 try:
972 for r in self:
976 for r in self:
973 df.write(self._chunkraw(r, r))
977 df.write(self._chunkraw(r, r))
974 finally:
978 finally:
975 df.close()
979 df.close()
976
980
977 fp = self.opener(self.indexfile, 'w', atomictemp=True)
981 fp = self.opener(self.indexfile, 'w', atomictemp=True)
978 self.version &= ~(REVLOGNGINLINEDATA)
982 self.version &= ~(REVLOGNGINLINEDATA)
979 self._inline = False
983 self._inline = False
980 for i in self:
984 for i in self:
981 e = self._io.packentry(self.index[i], self.node, self.version, i)
985 e = self._io.packentry(self.index[i], self.node, self.version, i)
982 fp.write(e)
986 fp.write(e)
983
987
984 # if we don't call close, the temp file will never replace the
988 # if we don't call close, the temp file will never replace the
985 # real index
989 # real index
986 fp.close()
990 fp.close()
987
991
988 tr.replace(self.indexfile, trindex * self._io.size)
992 tr.replace(self.indexfile, trindex * self._io.size)
989 self._chunkclear()
993 self._chunkclear()
990
994
991 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
995 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
992 """add a revision to the log
996 """add a revision to the log
993
997
994 text - the revision data to add
998 text - the revision data to add
995 transaction - the transaction object used for rollback
999 transaction - the transaction object used for rollback
996 link - the linkrev data to add
1000 link - the linkrev data to add
997 p1, p2 - the parent nodeids of the revision
1001 p1, p2 - the parent nodeids of the revision
998 cachedelta - an optional precomputed delta
1002 cachedelta - an optional precomputed delta
999 """
1003 """
1000 node = hash(text, p1, p2)
1004 node = hash(text, p1, p2)
1001 if node in self.nodemap:
1005 if node in self.nodemap:
1002 return node
1006 return node
1003
1007
1004 dfh = None
1008 dfh = None
1005 if not self._inline:
1009 if not self._inline:
1006 dfh = self.opener(self.datafile, "a")
1010 dfh = self.opener(self.datafile, "a")
1007 ifh = self.opener(self.indexfile, "a+")
1011 ifh = self.opener(self.indexfile, "a+")
1008 try:
1012 try:
1009 return self._addrevision(node, text, transaction, link, p1, p2,
1013 return self._addrevision(node, text, transaction, link, p1, p2,
1010 cachedelta, ifh, dfh)
1014 cachedelta, ifh, dfh)
1011 finally:
1015 finally:
1012 if dfh:
1016 if dfh:
1013 dfh.close()
1017 dfh.close()
1014 ifh.close()
1018 ifh.close()
1015
1019
1016 def _addrevision(self, node, text, transaction, link, p1, p2,
1020 def _addrevision(self, node, text, transaction, link, p1, p2,
1017 cachedelta, ifh, dfh):
1021 cachedelta, ifh, dfh):
1018 """internal function to add revisions to the log
1022 """internal function to add revisions to the log
1019
1023
1020 see addrevision for argument descriptions.
1024 see addrevision for argument descriptions.
1021 invariants:
1025 invariants:
1022 - text is optional (can be None); if not set, cachedelta must be set.
1026 - text is optional (can be None); if not set, cachedelta must be set.
1023 if both are set, they must correspond to eachother.
1027 if both are set, they must correspond to eachother.
1024 """
1028 """
1025 btext = [text]
1029 btext = [text]
1026 def buildtext():
1030 def buildtext():
1027 if btext[0] is not None:
1031 if btext[0] is not None:
1028 return btext[0]
1032 return btext[0]
1029 # flush any pending writes here so we can read it in revision
1033 # flush any pending writes here so we can read it in revision
1030 if dfh:
1034 if dfh:
1031 dfh.flush()
1035 dfh.flush()
1032 ifh.flush()
1036 ifh.flush()
1033 basetext = self.revision(self.node(cachedelta[0]))
1037 basetext = self.revision(self.node(cachedelta[0]))
1034 btext[0] = mdiff.patch(basetext, cachedelta[1])
1038 btext[0] = mdiff.patch(basetext, cachedelta[1])
1035 chk = hash(btext[0], p1, p2)
1039 chk = hash(btext[0], p1, p2)
1036 if chk != node:
1040 if chk != node:
1037 raise RevlogError(_("consistency error in delta"))
1041 raise RevlogError(_("consistency error in delta"))
1038 return btext[0]
1042 return btext[0]
1039
1043
1040 def builddelta(rev):
1044 def builddelta(rev):
1041 # can we use the cached delta?
1045 # can we use the cached delta?
1042 if cachedelta and cachedelta[0] == rev:
1046 if cachedelta and cachedelta[0] == rev:
1043 delta = cachedelta[1]
1047 delta = cachedelta[1]
1044 else:
1048 else:
1045 t = buildtext()
1049 t = buildtext()
1046 ptext = self.revision(self.node(rev))
1050 ptext = self.revision(self.node(rev))
1047 delta = mdiff.textdiff(ptext, t)
1051 delta = mdiff.textdiff(ptext, t)
1048 data = compress(delta)
1052 data = compress(delta)
1049 l = len(data[1]) + len(data[0])
1053 l = len(data[1]) + len(data[0])
1050 if basecache[0] == rev:
1054 if basecache[0] == rev:
1051 chainbase = basecache[1]
1055 chainbase = basecache[1]
1052 else:
1056 else:
1053 chainbase = self.chainbase(rev)
1057 chainbase = self.chainbase(rev)
1054 dist = l + offset - self.start(chainbase)
1058 dist = l + offset - self.start(chainbase)
1055 if self._generaldelta:
1059 if self._generaldelta:
1056 base = rev
1060 base = rev
1057 else:
1061 else:
1058 base = chainbase
1062 base = chainbase
1059 return dist, l, data, base, chainbase
1063 return dist, l, data, base, chainbase
1060
1064
1061 curr = len(self)
1065 curr = len(self)
1062 prev = curr - 1
1066 prev = curr - 1
1063 base = chainbase = curr
1067 base = chainbase = curr
1064 offset = self.end(prev)
1068 offset = self.end(prev)
1065 flags = 0
1069 flags = 0
1066 d = None
1070 d = None
1067 basecache = self._basecache
1071 basecache = self._basecache
1068 p1r, p2r = self.rev(p1), self.rev(p2)
1072 p1r, p2r = self.rev(p1), self.rev(p2)
1069
1073
1070 # should we try to build a delta?
1074 # should we try to build a delta?
1071 if prev != nullrev:
1075 if prev != nullrev:
1072 if self._generaldelta:
1076 if self._generaldelta:
1073 if p1r >= basecache[1]:
1077 if p1r >= basecache[1]:
1074 d = builddelta(p1r)
1078 d = builddelta(p1r)
1075 elif p2r >= basecache[1]:
1079 elif p2r >= basecache[1]:
1076 d = builddelta(p2r)
1080 d = builddelta(p2r)
1077 else:
1081 else:
1078 d = builddelta(prev)
1082 d = builddelta(prev)
1079 else:
1083 else:
1080 d = builddelta(prev)
1084 d = builddelta(prev)
1081 dist, l, data, base, chainbase = d
1085 dist, l, data, base, chainbase = d
1082
1086
1083 # full versions are inserted when the needed deltas
1087 # full versions are inserted when the needed deltas
1084 # become comparable to the uncompressed text
1088 # become comparable to the uncompressed text
1085 if text is None:
1089 if text is None:
1086 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1090 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1087 cachedelta[1])
1091 cachedelta[1])
1088 else:
1092 else:
1089 textlen = len(text)
1093 textlen = len(text)
1090 if d is None or dist > textlen * 2:
1094 if d is None or dist > textlen * 2:
1091 text = buildtext()
1095 text = buildtext()
1092 data = compress(text)
1096 data = compress(text)
1093 l = len(data[1]) + len(data[0])
1097 l = len(data[1]) + len(data[0])
1094 base = chainbase = curr
1098 base = chainbase = curr
1095
1099
1096 e = (offset_type(offset, flags), l, textlen,
1100 e = (offset_type(offset, flags), l, textlen,
1097 base, link, p1r, p2r, node)
1101 base, link, p1r, p2r, node)
1098 self.index.insert(-1, e)
1102 self.index.insert(-1, e)
1099 self.nodemap[node] = curr
1103 self.nodemap[node] = curr
1100
1104
1101 entry = self._io.packentry(e, self.node, self.version, curr)
1105 entry = self._io.packentry(e, self.node, self.version, curr)
1102 if not self._inline:
1106 if not self._inline:
1103 transaction.add(self.datafile, offset)
1107 transaction.add(self.datafile, offset)
1104 transaction.add(self.indexfile, curr * len(entry))
1108 transaction.add(self.indexfile, curr * len(entry))
1105 if data[0]:
1109 if data[0]:
1106 dfh.write(data[0])
1110 dfh.write(data[0])
1107 dfh.write(data[1])
1111 dfh.write(data[1])
1108 dfh.flush()
1112 dfh.flush()
1109 ifh.write(entry)
1113 ifh.write(entry)
1110 else:
1114 else:
1111 offset += curr * self._io.size
1115 offset += curr * self._io.size
1112 transaction.add(self.indexfile, offset, curr)
1116 transaction.add(self.indexfile, offset, curr)
1113 ifh.write(entry)
1117 ifh.write(entry)
1114 ifh.write(data[0])
1118 ifh.write(data[0])
1115 ifh.write(data[1])
1119 ifh.write(data[1])
1116 self.checkinlinesize(transaction, ifh)
1120 self.checkinlinesize(transaction, ifh)
1117
1121
1118 if type(text) == str: # only accept immutable objects
1122 if type(text) == str: # only accept immutable objects
1119 self._cache = (node, curr, text)
1123 self._cache = (node, curr, text)
1120 self._basecache = (curr, chainbase)
1124 self._basecache = (curr, chainbase)
1121 return node
1125 return node
1122
1126
1123 def group(self, nodelist, bundler, reorder=None):
1127 def group(self, nodelist, bundler, reorder=None):
1124 """Calculate a delta group, yielding a sequence of changegroup chunks
1128 """Calculate a delta group, yielding a sequence of changegroup chunks
1125 (strings).
1129 (strings).
1126
1130
1127 Given a list of changeset revs, return a set of deltas and
1131 Given a list of changeset revs, return a set of deltas and
1128 metadata corresponding to nodes. The first delta is
1132 metadata corresponding to nodes. The first delta is
1129 first parent(nodelist[0]) -> nodelist[0], the receiver is
1133 first parent(nodelist[0]) -> nodelist[0], the receiver is
1130 guaranteed to have this parent as it has all history before
1134 guaranteed to have this parent as it has all history before
1131 these changesets. In the case firstparent is nullrev the
1135 these changesets. In the case firstparent is nullrev the
1132 changegroup starts with a full revision.
1136 changegroup starts with a full revision.
1133 """
1137 """
1134
1138
1135 # if we don't have any revisions touched by these changesets, bail
1139 # if we don't have any revisions touched by these changesets, bail
1136 if len(nodelist) == 0:
1140 if len(nodelist) == 0:
1137 yield bundler.close()
1141 yield bundler.close()
1138 return
1142 return
1139
1143
1140 # for generaldelta revlogs, we linearize the revs; this will both be
1144 # for generaldelta revlogs, we linearize the revs; this will both be
1141 # much quicker and generate a much smaller bundle
1145 # much quicker and generate a much smaller bundle
1142 if (self._generaldelta and reorder is not False) or reorder:
1146 if (self._generaldelta and reorder is not False) or reorder:
1143 dag = dagutil.revlogdag(self)
1147 dag = dagutil.revlogdag(self)
1144 revs = set(self.rev(n) for n in nodelist)
1148 revs = set(self.rev(n) for n in nodelist)
1145 revs = dag.linearize(revs)
1149 revs = dag.linearize(revs)
1146 else:
1150 else:
1147 revs = sorted([self.rev(n) for n in nodelist])
1151 revs = sorted([self.rev(n) for n in nodelist])
1148
1152
1149 # add the parent of the first rev
1153 # add the parent of the first rev
1150 p = self.parentrevs(revs[0])[0]
1154 p = self.parentrevs(revs[0])[0]
1151 revs.insert(0, p)
1155 revs.insert(0, p)
1152
1156
1153 # build deltas
1157 # build deltas
1154 for r in xrange(len(revs) - 1):
1158 for r in xrange(len(revs) - 1):
1155 prev, curr = revs[r], revs[r + 1]
1159 prev, curr = revs[r], revs[r + 1]
1156 for c in bundler.revchunk(self, curr, prev):
1160 for c in bundler.revchunk(self, curr, prev):
1157 yield c
1161 yield c
1158
1162
1159 yield bundler.close()
1163 yield bundler.close()
1160
1164
1161 def addgroup(self, bundle, linkmapper, transaction):
1165 def addgroup(self, bundle, linkmapper, transaction):
1162 """
1166 """
1163 add a delta group
1167 add a delta group
1164
1168
1165 given a set of deltas, add them to the revision log. the
1169 given a set of deltas, add them to the revision log. the
1166 first delta is against its parent, which should be in our
1170 first delta is against its parent, which should be in our
1167 log, the rest are against the previous delta.
1171 log, the rest are against the previous delta.
1168 """
1172 """
1169
1173
1170 # track the base of the current delta log
1174 # track the base of the current delta log
1171 content = []
1175 content = []
1172 node = None
1176 node = None
1173
1177
1174 r = len(self)
1178 r = len(self)
1175 end = 0
1179 end = 0
1176 if r:
1180 if r:
1177 end = self.end(r - 1)
1181 end = self.end(r - 1)
1178 ifh = self.opener(self.indexfile, "a+")
1182 ifh = self.opener(self.indexfile, "a+")
1179 isize = r * self._io.size
1183 isize = r * self._io.size
1180 if self._inline:
1184 if self._inline:
1181 transaction.add(self.indexfile, end + isize, r)
1185 transaction.add(self.indexfile, end + isize, r)
1182 dfh = None
1186 dfh = None
1183 else:
1187 else:
1184 transaction.add(self.indexfile, isize, r)
1188 transaction.add(self.indexfile, isize, r)
1185 transaction.add(self.datafile, end)
1189 transaction.add(self.datafile, end)
1186 dfh = self.opener(self.datafile, "a")
1190 dfh = self.opener(self.datafile, "a")
1187
1191
1188 try:
1192 try:
1189 # loop through our set of deltas
1193 # loop through our set of deltas
1190 chain = None
1194 chain = None
1191 while True:
1195 while True:
1192 chunkdata = bundle.deltachunk(chain)
1196 chunkdata = bundle.deltachunk(chain)
1193 if not chunkdata:
1197 if not chunkdata:
1194 break
1198 break
1195 node = chunkdata['node']
1199 node = chunkdata['node']
1196 p1 = chunkdata['p1']
1200 p1 = chunkdata['p1']
1197 p2 = chunkdata['p2']
1201 p2 = chunkdata['p2']
1198 cs = chunkdata['cs']
1202 cs = chunkdata['cs']
1199 deltabase = chunkdata['deltabase']
1203 deltabase = chunkdata['deltabase']
1200 delta = chunkdata['delta']
1204 delta = chunkdata['delta']
1201
1205
1202 content.append(node)
1206 content.append(node)
1203
1207
1204 link = linkmapper(cs)
1208 link = linkmapper(cs)
1205 if node in self.nodemap:
1209 if node in self.nodemap:
1206 # this can happen if two branches make the same change
1210 # this can happen if two branches make the same change
1207 chain = node
1211 chain = node
1208 continue
1212 continue
1209
1213
1210 for p in (p1, p2):
1214 for p in (p1, p2):
1211 if p not in self.nodemap:
1215 if p not in self.nodemap:
1212 raise LookupError(p, self.indexfile,
1216 raise LookupError(p, self.indexfile,
1213 _('unknown parent'))
1217 _('unknown parent'))
1214
1218
1215 if deltabase not in self.nodemap:
1219 if deltabase not in self.nodemap:
1216 raise LookupError(deltabase, self.indexfile,
1220 raise LookupError(deltabase, self.indexfile,
1217 _('unknown delta base'))
1221 _('unknown delta base'))
1218
1222
1219 baserev = self.rev(deltabase)
1223 baserev = self.rev(deltabase)
1220 chain = self._addrevision(node, None, transaction, link,
1224 chain = self._addrevision(node, None, transaction, link,
1221 p1, p2, (baserev, delta), ifh, dfh)
1225 p1, p2, (baserev, delta), ifh, dfh)
1222 if not dfh and not self._inline:
1226 if not dfh and not self._inline:
1223 # addrevision switched from inline to conventional
1227 # addrevision switched from inline to conventional
1224 # reopen the index
1228 # reopen the index
1225 ifh.close()
1229 ifh.close()
1226 dfh = self.opener(self.datafile, "a")
1230 dfh = self.opener(self.datafile, "a")
1227 ifh = self.opener(self.indexfile, "a")
1231 ifh = self.opener(self.indexfile, "a")
1228 finally:
1232 finally:
1229 if dfh:
1233 if dfh:
1230 dfh.close()
1234 dfh.close()
1231 ifh.close()
1235 ifh.close()
1232
1236
1233 return content
1237 return content
1234
1238
1235 def strip(self, minlink, transaction):
1239 def strip(self, minlink, transaction):
1236 """truncate the revlog on the first revision with a linkrev >= minlink
1240 """truncate the revlog on the first revision with a linkrev >= minlink
1237
1241
1238 This function is called when we're stripping revision minlink and
1242 This function is called when we're stripping revision minlink and
1239 its descendants from the repository.
1243 its descendants from the repository.
1240
1244
1241 We have to remove all revisions with linkrev >= minlink, because
1245 We have to remove all revisions with linkrev >= minlink, because
1242 the equivalent changelog revisions will be renumbered after the
1246 the equivalent changelog revisions will be renumbered after the
1243 strip.
1247 strip.
1244
1248
1245 So we truncate the revlog on the first of these revisions, and
1249 So we truncate the revlog on the first of these revisions, and
1246 trust that the caller has saved the revisions that shouldn't be
1250 trust that the caller has saved the revisions that shouldn't be
1247 removed and that it'll re-add them after this truncation.
1251 removed and that it'll re-add them after this truncation.
1248 """
1252 """
1249 if len(self) == 0:
1253 if len(self) == 0:
1250 return
1254 return
1251
1255
1252 for rev in self:
1256 for rev in self:
1253 if self.index[rev][4] >= minlink:
1257 if self.index[rev][4] >= minlink:
1254 break
1258 break
1255 else:
1259 else:
1256 return
1260 return
1257
1261
1258 # first truncate the files on disk
1262 # first truncate the files on disk
1259 end = self.start(rev)
1263 end = self.start(rev)
1260 if not self._inline:
1264 if not self._inline:
1261 transaction.add(self.datafile, end)
1265 transaction.add(self.datafile, end)
1262 end = rev * self._io.size
1266 end = rev * self._io.size
1263 else:
1267 else:
1264 end += rev * self._io.size
1268 end += rev * self._io.size
1265
1269
1266 transaction.add(self.indexfile, end)
1270 transaction.add(self.indexfile, end)
1267
1271
1268 # then reset internal state in memory to forget those revisions
1272 # then reset internal state in memory to forget those revisions
1269 self._cache = None
1273 self._cache = None
1270 self._chunkclear()
1274 self._chunkclear()
1271 for x in xrange(rev, len(self)):
1275 for x in xrange(rev, len(self)):
1272 del self.nodemap[self.node(x)]
1276 del self.nodemap[self.node(x)]
1273
1277
1274 del self.index[rev:-1]
1278 del self.index[rev:-1]
1275
1279
1276 def checksize(self):
1280 def checksize(self):
1277 expected = 0
1281 expected = 0
1278 if len(self):
1282 if len(self):
1279 expected = max(0, self.end(len(self) - 1))
1283 expected = max(0, self.end(len(self) - 1))
1280
1284
1281 try:
1285 try:
1282 f = self.opener(self.datafile)
1286 f = self.opener(self.datafile)
1283 f.seek(0, 2)
1287 f.seek(0, 2)
1284 actual = f.tell()
1288 actual = f.tell()
1285 f.close()
1289 f.close()
1286 dd = actual - expected
1290 dd = actual - expected
1287 except IOError, inst:
1291 except IOError, inst:
1288 if inst.errno != errno.ENOENT:
1292 if inst.errno != errno.ENOENT:
1289 raise
1293 raise
1290 dd = 0
1294 dd = 0
1291
1295
1292 try:
1296 try:
1293 f = self.opener(self.indexfile)
1297 f = self.opener(self.indexfile)
1294 f.seek(0, 2)
1298 f.seek(0, 2)
1295 actual = f.tell()
1299 actual = f.tell()
1296 f.close()
1300 f.close()
1297 s = self._io.size
1301 s = self._io.size
1298 i = max(0, actual // s)
1302 i = max(0, actual // s)
1299 di = actual - (i * s)
1303 di = actual - (i * s)
1300 if self._inline:
1304 if self._inline:
1301 databytes = 0
1305 databytes = 0
1302 for r in self:
1306 for r in self:
1303 databytes += max(0, self.length(r))
1307 databytes += max(0, self.length(r))
1304 dd = 0
1308 dd = 0
1305 di = actual - len(self) * s - databytes
1309 di = actual - len(self) * s - databytes
1306 except IOError, inst:
1310 except IOError, inst:
1307 if inst.errno != errno.ENOENT:
1311 if inst.errno != errno.ENOENT:
1308 raise
1312 raise
1309 di = 0
1313 di = 0
1310
1314
1311 return (dd, di)
1315 return (dd, di)
1312
1316
1313 def files(self):
1317 def files(self):
1314 res = [self.indexfile]
1318 res = [self.indexfile]
1315 if not self._inline:
1319 if not self._inline:
1316 res.append(self.datafile)
1320 res.append(self.datafile)
1317 return res
1321 return res
General Comments 0
You need to be logged in to leave comments. Login now