##// END OF EJS Templates
delta: have a native implementation of _findsnapshot...
Boris Feld -
r41141:38e88450 default
parent child Browse files
Show More
@@ -1,2969 +1,3032
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #include <Python.h>
10 #include <Python.h>
11 #include <assert.h>
11 #include <assert.h>
12 #include <ctype.h>
12 #include <ctype.h>
13 #include <limits.h>
13 #include <limits.h>
14 #include <stddef.h>
14 #include <stddef.h>
15 #include <stdlib.h>
15 #include <stdlib.h>
16 #include <string.h>
16 #include <string.h>
17
17
18 #include "bitmanipulation.h"
18 #include "bitmanipulation.h"
19 #include "charencode.h"
19 #include "charencode.h"
20 #include "revlog.h"
20 #include "revlog.h"
21 #include "util.h"
21 #include "util.h"
22
22
23 #ifdef IS_PY3K
23 #ifdef IS_PY3K
24 /* The mapping of Python types is meant to be temporary to get Python
24 /* The mapping of Python types is meant to be temporary to get Python
25 * 3 to compile. We should remove this once Python 3 support is fully
25 * 3 to compile. We should remove this once Python 3 support is fully
26 * supported and proper types are used in the extensions themselves. */
26 * supported and proper types are used in the extensions themselves. */
27 #define PyInt_Check PyLong_Check
27 #define PyInt_Check PyLong_Check
28 #define PyInt_FromLong PyLong_FromLong
28 #define PyInt_FromLong PyLong_FromLong
29 #define PyInt_FromSsize_t PyLong_FromSsize_t
29 #define PyInt_FromSsize_t PyLong_FromSsize_t
30 #define PyInt_AsLong PyLong_AsLong
30 #define PyInt_AsLong PyLong_AsLong
31 #endif
31 #endif
32
32
33 typedef struct indexObjectStruct indexObject;
33 typedef struct indexObjectStruct indexObject;
34
34
35 typedef struct {
35 typedef struct {
36 int children[16];
36 int children[16];
37 } nodetreenode;
37 } nodetreenode;
38
38
39 /*
39 /*
40 * A base-16 trie for fast node->rev mapping.
40 * A base-16 trie for fast node->rev mapping.
41 *
41 *
42 * Positive value is index of the next node in the trie
42 * Positive value is index of the next node in the trie
43 * Negative value is a leaf: -(rev + 2)
43 * Negative value is a leaf: -(rev + 2)
44 * Zero is empty
44 * Zero is empty
45 */
45 */
46 typedef struct {
46 typedef struct {
47 indexObject *index;
47 indexObject *index;
48 nodetreenode *nodes;
48 nodetreenode *nodes;
49 unsigned length; /* # nodes in use */
49 unsigned length; /* # nodes in use */
50 unsigned capacity; /* # nodes allocated */
50 unsigned capacity; /* # nodes allocated */
51 int depth; /* maximum depth of tree */
51 int depth; /* maximum depth of tree */
52 int splits; /* # splits performed */
52 int splits; /* # splits performed */
53 } nodetree;
53 } nodetree;
54
54
55 typedef struct {
55 typedef struct {
56 PyObject_HEAD /* ; */
56 PyObject_HEAD /* ; */
57 nodetree nt;
57 nodetree nt;
58 } nodetreeObject;
58 } nodetreeObject;
59
59
60 /*
60 /*
61 * This class has two behaviors.
61 * This class has two behaviors.
62 *
62 *
63 * When used in a list-like way (with integer keys), we decode an
63 * When used in a list-like way (with integer keys), we decode an
64 * entry in a RevlogNG index file on demand. Our last entry is a
64 * entry in a RevlogNG index file on demand. Our last entry is a
65 * sentinel, always a nullid. We have limited support for
65 * sentinel, always a nullid. We have limited support for
66 * integer-keyed insert and delete, only at elements right before the
66 * integer-keyed insert and delete, only at elements right before the
67 * sentinel.
67 * sentinel.
68 *
68 *
69 * With string keys, we lazily perform a reverse mapping from node to
69 * With string keys, we lazily perform a reverse mapping from node to
70 * rev, using a base-16 trie.
70 * rev, using a base-16 trie.
71 */
71 */
72 struct indexObjectStruct {
72 struct indexObjectStruct {
73 PyObject_HEAD
73 PyObject_HEAD
74 /* Type-specific fields go here. */
74 /* Type-specific fields go here. */
75 PyObject *data; /* raw bytes of index */
75 PyObject *data; /* raw bytes of index */
76 Py_buffer buf; /* buffer of data */
76 Py_buffer buf; /* buffer of data */
77 PyObject **cache; /* cached tuples */
77 PyObject **cache; /* cached tuples */
78 const char **offsets; /* populated on demand */
78 const char **offsets; /* populated on demand */
79 Py_ssize_t raw_length; /* original number of elements */
79 Py_ssize_t raw_length; /* original number of elements */
80 Py_ssize_t length; /* current number of elements */
80 Py_ssize_t length; /* current number of elements */
81 PyObject *added; /* populated on demand */
81 PyObject *added; /* populated on demand */
82 PyObject *headrevs; /* cache, invalidated on changes */
82 PyObject *headrevs; /* cache, invalidated on changes */
83 PyObject *filteredrevs; /* filtered revs set */
83 PyObject *filteredrevs; /* filtered revs set */
84 nodetree nt; /* base-16 trie */
84 nodetree nt; /* base-16 trie */
85 int ntinitialized; /* 0 or 1 */
85 int ntinitialized; /* 0 or 1 */
86 int ntrev; /* last rev scanned */
86 int ntrev; /* last rev scanned */
87 int ntlookups; /* # lookups */
87 int ntlookups; /* # lookups */
88 int ntmisses; /* # lookups that miss the cache */
88 int ntmisses; /* # lookups that miss the cache */
89 int inlined;
89 int inlined;
90 };
90 };
91
91
92 static Py_ssize_t index_length(const indexObject *self)
92 static Py_ssize_t index_length(const indexObject *self)
93 {
93 {
94 if (self->added == NULL)
94 if (self->added == NULL)
95 return self->length;
95 return self->length;
96 return self->length + PyList_GET_SIZE(self->added);
96 return self->length + PyList_GET_SIZE(self->added);
97 }
97 }
98
98
99 static PyObject *nullentry = NULL;
99 static PyObject *nullentry = NULL;
100 static const char nullid[20] = {0};
100 static const char nullid[20] = {0};
101 static const Py_ssize_t nullrev = -1;
101 static const Py_ssize_t nullrev = -1;
102
102
103 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
103 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
104
104
105 #if LONG_MAX == 0x7fffffffL
105 #if LONG_MAX == 0x7fffffffL
106 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
106 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
107 #else
107 #else
108 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
108 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
109 #endif
109 #endif
110
110
111 /* A RevlogNG v1 index entry is 64 bytes long. */
111 /* A RevlogNG v1 index entry is 64 bytes long. */
112 static const long v1_hdrsize = 64;
112 static const long v1_hdrsize = 64;
113
113
114 static void raise_revlog_error(void)
114 static void raise_revlog_error(void)
115 {
115 {
116 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
116 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
117
117
118 mod = PyImport_ImportModule("mercurial.error");
118 mod = PyImport_ImportModule("mercurial.error");
119 if (mod == NULL) {
119 if (mod == NULL) {
120 goto cleanup;
120 goto cleanup;
121 }
121 }
122
122
123 dict = PyModule_GetDict(mod);
123 dict = PyModule_GetDict(mod);
124 if (dict == NULL) {
124 if (dict == NULL) {
125 goto cleanup;
125 goto cleanup;
126 }
126 }
127 Py_INCREF(dict);
127 Py_INCREF(dict);
128
128
129 errclass = PyDict_GetItemString(dict, "RevlogError");
129 errclass = PyDict_GetItemString(dict, "RevlogError");
130 if (errclass == NULL) {
130 if (errclass == NULL) {
131 PyErr_SetString(PyExc_SystemError,
131 PyErr_SetString(PyExc_SystemError,
132 "could not find RevlogError");
132 "could not find RevlogError");
133 goto cleanup;
133 goto cleanup;
134 }
134 }
135
135
136 /* value of exception is ignored by callers */
136 /* value of exception is ignored by callers */
137 PyErr_SetString(errclass, "RevlogError");
137 PyErr_SetString(errclass, "RevlogError");
138
138
139 cleanup:
139 cleanup:
140 Py_XDECREF(dict);
140 Py_XDECREF(dict);
141 Py_XDECREF(mod);
141 Py_XDECREF(mod);
142 }
142 }
143
143
144 /*
144 /*
145 * Return a pointer to the beginning of a RevlogNG record.
145 * Return a pointer to the beginning of a RevlogNG record.
146 */
146 */
147 static const char *index_deref(indexObject *self, Py_ssize_t pos)
147 static const char *index_deref(indexObject *self, Py_ssize_t pos)
148 {
148 {
149 if (self->inlined && pos > 0) {
149 if (self->inlined && pos > 0) {
150 if (self->offsets == NULL) {
150 if (self->offsets == NULL) {
151 self->offsets = PyMem_Malloc(self->raw_length *
151 self->offsets = PyMem_Malloc(self->raw_length *
152 sizeof(*self->offsets));
152 sizeof(*self->offsets));
153 if (self->offsets == NULL)
153 if (self->offsets == NULL)
154 return (const char *)PyErr_NoMemory();
154 return (const char *)PyErr_NoMemory();
155 inline_scan(self, self->offsets);
155 inline_scan(self, self->offsets);
156 }
156 }
157 return self->offsets[pos];
157 return self->offsets[pos];
158 }
158 }
159
159
160 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
160 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
161 }
161 }
162
162
163 /*
163 /*
164 * Get parents of the given rev.
164 * Get parents of the given rev.
165 *
165 *
166 * The specified rev must be valid and must not be nullrev. A returned
166 * The specified rev must be valid and must not be nullrev. A returned
167 * parent revision may be nullrev, but is guaranteed to be in valid range.
167 * parent revision may be nullrev, but is guaranteed to be in valid range.
168 */
168 */
169 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
169 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
170 int maxrev)
170 int maxrev)
171 {
171 {
172 if (rev >= self->length) {
172 if (rev >= self->length) {
173 long tmp;
173 long tmp;
174 PyObject *tuple =
174 PyObject *tuple =
175 PyList_GET_ITEM(self->added, rev - self->length);
175 PyList_GET_ITEM(self->added, rev - self->length);
176 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
176 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 5), &tmp)) {
177 return -1;
177 return -1;
178 }
178 }
179 ps[0] = (int)tmp;
179 ps[0] = (int)tmp;
180 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
180 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 6), &tmp)) {
181 return -1;
181 return -1;
182 }
182 }
183 ps[1] = (int)tmp;
183 ps[1] = (int)tmp;
184 } else {
184 } else {
185 const char *data = index_deref(self, rev);
185 const char *data = index_deref(self, rev);
186 ps[0] = getbe32(data + 24);
186 ps[0] = getbe32(data + 24);
187 ps[1] = getbe32(data + 28);
187 ps[1] = getbe32(data + 28);
188 }
188 }
189 /* If index file is corrupted, ps[] may point to invalid revisions. So
189 /* If index file is corrupted, ps[] may point to invalid revisions. So
190 * there is a risk of buffer overflow to trust them unconditionally. */
190 * there is a risk of buffer overflow to trust them unconditionally. */
191 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
191 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
192 PyErr_SetString(PyExc_ValueError, "parent out of range");
192 PyErr_SetString(PyExc_ValueError, "parent out of range");
193 return -1;
193 return -1;
194 }
194 }
195 return 0;
195 return 0;
196 }
196 }
197
197
198 /*
198 /*
199 * Get parents of the given rev.
199 * Get parents of the given rev.
200 *
200 *
201 * If the specified rev is out of range, IndexError will be raised. If the
201 * If the specified rev is out of range, IndexError will be raised. If the
202 * revlog entry is corrupted, ValueError may be raised.
202 * revlog entry is corrupted, ValueError may be raised.
203 *
203 *
204 * Returns 0 on success or -1 on failure.
204 * Returns 0 on success or -1 on failure.
205 */
205 */
206 int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
206 int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
207 {
207 {
208 int tiprev;
208 int tiprev;
209 if (!op || !HgRevlogIndex_Check(op) || !ps) {
209 if (!op || !HgRevlogIndex_Check(op) || !ps) {
210 PyErr_BadInternalCall();
210 PyErr_BadInternalCall();
211 return -1;
211 return -1;
212 }
212 }
213 tiprev = (int)index_length((indexObject *)op) - 1;
213 tiprev = (int)index_length((indexObject *)op) - 1;
214 if (rev < -1 || rev > tiprev) {
214 if (rev < -1 || rev > tiprev) {
215 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
215 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
216 return -1;
216 return -1;
217 } else if (rev == -1) {
217 } else if (rev == -1) {
218 ps[0] = ps[1] = -1;
218 ps[0] = ps[1] = -1;
219 return 0;
219 return 0;
220 } else {
220 } else {
221 return index_get_parents((indexObject *)op, rev, ps, tiprev);
221 return index_get_parents((indexObject *)op, rev, ps, tiprev);
222 }
222 }
223 }
223 }
224
224
225 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
225 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
226 {
226 {
227 uint64_t offset;
227 uint64_t offset;
228 if (rev == nullrev) {
228 if (rev == nullrev) {
229 return 0;
229 return 0;
230 }
230 }
231 if (rev >= self->length) {
231 if (rev >= self->length) {
232 PyObject *tuple;
232 PyObject *tuple;
233 PyObject *pylong;
233 PyObject *pylong;
234 PY_LONG_LONG tmp;
234 PY_LONG_LONG tmp;
235 tuple = PyList_GET_ITEM(self->added, rev - self->length);
235 tuple = PyList_GET_ITEM(self->added, rev - self->length);
236 pylong = PyTuple_GET_ITEM(tuple, 0);
236 pylong = PyTuple_GET_ITEM(tuple, 0);
237 tmp = PyLong_AsLongLong(pylong);
237 tmp = PyLong_AsLongLong(pylong);
238 if (tmp == -1 && PyErr_Occurred()) {
238 if (tmp == -1 && PyErr_Occurred()) {
239 return -1;
239 return -1;
240 }
240 }
241 if (tmp < 0) {
241 if (tmp < 0) {
242 PyErr_Format(PyExc_OverflowError,
242 PyErr_Format(PyExc_OverflowError,
243 "revlog entry size out of bound (%lld)",
243 "revlog entry size out of bound (%lld)",
244 (long long)tmp);
244 (long long)tmp);
245 return -1;
245 return -1;
246 }
246 }
247 offset = (uint64_t)tmp;
247 offset = (uint64_t)tmp;
248 } else {
248 } else {
249 const char *data = index_deref(self, rev);
249 const char *data = index_deref(self, rev);
250 offset = getbe32(data + 4);
250 offset = getbe32(data + 4);
251 if (rev == 0) {
251 if (rev == 0) {
252 /* mask out version number for the first entry */
252 /* mask out version number for the first entry */
253 offset &= 0xFFFF;
253 offset &= 0xFFFF;
254 } else {
254 } else {
255 uint32_t offset_high = getbe32(data);
255 uint32_t offset_high = getbe32(data);
256 offset |= ((uint64_t)offset_high) << 32;
256 offset |= ((uint64_t)offset_high) << 32;
257 }
257 }
258 }
258 }
259 return (int64_t)(offset >> 16);
259 return (int64_t)(offset >> 16);
260 }
260 }
261
261
262 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
262 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
263 {
263 {
264 if (rev == nullrev) {
264 if (rev == nullrev) {
265 return 0;
265 return 0;
266 }
266 }
267 if (rev >= self->length) {
267 if (rev >= self->length) {
268 PyObject *tuple;
268 PyObject *tuple;
269 PyObject *pylong;
269 PyObject *pylong;
270 long ret;
270 long ret;
271 tuple = PyList_GET_ITEM(self->added, rev - self->length);
271 tuple = PyList_GET_ITEM(self->added, rev - self->length);
272 pylong = PyTuple_GET_ITEM(tuple, 1);
272 pylong = PyTuple_GET_ITEM(tuple, 1);
273 ret = PyInt_AsLong(pylong);
273 ret = PyInt_AsLong(pylong);
274 if (ret == -1 && PyErr_Occurred()) {
274 if (ret == -1 && PyErr_Occurred()) {
275 return -1;
275 return -1;
276 }
276 }
277 if (ret < 0 || ret > (long)INT_MAX) {
277 if (ret < 0 || ret > (long)INT_MAX) {
278 PyErr_Format(PyExc_OverflowError,
278 PyErr_Format(PyExc_OverflowError,
279 "revlog entry size out of bound (%ld)",
279 "revlog entry size out of bound (%ld)",
280 ret);
280 ret);
281 return -1;
281 return -1;
282 }
282 }
283 return (int)ret;
283 return (int)ret;
284 } else {
284 } else {
285 const char *data = index_deref(self, rev);
285 const char *data = index_deref(self, rev);
286 int tmp = (int)getbe32(data + 8);
286 int tmp = (int)getbe32(data + 8);
287 if (tmp < 0) {
287 if (tmp < 0) {
288 PyErr_Format(PyExc_OverflowError,
288 PyErr_Format(PyExc_OverflowError,
289 "revlog entry size out of bound (%d)",
289 "revlog entry size out of bound (%d)",
290 tmp);
290 tmp);
291 return -1;
291 return -1;
292 }
292 }
293 return tmp;
293 return tmp;
294 }
294 }
295 }
295 }
296
296
297 /*
297 /*
298 * RevlogNG format (all in big endian, data may be inlined):
298 * RevlogNG format (all in big endian, data may be inlined):
299 * 6 bytes: offset
299 * 6 bytes: offset
300 * 2 bytes: flags
300 * 2 bytes: flags
301 * 4 bytes: compressed length
301 * 4 bytes: compressed length
302 * 4 bytes: uncompressed length
302 * 4 bytes: uncompressed length
303 * 4 bytes: base revision
303 * 4 bytes: base revision
304 * 4 bytes: link revision
304 * 4 bytes: link revision
305 * 4 bytes: parent 1 revision
305 * 4 bytes: parent 1 revision
306 * 4 bytes: parent 2 revision
306 * 4 bytes: parent 2 revision
307 * 32 bytes: nodeid (only 20 bytes used)
307 * 32 bytes: nodeid (only 20 bytes used)
308 */
308 */
309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
309 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
310 {
310 {
311 uint64_t offset_flags;
311 uint64_t offset_flags;
312 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
312 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
313 const char *c_node_id;
313 const char *c_node_id;
314 const char *data;
314 const char *data;
315 Py_ssize_t length = index_length(self);
315 Py_ssize_t length = index_length(self);
316 PyObject *entry;
316 PyObject *entry;
317
317
318 if (pos == nullrev) {
318 if (pos == nullrev) {
319 Py_INCREF(nullentry);
319 Py_INCREF(nullentry);
320 return nullentry;
320 return nullentry;
321 }
321 }
322
322
323 if (pos < 0 || pos >= length) {
323 if (pos < 0 || pos >= length) {
324 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
324 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
325 return NULL;
325 return NULL;
326 }
326 }
327
327
328 if (pos >= self->length) {
328 if (pos >= self->length) {
329 PyObject *obj;
329 PyObject *obj;
330 obj = PyList_GET_ITEM(self->added, pos - self->length);
330 obj = PyList_GET_ITEM(self->added, pos - self->length);
331 Py_INCREF(obj);
331 Py_INCREF(obj);
332 return obj;
332 return obj;
333 }
333 }
334
334
335 if (self->cache) {
335 if (self->cache) {
336 if (self->cache[pos]) {
336 if (self->cache[pos]) {
337 Py_INCREF(self->cache[pos]);
337 Py_INCREF(self->cache[pos]);
338 return self->cache[pos];
338 return self->cache[pos];
339 }
339 }
340 } else {
340 } else {
341 self->cache = calloc(self->raw_length, sizeof(PyObject *));
341 self->cache = calloc(self->raw_length, sizeof(PyObject *));
342 if (self->cache == NULL)
342 if (self->cache == NULL)
343 return PyErr_NoMemory();
343 return PyErr_NoMemory();
344 }
344 }
345
345
346 data = index_deref(self, pos);
346 data = index_deref(self, pos);
347 if (data == NULL)
347 if (data == NULL)
348 return NULL;
348 return NULL;
349
349
350 offset_flags = getbe32(data + 4);
350 offset_flags = getbe32(data + 4);
351 if (pos == 0) /* mask out version number for the first entry */
351 if (pos == 0) /* mask out version number for the first entry */
352 offset_flags &= 0xFFFF;
352 offset_flags &= 0xFFFF;
353 else {
353 else {
354 uint32_t offset_high = getbe32(data);
354 uint32_t offset_high = getbe32(data);
355 offset_flags |= ((uint64_t)offset_high) << 32;
355 offset_flags |= ((uint64_t)offset_high) << 32;
356 }
356 }
357
357
358 comp_len = getbe32(data + 8);
358 comp_len = getbe32(data + 8);
359 uncomp_len = getbe32(data + 12);
359 uncomp_len = getbe32(data + 12);
360 base_rev = getbe32(data + 16);
360 base_rev = getbe32(data + 16);
361 link_rev = getbe32(data + 20);
361 link_rev = getbe32(data + 20);
362 parent_1 = getbe32(data + 24);
362 parent_1 = getbe32(data + 24);
363 parent_2 = getbe32(data + 28);
363 parent_2 = getbe32(data + 28);
364 c_node_id = data + 32;
364 c_node_id = data + 32;
365
365
366 entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
366 entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
367 base_rev, link_rev, parent_1, parent_2, c_node_id,
367 base_rev, link_rev, parent_1, parent_2, c_node_id,
368 20);
368 20);
369
369
370 if (entry) {
370 if (entry) {
371 PyObject_GC_UnTrack(entry);
371 PyObject_GC_UnTrack(entry);
372 Py_INCREF(entry);
372 Py_INCREF(entry);
373 }
373 }
374
374
375 self->cache[pos] = entry;
375 self->cache[pos] = entry;
376
376
377 return entry;
377 return entry;
378 }
378 }
379
379
380 /*
380 /*
381 * Return the 20-byte SHA of the node corresponding to the given rev.
381 * Return the 20-byte SHA of the node corresponding to the given rev.
382 */
382 */
383 static const char *index_node(indexObject *self, Py_ssize_t pos)
383 static const char *index_node(indexObject *self, Py_ssize_t pos)
384 {
384 {
385 Py_ssize_t length = index_length(self);
385 Py_ssize_t length = index_length(self);
386 const char *data;
386 const char *data;
387
387
388 if (pos == nullrev)
388 if (pos == nullrev)
389 return nullid;
389 return nullid;
390
390
391 if (pos >= length)
391 if (pos >= length)
392 return NULL;
392 return NULL;
393
393
394 if (pos >= self->length) {
394 if (pos >= self->length) {
395 PyObject *tuple, *str;
395 PyObject *tuple, *str;
396 tuple = PyList_GET_ITEM(self->added, pos - self->length);
396 tuple = PyList_GET_ITEM(self->added, pos - self->length);
397 str = PyTuple_GetItem(tuple, 7);
397 str = PyTuple_GetItem(tuple, 7);
398 return str ? PyBytes_AS_STRING(str) : NULL;
398 return str ? PyBytes_AS_STRING(str) : NULL;
399 }
399 }
400
400
401 data = index_deref(self, pos);
401 data = index_deref(self, pos);
402 return data ? data + 32 : NULL;
402 return data ? data + 32 : NULL;
403 }
403 }
404
404
405 /*
405 /*
406 * Return the 20-byte SHA of the node corresponding to the given rev. The
406 * Return the 20-byte SHA of the node corresponding to the given rev. The
407 * rev is assumed to be existing. If not, an exception is set.
407 * rev is assumed to be existing. If not, an exception is set.
408 */
408 */
409 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
409 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
410 {
410 {
411 const char *node = index_node(self, pos);
411 const char *node = index_node(self, pos);
412 if (node == NULL) {
412 if (node == NULL) {
413 PyErr_Format(PyExc_IndexError, "could not access rev %d",
413 PyErr_Format(PyExc_IndexError, "could not access rev %d",
414 (int)pos);
414 (int)pos);
415 }
415 }
416 return node;
416 return node;
417 }
417 }
418
418
419 static int nt_insert(nodetree *self, const char *node, int rev);
419 static int nt_insert(nodetree *self, const char *node, int rev);
420
420
421 static int node_check(PyObject *obj, char **node)
421 static int node_check(PyObject *obj, char **node)
422 {
422 {
423 Py_ssize_t nodelen;
423 Py_ssize_t nodelen;
424 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
424 if (PyBytes_AsStringAndSize(obj, node, &nodelen) == -1)
425 return -1;
425 return -1;
426 if (nodelen == 20)
426 if (nodelen == 20)
427 return 0;
427 return 0;
428 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
428 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
429 return -1;
429 return -1;
430 }
430 }
431
431
432 static PyObject *index_append(indexObject *self, PyObject *obj)
432 static PyObject *index_append(indexObject *self, PyObject *obj)
433 {
433 {
434 char *node;
434 char *node;
435 Py_ssize_t len;
435 Py_ssize_t len;
436
436
437 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
437 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
438 PyErr_SetString(PyExc_TypeError, "8-tuple required");
438 PyErr_SetString(PyExc_TypeError, "8-tuple required");
439 return NULL;
439 return NULL;
440 }
440 }
441
441
442 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
442 if (node_check(PyTuple_GET_ITEM(obj, 7), &node) == -1)
443 return NULL;
443 return NULL;
444
444
445 len = index_length(self);
445 len = index_length(self);
446
446
447 if (self->added == NULL) {
447 if (self->added == NULL) {
448 self->added = PyList_New(0);
448 self->added = PyList_New(0);
449 if (self->added == NULL)
449 if (self->added == NULL)
450 return NULL;
450 return NULL;
451 }
451 }
452
452
453 if (PyList_Append(self->added, obj) == -1)
453 if (PyList_Append(self->added, obj) == -1)
454 return NULL;
454 return NULL;
455
455
456 if (self->ntinitialized)
456 if (self->ntinitialized)
457 nt_insert(&self->nt, node, (int)len);
457 nt_insert(&self->nt, node, (int)len);
458
458
459 Py_CLEAR(self->headrevs);
459 Py_CLEAR(self->headrevs);
460 Py_RETURN_NONE;
460 Py_RETURN_NONE;
461 }
461 }
462
462
463 static PyObject *index_stats(indexObject *self)
463 static PyObject *index_stats(indexObject *self)
464 {
464 {
465 PyObject *obj = PyDict_New();
465 PyObject *obj = PyDict_New();
466 PyObject *s = NULL;
466 PyObject *s = NULL;
467 PyObject *t = NULL;
467 PyObject *t = NULL;
468
468
469 if (obj == NULL)
469 if (obj == NULL)
470 return NULL;
470 return NULL;
471
471
472 #define istat(__n, __d) \
472 #define istat(__n, __d) \
473 do { \
473 do { \
474 s = PyBytes_FromString(__d); \
474 s = PyBytes_FromString(__d); \
475 t = PyInt_FromSsize_t(self->__n); \
475 t = PyInt_FromSsize_t(self->__n); \
476 if (!s || !t) \
476 if (!s || !t) \
477 goto bail; \
477 goto bail; \
478 if (PyDict_SetItem(obj, s, t) == -1) \
478 if (PyDict_SetItem(obj, s, t) == -1) \
479 goto bail; \
479 goto bail; \
480 Py_CLEAR(s); \
480 Py_CLEAR(s); \
481 Py_CLEAR(t); \
481 Py_CLEAR(t); \
482 } while (0)
482 } while (0)
483
483
484 if (self->added) {
484 if (self->added) {
485 Py_ssize_t len = PyList_GET_SIZE(self->added);
485 Py_ssize_t len = PyList_GET_SIZE(self->added);
486 s = PyBytes_FromString("index entries added");
486 s = PyBytes_FromString("index entries added");
487 t = PyInt_FromSsize_t(len);
487 t = PyInt_FromSsize_t(len);
488 if (!s || !t)
488 if (!s || !t)
489 goto bail;
489 goto bail;
490 if (PyDict_SetItem(obj, s, t) == -1)
490 if (PyDict_SetItem(obj, s, t) == -1)
491 goto bail;
491 goto bail;
492 Py_CLEAR(s);
492 Py_CLEAR(s);
493 Py_CLEAR(t);
493 Py_CLEAR(t);
494 }
494 }
495
495
496 if (self->raw_length != self->length)
496 if (self->raw_length != self->length)
497 istat(raw_length, "revs on disk");
497 istat(raw_length, "revs on disk");
498 istat(length, "revs in memory");
498 istat(length, "revs in memory");
499 istat(ntlookups, "node trie lookups");
499 istat(ntlookups, "node trie lookups");
500 istat(ntmisses, "node trie misses");
500 istat(ntmisses, "node trie misses");
501 istat(ntrev, "node trie last rev scanned");
501 istat(ntrev, "node trie last rev scanned");
502 if (self->ntinitialized) {
502 if (self->ntinitialized) {
503 istat(nt.capacity, "node trie capacity");
503 istat(nt.capacity, "node trie capacity");
504 istat(nt.depth, "node trie depth");
504 istat(nt.depth, "node trie depth");
505 istat(nt.length, "node trie count");
505 istat(nt.length, "node trie count");
506 istat(nt.splits, "node trie splits");
506 istat(nt.splits, "node trie splits");
507 }
507 }
508
508
509 #undef istat
509 #undef istat
510
510
511 return obj;
511 return obj;
512
512
513 bail:
513 bail:
514 Py_XDECREF(obj);
514 Py_XDECREF(obj);
515 Py_XDECREF(s);
515 Py_XDECREF(s);
516 Py_XDECREF(t);
516 Py_XDECREF(t);
517 return NULL;
517 return NULL;
518 }
518 }
519
519
520 /*
520 /*
521 * When we cache a list, we want to be sure the caller can't mutate
521 * When we cache a list, we want to be sure the caller can't mutate
522 * the cached copy.
522 * the cached copy.
523 */
523 */
524 static PyObject *list_copy(PyObject *list)
524 static PyObject *list_copy(PyObject *list)
525 {
525 {
526 Py_ssize_t len = PyList_GET_SIZE(list);
526 Py_ssize_t len = PyList_GET_SIZE(list);
527 PyObject *newlist = PyList_New(len);
527 PyObject *newlist = PyList_New(len);
528 Py_ssize_t i;
528 Py_ssize_t i;
529
529
530 if (newlist == NULL)
530 if (newlist == NULL)
531 return NULL;
531 return NULL;
532
532
533 for (i = 0; i < len; i++) {
533 for (i = 0; i < len; i++) {
534 PyObject *obj = PyList_GET_ITEM(list, i);
534 PyObject *obj = PyList_GET_ITEM(list, i);
535 Py_INCREF(obj);
535 Py_INCREF(obj);
536 PyList_SET_ITEM(newlist, i, obj);
536 PyList_SET_ITEM(newlist, i, obj);
537 }
537 }
538
538
539 return newlist;
539 return newlist;
540 }
540 }
541
541
542 static int check_filter(PyObject *filter, Py_ssize_t arg)
542 static int check_filter(PyObject *filter, Py_ssize_t arg)
543 {
543 {
544 if (filter) {
544 if (filter) {
545 PyObject *arglist, *result;
545 PyObject *arglist, *result;
546 int isfiltered;
546 int isfiltered;
547
547
548 arglist = Py_BuildValue("(n)", arg);
548 arglist = Py_BuildValue("(n)", arg);
549 if (!arglist) {
549 if (!arglist) {
550 return -1;
550 return -1;
551 }
551 }
552
552
553 result = PyEval_CallObject(filter, arglist);
553 result = PyEval_CallObject(filter, arglist);
554 Py_DECREF(arglist);
554 Py_DECREF(arglist);
555 if (!result) {
555 if (!result) {
556 return -1;
556 return -1;
557 }
557 }
558
558
559 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
559 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
560 * same as this function, so we can just return it directly.*/
560 * same as this function, so we can just return it directly.*/
561 isfiltered = PyObject_IsTrue(result);
561 isfiltered = PyObject_IsTrue(result);
562 Py_DECREF(result);
562 Py_DECREF(result);
563 return isfiltered;
563 return isfiltered;
564 } else {
564 } else {
565 return 0;
565 return 0;
566 }
566 }
567 }
567 }
568
568
569 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
569 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
570 Py_ssize_t marker, char *phases)
570 Py_ssize_t marker, char *phases)
571 {
571 {
572 PyObject *iter = NULL;
572 PyObject *iter = NULL;
573 PyObject *iter_item = NULL;
573 PyObject *iter_item = NULL;
574 Py_ssize_t min_idx = index_length(self) + 2;
574 Py_ssize_t min_idx = index_length(self) + 2;
575 long iter_item_long;
575 long iter_item_long;
576
576
577 if (PyList_GET_SIZE(list) != 0) {
577 if (PyList_GET_SIZE(list) != 0) {
578 iter = PyObject_GetIter(list);
578 iter = PyObject_GetIter(list);
579 if (iter == NULL)
579 if (iter == NULL)
580 return -2;
580 return -2;
581 while ((iter_item = PyIter_Next(iter))) {
581 while ((iter_item = PyIter_Next(iter))) {
582 if (!pylong_to_long(iter_item, &iter_item_long)) {
582 if (!pylong_to_long(iter_item, &iter_item_long)) {
583 Py_DECREF(iter_item);
583 Py_DECREF(iter_item);
584 return -2;
584 return -2;
585 }
585 }
586 Py_DECREF(iter_item);
586 Py_DECREF(iter_item);
587 if (iter_item_long < min_idx)
587 if (iter_item_long < min_idx)
588 min_idx = iter_item_long;
588 min_idx = iter_item_long;
589 phases[iter_item_long] = (char)marker;
589 phases[iter_item_long] = (char)marker;
590 }
590 }
591 Py_DECREF(iter);
591 Py_DECREF(iter);
592 }
592 }
593
593
594 return min_idx;
594 return min_idx;
595 }
595 }
596
596
597 static inline void set_phase_from_parents(char *phases, int parent_1,
597 static inline void set_phase_from_parents(char *phases, int parent_1,
598 int parent_2, Py_ssize_t i)
598 int parent_2, Py_ssize_t i)
599 {
599 {
600 if (parent_1 >= 0 && phases[parent_1] > phases[i])
600 if (parent_1 >= 0 && phases[parent_1] > phases[i])
601 phases[i] = phases[parent_1];
601 phases[i] = phases[parent_1];
602 if (parent_2 >= 0 && phases[parent_2] > phases[i])
602 if (parent_2 >= 0 && phases[parent_2] > phases[i])
603 phases[i] = phases[parent_2];
603 phases[i] = phases[parent_2];
604 }
604 }
605
605
606 static PyObject *reachableroots2(indexObject *self, PyObject *args)
606 static PyObject *reachableroots2(indexObject *self, PyObject *args)
607 {
607 {
608
608
609 /* Input */
609 /* Input */
610 long minroot;
610 long minroot;
611 PyObject *includepatharg = NULL;
611 PyObject *includepatharg = NULL;
612 int includepath = 0;
612 int includepath = 0;
613 /* heads and roots are lists */
613 /* heads and roots are lists */
614 PyObject *heads = NULL;
614 PyObject *heads = NULL;
615 PyObject *roots = NULL;
615 PyObject *roots = NULL;
616 PyObject *reachable = NULL;
616 PyObject *reachable = NULL;
617
617
618 PyObject *val;
618 PyObject *val;
619 Py_ssize_t len = index_length(self);
619 Py_ssize_t len = index_length(self);
620 long revnum;
620 long revnum;
621 Py_ssize_t k;
621 Py_ssize_t k;
622 Py_ssize_t i;
622 Py_ssize_t i;
623 Py_ssize_t l;
623 Py_ssize_t l;
624 int r;
624 int r;
625 int parents[2];
625 int parents[2];
626
626
627 /* Internal data structure:
627 /* Internal data structure:
628 * tovisit: array of length len+1 (all revs + nullrev), filled upto
628 * tovisit: array of length len+1 (all revs + nullrev), filled upto
629 * lentovisit
629 * lentovisit
630 *
630 *
631 * revstates: array of length len+1 (all revs + nullrev) */
631 * revstates: array of length len+1 (all revs + nullrev) */
632 int *tovisit = NULL;
632 int *tovisit = NULL;
633 long lentovisit = 0;
633 long lentovisit = 0;
634 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
634 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
635 char *revstates = NULL;
635 char *revstates = NULL;
636
636
637 /* Get arguments */
637 /* Get arguments */
638 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
638 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
639 &PyList_Type, &roots, &PyBool_Type,
639 &PyList_Type, &roots, &PyBool_Type,
640 &includepatharg))
640 &includepatharg))
641 goto bail;
641 goto bail;
642
642
643 if (includepatharg == Py_True)
643 if (includepatharg == Py_True)
644 includepath = 1;
644 includepath = 1;
645
645
646 /* Initialize return set */
646 /* Initialize return set */
647 reachable = PyList_New(0);
647 reachable = PyList_New(0);
648 if (reachable == NULL)
648 if (reachable == NULL)
649 goto bail;
649 goto bail;
650
650
651 /* Initialize internal datastructures */
651 /* Initialize internal datastructures */
652 tovisit = (int *)malloc((len + 1) * sizeof(int));
652 tovisit = (int *)malloc((len + 1) * sizeof(int));
653 if (tovisit == NULL) {
653 if (tovisit == NULL) {
654 PyErr_NoMemory();
654 PyErr_NoMemory();
655 goto bail;
655 goto bail;
656 }
656 }
657
657
658 revstates = (char *)calloc(len + 1, 1);
658 revstates = (char *)calloc(len + 1, 1);
659 if (revstates == NULL) {
659 if (revstates == NULL) {
660 PyErr_NoMemory();
660 PyErr_NoMemory();
661 goto bail;
661 goto bail;
662 }
662 }
663
663
664 l = PyList_GET_SIZE(roots);
664 l = PyList_GET_SIZE(roots);
665 for (i = 0; i < l; i++) {
665 for (i = 0; i < l; i++) {
666 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
666 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
667 if (revnum == -1 && PyErr_Occurred())
667 if (revnum == -1 && PyErr_Occurred())
668 goto bail;
668 goto bail;
669 /* If root is out of range, e.g. wdir(), it must be unreachable
669 /* If root is out of range, e.g. wdir(), it must be unreachable
670 * from heads. So we can just ignore it. */
670 * from heads. So we can just ignore it. */
671 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
671 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
672 continue;
672 continue;
673 revstates[revnum + 1] |= RS_ROOT;
673 revstates[revnum + 1] |= RS_ROOT;
674 }
674 }
675
675
676 /* Populate tovisit with all the heads */
676 /* Populate tovisit with all the heads */
677 l = PyList_GET_SIZE(heads);
677 l = PyList_GET_SIZE(heads);
678 for (i = 0; i < l; i++) {
678 for (i = 0; i < l; i++) {
679 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
679 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
680 if (revnum == -1 && PyErr_Occurred())
680 if (revnum == -1 && PyErr_Occurred())
681 goto bail;
681 goto bail;
682 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
682 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
683 PyErr_SetString(PyExc_IndexError, "head out of range");
683 PyErr_SetString(PyExc_IndexError, "head out of range");
684 goto bail;
684 goto bail;
685 }
685 }
686 if (!(revstates[revnum + 1] & RS_SEEN)) {
686 if (!(revstates[revnum + 1] & RS_SEEN)) {
687 tovisit[lentovisit++] = (int)revnum;
687 tovisit[lentovisit++] = (int)revnum;
688 revstates[revnum + 1] |= RS_SEEN;
688 revstates[revnum + 1] |= RS_SEEN;
689 }
689 }
690 }
690 }
691
691
692 /* Visit the tovisit list and find the reachable roots */
692 /* Visit the tovisit list and find the reachable roots */
693 k = 0;
693 k = 0;
694 while (k < lentovisit) {
694 while (k < lentovisit) {
695 /* Add the node to reachable if it is a root*/
695 /* Add the node to reachable if it is a root*/
696 revnum = tovisit[k++];
696 revnum = tovisit[k++];
697 if (revstates[revnum + 1] & RS_ROOT) {
697 if (revstates[revnum + 1] & RS_ROOT) {
698 revstates[revnum + 1] |= RS_REACHABLE;
698 revstates[revnum + 1] |= RS_REACHABLE;
699 val = PyInt_FromLong(revnum);
699 val = PyInt_FromLong(revnum);
700 if (val == NULL)
700 if (val == NULL)
701 goto bail;
701 goto bail;
702 r = PyList_Append(reachable, val);
702 r = PyList_Append(reachable, val);
703 Py_DECREF(val);
703 Py_DECREF(val);
704 if (r < 0)
704 if (r < 0)
705 goto bail;
705 goto bail;
706 if (includepath == 0)
706 if (includepath == 0)
707 continue;
707 continue;
708 }
708 }
709
709
710 /* Add its parents to the list of nodes to visit */
710 /* Add its parents to the list of nodes to visit */
711 if (revnum == nullrev)
711 if (revnum == nullrev)
712 continue;
712 continue;
713 r = index_get_parents(self, revnum, parents, (int)len - 1);
713 r = index_get_parents(self, revnum, parents, (int)len - 1);
714 if (r < 0)
714 if (r < 0)
715 goto bail;
715 goto bail;
716 for (i = 0; i < 2; i++) {
716 for (i = 0; i < 2; i++) {
717 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
717 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
718 parents[i] >= minroot) {
718 parents[i] >= minroot) {
719 tovisit[lentovisit++] = parents[i];
719 tovisit[lentovisit++] = parents[i];
720 revstates[parents[i] + 1] |= RS_SEEN;
720 revstates[parents[i] + 1] |= RS_SEEN;
721 }
721 }
722 }
722 }
723 }
723 }
724
724
725 /* Find all the nodes in between the roots we found and the heads
725 /* Find all the nodes in between the roots we found and the heads
726 * and add them to the reachable set */
726 * and add them to the reachable set */
727 if (includepath == 1) {
727 if (includepath == 1) {
728 long minidx = minroot;
728 long minidx = minroot;
729 if (minidx < 0)
729 if (minidx < 0)
730 minidx = 0;
730 minidx = 0;
731 for (i = minidx; i < len; i++) {
731 for (i = minidx; i < len; i++) {
732 if (!(revstates[i + 1] & RS_SEEN))
732 if (!(revstates[i + 1] & RS_SEEN))
733 continue;
733 continue;
734 r = index_get_parents(self, i, parents, (int)len - 1);
734 r = index_get_parents(self, i, parents, (int)len - 1);
735 /* Corrupted index file, error is set from
735 /* Corrupted index file, error is set from
736 * index_get_parents */
736 * index_get_parents */
737 if (r < 0)
737 if (r < 0)
738 goto bail;
738 goto bail;
739 if (((revstates[parents[0] + 1] |
739 if (((revstates[parents[0] + 1] |
740 revstates[parents[1] + 1]) &
740 revstates[parents[1] + 1]) &
741 RS_REACHABLE) &&
741 RS_REACHABLE) &&
742 !(revstates[i + 1] & RS_REACHABLE)) {
742 !(revstates[i + 1] & RS_REACHABLE)) {
743 revstates[i + 1] |= RS_REACHABLE;
743 revstates[i + 1] |= RS_REACHABLE;
744 val = PyInt_FromSsize_t(i);
744 val = PyInt_FromSsize_t(i);
745 if (val == NULL)
745 if (val == NULL)
746 goto bail;
746 goto bail;
747 r = PyList_Append(reachable, val);
747 r = PyList_Append(reachable, val);
748 Py_DECREF(val);
748 Py_DECREF(val);
749 if (r < 0)
749 if (r < 0)
750 goto bail;
750 goto bail;
751 }
751 }
752 }
752 }
753 }
753 }
754
754
755 free(revstates);
755 free(revstates);
756 free(tovisit);
756 free(tovisit);
757 return reachable;
757 return reachable;
758 bail:
758 bail:
759 Py_XDECREF(reachable);
759 Py_XDECREF(reachable);
760 free(revstates);
760 free(revstates);
761 free(tovisit);
761 free(tovisit);
762 return NULL;
762 return NULL;
763 }
763 }
764
764
765 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
765 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
766 {
766 {
767 PyObject *roots = Py_None;
767 PyObject *roots = Py_None;
768 PyObject *ret = NULL;
768 PyObject *ret = NULL;
769 PyObject *phasessize = NULL;
769 PyObject *phasessize = NULL;
770 PyObject *phaseroots = NULL;
770 PyObject *phaseroots = NULL;
771 PyObject *phaseset = NULL;
771 PyObject *phaseset = NULL;
772 PyObject *phasessetlist = NULL;
772 PyObject *phasessetlist = NULL;
773 PyObject *rev = NULL;
773 PyObject *rev = NULL;
774 Py_ssize_t len = index_length(self);
774 Py_ssize_t len = index_length(self);
775 Py_ssize_t numphase = 0;
775 Py_ssize_t numphase = 0;
776 Py_ssize_t minrevallphases = 0;
776 Py_ssize_t minrevallphases = 0;
777 Py_ssize_t minrevphase = 0;
777 Py_ssize_t minrevphase = 0;
778 Py_ssize_t i = 0;
778 Py_ssize_t i = 0;
779 char *phases = NULL;
779 char *phases = NULL;
780 long phase;
780 long phase;
781
781
782 if (!PyArg_ParseTuple(args, "O", &roots))
782 if (!PyArg_ParseTuple(args, "O", &roots))
783 goto done;
783 goto done;
784 if (roots == NULL || !PyList_Check(roots)) {
784 if (roots == NULL || !PyList_Check(roots)) {
785 PyErr_SetString(PyExc_TypeError, "roots must be a list");
785 PyErr_SetString(PyExc_TypeError, "roots must be a list");
786 goto done;
786 goto done;
787 }
787 }
788
788
789 phases = calloc(
789 phases = calloc(
790 len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
790 len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
791 if (phases == NULL) {
791 if (phases == NULL) {
792 PyErr_NoMemory();
792 PyErr_NoMemory();
793 goto done;
793 goto done;
794 }
794 }
795 /* Put the phase information of all the roots in phases */
795 /* Put the phase information of all the roots in phases */
796 numphase = PyList_GET_SIZE(roots) + 1;
796 numphase = PyList_GET_SIZE(roots) + 1;
797 minrevallphases = len + 1;
797 minrevallphases = len + 1;
798 phasessetlist = PyList_New(numphase);
798 phasessetlist = PyList_New(numphase);
799 if (phasessetlist == NULL)
799 if (phasessetlist == NULL)
800 goto done;
800 goto done;
801
801
802 PyList_SET_ITEM(phasessetlist, 0, Py_None);
802 PyList_SET_ITEM(phasessetlist, 0, Py_None);
803 Py_INCREF(Py_None);
803 Py_INCREF(Py_None);
804
804
805 for (i = 0; i < numphase - 1; i++) {
805 for (i = 0; i < numphase - 1; i++) {
806 phaseroots = PyList_GET_ITEM(roots, i);
806 phaseroots = PyList_GET_ITEM(roots, i);
807 phaseset = PySet_New(NULL);
807 phaseset = PySet_New(NULL);
808 if (phaseset == NULL)
808 if (phaseset == NULL)
809 goto release;
809 goto release;
810 PyList_SET_ITEM(phasessetlist, i + 1, phaseset);
810 PyList_SET_ITEM(phasessetlist, i + 1, phaseset);
811 if (!PyList_Check(phaseroots)) {
811 if (!PyList_Check(phaseroots)) {
812 PyErr_SetString(PyExc_TypeError,
812 PyErr_SetString(PyExc_TypeError,
813 "roots item must be a list");
813 "roots item must be a list");
814 goto release;
814 goto release;
815 }
815 }
816 minrevphase =
816 minrevphase =
817 add_roots_get_min(self, phaseroots, i + 1, phases);
817 add_roots_get_min(self, phaseroots, i + 1, phases);
818 if (minrevphase == -2) /* Error from add_roots_get_min */
818 if (minrevphase == -2) /* Error from add_roots_get_min */
819 goto release;
819 goto release;
820 minrevallphases = MIN(minrevallphases, minrevphase);
820 minrevallphases = MIN(minrevallphases, minrevphase);
821 }
821 }
822 /* Propagate the phase information from the roots to the revs */
822 /* Propagate the phase information from the roots to the revs */
823 if (minrevallphases != -1) {
823 if (minrevallphases != -1) {
824 int parents[2];
824 int parents[2];
825 for (i = minrevallphases; i < len; i++) {
825 for (i = minrevallphases; i < len; i++) {
826 if (index_get_parents(self, i, parents, (int)len - 1) <
826 if (index_get_parents(self, i, parents, (int)len - 1) <
827 0)
827 0)
828 goto release;
828 goto release;
829 set_phase_from_parents(phases, parents[0], parents[1],
829 set_phase_from_parents(phases, parents[0], parents[1],
830 i);
830 i);
831 }
831 }
832 }
832 }
833 /* Transform phase list to a python list */
833 /* Transform phase list to a python list */
834 phasessize = PyInt_FromSsize_t(len);
834 phasessize = PyInt_FromSsize_t(len);
835 if (phasessize == NULL)
835 if (phasessize == NULL)
836 goto release;
836 goto release;
837 for (i = 0; i < len; i++) {
837 for (i = 0; i < len; i++) {
838 phase = phases[i];
838 phase = phases[i];
839 /* We only store the sets of phase for non public phase, the
839 /* We only store the sets of phase for non public phase, the
840 * public phase is computed as a difference */
840 * public phase is computed as a difference */
841 if (phase != 0) {
841 if (phase != 0) {
842 phaseset = PyList_GET_ITEM(phasessetlist, phase);
842 phaseset = PyList_GET_ITEM(phasessetlist, phase);
843 rev = PyInt_FromSsize_t(i);
843 rev = PyInt_FromSsize_t(i);
844 if (rev == NULL)
844 if (rev == NULL)
845 goto release;
845 goto release;
846 PySet_Add(phaseset, rev);
846 PySet_Add(phaseset, rev);
847 Py_XDECREF(rev);
847 Py_XDECREF(rev);
848 }
848 }
849 }
849 }
850 ret = PyTuple_Pack(2, phasessize, phasessetlist);
850 ret = PyTuple_Pack(2, phasessize, phasessetlist);
851
851
852 release:
852 release:
853 Py_XDECREF(phasessize);
853 Py_XDECREF(phasessize);
854 Py_XDECREF(phasessetlist);
854 Py_XDECREF(phasessetlist);
855 done:
855 done:
856 free(phases);
856 free(phases);
857 return ret;
857 return ret;
858 }
858 }
859
859
860 static PyObject *index_headrevs(indexObject *self, PyObject *args)
860 static PyObject *index_headrevs(indexObject *self, PyObject *args)
861 {
861 {
862 Py_ssize_t i, j, len;
862 Py_ssize_t i, j, len;
863 char *nothead = NULL;
863 char *nothead = NULL;
864 PyObject *heads = NULL;
864 PyObject *heads = NULL;
865 PyObject *filter = NULL;
865 PyObject *filter = NULL;
866 PyObject *filteredrevs = Py_None;
866 PyObject *filteredrevs = Py_None;
867
867
868 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
868 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
869 return NULL;
869 return NULL;
870 }
870 }
871
871
872 if (self->headrevs && filteredrevs == self->filteredrevs)
872 if (self->headrevs && filteredrevs == self->filteredrevs)
873 return list_copy(self->headrevs);
873 return list_copy(self->headrevs);
874
874
875 Py_DECREF(self->filteredrevs);
875 Py_DECREF(self->filteredrevs);
876 self->filteredrevs = filteredrevs;
876 self->filteredrevs = filteredrevs;
877 Py_INCREF(filteredrevs);
877 Py_INCREF(filteredrevs);
878
878
879 if (filteredrevs != Py_None) {
879 if (filteredrevs != Py_None) {
880 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
880 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
881 if (!filter) {
881 if (!filter) {
882 PyErr_SetString(
882 PyErr_SetString(
883 PyExc_TypeError,
883 PyExc_TypeError,
884 "filteredrevs has no attribute __contains__");
884 "filteredrevs has no attribute __contains__");
885 goto bail;
885 goto bail;
886 }
886 }
887 }
887 }
888
888
889 len = index_length(self);
889 len = index_length(self);
890 heads = PyList_New(0);
890 heads = PyList_New(0);
891 if (heads == NULL)
891 if (heads == NULL)
892 goto bail;
892 goto bail;
893 if (len == 0) {
893 if (len == 0) {
894 PyObject *nullid = PyInt_FromLong(-1);
894 PyObject *nullid = PyInt_FromLong(-1);
895 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
895 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
896 Py_XDECREF(nullid);
896 Py_XDECREF(nullid);
897 goto bail;
897 goto bail;
898 }
898 }
899 goto done;
899 goto done;
900 }
900 }
901
901
902 nothead = calloc(len, 1);
902 nothead = calloc(len, 1);
903 if (nothead == NULL) {
903 if (nothead == NULL) {
904 PyErr_NoMemory();
904 PyErr_NoMemory();
905 goto bail;
905 goto bail;
906 }
906 }
907
907
908 for (i = len - 1; i >= 0; i--) {
908 for (i = len - 1; i >= 0; i--) {
909 int isfiltered;
909 int isfiltered;
910 int parents[2];
910 int parents[2];
911
911
912 /* If nothead[i] == 1, it means we've seen an unfiltered child
912 /* If nothead[i] == 1, it means we've seen an unfiltered child
913 * of this node already, and therefore this node is not
913 * of this node already, and therefore this node is not
914 * filtered. So we can skip the expensive check_filter step.
914 * filtered. So we can skip the expensive check_filter step.
915 */
915 */
916 if (nothead[i] != 1) {
916 if (nothead[i] != 1) {
917 isfiltered = check_filter(filter, i);
917 isfiltered = check_filter(filter, i);
918 if (isfiltered == -1) {
918 if (isfiltered == -1) {
919 PyErr_SetString(PyExc_TypeError,
919 PyErr_SetString(PyExc_TypeError,
920 "unable to check filter");
920 "unable to check filter");
921 goto bail;
921 goto bail;
922 }
922 }
923
923
924 if (isfiltered) {
924 if (isfiltered) {
925 nothead[i] = 1;
925 nothead[i] = 1;
926 continue;
926 continue;
927 }
927 }
928 }
928 }
929
929
930 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
930 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
931 goto bail;
931 goto bail;
932 for (j = 0; j < 2; j++) {
932 for (j = 0; j < 2; j++) {
933 if (parents[j] >= 0)
933 if (parents[j] >= 0)
934 nothead[parents[j]] = 1;
934 nothead[parents[j]] = 1;
935 }
935 }
936 }
936 }
937
937
938 for (i = 0; i < len; i++) {
938 for (i = 0; i < len; i++) {
939 PyObject *head;
939 PyObject *head;
940
940
941 if (nothead[i])
941 if (nothead[i])
942 continue;
942 continue;
943 head = PyInt_FromSsize_t(i);
943 head = PyInt_FromSsize_t(i);
944 if (head == NULL || PyList_Append(heads, head) == -1) {
944 if (head == NULL || PyList_Append(heads, head) == -1) {
945 Py_XDECREF(head);
945 Py_XDECREF(head);
946 goto bail;
946 goto bail;
947 }
947 }
948 }
948 }
949
949
950 done:
950 done:
951 self->headrevs = heads;
951 self->headrevs = heads;
952 Py_XDECREF(filter);
952 Py_XDECREF(filter);
953 free(nothead);
953 free(nothead);
954 return list_copy(self->headrevs);
954 return list_copy(self->headrevs);
955 bail:
955 bail:
956 Py_XDECREF(filter);
956 Py_XDECREF(filter);
957 Py_XDECREF(heads);
957 Py_XDECREF(heads);
958 free(nothead);
958 free(nothead);
959 return NULL;
959 return NULL;
960 }
960 }
961
961
962 /**
962 /**
963 * Obtain the base revision index entry.
963 * Obtain the base revision index entry.
964 *
964 *
965 * Callers must ensure that rev >= 0 or illegal memory access may occur.
965 * Callers must ensure that rev >= 0 or illegal memory access may occur.
966 */
966 */
967 static inline int index_baserev(indexObject *self, int rev)
967 static inline int index_baserev(indexObject *self, int rev)
968 {
968 {
969 const char *data;
969 const char *data;
970 int result;
970 int result;
971
971
972 if (rev >= self->length) {
972 if (rev >= self->length) {
973 PyObject *tuple =
973 PyObject *tuple =
974 PyList_GET_ITEM(self->added, rev - self->length);
974 PyList_GET_ITEM(self->added, rev - self->length);
975 long ret;
975 long ret;
976 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
976 if (!pylong_to_long(PyTuple_GET_ITEM(tuple, 3), &ret)) {
977 return -2;
977 return -2;
978 }
978 }
979 result = (int)ret;
979 result = (int)ret;
980 } else {
980 } else {
981 data = index_deref(self, rev);
981 data = index_deref(self, rev);
982 if (data == NULL) {
982 if (data == NULL) {
983 return -2;
983 return -2;
984 }
984 }
985
985
986 result = getbe32(data + 16);
986 result = getbe32(data + 16);
987 }
987 }
988 if (result > rev) {
988 if (result > rev) {
989 PyErr_Format(
989 PyErr_Format(
990 PyExc_ValueError,
990 PyExc_ValueError,
991 "corrupted revlog, revision base above revision: %d, %d",
991 "corrupted revlog, revision base above revision: %d, %d",
992 rev, result);
992 rev, result);
993 return -2;
993 return -2;
994 }
994 }
995 return result;
995 return result;
996 }
996 }
997
997
998 /**
998 /**
999 * Find if a revision is a snapshot or not
999 * Find if a revision is a snapshot or not
1000 *
1000 *
1001 * Only relevant for sparse-revlog case.
1001 * Only relevant for sparse-revlog case.
1002 * Callers must ensure that rev is in a valid range.
1002 * Callers must ensure that rev is in a valid range.
1003 */
1003 */
1004 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1004 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1005 {
1005 {
1006 int ps[2];
1006 int ps[2];
1007 Py_ssize_t base;
1007 Py_ssize_t base;
1008 while (rev >= 0) {
1008 while (rev >= 0) {
1009 base = (Py_ssize_t)index_baserev(self, rev);
1009 base = (Py_ssize_t)index_baserev(self, rev);
1010 if (base == rev) {
1010 if (base == rev) {
1011 base = -1;
1011 base = -1;
1012 }
1012 }
1013 if (base == -2) {
1013 if (base == -2) {
1014 assert(PyErr_Occurred());
1014 assert(PyErr_Occurred());
1015 return -1;
1015 return -1;
1016 }
1016 }
1017 if (base == -1) {
1017 if (base == -1) {
1018 return 1;
1018 return 1;
1019 }
1019 }
1020 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1020 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1021 assert(PyErr_Occurred());
1021 assert(PyErr_Occurred());
1022 return -1;
1022 return -1;
1023 };
1023 };
1024 if (base == ps[0] || base == ps[1]) {
1024 if (base == ps[0] || base == ps[1]) {
1025 return 0;
1025 return 0;
1026 }
1026 }
1027 rev = base;
1027 rev = base;
1028 }
1028 }
1029 return rev == -1;
1029 return rev == -1;
1030 }
1030 }
1031
1031
1032 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1032 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1033 {
1033 {
1034 long rev;
1034 long rev;
1035 int issnap;
1035 int issnap;
1036 Py_ssize_t length = index_length(self);
1036 Py_ssize_t length = index_length(self);
1037
1037
1038 if (!pylong_to_long(value, &rev)) {
1038 if (!pylong_to_long(value, &rev)) {
1039 return NULL;
1039 return NULL;
1040 }
1040 }
1041 if (rev < -1 || rev >= length) {
1041 if (rev < -1 || rev >= length) {
1042 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1042 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1043 rev);
1043 rev);
1044 return NULL;
1044 return NULL;
1045 };
1045 };
1046 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1046 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1047 if (issnap < 0) {
1047 if (issnap < 0) {
1048 return NULL;
1048 return NULL;
1049 };
1049 };
1050 return PyBool_FromLong((long)issnap);
1050 return PyBool_FromLong((long)issnap);
1051 }
1051 }
1052
1052
1053 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1054 {
1055 Py_ssize_t start_rev;
1056 PyObject *cache;
1057 Py_ssize_t base;
1058 Py_ssize_t rev;
1059 PyObject *key = NULL;
1060 PyObject *value = NULL;
1061 const Py_ssize_t length = index_length(self);
1062 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1063 return NULL;
1064 }
1065 for (rev = start_rev; rev < length; rev++) {
1066 int issnap;
1067 PyObject *allvalues = NULL;
1068 issnap = index_issnapshotrev(self, rev);
1069 if (issnap < 0) {
1070 goto bail;
1071 }
1072 if (issnap == 0) {
1073 continue;
1074 }
1075 base = (Py_ssize_t)index_baserev(self, rev);
1076 if (base == rev) {
1077 base = -1;
1078 }
1079 if (base == -2) {
1080 assert(PyErr_Occurred());
1081 goto bail;
1082 }
1083 key = PyInt_FromSsize_t(base);
1084 allvalues = PyDict_GetItem(cache, key);
1085 if (allvalues == NULL && PyErr_Occurred()) {
1086 goto bail;
1087 }
1088 if (allvalues == NULL) {
1089 int r;
1090 allvalues = PyList_New(0);
1091 if (!allvalues) {
1092 goto bail;
1093 }
1094 r = PyDict_SetItem(cache, key, allvalues);
1095 Py_DECREF(allvalues);
1096 if (r < 0) {
1097 goto bail;
1098 }
1099 }
1100 value = PyInt_FromSsize_t(rev);
1101 if (PyList_Append(allvalues, value)) {
1102 goto bail;
1103 }
1104 Py_CLEAR(key);
1105 Py_CLEAR(value);
1106 }
1107 Py_RETURN_NONE;
1108 bail:
1109 Py_XDECREF(key);
1110 Py_XDECREF(value);
1111 return NULL;
1112 }
1113
1053 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1114 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1054 {
1115 {
1055 int rev, generaldelta;
1116 int rev, generaldelta;
1056 PyObject *stoparg;
1117 PyObject *stoparg;
1057 int stoprev, iterrev, baserev = -1;
1118 int stoprev, iterrev, baserev = -1;
1058 int stopped;
1119 int stopped;
1059 PyObject *chain = NULL, *result = NULL;
1120 PyObject *chain = NULL, *result = NULL;
1060 const Py_ssize_t length = index_length(self);
1121 const Py_ssize_t length = index_length(self);
1061
1122
1062 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1123 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1063 return NULL;
1124 return NULL;
1064 }
1125 }
1065
1126
1066 if (PyInt_Check(stoparg)) {
1127 if (PyInt_Check(stoparg)) {
1067 stoprev = (int)PyInt_AsLong(stoparg);
1128 stoprev = (int)PyInt_AsLong(stoparg);
1068 if (stoprev == -1 && PyErr_Occurred()) {
1129 if (stoprev == -1 && PyErr_Occurred()) {
1069 return NULL;
1130 return NULL;
1070 }
1131 }
1071 } else if (stoparg == Py_None) {
1132 } else if (stoparg == Py_None) {
1072 stoprev = -2;
1133 stoprev = -2;
1073 } else {
1134 } else {
1074 PyErr_SetString(PyExc_ValueError,
1135 PyErr_SetString(PyExc_ValueError,
1075 "stoprev must be integer or None");
1136 "stoprev must be integer or None");
1076 return NULL;
1137 return NULL;
1077 }
1138 }
1078
1139
1079 if (rev < 0 || rev >= length) {
1140 if (rev < 0 || rev >= length) {
1080 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1141 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1081 return NULL;
1142 return NULL;
1082 }
1143 }
1083
1144
1084 chain = PyList_New(0);
1145 chain = PyList_New(0);
1085 if (chain == NULL) {
1146 if (chain == NULL) {
1086 return NULL;
1147 return NULL;
1087 }
1148 }
1088
1149
1089 baserev = index_baserev(self, rev);
1150 baserev = index_baserev(self, rev);
1090
1151
1091 /* This should never happen. */
1152 /* This should never happen. */
1092 if (baserev <= -2) {
1153 if (baserev <= -2) {
1093 /* Error should be set by index_deref() */
1154 /* Error should be set by index_deref() */
1094 assert(PyErr_Occurred());
1155 assert(PyErr_Occurred());
1095 goto bail;
1156 goto bail;
1096 }
1157 }
1097
1158
1098 iterrev = rev;
1159 iterrev = rev;
1099
1160
1100 while (iterrev != baserev && iterrev != stoprev) {
1161 while (iterrev != baserev && iterrev != stoprev) {
1101 PyObject *value = PyInt_FromLong(iterrev);
1162 PyObject *value = PyInt_FromLong(iterrev);
1102 if (value == NULL) {
1163 if (value == NULL) {
1103 goto bail;
1164 goto bail;
1104 }
1165 }
1105 if (PyList_Append(chain, value)) {
1166 if (PyList_Append(chain, value)) {
1106 Py_DECREF(value);
1167 Py_DECREF(value);
1107 goto bail;
1168 goto bail;
1108 }
1169 }
1109 Py_DECREF(value);
1170 Py_DECREF(value);
1110
1171
1111 if (generaldelta) {
1172 if (generaldelta) {
1112 iterrev = baserev;
1173 iterrev = baserev;
1113 } else {
1174 } else {
1114 iterrev--;
1175 iterrev--;
1115 }
1176 }
1116
1177
1117 if (iterrev < 0) {
1178 if (iterrev < 0) {
1118 break;
1179 break;
1119 }
1180 }
1120
1181
1121 if (iterrev >= length) {
1182 if (iterrev >= length) {
1122 PyErr_SetString(PyExc_IndexError,
1183 PyErr_SetString(PyExc_IndexError,
1123 "revision outside index");
1184 "revision outside index");
1124 return NULL;
1185 return NULL;
1125 }
1186 }
1126
1187
1127 baserev = index_baserev(self, iterrev);
1188 baserev = index_baserev(self, iterrev);
1128
1189
1129 /* This should never happen. */
1190 /* This should never happen. */
1130 if (baserev <= -2) {
1191 if (baserev <= -2) {
1131 /* Error should be set by index_deref() */
1192 /* Error should be set by index_deref() */
1132 assert(PyErr_Occurred());
1193 assert(PyErr_Occurred());
1133 goto bail;
1194 goto bail;
1134 }
1195 }
1135 }
1196 }
1136
1197
1137 if (iterrev == stoprev) {
1198 if (iterrev == stoprev) {
1138 stopped = 1;
1199 stopped = 1;
1139 } else {
1200 } else {
1140 PyObject *value = PyInt_FromLong(iterrev);
1201 PyObject *value = PyInt_FromLong(iterrev);
1141 if (value == NULL) {
1202 if (value == NULL) {
1142 goto bail;
1203 goto bail;
1143 }
1204 }
1144 if (PyList_Append(chain, value)) {
1205 if (PyList_Append(chain, value)) {
1145 Py_DECREF(value);
1206 Py_DECREF(value);
1146 goto bail;
1207 goto bail;
1147 }
1208 }
1148 Py_DECREF(value);
1209 Py_DECREF(value);
1149
1210
1150 stopped = 0;
1211 stopped = 0;
1151 }
1212 }
1152
1213
1153 if (PyList_Reverse(chain)) {
1214 if (PyList_Reverse(chain)) {
1154 goto bail;
1215 goto bail;
1155 }
1216 }
1156
1217
1157 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1218 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1158 Py_DECREF(chain);
1219 Py_DECREF(chain);
1159 return result;
1220 return result;
1160
1221
1161 bail:
1222 bail:
1162 Py_DECREF(chain);
1223 Py_DECREF(chain);
1163 return NULL;
1224 return NULL;
1164 }
1225 }
1165
1226
1166 static inline int64_t
1227 static inline int64_t
1167 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1228 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1168 {
1229 {
1169 int64_t start_offset;
1230 int64_t start_offset;
1170 int64_t end_offset;
1231 int64_t end_offset;
1171 int end_size;
1232 int end_size;
1172 start_offset = index_get_start(self, start_rev);
1233 start_offset = index_get_start(self, start_rev);
1173 if (start_offset < 0) {
1234 if (start_offset < 0) {
1174 return -1;
1235 return -1;
1175 }
1236 }
1176 end_offset = index_get_start(self, end_rev);
1237 end_offset = index_get_start(self, end_rev);
1177 if (end_offset < 0) {
1238 if (end_offset < 0) {
1178 return -1;
1239 return -1;
1179 }
1240 }
1180 end_size = index_get_length(self, end_rev);
1241 end_size = index_get_length(self, end_rev);
1181 if (end_size < 0) {
1242 if (end_size < 0) {
1182 return -1;
1243 return -1;
1183 }
1244 }
1184 if (end_offset < start_offset) {
1245 if (end_offset < start_offset) {
1185 PyErr_Format(PyExc_ValueError,
1246 PyErr_Format(PyExc_ValueError,
1186 "corrupted revlog index: inconsistent offset "
1247 "corrupted revlog index: inconsistent offset "
1187 "between revisions (%zd) and (%zd)",
1248 "between revisions (%zd) and (%zd)",
1188 start_rev, end_rev);
1249 start_rev, end_rev);
1189 return -1;
1250 return -1;
1190 }
1251 }
1191 return (end_offset - start_offset) + (int64_t)end_size;
1252 return (end_offset - start_offset) + (int64_t)end_size;
1192 }
1253 }
1193
1254
1194 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1255 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1195 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1256 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1196 Py_ssize_t startidx, Py_ssize_t endidx)
1257 Py_ssize_t startidx, Py_ssize_t endidx)
1197 {
1258 {
1198 int length;
1259 int length;
1199 while (endidx > 1 && endidx > startidx) {
1260 while (endidx > 1 && endidx > startidx) {
1200 length = index_get_length(self, revs[endidx - 1]);
1261 length = index_get_length(self, revs[endidx - 1]);
1201 if (length < 0) {
1262 if (length < 0) {
1202 return -1;
1263 return -1;
1203 }
1264 }
1204 if (length != 0) {
1265 if (length != 0) {
1205 break;
1266 break;
1206 }
1267 }
1207 endidx -= 1;
1268 endidx -= 1;
1208 }
1269 }
1209 return endidx;
1270 return endidx;
1210 }
1271 }
1211
1272
1212 struct Gap {
1273 struct Gap {
1213 int64_t size;
1274 int64_t size;
1214 Py_ssize_t idx;
1275 Py_ssize_t idx;
1215 };
1276 };
1216
1277
1217 static int gap_compare(const void *left, const void *right)
1278 static int gap_compare(const void *left, const void *right)
1218 {
1279 {
1219 const struct Gap *l_left = ((const struct Gap *)left);
1280 const struct Gap *l_left = ((const struct Gap *)left);
1220 const struct Gap *l_right = ((const struct Gap *)right);
1281 const struct Gap *l_right = ((const struct Gap *)right);
1221 if (l_left->size < l_right->size) {
1282 if (l_left->size < l_right->size) {
1222 return -1;
1283 return -1;
1223 } else if (l_left->size > l_right->size) {
1284 } else if (l_left->size > l_right->size) {
1224 return 1;
1285 return 1;
1225 }
1286 }
1226 return 0;
1287 return 0;
1227 }
1288 }
1228 static int Py_ssize_t_compare(const void *left, const void *right)
1289 static int Py_ssize_t_compare(const void *left, const void *right)
1229 {
1290 {
1230 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1291 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1231 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1292 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1232 if (l_left < l_right) {
1293 if (l_left < l_right) {
1233 return -1;
1294 return -1;
1234 } else if (l_left > l_right) {
1295 } else if (l_left > l_right) {
1235 return 1;
1296 return 1;
1236 }
1297 }
1237 return 0;
1298 return 0;
1238 }
1299 }
1239
1300
1240 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1301 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1241 {
1302 {
1242 /* method arguments */
1303 /* method arguments */
1243 PyObject *list_revs = NULL; /* revisions in the chain */
1304 PyObject *list_revs = NULL; /* revisions in the chain */
1244 double targetdensity = 0; /* min density to achieve */
1305 double targetdensity = 0; /* min density to achieve */
1245 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1306 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1246
1307
1247 /* other core variables */
1308 /* other core variables */
1248 Py_ssize_t idxlen = index_length(self);
1309 Py_ssize_t idxlen = index_length(self);
1249 Py_ssize_t i; /* used for various iteration */
1310 Py_ssize_t i; /* used for various iteration */
1250 PyObject *result = NULL; /* the final return of the function */
1311 PyObject *result = NULL; /* the final return of the function */
1251
1312
1252 /* generic information about the delta chain being slice */
1313 /* generic information about the delta chain being slice */
1253 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1314 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1254 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1315 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1255 int64_t chainpayload = 0; /* sum of all delta in the chain */
1316 int64_t chainpayload = 0; /* sum of all delta in the chain */
1256 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1317 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1257
1318
1258 /* variable used for slicing the delta chain */
1319 /* variable used for slicing the delta chain */
1259 int64_t readdata = 0; /* amount of data currently planned to be read */
1320 int64_t readdata = 0; /* amount of data currently planned to be read */
1260 double density = 0; /* ration of payload data compared to read ones */
1321 double density = 0; /* ration of payload data compared to read ones */
1261 int64_t previous_end;
1322 int64_t previous_end;
1262 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1323 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1263 Py_ssize_t num_gaps =
1324 Py_ssize_t num_gaps =
1264 0; /* total number of notable gap recorded so far */
1325 0; /* total number of notable gap recorded so far */
1265 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1326 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1266 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1327 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1267 PyObject *chunk = NULL; /* individual slice */
1328 PyObject *chunk = NULL; /* individual slice */
1268 PyObject *allchunks = NULL; /* all slices */
1329 PyObject *allchunks = NULL; /* all slices */
1269 Py_ssize_t previdx;
1330 Py_ssize_t previdx;
1270
1331
1271 /* parsing argument */
1332 /* parsing argument */
1272 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1333 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1273 &targetdensity, &mingapsize)) {
1334 &targetdensity, &mingapsize)) {
1274 goto bail;
1335 goto bail;
1275 }
1336 }
1276
1337
1277 /* If the delta chain contains a single element, we do not need slicing
1338 /* If the delta chain contains a single element, we do not need slicing
1278 */
1339 */
1279 num_revs = PyList_GET_SIZE(list_revs);
1340 num_revs = PyList_GET_SIZE(list_revs);
1280 if (num_revs <= 1) {
1341 if (num_revs <= 1) {
1281 result = PyTuple_Pack(1, list_revs);
1342 result = PyTuple_Pack(1, list_revs);
1282 goto done;
1343 goto done;
1283 }
1344 }
1284
1345
1285 /* Turn the python list into a native integer array (for efficiency) */
1346 /* Turn the python list into a native integer array (for efficiency) */
1286 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1347 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1287 if (revs == NULL) {
1348 if (revs == NULL) {
1288 PyErr_NoMemory();
1349 PyErr_NoMemory();
1289 goto bail;
1350 goto bail;
1290 }
1351 }
1291 for (i = 0; i < num_revs; i++) {
1352 for (i = 0; i < num_revs; i++) {
1292 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1353 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1293 if (revnum == -1 && PyErr_Occurred()) {
1354 if (revnum == -1 && PyErr_Occurred()) {
1294 goto bail;
1355 goto bail;
1295 }
1356 }
1296 if (revnum < nullrev || revnum >= idxlen) {
1357 if (revnum < nullrev || revnum >= idxlen) {
1297 PyErr_Format(PyExc_IndexError,
1358 PyErr_Format(PyExc_IndexError,
1298 "index out of range: %zd", revnum);
1359 "index out of range: %zd", revnum);
1299 goto bail;
1360 goto bail;
1300 }
1361 }
1301 revs[i] = revnum;
1362 revs[i] = revnum;
1302 }
1363 }
1303
1364
1304 /* Compute and check various property of the unsliced delta chain */
1365 /* Compute and check various property of the unsliced delta chain */
1305 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1366 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1306 if (deltachainspan < 0) {
1367 if (deltachainspan < 0) {
1307 goto bail;
1368 goto bail;
1308 }
1369 }
1309
1370
1310 if (deltachainspan <= mingapsize) {
1371 if (deltachainspan <= mingapsize) {
1311 result = PyTuple_Pack(1, list_revs);
1372 result = PyTuple_Pack(1, list_revs);
1312 goto done;
1373 goto done;
1313 }
1374 }
1314 chainpayload = 0;
1375 chainpayload = 0;
1315 for (i = 0; i < num_revs; i++) {
1376 for (i = 0; i < num_revs; i++) {
1316 int tmp = index_get_length(self, revs[i]);
1377 int tmp = index_get_length(self, revs[i]);
1317 if (tmp < 0) {
1378 if (tmp < 0) {
1318 goto bail;
1379 goto bail;
1319 }
1380 }
1320 chainpayload += tmp;
1381 chainpayload += tmp;
1321 }
1382 }
1322
1383
1323 readdata = deltachainspan;
1384 readdata = deltachainspan;
1324 density = 1.0;
1385 density = 1.0;
1325
1386
1326 if (0 < deltachainspan) {
1387 if (0 < deltachainspan) {
1327 density = (double)chainpayload / (double)deltachainspan;
1388 density = (double)chainpayload / (double)deltachainspan;
1328 }
1389 }
1329
1390
1330 if (density >= targetdensity) {
1391 if (density >= targetdensity) {
1331 result = PyTuple_Pack(1, list_revs);
1392 result = PyTuple_Pack(1, list_revs);
1332 goto done;
1393 goto done;
1333 }
1394 }
1334
1395
1335 /* if chain is too sparse, look for relevant gaps */
1396 /* if chain is too sparse, look for relevant gaps */
1336 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1397 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1337 if (gaps == NULL) {
1398 if (gaps == NULL) {
1338 PyErr_NoMemory();
1399 PyErr_NoMemory();
1339 goto bail;
1400 goto bail;
1340 }
1401 }
1341
1402
1342 previous_end = -1;
1403 previous_end = -1;
1343 for (i = 0; i < num_revs; i++) {
1404 for (i = 0; i < num_revs; i++) {
1344 int64_t revstart;
1405 int64_t revstart;
1345 int revsize;
1406 int revsize;
1346 revstart = index_get_start(self, revs[i]);
1407 revstart = index_get_start(self, revs[i]);
1347 if (revstart < 0) {
1408 if (revstart < 0) {
1348 goto bail;
1409 goto bail;
1349 };
1410 };
1350 revsize = index_get_length(self, revs[i]);
1411 revsize = index_get_length(self, revs[i]);
1351 if (revsize < 0) {
1412 if (revsize < 0) {
1352 goto bail;
1413 goto bail;
1353 };
1414 };
1354 if (revsize == 0) {
1415 if (revsize == 0) {
1355 continue;
1416 continue;
1356 }
1417 }
1357 if (previous_end >= 0) {
1418 if (previous_end >= 0) {
1358 int64_t gapsize = revstart - previous_end;
1419 int64_t gapsize = revstart - previous_end;
1359 if (gapsize > mingapsize) {
1420 if (gapsize > mingapsize) {
1360 gaps[num_gaps].size = gapsize;
1421 gaps[num_gaps].size = gapsize;
1361 gaps[num_gaps].idx = i;
1422 gaps[num_gaps].idx = i;
1362 num_gaps += 1;
1423 num_gaps += 1;
1363 }
1424 }
1364 }
1425 }
1365 previous_end = revstart + revsize;
1426 previous_end = revstart + revsize;
1366 }
1427 }
1367 if (num_gaps == 0) {
1428 if (num_gaps == 0) {
1368 result = PyTuple_Pack(1, list_revs);
1429 result = PyTuple_Pack(1, list_revs);
1369 goto done;
1430 goto done;
1370 }
1431 }
1371 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1432 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1372
1433
1373 /* Slice the largest gap first, they improve the density the most */
1434 /* Slice the largest gap first, they improve the density the most */
1374 selected_indices =
1435 selected_indices =
1375 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1436 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1376 if (selected_indices == NULL) {
1437 if (selected_indices == NULL) {
1377 PyErr_NoMemory();
1438 PyErr_NoMemory();
1378 goto bail;
1439 goto bail;
1379 }
1440 }
1380
1441
1381 for (i = num_gaps - 1; i >= 0; i--) {
1442 for (i = num_gaps - 1; i >= 0; i--) {
1382 selected_indices[num_selected] = gaps[i].idx;
1443 selected_indices[num_selected] = gaps[i].idx;
1383 readdata -= gaps[i].size;
1444 readdata -= gaps[i].size;
1384 num_selected += 1;
1445 num_selected += 1;
1385 if (readdata <= 0) {
1446 if (readdata <= 0) {
1386 density = 1.0;
1447 density = 1.0;
1387 } else {
1448 } else {
1388 density = (double)chainpayload / (double)readdata;
1449 density = (double)chainpayload / (double)readdata;
1389 }
1450 }
1390 if (density >= targetdensity) {
1451 if (density >= targetdensity) {
1391 break;
1452 break;
1392 }
1453 }
1393 }
1454 }
1394 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1455 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1395 &Py_ssize_t_compare);
1456 &Py_ssize_t_compare);
1396
1457
1397 /* create the resulting slice */
1458 /* create the resulting slice */
1398 allchunks = PyList_New(0);
1459 allchunks = PyList_New(0);
1399 if (allchunks == NULL) {
1460 if (allchunks == NULL) {
1400 goto bail;
1461 goto bail;
1401 }
1462 }
1402 previdx = 0;
1463 previdx = 0;
1403 selected_indices[num_selected] = num_revs;
1464 selected_indices[num_selected] = num_revs;
1404 for (i = 0; i <= num_selected; i++) {
1465 for (i = 0; i <= num_selected; i++) {
1405 Py_ssize_t idx = selected_indices[i];
1466 Py_ssize_t idx = selected_indices[i];
1406 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1467 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1407 if (endidx < 0) {
1468 if (endidx < 0) {
1408 goto bail;
1469 goto bail;
1409 }
1470 }
1410 if (previdx < endidx) {
1471 if (previdx < endidx) {
1411 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1472 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1412 if (chunk == NULL) {
1473 if (chunk == NULL) {
1413 goto bail;
1474 goto bail;
1414 }
1475 }
1415 if (PyList_Append(allchunks, chunk) == -1) {
1476 if (PyList_Append(allchunks, chunk) == -1) {
1416 goto bail;
1477 goto bail;
1417 }
1478 }
1418 Py_DECREF(chunk);
1479 Py_DECREF(chunk);
1419 chunk = NULL;
1480 chunk = NULL;
1420 }
1481 }
1421 previdx = idx;
1482 previdx = idx;
1422 }
1483 }
1423 result = allchunks;
1484 result = allchunks;
1424 goto done;
1485 goto done;
1425
1486
1426 bail:
1487 bail:
1427 Py_XDECREF(allchunks);
1488 Py_XDECREF(allchunks);
1428 Py_XDECREF(chunk);
1489 Py_XDECREF(chunk);
1429 done:
1490 done:
1430 free(revs);
1491 free(revs);
1431 free(gaps);
1492 free(gaps);
1432 free(selected_indices);
1493 free(selected_indices);
1433 return result;
1494 return result;
1434 }
1495 }
1435
1496
1436 static inline int nt_level(const char *node, Py_ssize_t level)
1497 static inline int nt_level(const char *node, Py_ssize_t level)
1437 {
1498 {
1438 int v = node[level >> 1];
1499 int v = node[level >> 1];
1439 if (!(level & 1))
1500 if (!(level & 1))
1440 v >>= 4;
1501 v >>= 4;
1441 return v & 0xf;
1502 return v & 0xf;
1442 }
1503 }
1443
1504
1444 /*
1505 /*
1445 * Return values:
1506 * Return values:
1446 *
1507 *
1447 * -4: match is ambiguous (multiple candidates)
1508 * -4: match is ambiguous (multiple candidates)
1448 * -2: not found
1509 * -2: not found
1449 * rest: valid rev
1510 * rest: valid rev
1450 */
1511 */
1451 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1512 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1452 int hex)
1513 int hex)
1453 {
1514 {
1454 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1515 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1455 int level, maxlevel, off;
1516 int level, maxlevel, off;
1456
1517
1457 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1518 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1458 return -1;
1519 return -1;
1459
1520
1460 if (hex)
1521 if (hex)
1461 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1522 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1462 else
1523 else
1463 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1524 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1464
1525
1465 for (level = off = 0; level < maxlevel; level++) {
1526 for (level = off = 0; level < maxlevel; level++) {
1466 int k = getnybble(node, level);
1527 int k = getnybble(node, level);
1467 nodetreenode *n = &self->nodes[off];
1528 nodetreenode *n = &self->nodes[off];
1468 int v = n->children[k];
1529 int v = n->children[k];
1469
1530
1470 if (v < 0) {
1531 if (v < 0) {
1471 const char *n;
1532 const char *n;
1472 Py_ssize_t i;
1533 Py_ssize_t i;
1473
1534
1474 v = -(v + 2);
1535 v = -(v + 2);
1475 n = index_node(self->index, v);
1536 n = index_node(self->index, v);
1476 if (n == NULL)
1537 if (n == NULL)
1477 return -2;
1538 return -2;
1478 for (i = level; i < maxlevel; i++)
1539 for (i = level; i < maxlevel; i++)
1479 if (getnybble(node, i) != nt_level(n, i))
1540 if (getnybble(node, i) != nt_level(n, i))
1480 return -2;
1541 return -2;
1481 return v;
1542 return v;
1482 }
1543 }
1483 if (v == 0)
1544 if (v == 0)
1484 return -2;
1545 return -2;
1485 off = v;
1546 off = v;
1486 }
1547 }
1487 /* multiple matches against an ambiguous prefix */
1548 /* multiple matches against an ambiguous prefix */
1488 return -4;
1549 return -4;
1489 }
1550 }
1490
1551
1491 static int nt_new(nodetree *self)
1552 static int nt_new(nodetree *self)
1492 {
1553 {
1493 if (self->length == self->capacity) {
1554 if (self->length == self->capacity) {
1494 unsigned newcapacity;
1555 unsigned newcapacity;
1495 nodetreenode *newnodes;
1556 nodetreenode *newnodes;
1496 newcapacity = self->capacity * 2;
1557 newcapacity = self->capacity * 2;
1497 if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
1558 if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
1498 PyErr_SetString(PyExc_MemoryError,
1559 PyErr_SetString(PyExc_MemoryError,
1499 "overflow in nt_new");
1560 "overflow in nt_new");
1500 return -1;
1561 return -1;
1501 }
1562 }
1502 newnodes =
1563 newnodes =
1503 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1564 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1504 if (newnodes == NULL) {
1565 if (newnodes == NULL) {
1505 PyErr_SetString(PyExc_MemoryError, "out of memory");
1566 PyErr_SetString(PyExc_MemoryError, "out of memory");
1506 return -1;
1567 return -1;
1507 }
1568 }
1508 self->capacity = newcapacity;
1569 self->capacity = newcapacity;
1509 self->nodes = newnodes;
1570 self->nodes = newnodes;
1510 memset(&self->nodes[self->length], 0,
1571 memset(&self->nodes[self->length], 0,
1511 sizeof(nodetreenode) * (self->capacity - self->length));
1572 sizeof(nodetreenode) * (self->capacity - self->length));
1512 }
1573 }
1513 return self->length++;
1574 return self->length++;
1514 }
1575 }
1515
1576
1516 static int nt_insert(nodetree *self, const char *node, int rev)
1577 static int nt_insert(nodetree *self, const char *node, int rev)
1517 {
1578 {
1518 int level = 0;
1579 int level = 0;
1519 int off = 0;
1580 int off = 0;
1520
1581
1521 while (level < 40) {
1582 while (level < 40) {
1522 int k = nt_level(node, level);
1583 int k = nt_level(node, level);
1523 nodetreenode *n;
1584 nodetreenode *n;
1524 int v;
1585 int v;
1525
1586
1526 n = &self->nodes[off];
1587 n = &self->nodes[off];
1527 v = n->children[k];
1588 v = n->children[k];
1528
1589
1529 if (v == 0) {
1590 if (v == 0) {
1530 n->children[k] = -rev - 2;
1591 n->children[k] = -rev - 2;
1531 return 0;
1592 return 0;
1532 }
1593 }
1533 if (v < 0) {
1594 if (v < 0) {
1534 const char *oldnode =
1595 const char *oldnode =
1535 index_node_existing(self->index, -(v + 2));
1596 index_node_existing(self->index, -(v + 2));
1536 int noff;
1597 int noff;
1537
1598
1538 if (oldnode == NULL)
1599 if (oldnode == NULL)
1539 return -1;
1600 return -1;
1540 if (!memcmp(oldnode, node, 20)) {
1601 if (!memcmp(oldnode, node, 20)) {
1541 n->children[k] = -rev - 2;
1602 n->children[k] = -rev - 2;
1542 return 0;
1603 return 0;
1543 }
1604 }
1544 noff = nt_new(self);
1605 noff = nt_new(self);
1545 if (noff == -1)
1606 if (noff == -1)
1546 return -1;
1607 return -1;
1547 /* self->nodes may have been changed by realloc */
1608 /* self->nodes may have been changed by realloc */
1548 self->nodes[off].children[k] = noff;
1609 self->nodes[off].children[k] = noff;
1549 off = noff;
1610 off = noff;
1550 n = &self->nodes[off];
1611 n = &self->nodes[off];
1551 n->children[nt_level(oldnode, ++level)] = v;
1612 n->children[nt_level(oldnode, ++level)] = v;
1552 if (level > self->depth)
1613 if (level > self->depth)
1553 self->depth = level;
1614 self->depth = level;
1554 self->splits += 1;
1615 self->splits += 1;
1555 } else {
1616 } else {
1556 level += 1;
1617 level += 1;
1557 off = v;
1618 off = v;
1558 }
1619 }
1559 }
1620 }
1560
1621
1561 return -1;
1622 return -1;
1562 }
1623 }
1563
1624
1564 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1625 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1565 {
1626 {
1566 Py_ssize_t rev;
1627 Py_ssize_t rev;
1567 const char *node;
1628 const char *node;
1568 Py_ssize_t length;
1629 Py_ssize_t length;
1569 if (!PyArg_ParseTuple(args, "n", &rev))
1630 if (!PyArg_ParseTuple(args, "n", &rev))
1570 return NULL;
1631 return NULL;
1571 length = index_length(self->nt.index);
1632 length = index_length(self->nt.index);
1572 if (rev < 0 || rev >= length) {
1633 if (rev < 0 || rev >= length) {
1573 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1634 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1574 return NULL;
1635 return NULL;
1575 }
1636 }
1576 node = index_node_existing(self->nt.index, rev);
1637 node = index_node_existing(self->nt.index, rev);
1577 if (nt_insert(&self->nt, node, (int)rev) == -1)
1638 if (nt_insert(&self->nt, node, (int)rev) == -1)
1578 return NULL;
1639 return NULL;
1579 Py_RETURN_NONE;
1640 Py_RETURN_NONE;
1580 }
1641 }
1581
1642
1582 static int nt_delete_node(nodetree *self, const char *node)
1643 static int nt_delete_node(nodetree *self, const char *node)
1583 {
1644 {
1584 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1645 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1585 */
1646 */
1586 return nt_insert(self, node, -2);
1647 return nt_insert(self, node, -2);
1587 }
1648 }
1588
1649
1589 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1650 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1590 {
1651 {
1591 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1652 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1592 self->nodes = NULL;
1653 self->nodes = NULL;
1593
1654
1594 self->index = index;
1655 self->index = index;
1595 /* The input capacity is in terms of revisions, while the field is in
1656 /* The input capacity is in terms of revisions, while the field is in
1596 * terms of nodetree nodes. */
1657 * terms of nodetree nodes. */
1597 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1658 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1598 self->depth = 0;
1659 self->depth = 0;
1599 self->splits = 0;
1660 self->splits = 0;
1600 if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
1661 if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
1601 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1662 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1602 return -1;
1663 return -1;
1603 }
1664 }
1604 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1665 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1605 if (self->nodes == NULL) {
1666 if (self->nodes == NULL) {
1606 PyErr_NoMemory();
1667 PyErr_NoMemory();
1607 return -1;
1668 return -1;
1608 }
1669 }
1609 self->length = 1;
1670 self->length = 1;
1610 return 0;
1671 return 0;
1611 }
1672 }
1612
1673
1613 static int ntobj_init(nodetreeObject *self, PyObject *args)
1674 static int ntobj_init(nodetreeObject *self, PyObject *args)
1614 {
1675 {
1615 PyObject *index;
1676 PyObject *index;
1616 unsigned capacity;
1677 unsigned capacity;
1617 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1678 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1618 &capacity))
1679 &capacity))
1619 return -1;
1680 return -1;
1620 Py_INCREF(index);
1681 Py_INCREF(index);
1621 return nt_init(&self->nt, (indexObject *)index, capacity);
1682 return nt_init(&self->nt, (indexObject *)index, capacity);
1622 }
1683 }
1623
1684
1624 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1685 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1625 {
1686 {
1626 return nt_find(self, node, nodelen, 1);
1687 return nt_find(self, node, nodelen, 1);
1627 }
1688 }
1628
1689
1629 /*
1690 /*
1630 * Find the length of the shortest unique prefix of node.
1691 * Find the length of the shortest unique prefix of node.
1631 *
1692 *
1632 * Return values:
1693 * Return values:
1633 *
1694 *
1634 * -3: error (exception set)
1695 * -3: error (exception set)
1635 * -2: not found (no exception set)
1696 * -2: not found (no exception set)
1636 * rest: length of shortest prefix
1697 * rest: length of shortest prefix
1637 */
1698 */
1638 static int nt_shortest(nodetree *self, const char *node)
1699 static int nt_shortest(nodetree *self, const char *node)
1639 {
1700 {
1640 int level, off;
1701 int level, off;
1641
1702
1642 for (level = off = 0; level < 40; level++) {
1703 for (level = off = 0; level < 40; level++) {
1643 int k, v;
1704 int k, v;
1644 nodetreenode *n = &self->nodes[off];
1705 nodetreenode *n = &self->nodes[off];
1645 k = nt_level(node, level);
1706 k = nt_level(node, level);
1646 v = n->children[k];
1707 v = n->children[k];
1647 if (v < 0) {
1708 if (v < 0) {
1648 const char *n;
1709 const char *n;
1649 v = -(v + 2);
1710 v = -(v + 2);
1650 n = index_node_existing(self->index, v);
1711 n = index_node_existing(self->index, v);
1651 if (n == NULL)
1712 if (n == NULL)
1652 return -3;
1713 return -3;
1653 if (memcmp(node, n, 20) != 0)
1714 if (memcmp(node, n, 20) != 0)
1654 /*
1715 /*
1655 * Found a unique prefix, but it wasn't for the
1716 * Found a unique prefix, but it wasn't for the
1656 * requested node (i.e the requested node does
1717 * requested node (i.e the requested node does
1657 * not exist).
1718 * not exist).
1658 */
1719 */
1659 return -2;
1720 return -2;
1660 return level + 1;
1721 return level + 1;
1661 }
1722 }
1662 if (v == 0)
1723 if (v == 0)
1663 return -2;
1724 return -2;
1664 off = v;
1725 off = v;
1665 }
1726 }
1666 /*
1727 /*
1667 * The node was still not unique after 40 hex digits, so this won't
1728 * The node was still not unique after 40 hex digits, so this won't
1668 * happen. Also, if we get here, then there's a programming error in
1729 * happen. Also, if we get here, then there's a programming error in
1669 * this file that made us insert a node longer than 40 hex digits.
1730 * this file that made us insert a node longer than 40 hex digits.
1670 */
1731 */
1671 PyErr_SetString(PyExc_Exception, "broken node tree");
1732 PyErr_SetString(PyExc_Exception, "broken node tree");
1672 return -3;
1733 return -3;
1673 }
1734 }
1674
1735
1675 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1736 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1676 {
1737 {
1677 PyObject *val;
1738 PyObject *val;
1678 char *node;
1739 char *node;
1679 int length;
1740 int length;
1680
1741
1681 if (!PyArg_ParseTuple(args, "O", &val))
1742 if (!PyArg_ParseTuple(args, "O", &val))
1682 return NULL;
1743 return NULL;
1683 if (node_check(val, &node) == -1)
1744 if (node_check(val, &node) == -1)
1684 return NULL;
1745 return NULL;
1685
1746
1686 length = nt_shortest(&self->nt, node);
1747 length = nt_shortest(&self->nt, node);
1687 if (length == -3)
1748 if (length == -3)
1688 return NULL;
1749 return NULL;
1689 if (length == -2) {
1750 if (length == -2) {
1690 raise_revlog_error();
1751 raise_revlog_error();
1691 return NULL;
1752 return NULL;
1692 }
1753 }
1693 return PyInt_FromLong(length);
1754 return PyInt_FromLong(length);
1694 }
1755 }
1695
1756
1696 static void nt_dealloc(nodetree *self)
1757 static void nt_dealloc(nodetree *self)
1697 {
1758 {
1698 free(self->nodes);
1759 free(self->nodes);
1699 self->nodes = NULL;
1760 self->nodes = NULL;
1700 }
1761 }
1701
1762
1702 static void ntobj_dealloc(nodetreeObject *self)
1763 static void ntobj_dealloc(nodetreeObject *self)
1703 {
1764 {
1704 Py_XDECREF(self->nt.index);
1765 Py_XDECREF(self->nt.index);
1705 nt_dealloc(&self->nt);
1766 nt_dealloc(&self->nt);
1706 PyObject_Del(self);
1767 PyObject_Del(self);
1707 }
1768 }
1708
1769
1709 static PyMethodDef ntobj_methods[] = {
1770 static PyMethodDef ntobj_methods[] = {
1710 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1771 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1711 "insert an index entry"},
1772 "insert an index entry"},
1712 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1773 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1713 "find length of shortest hex nodeid of a binary ID"},
1774 "find length of shortest hex nodeid of a binary ID"},
1714 {NULL} /* Sentinel */
1775 {NULL} /* Sentinel */
1715 };
1776 };
1716
1777
1717 static PyTypeObject nodetreeType = {
1778 static PyTypeObject nodetreeType = {
1718 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1779 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1719 "parsers.nodetree", /* tp_name */
1780 "parsers.nodetree", /* tp_name */
1720 sizeof(nodetreeObject), /* tp_basicsize */
1781 sizeof(nodetreeObject), /* tp_basicsize */
1721 0, /* tp_itemsize */
1782 0, /* tp_itemsize */
1722 (destructor)ntobj_dealloc, /* tp_dealloc */
1783 (destructor)ntobj_dealloc, /* tp_dealloc */
1723 0, /* tp_print */
1784 0, /* tp_print */
1724 0, /* tp_getattr */
1785 0, /* tp_getattr */
1725 0, /* tp_setattr */
1786 0, /* tp_setattr */
1726 0, /* tp_compare */
1787 0, /* tp_compare */
1727 0, /* tp_repr */
1788 0, /* tp_repr */
1728 0, /* tp_as_number */
1789 0, /* tp_as_number */
1729 0, /* tp_as_sequence */
1790 0, /* tp_as_sequence */
1730 0, /* tp_as_mapping */
1791 0, /* tp_as_mapping */
1731 0, /* tp_hash */
1792 0, /* tp_hash */
1732 0, /* tp_call */
1793 0, /* tp_call */
1733 0, /* tp_str */
1794 0, /* tp_str */
1734 0, /* tp_getattro */
1795 0, /* tp_getattro */
1735 0, /* tp_setattro */
1796 0, /* tp_setattro */
1736 0, /* tp_as_buffer */
1797 0, /* tp_as_buffer */
1737 Py_TPFLAGS_DEFAULT, /* tp_flags */
1798 Py_TPFLAGS_DEFAULT, /* tp_flags */
1738 "nodetree", /* tp_doc */
1799 "nodetree", /* tp_doc */
1739 0, /* tp_traverse */
1800 0, /* tp_traverse */
1740 0, /* tp_clear */
1801 0, /* tp_clear */
1741 0, /* tp_richcompare */
1802 0, /* tp_richcompare */
1742 0, /* tp_weaklistoffset */
1803 0, /* tp_weaklistoffset */
1743 0, /* tp_iter */
1804 0, /* tp_iter */
1744 0, /* tp_iternext */
1805 0, /* tp_iternext */
1745 ntobj_methods, /* tp_methods */
1806 ntobj_methods, /* tp_methods */
1746 0, /* tp_members */
1807 0, /* tp_members */
1747 0, /* tp_getset */
1808 0, /* tp_getset */
1748 0, /* tp_base */
1809 0, /* tp_base */
1749 0, /* tp_dict */
1810 0, /* tp_dict */
1750 0, /* tp_descr_get */
1811 0, /* tp_descr_get */
1751 0, /* tp_descr_set */
1812 0, /* tp_descr_set */
1752 0, /* tp_dictoffset */
1813 0, /* tp_dictoffset */
1753 (initproc)ntobj_init, /* tp_init */
1814 (initproc)ntobj_init, /* tp_init */
1754 0, /* tp_alloc */
1815 0, /* tp_alloc */
1755 };
1816 };
1756
1817
1757 static int index_init_nt(indexObject *self)
1818 static int index_init_nt(indexObject *self)
1758 {
1819 {
1759 if (!self->ntinitialized) {
1820 if (!self->ntinitialized) {
1760 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1821 if (nt_init(&self->nt, self, (int)self->raw_length) == -1) {
1761 nt_dealloc(&self->nt);
1822 nt_dealloc(&self->nt);
1762 return -1;
1823 return -1;
1763 }
1824 }
1764 if (nt_insert(&self->nt, nullid, -1) == -1) {
1825 if (nt_insert(&self->nt, nullid, -1) == -1) {
1765 nt_dealloc(&self->nt);
1826 nt_dealloc(&self->nt);
1766 return -1;
1827 return -1;
1767 }
1828 }
1768 self->ntinitialized = 1;
1829 self->ntinitialized = 1;
1769 self->ntrev = (int)index_length(self);
1830 self->ntrev = (int)index_length(self);
1770 self->ntlookups = 1;
1831 self->ntlookups = 1;
1771 self->ntmisses = 0;
1832 self->ntmisses = 0;
1772 }
1833 }
1773 return 0;
1834 return 0;
1774 }
1835 }
1775
1836
1776 /*
1837 /*
1777 * Return values:
1838 * Return values:
1778 *
1839 *
1779 * -3: error (exception set)
1840 * -3: error (exception set)
1780 * -2: not found (no exception set)
1841 * -2: not found (no exception set)
1781 * rest: valid rev
1842 * rest: valid rev
1782 */
1843 */
1783 static int index_find_node(indexObject *self, const char *node,
1844 static int index_find_node(indexObject *self, const char *node,
1784 Py_ssize_t nodelen)
1845 Py_ssize_t nodelen)
1785 {
1846 {
1786 int rev;
1847 int rev;
1787
1848
1788 if (index_init_nt(self) == -1)
1849 if (index_init_nt(self) == -1)
1789 return -3;
1850 return -3;
1790
1851
1791 self->ntlookups++;
1852 self->ntlookups++;
1792 rev = nt_find(&self->nt, node, nodelen, 0);
1853 rev = nt_find(&self->nt, node, nodelen, 0);
1793 if (rev >= -1)
1854 if (rev >= -1)
1794 return rev;
1855 return rev;
1795
1856
1796 /*
1857 /*
1797 * For the first handful of lookups, we scan the entire index,
1858 * For the first handful of lookups, we scan the entire index,
1798 * and cache only the matching nodes. This optimizes for cases
1859 * and cache only the matching nodes. This optimizes for cases
1799 * like "hg tip", where only a few nodes are accessed.
1860 * like "hg tip", where only a few nodes are accessed.
1800 *
1861 *
1801 * After that, we cache every node we visit, using a single
1862 * After that, we cache every node we visit, using a single
1802 * scan amortized over multiple lookups. This gives the best
1863 * scan amortized over multiple lookups. This gives the best
1803 * bulk performance, e.g. for "hg log".
1864 * bulk performance, e.g. for "hg log".
1804 */
1865 */
1805 if (self->ntmisses++ < 4) {
1866 if (self->ntmisses++ < 4) {
1806 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1867 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1807 const char *n = index_node_existing(self, rev);
1868 const char *n = index_node_existing(self, rev);
1808 if (n == NULL)
1869 if (n == NULL)
1809 return -3;
1870 return -3;
1810 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1871 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1811 if (nt_insert(&self->nt, n, rev) == -1)
1872 if (nt_insert(&self->nt, n, rev) == -1)
1812 return -3;
1873 return -3;
1813 break;
1874 break;
1814 }
1875 }
1815 }
1876 }
1816 } else {
1877 } else {
1817 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1878 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1818 const char *n = index_node_existing(self, rev);
1879 const char *n = index_node_existing(self, rev);
1819 if (n == NULL)
1880 if (n == NULL)
1820 return -3;
1881 return -3;
1821 if (nt_insert(&self->nt, n, rev) == -1) {
1882 if (nt_insert(&self->nt, n, rev) == -1) {
1822 self->ntrev = rev + 1;
1883 self->ntrev = rev + 1;
1823 return -3;
1884 return -3;
1824 }
1885 }
1825 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1886 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1826 break;
1887 break;
1827 }
1888 }
1828 }
1889 }
1829 self->ntrev = rev;
1890 self->ntrev = rev;
1830 }
1891 }
1831
1892
1832 if (rev >= 0)
1893 if (rev >= 0)
1833 return rev;
1894 return rev;
1834 return -2;
1895 return -2;
1835 }
1896 }
1836
1897
1837 static PyObject *index_getitem(indexObject *self, PyObject *value)
1898 static PyObject *index_getitem(indexObject *self, PyObject *value)
1838 {
1899 {
1839 char *node;
1900 char *node;
1840 int rev;
1901 int rev;
1841
1902
1842 if (PyInt_Check(value)) {
1903 if (PyInt_Check(value)) {
1843 long idx;
1904 long idx;
1844 if (!pylong_to_long(value, &idx)) {
1905 if (!pylong_to_long(value, &idx)) {
1845 return NULL;
1906 return NULL;
1846 }
1907 }
1847 return index_get(self, idx);
1908 return index_get(self, idx);
1848 }
1909 }
1849
1910
1850 if (node_check(value, &node) == -1)
1911 if (node_check(value, &node) == -1)
1851 return NULL;
1912 return NULL;
1852 rev = index_find_node(self, node, 20);
1913 rev = index_find_node(self, node, 20);
1853 if (rev >= -1)
1914 if (rev >= -1)
1854 return PyInt_FromLong(rev);
1915 return PyInt_FromLong(rev);
1855 if (rev == -2)
1916 if (rev == -2)
1856 raise_revlog_error();
1917 raise_revlog_error();
1857 return NULL;
1918 return NULL;
1858 }
1919 }
1859
1920
1860 /*
1921 /*
1861 * Fully populate the radix tree.
1922 * Fully populate the radix tree.
1862 */
1923 */
1863 static int index_populate_nt(indexObject *self)
1924 static int index_populate_nt(indexObject *self)
1864 {
1925 {
1865 int rev;
1926 int rev;
1866 if (self->ntrev > 0) {
1927 if (self->ntrev > 0) {
1867 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1928 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1868 const char *n = index_node_existing(self, rev);
1929 const char *n = index_node_existing(self, rev);
1869 if (n == NULL)
1930 if (n == NULL)
1870 return -1;
1931 return -1;
1871 if (nt_insert(&self->nt, n, rev) == -1)
1932 if (nt_insert(&self->nt, n, rev) == -1)
1872 return -1;
1933 return -1;
1873 }
1934 }
1874 self->ntrev = -1;
1935 self->ntrev = -1;
1875 }
1936 }
1876 return 0;
1937 return 0;
1877 }
1938 }
1878
1939
1879 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1940 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1880 {
1941 {
1881 const char *fullnode;
1942 const char *fullnode;
1882 int nodelen;
1943 int nodelen;
1883 char *node;
1944 char *node;
1884 int rev, i;
1945 int rev, i;
1885
1946
1886 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1947 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
1887 return NULL;
1948 return NULL;
1888
1949
1889 if (nodelen < 1) {
1950 if (nodelen < 1) {
1890 PyErr_SetString(PyExc_ValueError, "key too short");
1951 PyErr_SetString(PyExc_ValueError, "key too short");
1891 return NULL;
1952 return NULL;
1892 }
1953 }
1893
1954
1894 if (nodelen > 40) {
1955 if (nodelen > 40) {
1895 PyErr_SetString(PyExc_ValueError, "key too long");
1956 PyErr_SetString(PyExc_ValueError, "key too long");
1896 return NULL;
1957 return NULL;
1897 }
1958 }
1898
1959
1899 for (i = 0; i < nodelen; i++)
1960 for (i = 0; i < nodelen; i++)
1900 hexdigit(node, i);
1961 hexdigit(node, i);
1901 if (PyErr_Occurred()) {
1962 if (PyErr_Occurred()) {
1902 /* input contains non-hex characters */
1963 /* input contains non-hex characters */
1903 PyErr_Clear();
1964 PyErr_Clear();
1904 Py_RETURN_NONE;
1965 Py_RETURN_NONE;
1905 }
1966 }
1906
1967
1907 if (index_init_nt(self) == -1)
1968 if (index_init_nt(self) == -1)
1908 return NULL;
1969 return NULL;
1909 if (index_populate_nt(self) == -1)
1970 if (index_populate_nt(self) == -1)
1910 return NULL;
1971 return NULL;
1911 rev = nt_partialmatch(&self->nt, node, nodelen);
1972 rev = nt_partialmatch(&self->nt, node, nodelen);
1912
1973
1913 switch (rev) {
1974 switch (rev) {
1914 case -4:
1975 case -4:
1915 raise_revlog_error();
1976 raise_revlog_error();
1916 return NULL;
1977 return NULL;
1917 case -2:
1978 case -2:
1918 Py_RETURN_NONE;
1979 Py_RETURN_NONE;
1919 case -1:
1980 case -1:
1920 return PyBytes_FromStringAndSize(nullid, 20);
1981 return PyBytes_FromStringAndSize(nullid, 20);
1921 }
1982 }
1922
1983
1923 fullnode = index_node_existing(self, rev);
1984 fullnode = index_node_existing(self, rev);
1924 if (fullnode == NULL) {
1985 if (fullnode == NULL) {
1925 return NULL;
1986 return NULL;
1926 }
1987 }
1927 return PyBytes_FromStringAndSize(fullnode, 20);
1988 return PyBytes_FromStringAndSize(fullnode, 20);
1928 }
1989 }
1929
1990
1930 static PyObject *index_shortest(indexObject *self, PyObject *args)
1991 static PyObject *index_shortest(indexObject *self, PyObject *args)
1931 {
1992 {
1932 PyObject *val;
1993 PyObject *val;
1933 char *node;
1994 char *node;
1934 int length;
1995 int length;
1935
1996
1936 if (!PyArg_ParseTuple(args, "O", &val))
1997 if (!PyArg_ParseTuple(args, "O", &val))
1937 return NULL;
1998 return NULL;
1938 if (node_check(val, &node) == -1)
1999 if (node_check(val, &node) == -1)
1939 return NULL;
2000 return NULL;
1940
2001
1941 self->ntlookups++;
2002 self->ntlookups++;
1942 if (index_init_nt(self) == -1)
2003 if (index_init_nt(self) == -1)
1943 return NULL;
2004 return NULL;
1944 if (index_populate_nt(self) == -1)
2005 if (index_populate_nt(self) == -1)
1945 return NULL;
2006 return NULL;
1946 length = nt_shortest(&self->nt, node);
2007 length = nt_shortest(&self->nt, node);
1947 if (length == -3)
2008 if (length == -3)
1948 return NULL;
2009 return NULL;
1949 if (length == -2) {
2010 if (length == -2) {
1950 raise_revlog_error();
2011 raise_revlog_error();
1951 return NULL;
2012 return NULL;
1952 }
2013 }
1953 return PyInt_FromLong(length);
2014 return PyInt_FromLong(length);
1954 }
2015 }
1955
2016
1956 static PyObject *index_m_get(indexObject *self, PyObject *args)
2017 static PyObject *index_m_get(indexObject *self, PyObject *args)
1957 {
2018 {
1958 PyObject *val;
2019 PyObject *val;
1959 char *node;
2020 char *node;
1960 int rev;
2021 int rev;
1961
2022
1962 if (!PyArg_ParseTuple(args, "O", &val))
2023 if (!PyArg_ParseTuple(args, "O", &val))
1963 return NULL;
2024 return NULL;
1964 if (node_check(val, &node) == -1)
2025 if (node_check(val, &node) == -1)
1965 return NULL;
2026 return NULL;
1966 rev = index_find_node(self, node, 20);
2027 rev = index_find_node(self, node, 20);
1967 if (rev == -3)
2028 if (rev == -3)
1968 return NULL;
2029 return NULL;
1969 if (rev == -2)
2030 if (rev == -2)
1970 Py_RETURN_NONE;
2031 Py_RETURN_NONE;
1971 return PyInt_FromLong(rev);
2032 return PyInt_FromLong(rev);
1972 }
2033 }
1973
2034
1974 static int index_contains(indexObject *self, PyObject *value)
2035 static int index_contains(indexObject *self, PyObject *value)
1975 {
2036 {
1976 char *node;
2037 char *node;
1977
2038
1978 if (PyInt_Check(value)) {
2039 if (PyInt_Check(value)) {
1979 long rev;
2040 long rev;
1980 if (!pylong_to_long(value, &rev)) {
2041 if (!pylong_to_long(value, &rev)) {
1981 return -1;
2042 return -1;
1982 }
2043 }
1983 return rev >= -1 && rev < index_length(self);
2044 return rev >= -1 && rev < index_length(self);
1984 }
2045 }
1985
2046
1986 if (node_check(value, &node) == -1)
2047 if (node_check(value, &node) == -1)
1987 return -1;
2048 return -1;
1988
2049
1989 switch (index_find_node(self, node, 20)) {
2050 switch (index_find_node(self, node, 20)) {
1990 case -3:
2051 case -3:
1991 return -1;
2052 return -1;
1992 case -2:
2053 case -2:
1993 return 0;
2054 return 0;
1994 default:
2055 default:
1995 return 1;
2056 return 1;
1996 }
2057 }
1997 }
2058 }
1998
2059
1999 typedef uint64_t bitmask;
2060 typedef uint64_t bitmask;
2000
2061
2001 /*
2062 /*
2002 * Given a disjoint set of revs, return all candidates for the
2063 * Given a disjoint set of revs, return all candidates for the
2003 * greatest common ancestor. In revset notation, this is the set
2064 * greatest common ancestor. In revset notation, this is the set
2004 * "heads(::a and ::b and ...)"
2065 * "heads(::a and ::b and ...)"
2005 */
2066 */
2006 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2067 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2007 int revcount)
2068 int revcount)
2008 {
2069 {
2009 const bitmask allseen = (1ull << revcount) - 1;
2070 const bitmask allseen = (1ull << revcount) - 1;
2010 const bitmask poison = 1ull << revcount;
2071 const bitmask poison = 1ull << revcount;
2011 PyObject *gca = PyList_New(0);
2072 PyObject *gca = PyList_New(0);
2012 int i, v, interesting;
2073 int i, v, interesting;
2013 int maxrev = -1;
2074 int maxrev = -1;
2014 bitmask sp;
2075 bitmask sp;
2015 bitmask *seen;
2076 bitmask *seen;
2016
2077
2017 if (gca == NULL)
2078 if (gca == NULL)
2018 return PyErr_NoMemory();
2079 return PyErr_NoMemory();
2019
2080
2020 for (i = 0; i < revcount; i++) {
2081 for (i = 0; i < revcount; i++) {
2021 if (revs[i] > maxrev)
2082 if (revs[i] > maxrev)
2022 maxrev = revs[i];
2083 maxrev = revs[i];
2023 }
2084 }
2024
2085
2025 seen = calloc(sizeof(*seen), maxrev + 1);
2086 seen = calloc(sizeof(*seen), maxrev + 1);
2026 if (seen == NULL) {
2087 if (seen == NULL) {
2027 Py_DECREF(gca);
2088 Py_DECREF(gca);
2028 return PyErr_NoMemory();
2089 return PyErr_NoMemory();
2029 }
2090 }
2030
2091
2031 for (i = 0; i < revcount; i++)
2092 for (i = 0; i < revcount; i++)
2032 seen[revs[i]] = 1ull << i;
2093 seen[revs[i]] = 1ull << i;
2033
2094
2034 interesting = revcount;
2095 interesting = revcount;
2035
2096
2036 for (v = maxrev; v >= 0 && interesting; v--) {
2097 for (v = maxrev; v >= 0 && interesting; v--) {
2037 bitmask sv = seen[v];
2098 bitmask sv = seen[v];
2038 int parents[2];
2099 int parents[2];
2039
2100
2040 if (!sv)
2101 if (!sv)
2041 continue;
2102 continue;
2042
2103
2043 if (sv < poison) {
2104 if (sv < poison) {
2044 interesting -= 1;
2105 interesting -= 1;
2045 if (sv == allseen) {
2106 if (sv == allseen) {
2046 PyObject *obj = PyInt_FromLong(v);
2107 PyObject *obj = PyInt_FromLong(v);
2047 if (obj == NULL)
2108 if (obj == NULL)
2048 goto bail;
2109 goto bail;
2049 if (PyList_Append(gca, obj) == -1) {
2110 if (PyList_Append(gca, obj) == -1) {
2050 Py_DECREF(obj);
2111 Py_DECREF(obj);
2051 goto bail;
2112 goto bail;
2052 }
2113 }
2053 sv |= poison;
2114 sv |= poison;
2054 for (i = 0; i < revcount; i++) {
2115 for (i = 0; i < revcount; i++) {
2055 if (revs[i] == v)
2116 if (revs[i] == v)
2056 goto done;
2117 goto done;
2057 }
2118 }
2058 }
2119 }
2059 }
2120 }
2060 if (index_get_parents(self, v, parents, maxrev) < 0)
2121 if (index_get_parents(self, v, parents, maxrev) < 0)
2061 goto bail;
2122 goto bail;
2062
2123
2063 for (i = 0; i < 2; i++) {
2124 for (i = 0; i < 2; i++) {
2064 int p = parents[i];
2125 int p = parents[i];
2065 if (p == -1)
2126 if (p == -1)
2066 continue;
2127 continue;
2067 sp = seen[p];
2128 sp = seen[p];
2068 if (sv < poison) {
2129 if (sv < poison) {
2069 if (sp == 0) {
2130 if (sp == 0) {
2070 seen[p] = sv;
2131 seen[p] = sv;
2071 interesting++;
2132 interesting++;
2072 } else if (sp != sv)
2133 } else if (sp != sv)
2073 seen[p] |= sv;
2134 seen[p] |= sv;
2074 } else {
2135 } else {
2075 if (sp && sp < poison)
2136 if (sp && sp < poison)
2076 interesting--;
2137 interesting--;
2077 seen[p] = sv;
2138 seen[p] = sv;
2078 }
2139 }
2079 }
2140 }
2080 }
2141 }
2081
2142
2082 done:
2143 done:
2083 free(seen);
2144 free(seen);
2084 return gca;
2145 return gca;
2085 bail:
2146 bail:
2086 free(seen);
2147 free(seen);
2087 Py_XDECREF(gca);
2148 Py_XDECREF(gca);
2088 return NULL;
2149 return NULL;
2089 }
2150 }
2090
2151
2091 /*
2152 /*
2092 * Given a disjoint set of revs, return the subset with the longest
2153 * Given a disjoint set of revs, return the subset with the longest
2093 * path to the root.
2154 * path to the root.
2094 */
2155 */
2095 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2156 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2096 {
2157 {
2097 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2158 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2098 static const Py_ssize_t capacity = 24;
2159 static const Py_ssize_t capacity = 24;
2099 int *depth, *interesting = NULL;
2160 int *depth, *interesting = NULL;
2100 int i, j, v, ninteresting;
2161 int i, j, v, ninteresting;
2101 PyObject *dict = NULL, *keys = NULL;
2162 PyObject *dict = NULL, *keys = NULL;
2102 long *seen = NULL;
2163 long *seen = NULL;
2103 int maxrev = -1;
2164 int maxrev = -1;
2104 long final;
2165 long final;
2105
2166
2106 if (revcount > capacity) {
2167 if (revcount > capacity) {
2107 PyErr_Format(PyExc_OverflowError,
2168 PyErr_Format(PyExc_OverflowError,
2108 "bitset size (%ld) > capacity (%ld)",
2169 "bitset size (%ld) > capacity (%ld)",
2109 (long)revcount, (long)capacity);
2170 (long)revcount, (long)capacity);
2110 return NULL;
2171 return NULL;
2111 }
2172 }
2112
2173
2113 for (i = 0; i < revcount; i++) {
2174 for (i = 0; i < revcount; i++) {
2114 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2175 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2115 if (n > maxrev)
2176 if (n > maxrev)
2116 maxrev = n;
2177 maxrev = n;
2117 }
2178 }
2118
2179
2119 depth = calloc(sizeof(*depth), maxrev + 1);
2180 depth = calloc(sizeof(*depth), maxrev + 1);
2120 if (depth == NULL)
2181 if (depth == NULL)
2121 return PyErr_NoMemory();
2182 return PyErr_NoMemory();
2122
2183
2123 seen = calloc(sizeof(*seen), maxrev + 1);
2184 seen = calloc(sizeof(*seen), maxrev + 1);
2124 if (seen == NULL) {
2185 if (seen == NULL) {
2125 PyErr_NoMemory();
2186 PyErr_NoMemory();
2126 goto bail;
2187 goto bail;
2127 }
2188 }
2128
2189
2129 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2190 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2130 if (interesting == NULL) {
2191 if (interesting == NULL) {
2131 PyErr_NoMemory();
2192 PyErr_NoMemory();
2132 goto bail;
2193 goto bail;
2133 }
2194 }
2134
2195
2135 if (PyList_Sort(revs) == -1)
2196 if (PyList_Sort(revs) == -1)
2136 goto bail;
2197 goto bail;
2137
2198
2138 for (i = 0; i < revcount; i++) {
2199 for (i = 0; i < revcount; i++) {
2139 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2200 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2140 long b = 1l << i;
2201 long b = 1l << i;
2141 depth[n] = 1;
2202 depth[n] = 1;
2142 seen[n] = b;
2203 seen[n] = b;
2143 interesting[b] = 1;
2204 interesting[b] = 1;
2144 }
2205 }
2145
2206
2146 /* invariant: ninteresting is the number of non-zero entries in
2207 /* invariant: ninteresting is the number of non-zero entries in
2147 * interesting. */
2208 * interesting. */
2148 ninteresting = (int)revcount;
2209 ninteresting = (int)revcount;
2149
2210
2150 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2211 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2151 int dv = depth[v];
2212 int dv = depth[v];
2152 int parents[2];
2213 int parents[2];
2153 long sv;
2214 long sv;
2154
2215
2155 if (dv == 0)
2216 if (dv == 0)
2156 continue;
2217 continue;
2157
2218
2158 sv = seen[v];
2219 sv = seen[v];
2159 if (index_get_parents(self, v, parents, maxrev) < 0)
2220 if (index_get_parents(self, v, parents, maxrev) < 0)
2160 goto bail;
2221 goto bail;
2161
2222
2162 for (i = 0; i < 2; i++) {
2223 for (i = 0; i < 2; i++) {
2163 int p = parents[i];
2224 int p = parents[i];
2164 long sp;
2225 long sp;
2165 int dp;
2226 int dp;
2166
2227
2167 if (p == -1)
2228 if (p == -1)
2168 continue;
2229 continue;
2169
2230
2170 dp = depth[p];
2231 dp = depth[p];
2171 sp = seen[p];
2232 sp = seen[p];
2172 if (dp <= dv) {
2233 if (dp <= dv) {
2173 depth[p] = dv + 1;
2234 depth[p] = dv + 1;
2174 if (sp != sv) {
2235 if (sp != sv) {
2175 interesting[sv] += 1;
2236 interesting[sv] += 1;
2176 seen[p] = sv;
2237 seen[p] = sv;
2177 if (sp) {
2238 if (sp) {
2178 interesting[sp] -= 1;
2239 interesting[sp] -= 1;
2179 if (interesting[sp] == 0)
2240 if (interesting[sp] == 0)
2180 ninteresting -= 1;
2241 ninteresting -= 1;
2181 }
2242 }
2182 }
2243 }
2183 } else if (dv == dp - 1) {
2244 } else if (dv == dp - 1) {
2184 long nsp = sp | sv;
2245 long nsp = sp | sv;
2185 if (nsp == sp)
2246 if (nsp == sp)
2186 continue;
2247 continue;
2187 seen[p] = nsp;
2248 seen[p] = nsp;
2188 interesting[sp] -= 1;
2249 interesting[sp] -= 1;
2189 if (interesting[sp] == 0)
2250 if (interesting[sp] == 0)
2190 ninteresting -= 1;
2251 ninteresting -= 1;
2191 if (interesting[nsp] == 0)
2252 if (interesting[nsp] == 0)
2192 ninteresting += 1;
2253 ninteresting += 1;
2193 interesting[nsp] += 1;
2254 interesting[nsp] += 1;
2194 }
2255 }
2195 }
2256 }
2196 interesting[sv] -= 1;
2257 interesting[sv] -= 1;
2197 if (interesting[sv] == 0)
2258 if (interesting[sv] == 0)
2198 ninteresting -= 1;
2259 ninteresting -= 1;
2199 }
2260 }
2200
2261
2201 final = 0;
2262 final = 0;
2202 j = ninteresting;
2263 j = ninteresting;
2203 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2264 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2204 if (interesting[i] == 0)
2265 if (interesting[i] == 0)
2205 continue;
2266 continue;
2206 final |= i;
2267 final |= i;
2207 j -= 1;
2268 j -= 1;
2208 }
2269 }
2209 if (final == 0) {
2270 if (final == 0) {
2210 keys = PyList_New(0);
2271 keys = PyList_New(0);
2211 goto bail;
2272 goto bail;
2212 }
2273 }
2213
2274
2214 dict = PyDict_New();
2275 dict = PyDict_New();
2215 if (dict == NULL)
2276 if (dict == NULL)
2216 goto bail;
2277 goto bail;
2217
2278
2218 for (i = 0; i < revcount; i++) {
2279 for (i = 0; i < revcount; i++) {
2219 PyObject *key;
2280 PyObject *key;
2220
2281
2221 if ((final & (1 << i)) == 0)
2282 if ((final & (1 << i)) == 0)
2222 continue;
2283 continue;
2223
2284
2224 key = PyList_GET_ITEM(revs, i);
2285 key = PyList_GET_ITEM(revs, i);
2225 Py_INCREF(key);
2286 Py_INCREF(key);
2226 Py_INCREF(Py_None);
2287 Py_INCREF(Py_None);
2227 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2288 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2228 Py_DECREF(key);
2289 Py_DECREF(key);
2229 Py_DECREF(Py_None);
2290 Py_DECREF(Py_None);
2230 goto bail;
2291 goto bail;
2231 }
2292 }
2232 }
2293 }
2233
2294
2234 keys = PyDict_Keys(dict);
2295 keys = PyDict_Keys(dict);
2235
2296
2236 bail:
2297 bail:
2237 free(depth);
2298 free(depth);
2238 free(seen);
2299 free(seen);
2239 free(interesting);
2300 free(interesting);
2240 Py_XDECREF(dict);
2301 Py_XDECREF(dict);
2241
2302
2242 return keys;
2303 return keys;
2243 }
2304 }
2244
2305
2245 /*
2306 /*
2246 * Given a (possibly overlapping) set of revs, return all the
2307 * Given a (possibly overlapping) set of revs, return all the
2247 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2308 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2248 */
2309 */
2249 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2310 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2250 {
2311 {
2251 PyObject *ret = NULL;
2312 PyObject *ret = NULL;
2252 Py_ssize_t argcount, i, len;
2313 Py_ssize_t argcount, i, len;
2253 bitmask repeat = 0;
2314 bitmask repeat = 0;
2254 int revcount = 0;
2315 int revcount = 0;
2255 int *revs;
2316 int *revs;
2256
2317
2257 argcount = PySequence_Length(args);
2318 argcount = PySequence_Length(args);
2258 revs = PyMem_Malloc(argcount * sizeof(*revs));
2319 revs = PyMem_Malloc(argcount * sizeof(*revs));
2259 if (argcount > 0 && revs == NULL)
2320 if (argcount > 0 && revs == NULL)
2260 return PyErr_NoMemory();
2321 return PyErr_NoMemory();
2261 len = index_length(self);
2322 len = index_length(self);
2262
2323
2263 for (i = 0; i < argcount; i++) {
2324 for (i = 0; i < argcount; i++) {
2264 static const int capacity = 24;
2325 static const int capacity = 24;
2265 PyObject *obj = PySequence_GetItem(args, i);
2326 PyObject *obj = PySequence_GetItem(args, i);
2266 bitmask x;
2327 bitmask x;
2267 long val;
2328 long val;
2268
2329
2269 if (!PyInt_Check(obj)) {
2330 if (!PyInt_Check(obj)) {
2270 PyErr_SetString(PyExc_TypeError,
2331 PyErr_SetString(PyExc_TypeError,
2271 "arguments must all be ints");
2332 "arguments must all be ints");
2272 Py_DECREF(obj);
2333 Py_DECREF(obj);
2273 goto bail;
2334 goto bail;
2274 }
2335 }
2275 val = PyInt_AsLong(obj);
2336 val = PyInt_AsLong(obj);
2276 Py_DECREF(obj);
2337 Py_DECREF(obj);
2277 if (val == -1) {
2338 if (val == -1) {
2278 ret = PyList_New(0);
2339 ret = PyList_New(0);
2279 goto done;
2340 goto done;
2280 }
2341 }
2281 if (val < 0 || val >= len) {
2342 if (val < 0 || val >= len) {
2282 PyErr_SetString(PyExc_IndexError, "index out of range");
2343 PyErr_SetString(PyExc_IndexError, "index out of range");
2283 goto bail;
2344 goto bail;
2284 }
2345 }
2285 /* this cheesy bloom filter lets us avoid some more
2346 /* this cheesy bloom filter lets us avoid some more
2286 * expensive duplicate checks in the common set-is-disjoint
2347 * expensive duplicate checks in the common set-is-disjoint
2287 * case */
2348 * case */
2288 x = 1ull << (val & 0x3f);
2349 x = 1ull << (val & 0x3f);
2289 if (repeat & x) {
2350 if (repeat & x) {
2290 int k;
2351 int k;
2291 for (k = 0; k < revcount; k++) {
2352 for (k = 0; k < revcount; k++) {
2292 if (val == revs[k])
2353 if (val == revs[k])
2293 goto duplicate;
2354 goto duplicate;
2294 }
2355 }
2295 } else
2356 } else
2296 repeat |= x;
2357 repeat |= x;
2297 if (revcount >= capacity) {
2358 if (revcount >= capacity) {
2298 PyErr_Format(PyExc_OverflowError,
2359 PyErr_Format(PyExc_OverflowError,
2299 "bitset size (%d) > capacity (%d)",
2360 "bitset size (%d) > capacity (%d)",
2300 revcount, capacity);
2361 revcount, capacity);
2301 goto bail;
2362 goto bail;
2302 }
2363 }
2303 revs[revcount++] = (int)val;
2364 revs[revcount++] = (int)val;
2304 duplicate:;
2365 duplicate:;
2305 }
2366 }
2306
2367
2307 if (revcount == 0) {
2368 if (revcount == 0) {
2308 ret = PyList_New(0);
2369 ret = PyList_New(0);
2309 goto done;
2370 goto done;
2310 }
2371 }
2311 if (revcount == 1) {
2372 if (revcount == 1) {
2312 PyObject *obj;
2373 PyObject *obj;
2313 ret = PyList_New(1);
2374 ret = PyList_New(1);
2314 if (ret == NULL)
2375 if (ret == NULL)
2315 goto bail;
2376 goto bail;
2316 obj = PyInt_FromLong(revs[0]);
2377 obj = PyInt_FromLong(revs[0]);
2317 if (obj == NULL)
2378 if (obj == NULL)
2318 goto bail;
2379 goto bail;
2319 PyList_SET_ITEM(ret, 0, obj);
2380 PyList_SET_ITEM(ret, 0, obj);
2320 goto done;
2381 goto done;
2321 }
2382 }
2322
2383
2323 ret = find_gca_candidates(self, revs, revcount);
2384 ret = find_gca_candidates(self, revs, revcount);
2324 if (ret == NULL)
2385 if (ret == NULL)
2325 goto bail;
2386 goto bail;
2326
2387
2327 done:
2388 done:
2328 PyMem_Free(revs);
2389 PyMem_Free(revs);
2329 return ret;
2390 return ret;
2330
2391
2331 bail:
2392 bail:
2332 PyMem_Free(revs);
2393 PyMem_Free(revs);
2333 Py_XDECREF(ret);
2394 Py_XDECREF(ret);
2334 return NULL;
2395 return NULL;
2335 }
2396 }
2336
2397
2337 /*
2398 /*
2338 * Given a (possibly overlapping) set of revs, return the greatest
2399 * Given a (possibly overlapping) set of revs, return the greatest
2339 * common ancestors: those with the longest path to the root.
2400 * common ancestors: those with the longest path to the root.
2340 */
2401 */
2341 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2402 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2342 {
2403 {
2343 PyObject *ret;
2404 PyObject *ret;
2344 PyObject *gca = index_commonancestorsheads(self, args);
2405 PyObject *gca = index_commonancestorsheads(self, args);
2345 if (gca == NULL)
2406 if (gca == NULL)
2346 return NULL;
2407 return NULL;
2347
2408
2348 if (PyList_GET_SIZE(gca) <= 1) {
2409 if (PyList_GET_SIZE(gca) <= 1) {
2349 return gca;
2410 return gca;
2350 }
2411 }
2351
2412
2352 ret = find_deepest(self, gca);
2413 ret = find_deepest(self, gca);
2353 Py_DECREF(gca);
2414 Py_DECREF(gca);
2354 return ret;
2415 return ret;
2355 }
2416 }
2356
2417
2357 /*
2418 /*
2358 * Invalidate any trie entries introduced by added revs.
2419 * Invalidate any trie entries introduced by added revs.
2359 */
2420 */
2360 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2421 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2361 {
2422 {
2362 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2423 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2363
2424
2364 for (i = start; i < len; i++) {
2425 for (i = start; i < len; i++) {
2365 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2426 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2366 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2427 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2367
2428
2368 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
2429 nt_delete_node(&self->nt, PyBytes_AS_STRING(node));
2369 }
2430 }
2370
2431
2371 if (start == 0)
2432 if (start == 0)
2372 Py_CLEAR(self->added);
2433 Py_CLEAR(self->added);
2373 }
2434 }
2374
2435
2375 /*
2436 /*
2376 * Delete a numeric range of revs, which must be at the end of the
2437 * Delete a numeric range of revs, which must be at the end of the
2377 * range, but exclude the sentinel nullid entry.
2438 * range, but exclude the sentinel nullid entry.
2378 */
2439 */
2379 static int index_slice_del(indexObject *self, PyObject *item)
2440 static int index_slice_del(indexObject *self, PyObject *item)
2380 {
2441 {
2381 Py_ssize_t start, stop, step, slicelength;
2442 Py_ssize_t start, stop, step, slicelength;
2382 Py_ssize_t length = index_length(self) + 1;
2443 Py_ssize_t length = index_length(self) + 1;
2383 int ret = 0;
2444 int ret = 0;
2384
2445
2385 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2446 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2386 #ifdef IS_PY3K
2447 #ifdef IS_PY3K
2387 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2448 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2388 &slicelength) < 0)
2449 &slicelength) < 0)
2389 #else
2450 #else
2390 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2451 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2391 &step, &slicelength) < 0)
2452 &step, &slicelength) < 0)
2392 #endif
2453 #endif
2393 return -1;
2454 return -1;
2394
2455
2395 if (slicelength <= 0)
2456 if (slicelength <= 0)
2396 return 0;
2457 return 0;
2397
2458
2398 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2459 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2399 stop = start;
2460 stop = start;
2400
2461
2401 if (step < 0) {
2462 if (step < 0) {
2402 stop = start + 1;
2463 stop = start + 1;
2403 start = stop + step * (slicelength - 1) - 1;
2464 start = stop + step * (slicelength - 1) - 1;
2404 step = -step;
2465 step = -step;
2405 }
2466 }
2406
2467
2407 if (step != 1) {
2468 if (step != 1) {
2408 PyErr_SetString(PyExc_ValueError,
2469 PyErr_SetString(PyExc_ValueError,
2409 "revlog index delete requires step size of 1");
2470 "revlog index delete requires step size of 1");
2410 return -1;
2471 return -1;
2411 }
2472 }
2412
2473
2413 if (stop != length - 1) {
2474 if (stop != length - 1) {
2414 PyErr_SetString(PyExc_IndexError,
2475 PyErr_SetString(PyExc_IndexError,
2415 "revlog index deletion indices are invalid");
2476 "revlog index deletion indices are invalid");
2416 return -1;
2477 return -1;
2417 }
2478 }
2418
2479
2419 if (start < self->length) {
2480 if (start < self->length) {
2420 if (self->ntinitialized) {
2481 if (self->ntinitialized) {
2421 Py_ssize_t i;
2482 Py_ssize_t i;
2422
2483
2423 for (i = start + 1; i < self->length; i++) {
2484 for (i = start + 1; i < self->length; i++) {
2424 const char *node = index_node_existing(self, i);
2485 const char *node = index_node_existing(self, i);
2425 if (node == NULL)
2486 if (node == NULL)
2426 return -1;
2487 return -1;
2427
2488
2428 nt_delete_node(&self->nt, node);
2489 nt_delete_node(&self->nt, node);
2429 }
2490 }
2430 if (self->added)
2491 if (self->added)
2431 index_invalidate_added(self, 0);
2492 index_invalidate_added(self, 0);
2432 if (self->ntrev > start)
2493 if (self->ntrev > start)
2433 self->ntrev = (int)start;
2494 self->ntrev = (int)start;
2434 }
2495 }
2435 self->length = start;
2496 self->length = start;
2436 if (start < self->raw_length) {
2497 if (start < self->raw_length) {
2437 if (self->cache) {
2498 if (self->cache) {
2438 Py_ssize_t i;
2499 Py_ssize_t i;
2439 for (i = start; i < self->raw_length; i++)
2500 for (i = start; i < self->raw_length; i++)
2440 Py_CLEAR(self->cache[i]);
2501 Py_CLEAR(self->cache[i]);
2441 }
2502 }
2442 self->raw_length = start;
2503 self->raw_length = start;
2443 }
2504 }
2444 goto done;
2505 goto done;
2445 }
2506 }
2446
2507
2447 if (self->ntinitialized) {
2508 if (self->ntinitialized) {
2448 index_invalidate_added(self, start - self->length);
2509 index_invalidate_added(self, start - self->length);
2449 if (self->ntrev > start)
2510 if (self->ntrev > start)
2450 self->ntrev = (int)start;
2511 self->ntrev = (int)start;
2451 }
2512 }
2452 if (self->added)
2513 if (self->added)
2453 ret = PyList_SetSlice(self->added, start - self->length,
2514 ret = PyList_SetSlice(self->added, start - self->length,
2454 PyList_GET_SIZE(self->added), NULL);
2515 PyList_GET_SIZE(self->added), NULL);
2455 done:
2516 done:
2456 Py_CLEAR(self->headrevs);
2517 Py_CLEAR(self->headrevs);
2457 return ret;
2518 return ret;
2458 }
2519 }
2459
2520
2460 /*
2521 /*
2461 * Supported ops:
2522 * Supported ops:
2462 *
2523 *
2463 * slice deletion
2524 * slice deletion
2464 * string assignment (extend node->rev mapping)
2525 * string assignment (extend node->rev mapping)
2465 * string deletion (shrink node->rev mapping)
2526 * string deletion (shrink node->rev mapping)
2466 */
2527 */
2467 static int index_assign_subscript(indexObject *self, PyObject *item,
2528 static int index_assign_subscript(indexObject *self, PyObject *item,
2468 PyObject *value)
2529 PyObject *value)
2469 {
2530 {
2470 char *node;
2531 char *node;
2471 long rev;
2532 long rev;
2472
2533
2473 if (PySlice_Check(item) && value == NULL)
2534 if (PySlice_Check(item) && value == NULL)
2474 return index_slice_del(self, item);
2535 return index_slice_del(self, item);
2475
2536
2476 if (node_check(item, &node) == -1)
2537 if (node_check(item, &node) == -1)
2477 return -1;
2538 return -1;
2478
2539
2479 if (value == NULL)
2540 if (value == NULL)
2480 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2541 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2481 : 0;
2542 : 0;
2482 rev = PyInt_AsLong(value);
2543 rev = PyInt_AsLong(value);
2483 if (rev > INT_MAX || rev < 0) {
2544 if (rev > INT_MAX || rev < 0) {
2484 if (!PyErr_Occurred())
2545 if (!PyErr_Occurred())
2485 PyErr_SetString(PyExc_ValueError, "rev out of range");
2546 PyErr_SetString(PyExc_ValueError, "rev out of range");
2486 return -1;
2547 return -1;
2487 }
2548 }
2488
2549
2489 if (index_init_nt(self) == -1)
2550 if (index_init_nt(self) == -1)
2490 return -1;
2551 return -1;
2491 return nt_insert(&self->nt, node, (int)rev);
2552 return nt_insert(&self->nt, node, (int)rev);
2492 }
2553 }
2493
2554
2494 /*
2555 /*
2495 * Find all RevlogNG entries in an index that has inline data. Update
2556 * Find all RevlogNG entries in an index that has inline data. Update
2496 * the optional "offsets" table with those entries.
2557 * the optional "offsets" table with those entries.
2497 */
2558 */
2498 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2559 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2499 {
2560 {
2500 const char *data = (const char *)self->buf.buf;
2561 const char *data = (const char *)self->buf.buf;
2501 Py_ssize_t pos = 0;
2562 Py_ssize_t pos = 0;
2502 Py_ssize_t end = self->buf.len;
2563 Py_ssize_t end = self->buf.len;
2503 long incr = v1_hdrsize;
2564 long incr = v1_hdrsize;
2504 Py_ssize_t len = 0;
2565 Py_ssize_t len = 0;
2505
2566
2506 while (pos + v1_hdrsize <= end && pos >= 0) {
2567 while (pos + v1_hdrsize <= end && pos >= 0) {
2507 uint32_t comp_len;
2568 uint32_t comp_len;
2508 /* 3rd element of header is length of compressed inline data */
2569 /* 3rd element of header is length of compressed inline data */
2509 comp_len = getbe32(data + pos + 8);
2570 comp_len = getbe32(data + pos + 8);
2510 incr = v1_hdrsize + comp_len;
2571 incr = v1_hdrsize + comp_len;
2511 if (offsets)
2572 if (offsets)
2512 offsets[len] = data + pos;
2573 offsets[len] = data + pos;
2513 len++;
2574 len++;
2514 pos += incr;
2575 pos += incr;
2515 }
2576 }
2516
2577
2517 if (pos != end) {
2578 if (pos != end) {
2518 if (!PyErr_Occurred())
2579 if (!PyErr_Occurred())
2519 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2580 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2520 return -1;
2581 return -1;
2521 }
2582 }
2522
2583
2523 return len;
2584 return len;
2524 }
2585 }
2525
2586
2526 static int index_init(indexObject *self, PyObject *args)
2587 static int index_init(indexObject *self, PyObject *args)
2527 {
2588 {
2528 PyObject *data_obj, *inlined_obj;
2589 PyObject *data_obj, *inlined_obj;
2529 Py_ssize_t size;
2590 Py_ssize_t size;
2530
2591
2531 /* Initialize before argument-checking to avoid index_dealloc() crash.
2592 /* Initialize before argument-checking to avoid index_dealloc() crash.
2532 */
2593 */
2533 self->raw_length = 0;
2594 self->raw_length = 0;
2534 self->added = NULL;
2595 self->added = NULL;
2535 self->cache = NULL;
2596 self->cache = NULL;
2536 self->data = NULL;
2597 self->data = NULL;
2537 memset(&self->buf, 0, sizeof(self->buf));
2598 memset(&self->buf, 0, sizeof(self->buf));
2538 self->headrevs = NULL;
2599 self->headrevs = NULL;
2539 self->filteredrevs = Py_None;
2600 self->filteredrevs = Py_None;
2540 Py_INCREF(Py_None);
2601 Py_INCREF(Py_None);
2541 self->ntinitialized = 0;
2602 self->ntinitialized = 0;
2542 self->offsets = NULL;
2603 self->offsets = NULL;
2543
2604
2544 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2605 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2545 return -1;
2606 return -1;
2546 if (!PyObject_CheckBuffer(data_obj)) {
2607 if (!PyObject_CheckBuffer(data_obj)) {
2547 PyErr_SetString(PyExc_TypeError,
2608 PyErr_SetString(PyExc_TypeError,
2548 "data does not support buffer interface");
2609 "data does not support buffer interface");
2549 return -1;
2610 return -1;
2550 }
2611 }
2551
2612
2552 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2613 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2553 return -1;
2614 return -1;
2554 size = self->buf.len;
2615 size = self->buf.len;
2555
2616
2556 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2617 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2557 self->data = data_obj;
2618 self->data = data_obj;
2558
2619
2559 self->ntlookups = self->ntmisses = 0;
2620 self->ntlookups = self->ntmisses = 0;
2560 self->ntrev = -1;
2621 self->ntrev = -1;
2561 Py_INCREF(self->data);
2622 Py_INCREF(self->data);
2562
2623
2563 if (self->inlined) {
2624 if (self->inlined) {
2564 Py_ssize_t len = inline_scan(self, NULL);
2625 Py_ssize_t len = inline_scan(self, NULL);
2565 if (len == -1)
2626 if (len == -1)
2566 goto bail;
2627 goto bail;
2567 self->raw_length = len;
2628 self->raw_length = len;
2568 self->length = len;
2629 self->length = len;
2569 } else {
2630 } else {
2570 if (size % v1_hdrsize) {
2631 if (size % v1_hdrsize) {
2571 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2632 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2572 goto bail;
2633 goto bail;
2573 }
2634 }
2574 self->raw_length = size / v1_hdrsize;
2635 self->raw_length = size / v1_hdrsize;
2575 self->length = self->raw_length;
2636 self->length = self->raw_length;
2576 }
2637 }
2577
2638
2578 return 0;
2639 return 0;
2579 bail:
2640 bail:
2580 return -1;
2641 return -1;
2581 }
2642 }
2582
2643
2583 static PyObject *index_nodemap(indexObject *self)
2644 static PyObject *index_nodemap(indexObject *self)
2584 {
2645 {
2585 Py_INCREF(self);
2646 Py_INCREF(self);
2586 return (PyObject *)self;
2647 return (PyObject *)self;
2587 }
2648 }
2588
2649
2589 static void _index_clearcaches(indexObject *self)
2650 static void _index_clearcaches(indexObject *self)
2590 {
2651 {
2591 if (self->cache) {
2652 if (self->cache) {
2592 Py_ssize_t i;
2653 Py_ssize_t i;
2593
2654
2594 for (i = 0; i < self->raw_length; i++)
2655 for (i = 0; i < self->raw_length; i++)
2595 Py_CLEAR(self->cache[i]);
2656 Py_CLEAR(self->cache[i]);
2596 free(self->cache);
2657 free(self->cache);
2597 self->cache = NULL;
2658 self->cache = NULL;
2598 }
2659 }
2599 if (self->offsets) {
2660 if (self->offsets) {
2600 PyMem_Free((void *)self->offsets);
2661 PyMem_Free((void *)self->offsets);
2601 self->offsets = NULL;
2662 self->offsets = NULL;
2602 }
2663 }
2603 if (self->ntinitialized) {
2664 if (self->ntinitialized) {
2604 nt_dealloc(&self->nt);
2665 nt_dealloc(&self->nt);
2605 }
2666 }
2606 self->ntinitialized = 0;
2667 self->ntinitialized = 0;
2607 Py_CLEAR(self->headrevs);
2668 Py_CLEAR(self->headrevs);
2608 }
2669 }
2609
2670
2610 static PyObject *index_clearcaches(indexObject *self)
2671 static PyObject *index_clearcaches(indexObject *self)
2611 {
2672 {
2612 _index_clearcaches(self);
2673 _index_clearcaches(self);
2613 self->ntrev = -1;
2674 self->ntrev = -1;
2614 self->ntlookups = self->ntmisses = 0;
2675 self->ntlookups = self->ntmisses = 0;
2615 Py_RETURN_NONE;
2676 Py_RETURN_NONE;
2616 }
2677 }
2617
2678
2618 static void index_dealloc(indexObject *self)
2679 static void index_dealloc(indexObject *self)
2619 {
2680 {
2620 _index_clearcaches(self);
2681 _index_clearcaches(self);
2621 Py_XDECREF(self->filteredrevs);
2682 Py_XDECREF(self->filteredrevs);
2622 if (self->buf.buf) {
2683 if (self->buf.buf) {
2623 PyBuffer_Release(&self->buf);
2684 PyBuffer_Release(&self->buf);
2624 memset(&self->buf, 0, sizeof(self->buf));
2685 memset(&self->buf, 0, sizeof(self->buf));
2625 }
2686 }
2626 Py_XDECREF(self->data);
2687 Py_XDECREF(self->data);
2627 Py_XDECREF(self->added);
2688 Py_XDECREF(self->added);
2628 PyObject_Del(self);
2689 PyObject_Del(self);
2629 }
2690 }
2630
2691
2631 static PySequenceMethods index_sequence_methods = {
2692 static PySequenceMethods index_sequence_methods = {
2632 (lenfunc)index_length, /* sq_length */
2693 (lenfunc)index_length, /* sq_length */
2633 0, /* sq_concat */
2694 0, /* sq_concat */
2634 0, /* sq_repeat */
2695 0, /* sq_repeat */
2635 (ssizeargfunc)index_get, /* sq_item */
2696 (ssizeargfunc)index_get, /* sq_item */
2636 0, /* sq_slice */
2697 0, /* sq_slice */
2637 0, /* sq_ass_item */
2698 0, /* sq_ass_item */
2638 0, /* sq_ass_slice */
2699 0, /* sq_ass_slice */
2639 (objobjproc)index_contains, /* sq_contains */
2700 (objobjproc)index_contains, /* sq_contains */
2640 };
2701 };
2641
2702
2642 static PyMappingMethods index_mapping_methods = {
2703 static PyMappingMethods index_mapping_methods = {
2643 (lenfunc)index_length, /* mp_length */
2704 (lenfunc)index_length, /* mp_length */
2644 (binaryfunc)index_getitem, /* mp_subscript */
2705 (binaryfunc)index_getitem, /* mp_subscript */
2645 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2706 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2646 };
2707 };
2647
2708
2648 static PyMethodDef index_methods[] = {
2709 static PyMethodDef index_methods[] = {
2649 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2710 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2650 "return the gca set of the given revs"},
2711 "return the gca set of the given revs"},
2651 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2712 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2652 METH_VARARGS,
2713 METH_VARARGS,
2653 "return the heads of the common ancestors of the given revs"},
2714 "return the heads of the common ancestors of the given revs"},
2654 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2715 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2655 "clear the index caches"},
2716 "clear the index caches"},
2656 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2717 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2657 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2718 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2658 "compute phases"},
2719 "compute phases"},
2659 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2720 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2660 "reachableroots"},
2721 "reachableroots"},
2661 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2722 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2662 "get head revisions"}, /* Can do filtering since 3.2 */
2723 "get head revisions"}, /* Can do filtering since 3.2 */
2663 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2724 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2664 "get filtered head revisions"}, /* Can always do filtering */
2725 "get filtered head revisions"}, /* Can always do filtering */
2665 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2726 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2666 "True if the object is a snapshot"},
2727 "True if the object is a snapshot"},
2728 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2729 "Gather snapshot data in a cache dict"},
2667 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2730 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2668 "determine revisions with deltas to reconstruct fulltext"},
2731 "determine revisions with deltas to reconstruct fulltext"},
2669 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2732 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2670 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2733 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2671 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2734 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2672 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2735 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2673 "match a potentially ambiguous node ID"},
2736 "match a potentially ambiguous node ID"},
2674 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2737 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2675 "find length of shortest hex nodeid of a binary ID"},
2738 "find length of shortest hex nodeid of a binary ID"},
2676 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2739 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2677 {NULL} /* Sentinel */
2740 {NULL} /* Sentinel */
2678 };
2741 };
2679
2742
2680 static PyGetSetDef index_getset[] = {
2743 static PyGetSetDef index_getset[] = {
2681 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2744 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2682 {NULL} /* Sentinel */
2745 {NULL} /* Sentinel */
2683 };
2746 };
2684
2747
2685 PyTypeObject HgRevlogIndex_Type = {
2748 PyTypeObject HgRevlogIndex_Type = {
2686 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2749 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2687 "parsers.index", /* tp_name */
2750 "parsers.index", /* tp_name */
2688 sizeof(indexObject), /* tp_basicsize */
2751 sizeof(indexObject), /* tp_basicsize */
2689 0, /* tp_itemsize */
2752 0, /* tp_itemsize */
2690 (destructor)index_dealloc, /* tp_dealloc */
2753 (destructor)index_dealloc, /* tp_dealloc */
2691 0, /* tp_print */
2754 0, /* tp_print */
2692 0, /* tp_getattr */
2755 0, /* tp_getattr */
2693 0, /* tp_setattr */
2756 0, /* tp_setattr */
2694 0, /* tp_compare */
2757 0, /* tp_compare */
2695 0, /* tp_repr */
2758 0, /* tp_repr */
2696 0, /* tp_as_number */
2759 0, /* tp_as_number */
2697 &index_sequence_methods, /* tp_as_sequence */
2760 &index_sequence_methods, /* tp_as_sequence */
2698 &index_mapping_methods, /* tp_as_mapping */
2761 &index_mapping_methods, /* tp_as_mapping */
2699 0, /* tp_hash */
2762 0, /* tp_hash */
2700 0, /* tp_call */
2763 0, /* tp_call */
2701 0, /* tp_str */
2764 0, /* tp_str */
2702 0, /* tp_getattro */
2765 0, /* tp_getattro */
2703 0, /* tp_setattro */
2766 0, /* tp_setattro */
2704 0, /* tp_as_buffer */
2767 0, /* tp_as_buffer */
2705 Py_TPFLAGS_DEFAULT, /* tp_flags */
2768 Py_TPFLAGS_DEFAULT, /* tp_flags */
2706 "revlog index", /* tp_doc */
2769 "revlog index", /* tp_doc */
2707 0, /* tp_traverse */
2770 0, /* tp_traverse */
2708 0, /* tp_clear */
2771 0, /* tp_clear */
2709 0, /* tp_richcompare */
2772 0, /* tp_richcompare */
2710 0, /* tp_weaklistoffset */
2773 0, /* tp_weaklistoffset */
2711 0, /* tp_iter */
2774 0, /* tp_iter */
2712 0, /* tp_iternext */
2775 0, /* tp_iternext */
2713 index_methods, /* tp_methods */
2776 index_methods, /* tp_methods */
2714 0, /* tp_members */
2777 0, /* tp_members */
2715 index_getset, /* tp_getset */
2778 index_getset, /* tp_getset */
2716 0, /* tp_base */
2779 0, /* tp_base */
2717 0, /* tp_dict */
2780 0, /* tp_dict */
2718 0, /* tp_descr_get */
2781 0, /* tp_descr_get */
2719 0, /* tp_descr_set */
2782 0, /* tp_descr_set */
2720 0, /* tp_dictoffset */
2783 0, /* tp_dictoffset */
2721 (initproc)index_init, /* tp_init */
2784 (initproc)index_init, /* tp_init */
2722 0, /* tp_alloc */
2785 0, /* tp_alloc */
2723 };
2786 };
2724
2787
2725 /*
2788 /*
2726 * returns a tuple of the form (index, index, cache) with elements as
2789 * returns a tuple of the form (index, index, cache) with elements as
2727 * follows:
2790 * follows:
2728 *
2791 *
2729 * index: an index object that lazily parses RevlogNG records
2792 * index: an index object that lazily parses RevlogNG records
2730 * cache: if data is inlined, a tuple (0, index_file_content), else None
2793 * cache: if data is inlined, a tuple (0, index_file_content), else None
2731 * index_file_content could be a string, or a buffer
2794 * index_file_content could be a string, or a buffer
2732 *
2795 *
2733 * added complications are for backwards compatibility
2796 * added complications are for backwards compatibility
2734 */
2797 */
2735 PyObject *parse_index2(PyObject *self, PyObject *args)
2798 PyObject *parse_index2(PyObject *self, PyObject *args)
2736 {
2799 {
2737 PyObject *tuple = NULL, *cache = NULL;
2800 PyObject *tuple = NULL, *cache = NULL;
2738 indexObject *idx;
2801 indexObject *idx;
2739 int ret;
2802 int ret;
2740
2803
2741 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2804 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2742 if (idx == NULL)
2805 if (idx == NULL)
2743 goto bail;
2806 goto bail;
2744
2807
2745 ret = index_init(idx, args);
2808 ret = index_init(idx, args);
2746 if (ret == -1)
2809 if (ret == -1)
2747 goto bail;
2810 goto bail;
2748
2811
2749 if (idx->inlined) {
2812 if (idx->inlined) {
2750 cache = Py_BuildValue("iO", 0, idx->data);
2813 cache = Py_BuildValue("iO", 0, idx->data);
2751 if (cache == NULL)
2814 if (cache == NULL)
2752 goto bail;
2815 goto bail;
2753 } else {
2816 } else {
2754 cache = Py_None;
2817 cache = Py_None;
2755 Py_INCREF(cache);
2818 Py_INCREF(cache);
2756 }
2819 }
2757
2820
2758 tuple = Py_BuildValue("NN", idx, cache);
2821 tuple = Py_BuildValue("NN", idx, cache);
2759 if (!tuple)
2822 if (!tuple)
2760 goto bail;
2823 goto bail;
2761 return tuple;
2824 return tuple;
2762
2825
2763 bail:
2826 bail:
2764 Py_XDECREF(idx);
2827 Py_XDECREF(idx);
2765 Py_XDECREF(cache);
2828 Py_XDECREF(cache);
2766 Py_XDECREF(tuple);
2829 Py_XDECREF(tuple);
2767 return NULL;
2830 return NULL;
2768 }
2831 }
2769
2832
2770 #ifdef WITH_RUST
2833 #ifdef WITH_RUST
2771
2834
2772 /* rustlazyancestors: iteration over ancestors implemented in Rust
2835 /* rustlazyancestors: iteration over ancestors implemented in Rust
2773 *
2836 *
2774 * This class holds a reference to an index and to the Rust iterator.
2837 * This class holds a reference to an index and to the Rust iterator.
2775 */
2838 */
2776 typedef struct rustlazyancestorsObjectStruct rustlazyancestorsObject;
2839 typedef struct rustlazyancestorsObjectStruct rustlazyancestorsObject;
2777
2840
2778 struct rustlazyancestorsObjectStruct {
2841 struct rustlazyancestorsObjectStruct {
2779 PyObject_HEAD
2842 PyObject_HEAD
2780 /* Type-specific fields go here. */
2843 /* Type-specific fields go here. */
2781 indexObject *index; /* Ref kept to avoid GC'ing the index */
2844 indexObject *index; /* Ref kept to avoid GC'ing the index */
2782 void *iter; /* Rust iterator */
2845 void *iter; /* Rust iterator */
2783 };
2846 };
2784
2847
2785 /* FFI exposed from Rust code */
2848 /* FFI exposed from Rust code */
2786 rustlazyancestorsObject *rustlazyancestors_init(indexObject *index,
2849 rustlazyancestorsObject *rustlazyancestors_init(indexObject *index,
2787 /* intrevs vector */
2850 /* intrevs vector */
2788 Py_ssize_t initrevslen,
2851 Py_ssize_t initrevslen,
2789 long *initrevs, long stoprev,
2852 long *initrevs, long stoprev,
2790 int inclusive);
2853 int inclusive);
2791 void rustlazyancestors_drop(rustlazyancestorsObject *self);
2854 void rustlazyancestors_drop(rustlazyancestorsObject *self);
2792 int rustlazyancestors_next(rustlazyancestorsObject *self);
2855 int rustlazyancestors_next(rustlazyancestorsObject *self);
2793 int rustlazyancestors_contains(rustlazyancestorsObject *self, long rev);
2856 int rustlazyancestors_contains(rustlazyancestorsObject *self, long rev);
2794
2857
2795 /* CPython instance methods */
2858 /* CPython instance methods */
2796 static int rustla_init(rustlazyancestorsObject *self, PyObject *args)
2859 static int rustla_init(rustlazyancestorsObject *self, PyObject *args)
2797 {
2860 {
2798 PyObject *initrevsarg = NULL;
2861 PyObject *initrevsarg = NULL;
2799 PyObject *inclusivearg = NULL;
2862 PyObject *inclusivearg = NULL;
2800 long stoprev = 0;
2863 long stoprev = 0;
2801 long *initrevs = NULL;
2864 long *initrevs = NULL;
2802 int inclusive = 0;
2865 int inclusive = 0;
2803 Py_ssize_t i;
2866 Py_ssize_t i;
2804
2867
2805 indexObject *index;
2868 indexObject *index;
2806 if (!PyArg_ParseTuple(args, "O!O!lO!", &HgRevlogIndex_Type, &index,
2869 if (!PyArg_ParseTuple(args, "O!O!lO!", &HgRevlogIndex_Type, &index,
2807 &PyList_Type, &initrevsarg, &stoprev,
2870 &PyList_Type, &initrevsarg, &stoprev,
2808 &PyBool_Type, &inclusivearg))
2871 &PyBool_Type, &inclusivearg))
2809 return -1;
2872 return -1;
2810
2873
2811 Py_INCREF(index);
2874 Py_INCREF(index);
2812 self->index = index;
2875 self->index = index;
2813
2876
2814 if (inclusivearg == Py_True)
2877 if (inclusivearg == Py_True)
2815 inclusive = 1;
2878 inclusive = 1;
2816
2879
2817 Py_ssize_t linit = PyList_GET_SIZE(initrevsarg);
2880 Py_ssize_t linit = PyList_GET_SIZE(initrevsarg);
2818
2881
2819 initrevs = (long *)calloc(linit, sizeof(long));
2882 initrevs = (long *)calloc(linit, sizeof(long));
2820
2883
2821 if (initrevs == NULL) {
2884 if (initrevs == NULL) {
2822 PyErr_NoMemory();
2885 PyErr_NoMemory();
2823 goto bail;
2886 goto bail;
2824 }
2887 }
2825
2888
2826 for (i = 0; i < linit; i++) {
2889 for (i = 0; i < linit; i++) {
2827 initrevs[i] = PyInt_AsLong(PyList_GET_ITEM(initrevsarg, i));
2890 initrevs[i] = PyInt_AsLong(PyList_GET_ITEM(initrevsarg, i));
2828 }
2891 }
2829 if (PyErr_Occurred())
2892 if (PyErr_Occurred())
2830 goto bail;
2893 goto bail;
2831
2894
2832 self->iter =
2895 self->iter =
2833 rustlazyancestors_init(index, linit, initrevs, stoprev, inclusive);
2896 rustlazyancestors_init(index, linit, initrevs, stoprev, inclusive);
2834 if (self->iter == NULL) {
2897 if (self->iter == NULL) {
2835 /* if this is because of GraphError::ParentOutOfRange
2898 /* if this is because of GraphError::ParentOutOfRange
2836 * HgRevlogIndex_GetParents() has already set the proper
2899 * HgRevlogIndex_GetParents() has already set the proper
2837 * exception */
2900 * exception */
2838 goto bail;
2901 goto bail;
2839 }
2902 }
2840
2903
2841 free(initrevs);
2904 free(initrevs);
2842 return 0;
2905 return 0;
2843
2906
2844 bail:
2907 bail:
2845 free(initrevs);
2908 free(initrevs);
2846 return -1;
2909 return -1;
2847 };
2910 };
2848
2911
2849 static void rustla_dealloc(rustlazyancestorsObject *self)
2912 static void rustla_dealloc(rustlazyancestorsObject *self)
2850 {
2913 {
2851 Py_XDECREF(self->index);
2914 Py_XDECREF(self->index);
2852 if (self->iter != NULL) { /* can happen if rustla_init failed */
2915 if (self->iter != NULL) { /* can happen if rustla_init failed */
2853 rustlazyancestors_drop(self->iter);
2916 rustlazyancestors_drop(self->iter);
2854 }
2917 }
2855 PyObject_Del(self);
2918 PyObject_Del(self);
2856 }
2919 }
2857
2920
2858 static PyObject *rustla_next(rustlazyancestorsObject *self)
2921 static PyObject *rustla_next(rustlazyancestorsObject *self)
2859 {
2922 {
2860 int res = rustlazyancestors_next(self->iter);
2923 int res = rustlazyancestors_next(self->iter);
2861 if (res == -1) {
2924 if (res == -1) {
2862 /* Setting an explicit exception seems unnecessary
2925 /* Setting an explicit exception seems unnecessary
2863 * as examples from Python source code (Objects/rangeobjets.c
2926 * as examples from Python source code (Objects/rangeobjets.c
2864 * and Modules/_io/stringio.c) seem to demonstrate.
2927 * and Modules/_io/stringio.c) seem to demonstrate.
2865 */
2928 */
2866 return NULL;
2929 return NULL;
2867 }
2930 }
2868 return PyInt_FromLong(res);
2931 return PyInt_FromLong(res);
2869 }
2932 }
2870
2933
2871 static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev)
2934 static int rustla_contains(rustlazyancestorsObject *self, PyObject *rev)
2872 {
2935 {
2873 long lrev;
2936 long lrev;
2874 if (!pylong_to_long(rev, &lrev)) {
2937 if (!pylong_to_long(rev, &lrev)) {
2875 PyErr_Clear();
2938 PyErr_Clear();
2876 return 0;
2939 return 0;
2877 }
2940 }
2878 return rustlazyancestors_contains(self->iter, lrev);
2941 return rustlazyancestors_contains(self->iter, lrev);
2879 }
2942 }
2880
2943
2881 static PySequenceMethods rustla_sequence_methods = {
2944 static PySequenceMethods rustla_sequence_methods = {
2882 0, /* sq_length */
2945 0, /* sq_length */
2883 0, /* sq_concat */
2946 0, /* sq_concat */
2884 0, /* sq_repeat */
2947 0, /* sq_repeat */
2885 0, /* sq_item */
2948 0, /* sq_item */
2886 0, /* sq_slice */
2949 0, /* sq_slice */
2887 0, /* sq_ass_item */
2950 0, /* sq_ass_item */
2888 0, /* sq_ass_slice */
2951 0, /* sq_ass_slice */
2889 (objobjproc)rustla_contains, /* sq_contains */
2952 (objobjproc)rustla_contains, /* sq_contains */
2890 };
2953 };
2891
2954
2892 static PyTypeObject rustlazyancestorsType = {
2955 static PyTypeObject rustlazyancestorsType = {
2893 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2956 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2894 "parsers.rustlazyancestors", /* tp_name */
2957 "parsers.rustlazyancestors", /* tp_name */
2895 sizeof(rustlazyancestorsObject), /* tp_basicsize */
2958 sizeof(rustlazyancestorsObject), /* tp_basicsize */
2896 0, /* tp_itemsize */
2959 0, /* tp_itemsize */
2897 (destructor)rustla_dealloc, /* tp_dealloc */
2960 (destructor)rustla_dealloc, /* tp_dealloc */
2898 0, /* tp_print */
2961 0, /* tp_print */
2899 0, /* tp_getattr */
2962 0, /* tp_getattr */
2900 0, /* tp_setattr */
2963 0, /* tp_setattr */
2901 0, /* tp_compare */
2964 0, /* tp_compare */
2902 0, /* tp_repr */
2965 0, /* tp_repr */
2903 0, /* tp_as_number */
2966 0, /* tp_as_number */
2904 &rustla_sequence_methods, /* tp_as_sequence */
2967 &rustla_sequence_methods, /* tp_as_sequence */
2905 0, /* tp_as_mapping */
2968 0, /* tp_as_mapping */
2906 0, /* tp_hash */
2969 0, /* tp_hash */
2907 0, /* tp_call */
2970 0, /* tp_call */
2908 0, /* tp_str */
2971 0, /* tp_str */
2909 0, /* tp_getattro */
2972 0, /* tp_getattro */
2910 0, /* tp_setattro */
2973 0, /* tp_setattro */
2911 0, /* tp_as_buffer */
2974 0, /* tp_as_buffer */
2912 Py_TPFLAGS_DEFAULT, /* tp_flags */
2975 Py_TPFLAGS_DEFAULT, /* tp_flags */
2913 "Iterator over ancestors, implemented in Rust", /* tp_doc */
2976 "Iterator over ancestors, implemented in Rust", /* tp_doc */
2914 0, /* tp_traverse */
2977 0, /* tp_traverse */
2915 0, /* tp_clear */
2978 0, /* tp_clear */
2916 0, /* tp_richcompare */
2979 0, /* tp_richcompare */
2917 0, /* tp_weaklistoffset */
2980 0, /* tp_weaklistoffset */
2918 0, /* tp_iter */
2981 0, /* tp_iter */
2919 (iternextfunc)rustla_next, /* tp_iternext */
2982 (iternextfunc)rustla_next, /* tp_iternext */
2920 0, /* tp_methods */
2983 0, /* tp_methods */
2921 0, /* tp_members */
2984 0, /* tp_members */
2922 0, /* tp_getset */
2985 0, /* tp_getset */
2923 0, /* tp_base */
2986 0, /* tp_base */
2924 0, /* tp_dict */
2987 0, /* tp_dict */
2925 0, /* tp_descr_get */
2988 0, /* tp_descr_get */
2926 0, /* tp_descr_set */
2989 0, /* tp_descr_set */
2927 0, /* tp_dictoffset */
2990 0, /* tp_dictoffset */
2928 (initproc)rustla_init, /* tp_init */
2991 (initproc)rustla_init, /* tp_init */
2929 0, /* tp_alloc */
2992 0, /* tp_alloc */
2930 };
2993 };
2931 #endif /* WITH_RUST */
2994 #endif /* WITH_RUST */
2932
2995
2933 void revlog_module_init(PyObject *mod)
2996 void revlog_module_init(PyObject *mod)
2934 {
2997 {
2935 PyObject *caps = NULL;
2998 PyObject *caps = NULL;
2936 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
2999 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
2937 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3000 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
2938 return;
3001 return;
2939 Py_INCREF(&HgRevlogIndex_Type);
3002 Py_INCREF(&HgRevlogIndex_Type);
2940 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3003 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
2941
3004
2942 nodetreeType.tp_new = PyType_GenericNew;
3005 nodetreeType.tp_new = PyType_GenericNew;
2943 if (PyType_Ready(&nodetreeType) < 0)
3006 if (PyType_Ready(&nodetreeType) < 0)
2944 return;
3007 return;
2945 Py_INCREF(&nodetreeType);
3008 Py_INCREF(&nodetreeType);
2946 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3009 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
2947
3010
2948 if (!nullentry) {
3011 if (!nullentry) {
2949 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0,
3012 nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0,
2950 0, -1, -1, -1, -1, nullid, 20);
3013 0, -1, -1, -1, -1, nullid, 20);
2951 }
3014 }
2952 if (nullentry)
3015 if (nullentry)
2953 PyObject_GC_UnTrack(nullentry);
3016 PyObject_GC_UnTrack(nullentry);
2954
3017
2955 caps = PyCapsule_New(HgRevlogIndex_GetParents,
3018 caps = PyCapsule_New(HgRevlogIndex_GetParents,
2956 "mercurial.cext.parsers.index_get_parents_CAPI",
3019 "mercurial.cext.parsers.index_get_parents_CAPI",
2957 NULL);
3020 NULL);
2958 if (caps != NULL)
3021 if (caps != NULL)
2959 PyModule_AddObject(mod, "index_get_parents_CAPI", caps);
3022 PyModule_AddObject(mod, "index_get_parents_CAPI", caps);
2960
3023
2961 #ifdef WITH_RUST
3024 #ifdef WITH_RUST
2962 rustlazyancestorsType.tp_new = PyType_GenericNew;
3025 rustlazyancestorsType.tp_new = PyType_GenericNew;
2963 if (PyType_Ready(&rustlazyancestorsType) < 0)
3026 if (PyType_Ready(&rustlazyancestorsType) < 0)
2964 return;
3027 return;
2965 Py_INCREF(&rustlazyancestorsType);
3028 Py_INCREF(&rustlazyancestorsType);
2966 PyModule_AddObject(mod, "rustlazyancestors",
3029 PyModule_AddObject(mod, "rustlazyancestors",
2967 (PyObject *)&rustlazyancestorsType);
3030 (PyObject *)&rustlazyancestorsType);
2968 #endif
3031 #endif
2969 }
3032 }
@@ -1,1012 +1,1016
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import (
16 from ..node import (
17 nullrev,
17 nullrev,
18 )
18 )
19 from ..i18n import _
19 from ..i18n import _
20
20
21 from .constants import (
21 from .constants import (
22 REVIDX_ISCENSORED,
22 REVIDX_ISCENSORED,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 )
24 )
25
25
26 from ..thirdparty import (
26 from ..thirdparty import (
27 attr,
27 attr,
28 )
28 )
29
29
30 from .. import (
30 from .. import (
31 error,
31 error,
32 mdiff,
32 mdiff,
33 util,
33 )
34 )
34
35
35 # maximum <delta-chain-data>/<revision-text-length> ratio
36 # maximum <delta-chain-data>/<revision-text-length> ratio
36 LIMIT_DELTA2TEXT = 2
37 LIMIT_DELTA2TEXT = 2
37
38
38 class _testrevlog(object):
39 class _testrevlog(object):
39 """minimalist fake revlog to use in doctests"""
40 """minimalist fake revlog to use in doctests"""
40
41
41 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
42 """data is an list of revision payload boundaries"""
43 """data is an list of revision payload boundaries"""
43 self._data = data
44 self._data = data
44 self._srdensitythreshold = density
45 self._srdensitythreshold = density
45 self._srmingapsize = mingap
46 self._srmingapsize = mingap
46 self._snapshot = set(snapshot)
47 self._snapshot = set(snapshot)
47 self.index = None
48 self.index = None
48
49
49 def start(self, rev):
50 def start(self, rev):
50 if rev == nullrev:
51 if rev == nullrev:
51 return 0
52 return 0
52 if rev == 0:
53 if rev == 0:
53 return 0
54 return 0
54 return self._data[rev - 1]
55 return self._data[rev - 1]
55
56
56 def end(self, rev):
57 def end(self, rev):
57 if rev == nullrev:
58 if rev == nullrev:
58 return 0
59 return 0
59 return self._data[rev]
60 return self._data[rev]
60
61
61 def length(self, rev):
62 def length(self, rev):
62 return self.end(rev) - self.start(rev)
63 return self.end(rev) - self.start(rev)
63
64
64 def __len__(self):
65 def __len__(self):
65 return len(self._data)
66 return len(self._data)
66
67
67 def issnapshot(self, rev):
68 def issnapshot(self, rev):
68 if rev == nullrev:
69 if rev == nullrev:
69 return True
70 return True
70 return rev in self._snapshot
71 return rev in self._snapshot
71
72
72 def slicechunk(revlog, revs, targetsize=None):
73 def slicechunk(revlog, revs, targetsize=None):
73 """slice revs to reduce the amount of unrelated data to be read from disk.
74 """slice revs to reduce the amount of unrelated data to be read from disk.
74
75
75 ``revs`` is sliced into groups that should be read in one time.
76 ``revs`` is sliced into groups that should be read in one time.
76 Assume that revs are sorted.
77 Assume that revs are sorted.
77
78
78 The initial chunk is sliced until the overall density (payload/chunks-span
79 The initial chunk is sliced until the overall density (payload/chunks-span
79 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
80 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
80 `revlog._srmingapsize` is skipped.
81 `revlog._srmingapsize` is skipped.
81
82
82 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
83 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
83 For consistency with other slicing choice, this limit won't go lower than
84 For consistency with other slicing choice, this limit won't go lower than
84 `revlog._srmingapsize`.
85 `revlog._srmingapsize`.
85
86
86 If individual revisions chunk are larger than this limit, they will still
87 If individual revisions chunk are larger than this limit, they will still
87 be raised individually.
88 be raised individually.
88
89
89 >>> data = [
90 >>> data = [
90 ... 5, #00 (5)
91 ... 5, #00 (5)
91 ... 10, #01 (5)
92 ... 10, #01 (5)
92 ... 12, #02 (2)
93 ... 12, #02 (2)
93 ... 12, #03 (empty)
94 ... 12, #03 (empty)
94 ... 27, #04 (15)
95 ... 27, #04 (15)
95 ... 31, #05 (4)
96 ... 31, #05 (4)
96 ... 31, #06 (empty)
97 ... 31, #06 (empty)
97 ... 42, #07 (11)
98 ... 42, #07 (11)
98 ... 47, #08 (5)
99 ... 47, #08 (5)
99 ... 47, #09 (empty)
100 ... 47, #09 (empty)
100 ... 48, #10 (1)
101 ... 48, #10 (1)
101 ... 51, #11 (3)
102 ... 51, #11 (3)
102 ... 74, #12 (23)
103 ... 74, #12 (23)
103 ... 85, #13 (11)
104 ... 85, #13 (11)
104 ... 86, #14 (1)
105 ... 86, #14 (1)
105 ... 91, #15 (5)
106 ... 91, #15 (5)
106 ... ]
107 ... ]
107 >>> revlog = _testrevlog(data, snapshot=range(16))
108 >>> revlog = _testrevlog(data, snapshot=range(16))
108
109
109 >>> list(slicechunk(revlog, list(range(16))))
110 >>> list(slicechunk(revlog, list(range(16))))
110 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
111 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
111 >>> list(slicechunk(revlog, [0, 15]))
112 >>> list(slicechunk(revlog, [0, 15]))
112 [[0], [15]]
113 [[0], [15]]
113 >>> list(slicechunk(revlog, [0, 11, 15]))
114 >>> list(slicechunk(revlog, [0, 11, 15]))
114 [[0], [11], [15]]
115 [[0], [11], [15]]
115 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
116 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
116 [[0], [11, 13, 15]]
117 [[0], [11, 13, 15]]
117 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
118 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
118 [[1, 2], [5, 8, 10, 11], [14]]
119 [[1, 2], [5, 8, 10, 11], [14]]
119
120
120 Slicing with a maximum chunk size
121 Slicing with a maximum chunk size
121 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
122 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
122 [[0], [11], [13], [15]]
123 [[0], [11], [13], [15]]
123 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
124 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
124 [[0], [11], [13, 15]]
125 [[0], [11], [13, 15]]
125
126
126 Slicing involving nullrev
127 Slicing involving nullrev
127 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
128 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
128 [[-1, 0], [11], [13, 15]]
129 [[-1, 0], [11], [13, 15]]
129 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
130 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
130 [[-1], [13], [15]]
131 [[-1], [13], [15]]
131 """
132 """
132 if targetsize is not None:
133 if targetsize is not None:
133 targetsize = max(targetsize, revlog._srmingapsize)
134 targetsize = max(targetsize, revlog._srmingapsize)
134 # targetsize should not be specified when evaluating delta candidates:
135 # targetsize should not be specified when evaluating delta candidates:
135 # * targetsize is used to ensure we stay within specification when reading,
136 # * targetsize is used to ensure we stay within specification when reading,
136 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
137 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
137 if densityslicing is None:
138 if densityslicing is None:
138 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
139 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
139 for chunk in densityslicing(revs,
140 for chunk in densityslicing(revs,
140 revlog._srdensitythreshold,
141 revlog._srdensitythreshold,
141 revlog._srmingapsize):
142 revlog._srmingapsize):
142 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
143 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
143 yield subchunk
144 yield subchunk
144
145
145 def _slicechunktosize(revlog, revs, targetsize=None):
146 def _slicechunktosize(revlog, revs, targetsize=None):
146 """slice revs to match the target size
147 """slice revs to match the target size
147
148
148 This is intended to be used on chunk that density slicing selected by that
149 This is intended to be used on chunk that density slicing selected by that
149 are still too large compared to the read garantee of revlog. This might
150 are still too large compared to the read garantee of revlog. This might
150 happens when "minimal gap size" interrupted the slicing or when chain are
151 happens when "minimal gap size" interrupted the slicing or when chain are
151 built in a way that create large blocks next to each other.
152 built in a way that create large blocks next to each other.
152
153
153 >>> data = [
154 >>> data = [
154 ... 3, #0 (3)
155 ... 3, #0 (3)
155 ... 5, #1 (2)
156 ... 5, #1 (2)
156 ... 6, #2 (1)
157 ... 6, #2 (1)
157 ... 8, #3 (2)
158 ... 8, #3 (2)
158 ... 8, #4 (empty)
159 ... 8, #4 (empty)
159 ... 11, #5 (3)
160 ... 11, #5 (3)
160 ... 12, #6 (1)
161 ... 12, #6 (1)
161 ... 13, #7 (1)
162 ... 13, #7 (1)
162 ... 14, #8 (1)
163 ... 14, #8 (1)
163 ... ]
164 ... ]
164
165
165 == All snapshots cases ==
166 == All snapshots cases ==
166 >>> revlog = _testrevlog(data, snapshot=range(9))
167 >>> revlog = _testrevlog(data, snapshot=range(9))
167
168
168 Cases where chunk is already small enough
169 Cases where chunk is already small enough
169 >>> list(_slicechunktosize(revlog, [0], 3))
170 >>> list(_slicechunktosize(revlog, [0], 3))
170 [[0]]
171 [[0]]
171 >>> list(_slicechunktosize(revlog, [6, 7], 3))
172 >>> list(_slicechunktosize(revlog, [6, 7], 3))
172 [[6, 7]]
173 [[6, 7]]
173 >>> list(_slicechunktosize(revlog, [0], None))
174 >>> list(_slicechunktosize(revlog, [0], None))
174 [[0]]
175 [[0]]
175 >>> list(_slicechunktosize(revlog, [6, 7], None))
176 >>> list(_slicechunktosize(revlog, [6, 7], None))
176 [[6, 7]]
177 [[6, 7]]
177
178
178 cases where we need actual slicing
179 cases where we need actual slicing
179 >>> list(_slicechunktosize(revlog, [0, 1], 3))
180 >>> list(_slicechunktosize(revlog, [0, 1], 3))
180 [[0], [1]]
181 [[0], [1]]
181 >>> list(_slicechunktosize(revlog, [1, 3], 3))
182 >>> list(_slicechunktosize(revlog, [1, 3], 3))
182 [[1], [3]]
183 [[1], [3]]
183 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
184 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
184 [[1, 2], [3]]
185 [[1, 2], [3]]
185 >>> list(_slicechunktosize(revlog, [3, 5], 3))
186 >>> list(_slicechunktosize(revlog, [3, 5], 3))
186 [[3], [5]]
187 [[3], [5]]
187 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
188 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
188 [[3], [5]]
189 [[3], [5]]
189 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
190 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
190 [[5], [6, 7, 8]]
191 [[5], [6, 7, 8]]
191 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
192 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
192 [[0], [1, 2], [3], [5], [6, 7, 8]]
193 [[0], [1, 2], [3], [5], [6, 7, 8]]
193
194
194 Case with too large individual chunk (must return valid chunk)
195 Case with too large individual chunk (must return valid chunk)
195 >>> list(_slicechunktosize(revlog, [0, 1], 2))
196 >>> list(_slicechunktosize(revlog, [0, 1], 2))
196 [[0], [1]]
197 [[0], [1]]
197 >>> list(_slicechunktosize(revlog, [1, 3], 1))
198 >>> list(_slicechunktosize(revlog, [1, 3], 1))
198 [[1], [3]]
199 [[1], [3]]
199 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
200 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
200 [[3], [5]]
201 [[3], [5]]
201
202
202 == No Snapshot cases ==
203 == No Snapshot cases ==
203 >>> revlog = _testrevlog(data)
204 >>> revlog = _testrevlog(data)
204
205
205 Cases where chunk is already small enough
206 Cases where chunk is already small enough
206 >>> list(_slicechunktosize(revlog, [0], 3))
207 >>> list(_slicechunktosize(revlog, [0], 3))
207 [[0]]
208 [[0]]
208 >>> list(_slicechunktosize(revlog, [6, 7], 3))
209 >>> list(_slicechunktosize(revlog, [6, 7], 3))
209 [[6, 7]]
210 [[6, 7]]
210 >>> list(_slicechunktosize(revlog, [0], None))
211 >>> list(_slicechunktosize(revlog, [0], None))
211 [[0]]
212 [[0]]
212 >>> list(_slicechunktosize(revlog, [6, 7], None))
213 >>> list(_slicechunktosize(revlog, [6, 7], None))
213 [[6, 7]]
214 [[6, 7]]
214
215
215 cases where we need actual slicing
216 cases where we need actual slicing
216 >>> list(_slicechunktosize(revlog, [0, 1], 3))
217 >>> list(_slicechunktosize(revlog, [0, 1], 3))
217 [[0], [1]]
218 [[0], [1]]
218 >>> list(_slicechunktosize(revlog, [1, 3], 3))
219 >>> list(_slicechunktosize(revlog, [1, 3], 3))
219 [[1], [3]]
220 [[1], [3]]
220 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
221 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
221 [[1], [2, 3]]
222 [[1], [2, 3]]
222 >>> list(_slicechunktosize(revlog, [3, 5], 3))
223 >>> list(_slicechunktosize(revlog, [3, 5], 3))
223 [[3], [5]]
224 [[3], [5]]
224 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
225 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
225 [[3], [4, 5]]
226 [[3], [4, 5]]
226 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
227 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
227 [[5], [6, 7, 8]]
228 [[5], [6, 7, 8]]
228 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
229 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
229 [[0], [1, 2], [3], [5], [6, 7, 8]]
230 [[0], [1, 2], [3], [5], [6, 7, 8]]
230
231
231 Case with too large individual chunk (must return valid chunk)
232 Case with too large individual chunk (must return valid chunk)
232 >>> list(_slicechunktosize(revlog, [0, 1], 2))
233 >>> list(_slicechunktosize(revlog, [0, 1], 2))
233 [[0], [1]]
234 [[0], [1]]
234 >>> list(_slicechunktosize(revlog, [1, 3], 1))
235 >>> list(_slicechunktosize(revlog, [1, 3], 1))
235 [[1], [3]]
236 [[1], [3]]
236 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
237 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
237 [[3], [5]]
238 [[3], [5]]
238
239
239 == mixed case ==
240 == mixed case ==
240 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
241 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
241 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
242 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
242 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
243 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
243 """
244 """
244 assert targetsize is None or 0 <= targetsize
245 assert targetsize is None or 0 <= targetsize
245 startdata = revlog.start(revs[0])
246 startdata = revlog.start(revs[0])
246 enddata = revlog.end(revs[-1])
247 enddata = revlog.end(revs[-1])
247 fullspan = enddata - startdata
248 fullspan = enddata - startdata
248 if targetsize is None or fullspan <= targetsize:
249 if targetsize is None or fullspan <= targetsize:
249 yield revs
250 yield revs
250 return
251 return
251
252
252 startrevidx = 0
253 startrevidx = 0
253 endrevidx = 1
254 endrevidx = 1
254 iterrevs = enumerate(revs)
255 iterrevs = enumerate(revs)
255 next(iterrevs) # skip first rev.
256 next(iterrevs) # skip first rev.
256 # first step: get snapshots out of the way
257 # first step: get snapshots out of the way
257 for idx, r in iterrevs:
258 for idx, r in iterrevs:
258 span = revlog.end(r) - startdata
259 span = revlog.end(r) - startdata
259 snapshot = revlog.issnapshot(r)
260 snapshot = revlog.issnapshot(r)
260 if span <= targetsize and snapshot:
261 if span <= targetsize and snapshot:
261 endrevidx = idx + 1
262 endrevidx = idx + 1
262 else:
263 else:
263 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
264 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
264 if chunk:
265 if chunk:
265 yield chunk
266 yield chunk
266 startrevidx = idx
267 startrevidx = idx
267 startdata = revlog.start(r)
268 startdata = revlog.start(r)
268 endrevidx = idx + 1
269 endrevidx = idx + 1
269 if not snapshot:
270 if not snapshot:
270 break
271 break
271
272
272 # for the others, we use binary slicing to quickly converge toward valid
273 # for the others, we use binary slicing to quickly converge toward valid
273 # chunks (otherwise, we might end up looking for start/end of many
274 # chunks (otherwise, we might end up looking for start/end of many
274 # revisions). This logic is not looking for the perfect slicing point, it
275 # revisions). This logic is not looking for the perfect slicing point, it
275 # focuses on quickly converging toward valid chunks.
276 # focuses on quickly converging toward valid chunks.
276 nbitem = len(revs)
277 nbitem = len(revs)
277 while (enddata - startdata) > targetsize:
278 while (enddata - startdata) > targetsize:
278 endrevidx = nbitem
279 endrevidx = nbitem
279 if nbitem - startrevidx <= 1:
280 if nbitem - startrevidx <= 1:
280 break # protect against individual chunk larger than limit
281 break # protect against individual chunk larger than limit
281 localenddata = revlog.end(revs[endrevidx - 1])
282 localenddata = revlog.end(revs[endrevidx - 1])
282 span = localenddata - startdata
283 span = localenddata - startdata
283 while span > targetsize:
284 while span > targetsize:
284 if endrevidx - startrevidx <= 1:
285 if endrevidx - startrevidx <= 1:
285 break # protect against individual chunk larger than limit
286 break # protect against individual chunk larger than limit
286 endrevidx -= (endrevidx - startrevidx) // 2
287 endrevidx -= (endrevidx - startrevidx) // 2
287 localenddata = revlog.end(revs[endrevidx - 1])
288 localenddata = revlog.end(revs[endrevidx - 1])
288 span = localenddata - startdata
289 span = localenddata - startdata
289 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
290 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
290 if chunk:
291 if chunk:
291 yield chunk
292 yield chunk
292 startrevidx = endrevidx
293 startrevidx = endrevidx
293 startdata = revlog.start(revs[startrevidx])
294 startdata = revlog.start(revs[startrevidx])
294
295
295 chunk = _trimchunk(revlog, revs, startrevidx)
296 chunk = _trimchunk(revlog, revs, startrevidx)
296 if chunk:
297 if chunk:
297 yield chunk
298 yield chunk
298
299
299 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
300 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
300 mingapsize=0):
301 mingapsize=0):
301 """slice revs to reduce the amount of unrelated data to be read from disk.
302 """slice revs to reduce the amount of unrelated data to be read from disk.
302
303
303 ``revs`` is sliced into groups that should be read in one time.
304 ``revs`` is sliced into groups that should be read in one time.
304 Assume that revs are sorted.
305 Assume that revs are sorted.
305
306
306 The initial chunk is sliced until the overall density (payload/chunks-span
307 The initial chunk is sliced until the overall density (payload/chunks-span
307 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
308 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
308 skipped.
309 skipped.
309
310
310 >>> revlog = _testrevlog([
311 >>> revlog = _testrevlog([
311 ... 5, #00 (5)
312 ... 5, #00 (5)
312 ... 10, #01 (5)
313 ... 10, #01 (5)
313 ... 12, #02 (2)
314 ... 12, #02 (2)
314 ... 12, #03 (empty)
315 ... 12, #03 (empty)
315 ... 27, #04 (15)
316 ... 27, #04 (15)
316 ... 31, #05 (4)
317 ... 31, #05 (4)
317 ... 31, #06 (empty)
318 ... 31, #06 (empty)
318 ... 42, #07 (11)
319 ... 42, #07 (11)
319 ... 47, #08 (5)
320 ... 47, #08 (5)
320 ... 47, #09 (empty)
321 ... 47, #09 (empty)
321 ... 48, #10 (1)
322 ... 48, #10 (1)
322 ... 51, #11 (3)
323 ... 51, #11 (3)
323 ... 74, #12 (23)
324 ... 74, #12 (23)
324 ... 85, #13 (11)
325 ... 85, #13 (11)
325 ... 86, #14 (1)
326 ... 86, #14 (1)
326 ... 91, #15 (5)
327 ... 91, #15 (5)
327 ... ])
328 ... ])
328
329
329 >>> list(_slicechunktodensity(revlog, list(range(16))))
330 >>> list(_slicechunktodensity(revlog, list(range(16))))
330 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
331 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
331 >>> list(_slicechunktodensity(revlog, [0, 15]))
332 >>> list(_slicechunktodensity(revlog, [0, 15]))
332 [[0], [15]]
333 [[0], [15]]
333 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
334 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
334 [[0], [11], [15]]
335 [[0], [11], [15]]
335 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
336 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
336 [[0], [11, 13, 15]]
337 [[0], [11, 13, 15]]
337 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
338 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
338 [[1, 2], [5, 8, 10, 11], [14]]
339 [[1, 2], [5, 8, 10, 11], [14]]
339 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
340 ... mingapsize=20))
341 ... mingapsize=20))
341 [[1, 2, 3, 5, 8, 10, 11], [14]]
342 [[1, 2, 3, 5, 8, 10, 11], [14]]
342 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
343 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
343 ... targetdensity=0.95))
344 ... targetdensity=0.95))
344 [[1, 2], [5], [8, 10, 11], [14]]
345 [[1, 2], [5], [8, 10, 11], [14]]
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
346 ... targetdensity=0.95, mingapsize=12))
347 ... targetdensity=0.95, mingapsize=12))
347 [[1, 2], [5, 8, 10, 11], [14]]
348 [[1, 2], [5, 8, 10, 11], [14]]
348 """
349 """
349 start = revlog.start
350 start = revlog.start
350 length = revlog.length
351 length = revlog.length
351
352
352 if len(revs) <= 1:
353 if len(revs) <= 1:
353 yield revs
354 yield revs
354 return
355 return
355
356
356 deltachainspan = segmentspan(revlog, revs)
357 deltachainspan = segmentspan(revlog, revs)
357
358
358 if deltachainspan < mingapsize:
359 if deltachainspan < mingapsize:
359 yield revs
360 yield revs
360 return
361 return
361
362
362 readdata = deltachainspan
363 readdata = deltachainspan
363 chainpayload = sum(length(r) for r in revs)
364 chainpayload = sum(length(r) for r in revs)
364
365
365 if deltachainspan:
366 if deltachainspan:
366 density = chainpayload / float(deltachainspan)
367 density = chainpayload / float(deltachainspan)
367 else:
368 else:
368 density = 1.0
369 density = 1.0
369
370
370 if density >= targetdensity:
371 if density >= targetdensity:
371 yield revs
372 yield revs
372 return
373 return
373
374
374 # Store the gaps in a heap to have them sorted by decreasing size
375 # Store the gaps in a heap to have them sorted by decreasing size
375 gaps = []
376 gaps = []
376 prevend = None
377 prevend = None
377 for i, rev in enumerate(revs):
378 for i, rev in enumerate(revs):
378 revstart = start(rev)
379 revstart = start(rev)
379 revlen = length(rev)
380 revlen = length(rev)
380
381
381 # Skip empty revisions to form larger holes
382 # Skip empty revisions to form larger holes
382 if revlen == 0:
383 if revlen == 0:
383 continue
384 continue
384
385
385 if prevend is not None:
386 if prevend is not None:
386 gapsize = revstart - prevend
387 gapsize = revstart - prevend
387 # only consider holes that are large enough
388 # only consider holes that are large enough
388 if gapsize > mingapsize:
389 if gapsize > mingapsize:
389 gaps.append((gapsize, i))
390 gaps.append((gapsize, i))
390
391
391 prevend = revstart + revlen
392 prevend = revstart + revlen
392 # sort the gaps to pop them from largest to small
393 # sort the gaps to pop them from largest to small
393 gaps.sort()
394 gaps.sort()
394
395
395 # Collect the indices of the largest holes until the density is acceptable
396 # Collect the indices of the largest holes until the density is acceptable
396 selected = []
397 selected = []
397 while gaps and density < targetdensity:
398 while gaps and density < targetdensity:
398 gapsize, gapidx = gaps.pop()
399 gapsize, gapidx = gaps.pop()
399
400
400 selected.append(gapidx)
401 selected.append(gapidx)
401
402
402 # the gap sizes are stored as negatives to be sorted decreasingly
403 # the gap sizes are stored as negatives to be sorted decreasingly
403 # by the heap
404 # by the heap
404 readdata -= gapsize
405 readdata -= gapsize
405 if readdata > 0:
406 if readdata > 0:
406 density = chainpayload / float(readdata)
407 density = chainpayload / float(readdata)
407 else:
408 else:
408 density = 1.0
409 density = 1.0
409 selected.sort()
410 selected.sort()
410
411
411 # Cut the revs at collected indices
412 # Cut the revs at collected indices
412 previdx = 0
413 previdx = 0
413 for idx in selected:
414 for idx in selected:
414
415
415 chunk = _trimchunk(revlog, revs, previdx, idx)
416 chunk = _trimchunk(revlog, revs, previdx, idx)
416 if chunk:
417 if chunk:
417 yield chunk
418 yield chunk
418
419
419 previdx = idx
420 previdx = idx
420
421
421 chunk = _trimchunk(revlog, revs, previdx)
422 chunk = _trimchunk(revlog, revs, previdx)
422 if chunk:
423 if chunk:
423 yield chunk
424 yield chunk
424
425
425 def _trimchunk(revlog, revs, startidx, endidx=None):
426 def _trimchunk(revlog, revs, startidx, endidx=None):
426 """returns revs[startidx:endidx] without empty trailing revs
427 """returns revs[startidx:endidx] without empty trailing revs
427
428
428 Doctest Setup
429 Doctest Setup
429 >>> revlog = _testrevlog([
430 >>> revlog = _testrevlog([
430 ... 5, #0
431 ... 5, #0
431 ... 10, #1
432 ... 10, #1
432 ... 12, #2
433 ... 12, #2
433 ... 12, #3 (empty)
434 ... 12, #3 (empty)
434 ... 17, #4
435 ... 17, #4
435 ... 21, #5
436 ... 21, #5
436 ... 21, #6 (empty)
437 ... 21, #6 (empty)
437 ... ])
438 ... ])
438
439
439 Contiguous cases:
440 Contiguous cases:
440 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
441 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
441 [0, 1, 2, 3, 4, 5]
442 [0, 1, 2, 3, 4, 5]
442 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
443 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
443 [0, 1, 2, 3, 4]
444 [0, 1, 2, 3, 4]
444 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
445 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
445 [0, 1, 2]
446 [0, 1, 2]
446 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
447 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
447 [2]
448 [2]
448 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
449 [3, 4, 5]
450 [3, 4, 5]
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
451 [3, 4]
452 [3, 4]
452
453
453 Discontiguous cases:
454 Discontiguous cases:
454 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
455 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
455 [1, 3, 5]
456 [1, 3, 5]
456 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
457 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
457 [1]
458 [1]
458 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
459 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
459 [3, 5]
460 [3, 5]
460 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
461 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
461 [3, 5]
462 [3, 5]
462 """
463 """
463 length = revlog.length
464 length = revlog.length
464
465
465 if endidx is None:
466 if endidx is None:
466 endidx = len(revs)
467 endidx = len(revs)
467
468
468 # If we have a non-emtpy delta candidate, there are nothing to trim
469 # If we have a non-emtpy delta candidate, there are nothing to trim
469 if revs[endidx - 1] < len(revlog):
470 if revs[endidx - 1] < len(revlog):
470 # Trim empty revs at the end, except the very first revision of a chain
471 # Trim empty revs at the end, except the very first revision of a chain
471 while (endidx > 1
472 while (endidx > 1
472 and endidx > startidx
473 and endidx > startidx
473 and length(revs[endidx - 1]) == 0):
474 and length(revs[endidx - 1]) == 0):
474 endidx -= 1
475 endidx -= 1
475
476
476 return revs[startidx:endidx]
477 return revs[startidx:endidx]
477
478
478 def segmentspan(revlog, revs):
479 def segmentspan(revlog, revs):
479 """Get the byte span of a segment of revisions
480 """Get the byte span of a segment of revisions
480
481
481 revs is a sorted array of revision numbers
482 revs is a sorted array of revision numbers
482
483
483 >>> revlog = _testrevlog([
484 >>> revlog = _testrevlog([
484 ... 5, #0
485 ... 5, #0
485 ... 10, #1
486 ... 10, #1
486 ... 12, #2
487 ... 12, #2
487 ... 12, #3 (empty)
488 ... 12, #3 (empty)
488 ... 17, #4
489 ... 17, #4
489 ... ])
490 ... ])
490
491
491 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
492 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
492 17
493 17
493 >>> segmentspan(revlog, [0, 4])
494 >>> segmentspan(revlog, [0, 4])
494 17
495 17
495 >>> segmentspan(revlog, [3, 4])
496 >>> segmentspan(revlog, [3, 4])
496 5
497 5
497 >>> segmentspan(revlog, [1, 2, 3,])
498 >>> segmentspan(revlog, [1, 2, 3,])
498 7
499 7
499 >>> segmentspan(revlog, [1, 3])
500 >>> segmentspan(revlog, [1, 3])
500 7
501 7
501 """
502 """
502 if not revs:
503 if not revs:
503 return 0
504 return 0
504 end = revlog.end(revs[-1])
505 end = revlog.end(revs[-1])
505 return end - revlog.start(revs[0])
506 return end - revlog.start(revs[0])
506
507
507 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
508 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
508 """build full text from a (base, delta) pair and other metadata"""
509 """build full text from a (base, delta) pair and other metadata"""
509 # special case deltas which replace entire base; no need to decode
510 # special case deltas which replace entire base; no need to decode
510 # base revision. this neatly avoids censored bases, which throw when
511 # base revision. this neatly avoids censored bases, which throw when
511 # they're decoded.
512 # they're decoded.
512 hlen = struct.calcsize(">lll")
513 hlen = struct.calcsize(">lll")
513 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
514 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
514 len(delta) - hlen):
515 len(delta) - hlen):
515 fulltext = delta[hlen:]
516 fulltext = delta[hlen:]
516 else:
517 else:
517 # deltabase is rawtext before changed by flag processors, which is
518 # deltabase is rawtext before changed by flag processors, which is
518 # equivalent to non-raw text
519 # equivalent to non-raw text
519 basetext = revlog.revision(baserev, _df=fh, raw=False)
520 basetext = revlog.revision(baserev, _df=fh, raw=False)
520 fulltext = mdiff.patch(basetext, delta)
521 fulltext = mdiff.patch(basetext, delta)
521
522
522 try:
523 try:
523 res = revlog._processflags(fulltext, flags, 'read', raw=True)
524 res = revlog._processflags(fulltext, flags, 'read', raw=True)
524 fulltext, validatehash = res
525 fulltext, validatehash = res
525 if validatehash:
526 if validatehash:
526 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
527 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
527 if flags & REVIDX_ISCENSORED:
528 if flags & REVIDX_ISCENSORED:
528 raise error.StorageError(_('node %s is not censored') %
529 raise error.StorageError(_('node %s is not censored') %
529 expectednode)
530 expectednode)
530 except error.CensoredNodeError:
531 except error.CensoredNodeError:
531 # must pass the censored index flag to add censored revisions
532 # must pass the censored index flag to add censored revisions
532 if not flags & REVIDX_ISCENSORED:
533 if not flags & REVIDX_ISCENSORED:
533 raise
534 raise
534 return fulltext
535 return fulltext
535
536
536 @attr.s(slots=True, frozen=True)
537 @attr.s(slots=True, frozen=True)
537 class _deltainfo(object):
538 class _deltainfo(object):
538 distance = attr.ib()
539 distance = attr.ib()
539 deltalen = attr.ib()
540 deltalen = attr.ib()
540 data = attr.ib()
541 data = attr.ib()
541 base = attr.ib()
542 base = attr.ib()
542 chainbase = attr.ib()
543 chainbase = attr.ib()
543 chainlen = attr.ib()
544 chainlen = attr.ib()
544 compresseddeltalen = attr.ib()
545 compresseddeltalen = attr.ib()
545 snapshotdepth = attr.ib()
546 snapshotdepth = attr.ib()
546
547
547 def isgooddeltainfo(revlog, deltainfo, revinfo):
548 def isgooddeltainfo(revlog, deltainfo, revinfo):
548 """Returns True if the given delta is good. Good means that it is within
549 """Returns True if the given delta is good. Good means that it is within
549 the disk span, disk size, and chain length bounds that we know to be
550 the disk span, disk size, and chain length bounds that we know to be
550 performant."""
551 performant."""
551 if deltainfo is None:
552 if deltainfo is None:
552 return False
553 return False
553
554
554 # - 'deltainfo.distance' is the distance from the base revision --
555 # - 'deltainfo.distance' is the distance from the base revision --
555 # bounding it limits the amount of I/O we need to do.
556 # bounding it limits the amount of I/O we need to do.
556 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
557 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
557 # deltas we need to apply -- bounding it limits the amount of CPU
558 # deltas we need to apply -- bounding it limits the amount of CPU
558 # we consume.
559 # we consume.
559
560
560 textlen = revinfo.textlen
561 textlen = revinfo.textlen
561 defaultmax = textlen * 4
562 defaultmax = textlen * 4
562 maxdist = revlog._maxdeltachainspan
563 maxdist = revlog._maxdeltachainspan
563 if not maxdist:
564 if not maxdist:
564 maxdist = deltainfo.distance # ensure the conditional pass
565 maxdist = deltainfo.distance # ensure the conditional pass
565 maxdist = max(maxdist, defaultmax)
566 maxdist = max(maxdist, defaultmax)
566
567
567 # Bad delta from read span:
568 # Bad delta from read span:
568 #
569 #
569 # If the span of data read is larger than the maximum allowed.
570 # If the span of data read is larger than the maximum allowed.
570 #
571 #
571 # In the sparse-revlog case, we rely on the associated "sparse reading"
572 # In the sparse-revlog case, we rely on the associated "sparse reading"
572 # to avoid issue related to the span of data. In theory, it would be
573 # to avoid issue related to the span of data. In theory, it would be
573 # possible to build pathological revlog where delta pattern would lead
574 # possible to build pathological revlog where delta pattern would lead
574 # to too many reads. However, they do not happen in practice at all. So
575 # to too many reads. However, they do not happen in practice at all. So
575 # we skip the span check entirely.
576 # we skip the span check entirely.
576 if not revlog._sparserevlog and maxdist < deltainfo.distance:
577 if not revlog._sparserevlog and maxdist < deltainfo.distance:
577 return False
578 return False
578
579
579 # Bad delta from new delta size:
580 # Bad delta from new delta size:
580 #
581 #
581 # If the delta size is larger than the target text, storing the
582 # If the delta size is larger than the target text, storing the
582 # delta will be inefficient.
583 # delta will be inefficient.
583 if textlen < deltainfo.deltalen:
584 if textlen < deltainfo.deltalen:
584 return False
585 return False
585
586
586 # Bad delta from cumulated payload size:
587 # Bad delta from cumulated payload size:
587 #
588 #
588 # If the sum of delta get larger than K * target text length.
589 # If the sum of delta get larger than K * target text length.
589 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
590 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
590 return False
591 return False
591
592
592 # Bad delta from chain length:
593 # Bad delta from chain length:
593 #
594 #
594 # If the number of delta in the chain gets too high.
595 # If the number of delta in the chain gets too high.
595 if (revlog._maxchainlen
596 if (revlog._maxchainlen
596 and revlog._maxchainlen < deltainfo.chainlen):
597 and revlog._maxchainlen < deltainfo.chainlen):
597 return False
598 return False
598
599
599 # bad delta from intermediate snapshot size limit
600 # bad delta from intermediate snapshot size limit
600 #
601 #
601 # If an intermediate snapshot size is higher than the limit. The
602 # If an intermediate snapshot size is higher than the limit. The
602 # limit exist to prevent endless chain of intermediate delta to be
603 # limit exist to prevent endless chain of intermediate delta to be
603 # created.
604 # created.
604 if (deltainfo.snapshotdepth is not None and
605 if (deltainfo.snapshotdepth is not None and
605 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
606 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
606 return False
607 return False
607
608
608 # bad delta if new intermediate snapshot is larger than the previous
609 # bad delta if new intermediate snapshot is larger than the previous
609 # snapshot
610 # snapshot
610 if (deltainfo.snapshotdepth
611 if (deltainfo.snapshotdepth
611 and revlog.length(deltainfo.base) < deltainfo.deltalen):
612 and revlog.length(deltainfo.base) < deltainfo.deltalen):
612 return False
613 return False
613
614
614 return True
615 return True
615
616
616 # If a revision's full text is that much bigger than a base candidate full
617 # If a revision's full text is that much bigger than a base candidate full
617 # text's, it is very unlikely that it will produce a valid delta. We no longer
618 # text's, it is very unlikely that it will produce a valid delta. We no longer
618 # consider these candidates.
619 # consider these candidates.
619 LIMIT_BASE2TEXT = 500
620 LIMIT_BASE2TEXT = 500
620
621
621 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
622 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
622 """Provides group of revision to be tested as delta base
623 """Provides group of revision to be tested as delta base
623
624
624 This top level function focus on emitting groups with unique and worthwhile
625 This top level function focus on emitting groups with unique and worthwhile
625 content. See _raw_candidate_groups for details about the group order.
626 content. See _raw_candidate_groups for details about the group order.
626 """
627 """
627 # should we try to build a delta?
628 # should we try to build a delta?
628 if not (len(revlog) and revlog._storedeltachains):
629 if not (len(revlog) and revlog._storedeltachains):
629 yield None
630 yield None
630 return
631 return
631
632
632 deltalength = revlog.length
633 deltalength = revlog.length
633 deltaparent = revlog.deltaparent
634 deltaparent = revlog.deltaparent
634 sparse = revlog._sparserevlog
635 sparse = revlog._sparserevlog
635 good = None
636 good = None
636
637
637 deltas_limit = textlen * LIMIT_DELTA2TEXT
638 deltas_limit = textlen * LIMIT_DELTA2TEXT
638
639
639 tested = set([nullrev])
640 tested = set([nullrev])
640 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
641 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
641 while True:
642 while True:
642 temptative = candidates.send(good)
643 temptative = candidates.send(good)
643 if temptative is None:
644 if temptative is None:
644 break
645 break
645 group = []
646 group = []
646 for rev in temptative:
647 for rev in temptative:
647 # skip over empty delta (no need to include them in a chain)
648 # skip over empty delta (no need to include them in a chain)
648 while (revlog._generaldelta
649 while (revlog._generaldelta
649 and not (rev == nullrev
650 and not (rev == nullrev
650 or rev in tested
651 or rev in tested
651 or deltalength(rev))):
652 or deltalength(rev))):
652 tested.add(rev)
653 tested.add(rev)
653 rev = deltaparent(rev)
654 rev = deltaparent(rev)
654 # no need to try a delta against nullrev, this will be done as a
655 # no need to try a delta against nullrev, this will be done as a
655 # last resort.
656 # last resort.
656 if rev == nullrev:
657 if rev == nullrev:
657 continue
658 continue
658 # filter out revision we tested already
659 # filter out revision we tested already
659 if rev in tested:
660 if rev in tested:
660 continue
661 continue
661 tested.add(rev)
662 tested.add(rev)
662 # filter out delta base that will never produce good delta
663 # filter out delta base that will never produce good delta
663 if deltas_limit < revlog.length(rev):
664 if deltas_limit < revlog.length(rev):
664 continue
665 continue
665 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
666 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
666 continue
667 continue
667 # no delta for rawtext-changing revs (see "candelta" for why)
668 # no delta for rawtext-changing revs (see "candelta" for why)
668 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
669 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
669 continue
670 continue
670 # If we reach here, we are about to build and test a delta.
671 # If we reach here, we are about to build and test a delta.
671 # The delta building process will compute the chaininfo in all
672 # The delta building process will compute the chaininfo in all
672 # case, since that computation is cached, it is fine to access it
673 # case, since that computation is cached, it is fine to access it
673 # here too.
674 # here too.
674 chainlen, chainsize = revlog._chaininfo(rev)
675 chainlen, chainsize = revlog._chaininfo(rev)
675 # if chain will be too long, skip base
676 # if chain will be too long, skip base
676 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
677 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
677 continue
678 continue
678 # if chain already have too much data, skip base
679 # if chain already have too much data, skip base
679 if deltas_limit < chainsize:
680 if deltas_limit < chainsize:
680 continue
681 continue
681 group.append(rev)
682 group.append(rev)
682 if group:
683 if group:
683 # XXX: in the sparse revlog case, group can become large,
684 # XXX: in the sparse revlog case, group can become large,
684 # impacting performances. Some bounding or slicing mecanism
685 # impacting performances. Some bounding or slicing mecanism
685 # would help to reduce this impact.
686 # would help to reduce this impact.
686 good = yield tuple(group)
687 good = yield tuple(group)
687 yield None
688 yield None
688
689
689 def _findsnapshots(revlog, cache, start_rev):
690 def _findsnapshots(revlog, cache, start_rev):
690 """find snapshot from start_rev to tip"""
691 """find snapshot from start_rev to tip"""
691 deltaparent = revlog.deltaparent
692 if util.safehasattr(revlog.index, 'findsnapshots'):
692 issnapshot = revlog.issnapshot
693 revlog.index.findsnapshots(cache, start_rev)
693 for rev in revlog.revs(start_rev):
694 else:
694 if issnapshot(rev):
695 deltaparent = revlog.deltaparent
695 cache[deltaparent(rev)].append(rev)
696 issnapshot = revlog.issnapshot
697 for rev in revlog.revs(start_rev):
698 if issnapshot(rev):
699 cache[deltaparent(rev)].append(rev)
696
700
697 def _refinedgroups(revlog, p1, p2, cachedelta):
701 def _refinedgroups(revlog, p1, p2, cachedelta):
698 good = None
702 good = None
699 # First we try to reuse a the delta contained in the bundle.
703 # First we try to reuse a the delta contained in the bundle.
700 # (or from the source revlog)
704 # (or from the source revlog)
701 #
705 #
702 # This logic only applies to general delta repositories and can be disabled
706 # This logic only applies to general delta repositories and can be disabled
703 # through configuration. Disabling reuse source delta is useful when
707 # through configuration. Disabling reuse source delta is useful when
704 # we want to make sure we recomputed "optimal" deltas.
708 # we want to make sure we recomputed "optimal" deltas.
705 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
709 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
706 # Assume what we received from the server is a good choice
710 # Assume what we received from the server is a good choice
707 # build delta will reuse the cache
711 # build delta will reuse the cache
708 good = yield (cachedelta[0],)
712 good = yield (cachedelta[0],)
709 if good is not None:
713 if good is not None:
710 yield None
714 yield None
711 return
715 return
712 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
716 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
713 good = yield candidates
717 good = yield candidates
714 if good is not None:
718 if good is not None:
715 break
719 break
716
720
717 # If sparse revlog is enabled, we can try to refine the available deltas
721 # If sparse revlog is enabled, we can try to refine the available deltas
718 if not revlog._sparserevlog:
722 if not revlog._sparserevlog:
719 yield None
723 yield None
720 return
724 return
721
725
722 # if we have a refinable value, try to refine it
726 # if we have a refinable value, try to refine it
723 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
727 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
724 # refine snapshot down
728 # refine snapshot down
725 previous = None
729 previous = None
726 while previous != good:
730 while previous != good:
727 previous = good
731 previous = good
728 base = revlog.deltaparent(good)
732 base = revlog.deltaparent(good)
729 if base == nullrev:
733 if base == nullrev:
730 break
734 break
731 good = yield (base,)
735 good = yield (base,)
732 # refine snapshot up
736 # refine snapshot up
733 #
737 #
734 # XXX the _findsnapshots call can be expensive and is "duplicated" with
738 # XXX the _findsnapshots call can be expensive and is "duplicated" with
735 # the one done in `_rawgroups`. Once we start working on performance,
739 # the one done in `_rawgroups`. Once we start working on performance,
736 # we should make the two logics share this computation.
740 # we should make the two logics share this computation.
737 snapshots = collections.defaultdict(list)
741 snapshots = collections.defaultdict(list)
738 _findsnapshots(revlog, snapshots, good + 1)
742 _findsnapshots(revlog, snapshots, good + 1)
739 previous = None
743 previous = None
740 while good != previous:
744 while good != previous:
741 previous = good
745 previous = good
742 children = tuple(sorted(c for c in snapshots[good]))
746 children = tuple(sorted(c for c in snapshots[good]))
743 good = yield children
747 good = yield children
744
748
745 # we have found nothing
749 # we have found nothing
746 yield None
750 yield None
747
751
748 def _rawgroups(revlog, p1, p2, cachedelta):
752 def _rawgroups(revlog, p1, p2, cachedelta):
749 """Provides group of revision to be tested as delta base
753 """Provides group of revision to be tested as delta base
750
754
751 This lower level function focus on emitting delta theorically interresting
755 This lower level function focus on emitting delta theorically interresting
752 without looking it any practical details.
756 without looking it any practical details.
753
757
754 The group order aims at providing fast or small candidates first.
758 The group order aims at providing fast or small candidates first.
755 """
759 """
756 gdelta = revlog._generaldelta
760 gdelta = revlog._generaldelta
757 sparse = revlog._sparserevlog
761 sparse = revlog._sparserevlog
758 curr = len(revlog)
762 curr = len(revlog)
759 prev = curr - 1
763 prev = curr - 1
760 deltachain = lambda rev: revlog._deltachain(rev)[0]
764 deltachain = lambda rev: revlog._deltachain(rev)[0]
761
765
762 if gdelta:
766 if gdelta:
763 # exclude already lazy tested base if any
767 # exclude already lazy tested base if any
764 parents = [p for p in (p1, p2) if p != nullrev]
768 parents = [p for p in (p1, p2) if p != nullrev]
765
769
766 if not revlog._deltabothparents and len(parents) == 2:
770 if not revlog._deltabothparents and len(parents) == 2:
767 parents.sort()
771 parents.sort()
768 # To minimize the chance of having to build a fulltext,
772 # To minimize the chance of having to build a fulltext,
769 # pick first whichever parent is closest to us (max rev)
773 # pick first whichever parent is closest to us (max rev)
770 yield (parents[1],)
774 yield (parents[1],)
771 # then the other one (min rev) if the first did not fit
775 # then the other one (min rev) if the first did not fit
772 yield (parents[0],)
776 yield (parents[0],)
773 elif len(parents) > 0:
777 elif len(parents) > 0:
774 # Test all parents (1 or 2), and keep the best candidate
778 # Test all parents (1 or 2), and keep the best candidate
775 yield parents
779 yield parents
776
780
777 if sparse and parents:
781 if sparse and parents:
778 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
782 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
779 # See if we can use an existing snapshot in the parent chains to use as
783 # See if we can use an existing snapshot in the parent chains to use as
780 # a base for a new intermediate-snapshot
784 # a base for a new intermediate-snapshot
781 #
785 #
782 # search for snapshot in parents delta chain
786 # search for snapshot in parents delta chain
783 # map: snapshot-level: snapshot-rev
787 # map: snapshot-level: snapshot-rev
784 parents_snaps = collections.defaultdict(set)
788 parents_snaps = collections.defaultdict(set)
785 candidate_chains = [deltachain(p) for p in parents]
789 candidate_chains = [deltachain(p) for p in parents]
786 for chain in candidate_chains:
790 for chain in candidate_chains:
787 for idx, s in enumerate(chain):
791 for idx, s in enumerate(chain):
788 if not revlog.issnapshot(s):
792 if not revlog.issnapshot(s):
789 break
793 break
790 parents_snaps[idx].add(s)
794 parents_snaps[idx].add(s)
791 snapfloor = min(parents_snaps[0]) + 1
795 snapfloor = min(parents_snaps[0]) + 1
792 _findsnapshots(revlog, snapshots, snapfloor)
796 _findsnapshots(revlog, snapshots, snapfloor)
793 # search for the highest "unrelated" revision
797 # search for the highest "unrelated" revision
794 #
798 #
795 # Adding snapshots used by "unrelated" revision increase the odd we
799 # Adding snapshots used by "unrelated" revision increase the odd we
796 # reuse an independant, yet better snapshot chain.
800 # reuse an independant, yet better snapshot chain.
797 #
801 #
798 # XXX instead of building a set of revisions, we could lazily enumerate
802 # XXX instead of building a set of revisions, we could lazily enumerate
799 # over the chains. That would be more efficient, however we stick to
803 # over the chains. That would be more efficient, however we stick to
800 # simple code for now.
804 # simple code for now.
801 all_revs = set()
805 all_revs = set()
802 for chain in candidate_chains:
806 for chain in candidate_chains:
803 all_revs.update(chain)
807 all_revs.update(chain)
804 other = None
808 other = None
805 for r in revlog.revs(prev, snapfloor):
809 for r in revlog.revs(prev, snapfloor):
806 if r not in all_revs:
810 if r not in all_revs:
807 other = r
811 other = r
808 break
812 break
809 if other is not None:
813 if other is not None:
810 # To avoid unfair competition, we won't use unrelated intermediate
814 # To avoid unfair competition, we won't use unrelated intermediate
811 # snapshot that are deeper than the ones from the parent delta
815 # snapshot that are deeper than the ones from the parent delta
812 # chain.
816 # chain.
813 max_depth = max(parents_snaps.keys())
817 max_depth = max(parents_snaps.keys())
814 chain = deltachain(other)
818 chain = deltachain(other)
815 for idx, s in enumerate(chain):
819 for idx, s in enumerate(chain):
816 if s < snapfloor:
820 if s < snapfloor:
817 continue
821 continue
818 if max_depth < idx:
822 if max_depth < idx:
819 break
823 break
820 if not revlog.issnapshot(s):
824 if not revlog.issnapshot(s):
821 break
825 break
822 parents_snaps[idx].add(s)
826 parents_snaps[idx].add(s)
823 # Test them as possible intermediate snapshot base
827 # Test them as possible intermediate snapshot base
824 # We test them from highest to lowest level. High level one are more
828 # We test them from highest to lowest level. High level one are more
825 # likely to result in small delta
829 # likely to result in small delta
826 floor = None
830 floor = None
827 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
831 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
828 siblings = set()
832 siblings = set()
829 for s in snaps:
833 for s in snaps:
830 siblings.update(snapshots[s])
834 siblings.update(snapshots[s])
831 # Before considering making a new intermediate snapshot, we check
835 # Before considering making a new intermediate snapshot, we check
832 # if an existing snapshot, children of base we consider, would be
836 # if an existing snapshot, children of base we consider, would be
833 # suitable.
837 # suitable.
834 #
838 #
835 # It give a change to reuse a delta chain "unrelated" to the
839 # It give a change to reuse a delta chain "unrelated" to the
836 # current revision instead of starting our own. Without such
840 # current revision instead of starting our own. Without such
837 # re-use, topological branches would keep reopening new chains.
841 # re-use, topological branches would keep reopening new chains.
838 # Creating more and more snapshot as the repository grow.
842 # Creating more and more snapshot as the repository grow.
839
843
840 if floor is not None:
844 if floor is not None:
841 # We only do this for siblings created after the one in our
845 # We only do this for siblings created after the one in our
842 # parent's delta chain. Those created before has less chances
846 # parent's delta chain. Those created before has less chances
843 # to be valid base since our ancestors had to create a new
847 # to be valid base since our ancestors had to create a new
844 # snapshot.
848 # snapshot.
845 siblings = [r for r in siblings if floor < r]
849 siblings = [r for r in siblings if floor < r]
846 yield tuple(sorted(siblings))
850 yield tuple(sorted(siblings))
847 # then test the base from our parent's delta chain.
851 # then test the base from our parent's delta chain.
848 yield tuple(sorted(snaps))
852 yield tuple(sorted(snaps))
849 floor = min(snaps)
853 floor = min(snaps)
850 # No suitable base found in the parent chain, search if any full
854 # No suitable base found in the parent chain, search if any full
851 # snapshots emitted since parent's base would be a suitable base for an
855 # snapshots emitted since parent's base would be a suitable base for an
852 # intermediate snapshot.
856 # intermediate snapshot.
853 #
857 #
854 # It give a chance to reuse a delta chain unrelated to the current
858 # It give a chance to reuse a delta chain unrelated to the current
855 # revisions instead of starting our own. Without such re-use,
859 # revisions instead of starting our own. Without such re-use,
856 # topological branches would keep reopening new full chains. Creating
860 # topological branches would keep reopening new full chains. Creating
857 # more and more snapshot as the repository grow.
861 # more and more snapshot as the repository grow.
858 yield tuple(snapshots[nullrev])
862 yield tuple(snapshots[nullrev])
859
863
860 if not sparse:
864 if not sparse:
861 # other approach failed try against prev to hopefully save us a
865 # other approach failed try against prev to hopefully save us a
862 # fulltext.
866 # fulltext.
863 yield (prev,)
867 yield (prev,)
864
868
865 class deltacomputer(object):
869 class deltacomputer(object):
866 def __init__(self, revlog):
870 def __init__(self, revlog):
867 self.revlog = revlog
871 self.revlog = revlog
868
872
869 def buildtext(self, revinfo, fh):
873 def buildtext(self, revinfo, fh):
870 """Builds a fulltext version of a revision
874 """Builds a fulltext version of a revision
871
875
872 revinfo: _revisioninfo instance that contains all needed info
876 revinfo: _revisioninfo instance that contains all needed info
873 fh: file handle to either the .i or the .d revlog file,
877 fh: file handle to either the .i or the .d revlog file,
874 depending on whether it is inlined or not
878 depending on whether it is inlined or not
875 """
879 """
876 btext = revinfo.btext
880 btext = revinfo.btext
877 if btext[0] is not None:
881 if btext[0] is not None:
878 return btext[0]
882 return btext[0]
879
883
880 revlog = self.revlog
884 revlog = self.revlog
881 cachedelta = revinfo.cachedelta
885 cachedelta = revinfo.cachedelta
882 baserev = cachedelta[0]
886 baserev = cachedelta[0]
883 delta = cachedelta[1]
887 delta = cachedelta[1]
884
888
885 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
889 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
886 revinfo.p1, revinfo.p2,
890 revinfo.p1, revinfo.p2,
887 revinfo.flags, revinfo.node)
891 revinfo.flags, revinfo.node)
888 return fulltext
892 return fulltext
889
893
890 def _builddeltadiff(self, base, revinfo, fh):
894 def _builddeltadiff(self, base, revinfo, fh):
891 revlog = self.revlog
895 revlog = self.revlog
892 t = self.buildtext(revinfo, fh)
896 t = self.buildtext(revinfo, fh)
893 if revlog.iscensored(base):
897 if revlog.iscensored(base):
894 # deltas based on a censored revision must replace the
898 # deltas based on a censored revision must replace the
895 # full content in one patch, so delta works everywhere
899 # full content in one patch, so delta works everywhere
896 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
900 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
897 delta = header + t
901 delta = header + t
898 else:
902 else:
899 ptext = revlog.revision(base, _df=fh, raw=True)
903 ptext = revlog.revision(base, _df=fh, raw=True)
900 delta = mdiff.textdiff(ptext, t)
904 delta = mdiff.textdiff(ptext, t)
901
905
902 return delta
906 return delta
903
907
904 def _builddeltainfo(self, revinfo, base, fh):
908 def _builddeltainfo(self, revinfo, base, fh):
905 # can we use the cached delta?
909 # can we use the cached delta?
906 delta = None
910 delta = None
907 if revinfo.cachedelta:
911 if revinfo.cachedelta:
908 cachebase, cachediff = revinfo.cachedelta
912 cachebase, cachediff = revinfo.cachedelta
909 #check if the diff still apply
913 #check if the diff still apply
910 currentbase = cachebase
914 currentbase = cachebase
911 while (currentbase != nullrev
915 while (currentbase != nullrev
912 and currentbase != base
916 and currentbase != base
913 and self.revlog.length(currentbase) == 0):
917 and self.revlog.length(currentbase) == 0):
914 currentbase = self.revlog.deltaparent(currentbase)
918 currentbase = self.revlog.deltaparent(currentbase)
915 if currentbase == base:
919 if currentbase == base:
916 delta = revinfo.cachedelta[1]
920 delta = revinfo.cachedelta[1]
917 if delta is None:
921 if delta is None:
918 delta = self._builddeltadiff(base, revinfo, fh)
922 delta = self._builddeltadiff(base, revinfo, fh)
919 revlog = self.revlog
923 revlog = self.revlog
920 header, data = revlog.compress(delta)
924 header, data = revlog.compress(delta)
921 deltalen = len(header) + len(data)
925 deltalen = len(header) + len(data)
922 chainbase = revlog.chainbase(base)
926 chainbase = revlog.chainbase(base)
923 offset = revlog.end(len(revlog) - 1)
927 offset = revlog.end(len(revlog) - 1)
924 dist = deltalen + offset - revlog.start(chainbase)
928 dist = deltalen + offset - revlog.start(chainbase)
925 if revlog._generaldelta:
929 if revlog._generaldelta:
926 deltabase = base
930 deltabase = base
927 else:
931 else:
928 deltabase = chainbase
932 deltabase = chainbase
929 chainlen, compresseddeltalen = revlog._chaininfo(base)
933 chainlen, compresseddeltalen = revlog._chaininfo(base)
930 chainlen += 1
934 chainlen += 1
931 compresseddeltalen += deltalen
935 compresseddeltalen += deltalen
932
936
933 revlog = self.revlog
937 revlog = self.revlog
934 snapshotdepth = None
938 snapshotdepth = None
935 if deltabase == nullrev:
939 if deltabase == nullrev:
936 snapshotdepth = 0
940 snapshotdepth = 0
937 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
941 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
938 # A delta chain should always be one full snapshot,
942 # A delta chain should always be one full snapshot,
939 # zero or more semi-snapshots, and zero or more deltas
943 # zero or more semi-snapshots, and zero or more deltas
940 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
944 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
941 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
945 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
942 snapshotdepth = len(revlog._deltachain(deltabase)[0])
946 snapshotdepth = len(revlog._deltachain(deltabase)[0])
943
947
944 return _deltainfo(dist, deltalen, (header, data), deltabase,
948 return _deltainfo(dist, deltalen, (header, data), deltabase,
945 chainbase, chainlen, compresseddeltalen,
949 chainbase, chainlen, compresseddeltalen,
946 snapshotdepth)
950 snapshotdepth)
947
951
948 def _fullsnapshotinfo(self, fh, revinfo):
952 def _fullsnapshotinfo(self, fh, revinfo):
949 curr = len(self.revlog)
953 curr = len(self.revlog)
950 rawtext = self.buildtext(revinfo, fh)
954 rawtext = self.buildtext(revinfo, fh)
951 data = self.revlog.compress(rawtext)
955 data = self.revlog.compress(rawtext)
952 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
956 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
953 deltabase = chainbase = curr
957 deltabase = chainbase = curr
954 snapshotdepth = 0
958 snapshotdepth = 0
955 chainlen = 1
959 chainlen = 1
956
960
957 return _deltainfo(dist, deltalen, data, deltabase,
961 return _deltainfo(dist, deltalen, data, deltabase,
958 chainbase, chainlen, compresseddeltalen,
962 chainbase, chainlen, compresseddeltalen,
959 snapshotdepth)
963 snapshotdepth)
960
964
961 def finddeltainfo(self, revinfo, fh):
965 def finddeltainfo(self, revinfo, fh):
962 """Find an acceptable delta against a candidate revision
966 """Find an acceptable delta against a candidate revision
963
967
964 revinfo: information about the revision (instance of _revisioninfo)
968 revinfo: information about the revision (instance of _revisioninfo)
965 fh: file handle to either the .i or the .d revlog file,
969 fh: file handle to either the .i or the .d revlog file,
966 depending on whether it is inlined or not
970 depending on whether it is inlined or not
967
971
968 Returns the first acceptable candidate revision, as ordered by
972 Returns the first acceptable candidate revision, as ordered by
969 _candidategroups
973 _candidategroups
970
974
971 If no suitable deltabase is found, we return delta info for a full
975 If no suitable deltabase is found, we return delta info for a full
972 snapshot.
976 snapshot.
973 """
977 """
974 if not revinfo.textlen:
978 if not revinfo.textlen:
975 return self._fullsnapshotinfo(fh, revinfo)
979 return self._fullsnapshotinfo(fh, revinfo)
976
980
977 # no delta for flag processor revision (see "candelta" for why)
981 # no delta for flag processor revision (see "candelta" for why)
978 # not calling candelta since only one revision needs test, also to
982 # not calling candelta since only one revision needs test, also to
979 # avoid overhead fetching flags again.
983 # avoid overhead fetching flags again.
980 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
984 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
981 return self._fullsnapshotinfo(fh, revinfo)
985 return self._fullsnapshotinfo(fh, revinfo)
982
986
983 cachedelta = revinfo.cachedelta
987 cachedelta = revinfo.cachedelta
984 p1 = revinfo.p1
988 p1 = revinfo.p1
985 p2 = revinfo.p2
989 p2 = revinfo.p2
986 revlog = self.revlog
990 revlog = self.revlog
987
991
988 deltainfo = None
992 deltainfo = None
989 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
993 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
990 groups = _candidategroups(self.revlog, revinfo.textlen,
994 groups = _candidategroups(self.revlog, revinfo.textlen,
991 p1r, p2r, cachedelta)
995 p1r, p2r, cachedelta)
992 candidaterevs = next(groups)
996 candidaterevs = next(groups)
993 while candidaterevs is not None:
997 while candidaterevs is not None:
994 nominateddeltas = []
998 nominateddeltas = []
995 if deltainfo is not None:
999 if deltainfo is not None:
996 # if we already found a good delta,
1000 # if we already found a good delta,
997 # challenge it against refined candidates
1001 # challenge it against refined candidates
998 nominateddeltas.append(deltainfo)
1002 nominateddeltas.append(deltainfo)
999 for candidaterev in candidaterevs:
1003 for candidaterev in candidaterevs:
1000 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1004 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1001 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1005 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1002 nominateddeltas.append(candidatedelta)
1006 nominateddeltas.append(candidatedelta)
1003 if nominateddeltas:
1007 if nominateddeltas:
1004 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1008 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1005 if deltainfo is not None:
1009 if deltainfo is not None:
1006 candidaterevs = groups.send(deltainfo.base)
1010 candidaterevs = groups.send(deltainfo.base)
1007 else:
1011 else:
1008 candidaterevs = next(groups)
1012 candidaterevs = next(groups)
1009
1013
1010 if deltainfo is None:
1014 if deltainfo is None:
1011 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1015 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1012 return deltainfo
1016 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now