##// END OF EJS Templates
revlogv2: preserve the compression mode on disk...
marmoute -
r48025:6bfa6c2c default
parent child Browse files
Show More
@@ -1,3037 +1,3040 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <assert.h>
12 #include <assert.h>
13 #include <ctype.h>
13 #include <ctype.h>
14 #include <limits.h>
14 #include <limits.h>
15 #include <stddef.h>
15 #include <stddef.h>
16 #include <stdlib.h>
16 #include <stdlib.h>
17 #include <string.h>
17 #include <string.h>
18 #include <structmember.h>
18 #include <structmember.h>
19
19
20 #include "bitmanipulation.h"
20 #include "bitmanipulation.h"
21 #include "charencode.h"
21 #include "charencode.h"
22 #include "compat.h"
22 #include "compat.h"
23 #include "revlog.h"
23 #include "revlog.h"
24 #include "util.h"
24 #include "util.h"
25
25
26 #ifdef IS_PY3K
26 #ifdef IS_PY3K
27 /* The mapping of Python types is meant to be temporary to get Python
27 /* The mapping of Python types is meant to be temporary to get Python
28 * 3 to compile. We should remove this once Python 3 support is fully
28 * 3 to compile. We should remove this once Python 3 support is fully
29 * supported and proper types are used in the extensions themselves. */
29 * supported and proper types are used in the extensions themselves. */
30 #define PyInt_Check PyLong_Check
30 #define PyInt_Check PyLong_Check
31 #define PyInt_FromLong PyLong_FromLong
31 #define PyInt_FromLong PyLong_FromLong
32 #define PyInt_FromSsize_t PyLong_FromSsize_t
32 #define PyInt_FromSsize_t PyLong_FromSsize_t
33 #define PyInt_AsLong PyLong_AsLong
33 #define PyInt_AsLong PyLong_AsLong
34 #endif
34 #endif
35
35
36 typedef struct indexObjectStruct indexObject;
36 typedef struct indexObjectStruct indexObject;
37
37
38 typedef struct {
38 typedef struct {
39 int children[16];
39 int children[16];
40 } nodetreenode;
40 } nodetreenode;
41
41
42 typedef struct {
42 typedef struct {
43 int abi_version;
43 int abi_version;
44 Py_ssize_t (*index_length)(const indexObject *);
44 Py_ssize_t (*index_length)(const indexObject *);
45 const char *(*index_node)(indexObject *, Py_ssize_t);
45 const char *(*index_node)(indexObject *, Py_ssize_t);
46 int (*index_parents)(PyObject *, int, int *);
46 int (*index_parents)(PyObject *, int, int *);
47 } Revlog_CAPI;
47 } Revlog_CAPI;
48
48
49 /*
49 /*
50 * A base-16 trie for fast node->rev mapping.
50 * A base-16 trie for fast node->rev mapping.
51 *
51 *
52 * Positive value is index of the next node in the trie
52 * Positive value is index of the next node in the trie
53 * Negative value is a leaf: -(rev + 2)
53 * Negative value is a leaf: -(rev + 2)
54 * Zero is empty
54 * Zero is empty
55 */
55 */
56 typedef struct {
56 typedef struct {
57 indexObject *index;
57 indexObject *index;
58 nodetreenode *nodes;
58 nodetreenode *nodes;
59 Py_ssize_t nodelen;
59 Py_ssize_t nodelen;
60 size_t length; /* # nodes in use */
60 size_t length; /* # nodes in use */
61 size_t capacity; /* # nodes allocated */
61 size_t capacity; /* # nodes allocated */
62 int depth; /* maximum depth of tree */
62 int depth; /* maximum depth of tree */
63 int splits; /* # splits performed */
63 int splits; /* # splits performed */
64 } nodetree;
64 } nodetree;
65
65
66 typedef struct {
66 typedef struct {
67 PyObject_HEAD /* ; */
67 PyObject_HEAD /* ; */
68 nodetree nt;
68 nodetree nt;
69 } nodetreeObject;
69 } nodetreeObject;
70
70
71 /*
71 /*
72 * This class has two behaviors.
72 * This class has two behaviors.
73 *
73 *
74 * When used in a list-like way (with integer keys), we decode an
74 * When used in a list-like way (with integer keys), we decode an
75 * entry in a RevlogNG index file on demand. We have limited support for
75 * entry in a RevlogNG index file on demand. We have limited support for
76 * integer-keyed insert and delete, only at elements right before the
76 * integer-keyed insert and delete, only at elements right before the
77 * end.
77 * end.
78 *
78 *
79 * With string keys, we lazily perform a reverse mapping from node to
79 * With string keys, we lazily perform a reverse mapping from node to
80 * rev, using a base-16 trie.
80 * rev, using a base-16 trie.
81 */
81 */
82 struct indexObjectStruct {
82 struct indexObjectStruct {
83 PyObject_HEAD
83 PyObject_HEAD
84 /* Type-specific fields go here. */
84 /* Type-specific fields go here. */
85 PyObject *data; /* raw bytes of index */
85 PyObject *data; /* raw bytes of index */
86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
87 PyObject *nullentry; /* fast path for references to null */
87 PyObject *nullentry; /* fast path for references to null */
88 Py_buffer buf; /* buffer of data */
88 Py_buffer buf; /* buffer of data */
89 const char **offsets; /* populated on demand */
89 const char **offsets; /* populated on demand */
90 Py_ssize_t length; /* current on-disk number of elements */
90 Py_ssize_t length; /* current on-disk number of elements */
91 unsigned new_length; /* number of added elements */
91 unsigned new_length; /* number of added elements */
92 unsigned added_length; /* space reserved for added elements */
92 unsigned added_length; /* space reserved for added elements */
93 char *added; /* populated on demand */
93 char *added; /* populated on demand */
94 PyObject *headrevs; /* cache, invalidated on changes */
94 PyObject *headrevs; /* cache, invalidated on changes */
95 PyObject *filteredrevs; /* filtered revs set */
95 PyObject *filteredrevs; /* filtered revs set */
96 nodetree nt; /* base-16 trie */
96 nodetree nt; /* base-16 trie */
97 int ntinitialized; /* 0 or 1 */
97 int ntinitialized; /* 0 or 1 */
98 int ntrev; /* last rev scanned */
98 int ntrev; /* last rev scanned */
99 int ntlookups; /* # lookups */
99 int ntlookups; /* # lookups */
100 int ntmisses; /* # lookups that miss the cache */
100 int ntmisses; /* # lookups that miss the cache */
101 int inlined;
101 int inlined;
102 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
102 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
103 */
103 */
104 char format_version; /* size of index headers. Differs in v1 v.s. v2
104 char format_version; /* size of index headers. Differs in v1 v.s. v2
105 format */
105 format */
106 };
106 };
107
107
108 static Py_ssize_t index_length(const indexObject *self)
108 static Py_ssize_t index_length(const indexObject *self)
109 {
109 {
110 return self->length + self->new_length;
110 return self->length + self->new_length;
111 }
111 }
112
112
113 static const char nullid[32] = {0};
113 static const char nullid[32] = {0};
114 static const Py_ssize_t nullrev = -1;
114 static const Py_ssize_t nullrev = -1;
115
115
116 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
116 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
117
117
118 static int index_find_node(indexObject *self, const char *node);
118 static int index_find_node(indexObject *self, const char *node);
119
119
120 #if LONG_MAX == 0x7fffffffL
120 #if LONG_MAX == 0x7fffffffL
121 static const char *const tuple_format = PY23("Kiiiiiis#KiB", "Kiiiiiiy#KiB");
121 static const char *const tuple_format = PY23("Kiiiiiis#KiB", "Kiiiiiiy#KiB");
122 #else
122 #else
123 static const char *const tuple_format = PY23("kiiiiiis#kiB", "kiiiiiiy#kiB");
123 static const char *const tuple_format = PY23("kiiiiiis#kiB", "kiiiiiiy#kiB");
124 #endif
124 #endif
125
125
126 /* A RevlogNG v1 index entry is 64 bytes long. */
126 /* A RevlogNG v1 index entry is 64 bytes long. */
127 static const long v1_entry_size = 64;
127 static const long v1_entry_size = 64;
128
128
129 /* A Revlogv2 index entry is 96 bytes long. */
129 /* A Revlogv2 index entry is 96 bytes long. */
130 static const long v2_entry_size = 96;
130 static const long v2_entry_size = 96;
131
131
132 static const long format_v1 = 1; /* Internal only, could be any number */
132 static const long format_v1 = 1; /* Internal only, could be any number */
133 static const long format_v2 = 2; /* Internal only, could be any number */
133 static const long format_v2 = 2; /* Internal only, could be any number */
134
134
135 static const char comp_mode_inline = 2;
135 static const char comp_mode_inline = 2;
136
136
137 static void raise_revlog_error(void)
137 static void raise_revlog_error(void)
138 {
138 {
139 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
139 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
140
140
141 mod = PyImport_ImportModule("mercurial.error");
141 mod = PyImport_ImportModule("mercurial.error");
142 if (mod == NULL) {
142 if (mod == NULL) {
143 goto cleanup;
143 goto cleanup;
144 }
144 }
145
145
146 dict = PyModule_GetDict(mod);
146 dict = PyModule_GetDict(mod);
147 if (dict == NULL) {
147 if (dict == NULL) {
148 goto cleanup;
148 goto cleanup;
149 }
149 }
150 Py_INCREF(dict);
150 Py_INCREF(dict);
151
151
152 errclass = PyDict_GetItemString(dict, "RevlogError");
152 errclass = PyDict_GetItemString(dict, "RevlogError");
153 if (errclass == NULL) {
153 if (errclass == NULL) {
154 PyErr_SetString(PyExc_SystemError,
154 PyErr_SetString(PyExc_SystemError,
155 "could not find RevlogError");
155 "could not find RevlogError");
156 goto cleanup;
156 goto cleanup;
157 }
157 }
158
158
159 /* value of exception is ignored by callers */
159 /* value of exception is ignored by callers */
160 PyErr_SetString(errclass, "RevlogError");
160 PyErr_SetString(errclass, "RevlogError");
161
161
162 cleanup:
162 cleanup:
163 Py_XDECREF(dict);
163 Py_XDECREF(dict);
164 Py_XDECREF(mod);
164 Py_XDECREF(mod);
165 }
165 }
166
166
167 /*
167 /*
168 * Return a pointer to the beginning of a RevlogNG record.
168 * Return a pointer to the beginning of a RevlogNG record.
169 */
169 */
170 static const char *index_deref(indexObject *self, Py_ssize_t pos)
170 static const char *index_deref(indexObject *self, Py_ssize_t pos)
171 {
171 {
172 if (pos >= self->length)
172 if (pos >= self->length)
173 return self->added + (pos - self->length) * self->entry_size;
173 return self->added + (pos - self->length) * self->entry_size;
174
174
175 if (self->inlined && pos > 0) {
175 if (self->inlined && pos > 0) {
176 if (self->offsets == NULL) {
176 if (self->offsets == NULL) {
177 Py_ssize_t ret;
177 Py_ssize_t ret;
178 self->offsets =
178 self->offsets =
179 PyMem_Malloc(self->length * sizeof(*self->offsets));
179 PyMem_Malloc(self->length * sizeof(*self->offsets));
180 if (self->offsets == NULL)
180 if (self->offsets == NULL)
181 return (const char *)PyErr_NoMemory();
181 return (const char *)PyErr_NoMemory();
182 ret = inline_scan(self, self->offsets);
182 ret = inline_scan(self, self->offsets);
183 if (ret == -1) {
183 if (ret == -1) {
184 return NULL;
184 return NULL;
185 };
185 };
186 }
186 }
187 return self->offsets[pos];
187 return self->offsets[pos];
188 }
188 }
189
189
190 return (const char *)(self->buf.buf) + pos * self->entry_size;
190 return (const char *)(self->buf.buf) + pos * self->entry_size;
191 }
191 }
192
192
193 /*
193 /*
194 * Get parents of the given rev.
194 * Get parents of the given rev.
195 *
195 *
196 * The specified rev must be valid and must not be nullrev. A returned
196 * The specified rev must be valid and must not be nullrev. A returned
197 * parent revision may be nullrev, but is guaranteed to be in valid range.
197 * parent revision may be nullrev, but is guaranteed to be in valid range.
198 */
198 */
199 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
199 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
200 int maxrev)
200 int maxrev)
201 {
201 {
202 const char *data = index_deref(self, rev);
202 const char *data = index_deref(self, rev);
203
203
204 ps[0] = getbe32(data + 24);
204 ps[0] = getbe32(data + 24);
205 ps[1] = getbe32(data + 28);
205 ps[1] = getbe32(data + 28);
206
206
207 /* If index file is corrupted, ps[] may point to invalid revisions. So
207 /* If index file is corrupted, ps[] may point to invalid revisions. So
208 * there is a risk of buffer overflow to trust them unconditionally. */
208 * there is a risk of buffer overflow to trust them unconditionally. */
209 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
209 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
210 PyErr_SetString(PyExc_ValueError, "parent out of range");
210 PyErr_SetString(PyExc_ValueError, "parent out of range");
211 return -1;
211 return -1;
212 }
212 }
213 return 0;
213 return 0;
214 }
214 }
215
215
216 /*
216 /*
217 * Get parents of the given rev.
217 * Get parents of the given rev.
218 *
218 *
219 * If the specified rev is out of range, IndexError will be raised. If the
219 * If the specified rev is out of range, IndexError will be raised. If the
220 * revlog entry is corrupted, ValueError may be raised.
220 * revlog entry is corrupted, ValueError may be raised.
221 *
221 *
222 * Returns 0 on success or -1 on failure.
222 * Returns 0 on success or -1 on failure.
223 */
223 */
224 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
224 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
225 {
225 {
226 int tiprev;
226 int tiprev;
227 if (!op || !HgRevlogIndex_Check(op) || !ps) {
227 if (!op || !HgRevlogIndex_Check(op) || !ps) {
228 PyErr_BadInternalCall();
228 PyErr_BadInternalCall();
229 return -1;
229 return -1;
230 }
230 }
231 tiprev = (int)index_length((indexObject *)op) - 1;
231 tiprev = (int)index_length((indexObject *)op) - 1;
232 if (rev < -1 || rev > tiprev) {
232 if (rev < -1 || rev > tiprev) {
233 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
233 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
234 return -1;
234 return -1;
235 } else if (rev == -1) {
235 } else if (rev == -1) {
236 ps[0] = ps[1] = -1;
236 ps[0] = ps[1] = -1;
237 return 0;
237 return 0;
238 } else {
238 } else {
239 return index_get_parents((indexObject *)op, rev, ps, tiprev);
239 return index_get_parents((indexObject *)op, rev, ps, tiprev);
240 }
240 }
241 }
241 }
242
242
243 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
243 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
244 {
244 {
245 const char *data;
245 const char *data;
246 uint64_t offset;
246 uint64_t offset;
247
247
248 if (rev == nullrev)
248 if (rev == nullrev)
249 return 0;
249 return 0;
250
250
251 data = index_deref(self, rev);
251 data = index_deref(self, rev);
252 offset = getbe32(data + 4);
252 offset = getbe32(data + 4);
253 if (rev == 0) {
253 if (rev == 0) {
254 /* mask out version number for the first entry */
254 /* mask out version number for the first entry */
255 offset &= 0xFFFF;
255 offset &= 0xFFFF;
256 } else {
256 } else {
257 uint32_t offset_high = getbe32(data);
257 uint32_t offset_high = getbe32(data);
258 offset |= ((uint64_t)offset_high) << 32;
258 offset |= ((uint64_t)offset_high) << 32;
259 }
259 }
260 return (int64_t)(offset >> 16);
260 return (int64_t)(offset >> 16);
261 }
261 }
262
262
263 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
263 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
264 {
264 {
265 const char *data;
265 const char *data;
266 int tmp;
266 int tmp;
267
267
268 if (rev == nullrev)
268 if (rev == nullrev)
269 return 0;
269 return 0;
270
270
271 data = index_deref(self, rev);
271 data = index_deref(self, rev);
272
272
273 tmp = (int)getbe32(data + 8);
273 tmp = (int)getbe32(data + 8);
274 if (tmp < 0) {
274 if (tmp < 0) {
275 PyErr_Format(PyExc_OverflowError,
275 PyErr_Format(PyExc_OverflowError,
276 "revlog entry size out of bound (%d)", tmp);
276 "revlog entry size out of bound (%d)", tmp);
277 return -1;
277 return -1;
278 }
278 }
279 return tmp;
279 return tmp;
280 }
280 }
281
281
282 /*
282 /*
283 * RevlogNG format (all in big endian, data may be inlined):
283 * RevlogNG format (all in big endian, data may be inlined):
284 * 6 bytes: offset
284 * 6 bytes: offset
285 * 2 bytes: flags
285 * 2 bytes: flags
286 * 4 bytes: compressed length
286 * 4 bytes: compressed length
287 * 4 bytes: uncompressed length
287 * 4 bytes: uncompressed length
288 * 4 bytes: base revision
288 * 4 bytes: base revision
289 * 4 bytes: link revision
289 * 4 bytes: link revision
290 * 4 bytes: parent 1 revision
290 * 4 bytes: parent 1 revision
291 * 4 bytes: parent 2 revision
291 * 4 bytes: parent 2 revision
292 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
292 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
293 */
293 */
294 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
294 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
295 {
295 {
296 uint64_t offset_flags, sidedata_offset;
296 uint64_t offset_flags, sidedata_offset;
297 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
297 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
298 sidedata_comp_len;
298 sidedata_comp_len;
299 char data_comp_mode;
299 char data_comp_mode;
300 const char *c_node_id;
300 const char *c_node_id;
301 const char *data;
301 const char *data;
302 Py_ssize_t length = index_length(self);
302 Py_ssize_t length = index_length(self);
303
303
304 if (pos == nullrev) {
304 if (pos == nullrev) {
305 Py_INCREF(self->nullentry);
305 Py_INCREF(self->nullentry);
306 return self->nullentry;
306 return self->nullentry;
307 }
307 }
308
308
309 if (pos < 0 || pos >= length) {
309 if (pos < 0 || pos >= length) {
310 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
310 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
311 return NULL;
311 return NULL;
312 }
312 }
313
313
314 data = index_deref(self, pos);
314 data = index_deref(self, pos);
315 if (data == NULL)
315 if (data == NULL)
316 return NULL;
316 return NULL;
317
317
318 offset_flags = getbe32(data + 4);
318 offset_flags = getbe32(data + 4);
319 /*
319 /*
320 * The first entry on-disk needs the version number masked out,
320 * The first entry on-disk needs the version number masked out,
321 * but this doesn't apply if entries are added to an empty index.
321 * but this doesn't apply if entries are added to an empty index.
322 */
322 */
323 if (self->length && pos == 0)
323 if (self->length && pos == 0)
324 offset_flags &= 0xFFFF;
324 offset_flags &= 0xFFFF;
325 else {
325 else {
326 uint32_t offset_high = getbe32(data);
326 uint32_t offset_high = getbe32(data);
327 offset_flags |= ((uint64_t)offset_high) << 32;
327 offset_flags |= ((uint64_t)offset_high) << 32;
328 }
328 }
329
329
330 comp_len = getbe32(data + 8);
330 comp_len = getbe32(data + 8);
331 uncomp_len = getbe32(data + 12);
331 uncomp_len = getbe32(data + 12);
332 base_rev = getbe32(data + 16);
332 base_rev = getbe32(data + 16);
333 link_rev = getbe32(data + 20);
333 link_rev = getbe32(data + 20);
334 parent_1 = getbe32(data + 24);
334 parent_1 = getbe32(data + 24);
335 parent_2 = getbe32(data + 28);
335 parent_2 = getbe32(data + 28);
336 c_node_id = data + 32;
336 c_node_id = data + 32;
337
337
338 if (self->format_version == format_v1) {
338 if (self->format_version == format_v1) {
339 sidedata_offset = 0;
339 sidedata_offset = 0;
340 sidedata_comp_len = 0;
340 sidedata_comp_len = 0;
341 data_comp_mode = comp_mode_inline;
341 } else {
342 } else {
342 sidedata_offset = getbe64(data + 64);
343 sidedata_offset = getbe64(data + 64);
343 sidedata_comp_len = getbe32(data + 72);
344 sidedata_comp_len = getbe32(data + 72);
345 data_comp_mode = data[76];
344 }
346 }
345
347
346 data_comp_mode = comp_mode_inline;
347 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
348 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
348 base_rev, link_rev, parent_1, parent_2, c_node_id,
349 base_rev, link_rev, parent_1, parent_2, c_node_id,
349 self->nodelen, sidedata_offset, sidedata_comp_len,
350 self->nodelen, sidedata_offset, sidedata_comp_len,
350 data_comp_mode);
351 data_comp_mode);
351 }
352 }
352 /*
353 /*
353 * Pack header information in binary
354 * Pack header information in binary
354 */
355 */
355 static PyObject *index_pack_header(indexObject *self, PyObject *args)
356 static PyObject *index_pack_header(indexObject *self, PyObject *args)
356 {
357 {
357 int header;
358 int header;
358 char out[4];
359 char out[4];
359 if (!PyArg_ParseTuple(args, "I", &header)) {
360 if (!PyArg_ParseTuple(args, "I", &header)) {
360 return NULL;
361 return NULL;
361 }
362 }
362 if (self->format_version != format_v1) {
363 if (self->format_version != format_v1) {
363 PyErr_Format(PyExc_RuntimeError,
364 PyErr_Format(PyExc_RuntimeError,
364 "version header should go in the docket, not the "
365 "version header should go in the docket, not the "
365 "index: %lu",
366 "index: %lu",
366 header);
367 header);
367 return NULL;
368 return NULL;
368 }
369 }
369 putbe32(header, out);
370 putbe32(header, out);
370 return PyBytes_FromStringAndSize(out, 4);
371 return PyBytes_FromStringAndSize(out, 4);
371 }
372 }
372 /*
373 /*
373 * Return the raw binary string representing a revision
374 * Return the raw binary string representing a revision
374 */
375 */
375 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
376 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
376 {
377 {
377 long rev;
378 long rev;
378 const char *data;
379 const char *data;
379 Py_ssize_t length = index_length(self);
380 Py_ssize_t length = index_length(self);
380
381
381 if (!pylong_to_long(value, &rev)) {
382 if (!pylong_to_long(value, &rev)) {
382 return NULL;
383 return NULL;
383 }
384 }
384 if (rev < 0 || rev >= length) {
385 if (rev < 0 || rev >= length) {
385 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
386 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
386 rev);
387 rev);
387 return NULL;
388 return NULL;
388 };
389 };
389
390
390 data = index_deref(self, rev);
391 data = index_deref(self, rev);
391 if (data == NULL)
392 if (data == NULL)
392 return NULL;
393 return NULL;
393 if (rev == 0 && self->format_version == format_v1) {
394 if (rev == 0 && self->format_version == format_v1) {
394 /* the header is eating the start of the first entry */
395 /* the header is eating the start of the first entry */
395 return PyBytes_FromStringAndSize(data + 4,
396 return PyBytes_FromStringAndSize(data + 4,
396 self->entry_size - 4);
397 self->entry_size - 4);
397 }
398 }
398 return PyBytes_FromStringAndSize(data, self->entry_size);
399 return PyBytes_FromStringAndSize(data, self->entry_size);
399 }
400 }
400
401
401 /*
402 /*
402 * Return the hash of node corresponding to the given rev.
403 * Return the hash of node corresponding to the given rev.
403 */
404 */
404 static const char *index_node(indexObject *self, Py_ssize_t pos)
405 static const char *index_node(indexObject *self, Py_ssize_t pos)
405 {
406 {
406 Py_ssize_t length = index_length(self);
407 Py_ssize_t length = index_length(self);
407 const char *data;
408 const char *data;
408
409
409 if (pos == nullrev)
410 if (pos == nullrev)
410 return nullid;
411 return nullid;
411
412
412 if (pos >= length)
413 if (pos >= length)
413 return NULL;
414 return NULL;
414
415
415 data = index_deref(self, pos);
416 data = index_deref(self, pos);
416 return data ? data + 32 : NULL;
417 return data ? data + 32 : NULL;
417 }
418 }
418
419
419 /*
420 /*
420 * Return the hash of the node corresponding to the given rev. The
421 * Return the hash of the node corresponding to the given rev. The
421 * rev is assumed to be existing. If not, an exception is set.
422 * rev is assumed to be existing. If not, an exception is set.
422 */
423 */
423 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
424 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
424 {
425 {
425 const char *node = index_node(self, pos);
426 const char *node = index_node(self, pos);
426 if (node == NULL) {
427 if (node == NULL) {
427 PyErr_Format(PyExc_IndexError, "could not access rev %d",
428 PyErr_Format(PyExc_IndexError, "could not access rev %d",
428 (int)pos);
429 (int)pos);
429 }
430 }
430 return node;
431 return node;
431 }
432 }
432
433
433 static int nt_insert(nodetree *self, const char *node, int rev);
434 static int nt_insert(nodetree *self, const char *node, int rev);
434
435
435 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
436 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
436 {
437 {
437 Py_ssize_t thisnodelen;
438 Py_ssize_t thisnodelen;
438 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
439 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
439 return -1;
440 return -1;
440 if (nodelen == thisnodelen)
441 if (nodelen == thisnodelen)
441 return 0;
442 return 0;
442 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
443 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
443 thisnodelen, nodelen);
444 thisnodelen, nodelen);
444 return -1;
445 return -1;
445 }
446 }
446
447
447 static PyObject *index_append(indexObject *self, PyObject *obj)
448 static PyObject *index_append(indexObject *self, PyObject *obj)
448 {
449 {
449 uint64_t offset_flags, sidedata_offset;
450 uint64_t offset_flags, sidedata_offset;
450 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
451 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
451 char data_comp_mode;
452 char data_comp_mode;
452 Py_ssize_t c_node_id_len, sidedata_comp_len;
453 Py_ssize_t c_node_id_len, sidedata_comp_len;
453 const char *c_node_id;
454 const char *c_node_id;
454 char *data;
455 char *data;
455
456
456 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
457 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
457 &uncomp_len, &base_rev, &link_rev, &parent_1,
458 &uncomp_len, &base_rev, &link_rev, &parent_1,
458 &parent_2, &c_node_id, &c_node_id_len,
459 &parent_2, &c_node_id, &c_node_id_len,
459 &sidedata_offset, &sidedata_comp_len,
460 &sidedata_offset, &sidedata_comp_len,
460 &data_comp_mode)) {
461 &data_comp_mode)) {
461 PyErr_SetString(PyExc_TypeError, "11-tuple required");
462 PyErr_SetString(PyExc_TypeError, "11-tuple required");
462 return NULL;
463 return NULL;
463 }
464 }
464
465
465 if (c_node_id_len != self->nodelen) {
466 if (c_node_id_len != self->nodelen) {
466 PyErr_SetString(PyExc_TypeError, "invalid node");
467 PyErr_SetString(PyExc_TypeError, "invalid node");
467 return NULL;
468 return NULL;
468 }
469 }
469 if (data_comp_mode != comp_mode_inline) {
470 if (self->format_version == format_v1 &&
471 data_comp_mode != comp_mode_inline) {
470 PyErr_Format(PyExc_ValueError,
472 PyErr_Format(PyExc_ValueError,
471 "invalid data compression mode: %i",
473 "invalid data compression mode: %i",
472 data_comp_mode);
474 data_comp_mode);
473 return NULL;
475 return NULL;
474 }
476 }
475
477
476 if (self->new_length == self->added_length) {
478 if (self->new_length == self->added_length) {
477 size_t new_added_length =
479 size_t new_added_length =
478 self->added_length ? self->added_length * 2 : 4096;
480 self->added_length ? self->added_length * 2 : 4096;
479 void *new_added = PyMem_Realloc(
481 void *new_added = PyMem_Realloc(
480 self->added, new_added_length * self->entry_size);
482 self->added, new_added_length * self->entry_size);
481 if (!new_added)
483 if (!new_added)
482 return PyErr_NoMemory();
484 return PyErr_NoMemory();
483 self->added = new_added;
485 self->added = new_added;
484 self->added_length = new_added_length;
486 self->added_length = new_added_length;
485 }
487 }
486 rev = self->length + self->new_length;
488 rev = self->length + self->new_length;
487 data = self->added + self->entry_size * self->new_length++;
489 data = self->added + self->entry_size * self->new_length++;
488 putbe32(offset_flags >> 32, data);
490 putbe32(offset_flags >> 32, data);
489 putbe32(offset_flags & 0xffffffffU, data + 4);
491 putbe32(offset_flags & 0xffffffffU, data + 4);
490 putbe32(comp_len, data + 8);
492 putbe32(comp_len, data + 8);
491 putbe32(uncomp_len, data + 12);
493 putbe32(uncomp_len, data + 12);
492 putbe32(base_rev, data + 16);
494 putbe32(base_rev, data + 16);
493 putbe32(link_rev, data + 20);
495 putbe32(link_rev, data + 20);
494 putbe32(parent_1, data + 24);
496 putbe32(parent_1, data + 24);
495 putbe32(parent_2, data + 28);
497 putbe32(parent_2, data + 28);
496 memcpy(data + 32, c_node_id, c_node_id_len);
498 memcpy(data + 32, c_node_id, c_node_id_len);
497 /* Padding since SHA-1 is only 20 bytes for now */
499 /* Padding since SHA-1 is only 20 bytes for now */
498 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
500 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
499 if (self->format_version == format_v2) {
501 if (self->format_version == format_v2) {
500 putbe64(sidedata_offset, data + 64);
502 putbe64(sidedata_offset, data + 64);
501 putbe32(sidedata_comp_len, data + 72);
503 putbe32(sidedata_comp_len, data + 72);
504 data[76] = (char)data_comp_mode;
502 /* Padding for 96 bytes alignment */
505 /* Padding for 96 bytes alignment */
503 memset(data + 76, 0, self->entry_size - 76);
506 memset(data + 77, 0, self->entry_size - 77);
504 }
507 }
505
508
506 if (self->ntinitialized)
509 if (self->ntinitialized)
507 nt_insert(&self->nt, c_node_id, rev);
510 nt_insert(&self->nt, c_node_id, rev);
508
511
509 Py_CLEAR(self->headrevs);
512 Py_CLEAR(self->headrevs);
510 Py_RETURN_NONE;
513 Py_RETURN_NONE;
511 }
514 }
512
515
513 /* Replace an existing index entry's sidedata offset and length with new ones.
516 /* Replace an existing index entry's sidedata offset and length with new ones.
514 This cannot be used outside of the context of sidedata rewriting,
517 This cannot be used outside of the context of sidedata rewriting,
515 inside the transaction that creates the given revision. */
518 inside the transaction that creates the given revision. */
516 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
519 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
517 {
520 {
518 uint64_t offset_flags, sidedata_offset;
521 uint64_t offset_flags, sidedata_offset;
519 int rev;
522 int rev;
520 Py_ssize_t sidedata_comp_len;
523 Py_ssize_t sidedata_comp_len;
521 char *data;
524 char *data;
522 #if LONG_MAX == 0x7fffffffL
525 #if LONG_MAX == 0x7fffffffL
523 const char *const sidedata_format = PY23("nKiK", "nKiK");
526 const char *const sidedata_format = PY23("nKiK", "nKiK");
524 #else
527 #else
525 const char *const sidedata_format = PY23("nkik", "nkik");
528 const char *const sidedata_format = PY23("nkik", "nkik");
526 #endif
529 #endif
527
530
528 if (self->entry_size == v1_entry_size || self->inlined) {
531 if (self->entry_size == v1_entry_size || self->inlined) {
529 /*
532 /*
530 There is a bug in the transaction handling when going from an
533 There is a bug in the transaction handling when going from an
531 inline revlog to a separate index and data file. Turn it off until
534 inline revlog to a separate index and data file. Turn it off until
532 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
535 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
533 See issue6485.
536 See issue6485.
534 */
537 */
535 raise_revlog_error();
538 raise_revlog_error();
536 return NULL;
539 return NULL;
537 }
540 }
538
541
539 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
542 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
540 &sidedata_comp_len, &offset_flags))
543 &sidedata_comp_len, &offset_flags))
541 return NULL;
544 return NULL;
542
545
543 if (rev < 0 || rev >= index_length(self)) {
546 if (rev < 0 || rev >= index_length(self)) {
544 PyErr_SetString(PyExc_IndexError, "revision outside index");
547 PyErr_SetString(PyExc_IndexError, "revision outside index");
545 return NULL;
548 return NULL;
546 }
549 }
547 if (rev < self->length) {
550 if (rev < self->length) {
548 PyErr_SetString(
551 PyErr_SetString(
549 PyExc_IndexError,
552 PyExc_IndexError,
550 "cannot rewrite entries outside of this transaction");
553 "cannot rewrite entries outside of this transaction");
551 return NULL;
554 return NULL;
552 }
555 }
553
556
554 /* Find the newly added node, offset from the "already on-disk" length
557 /* Find the newly added node, offset from the "already on-disk" length
555 */
558 */
556 data = self->added + self->entry_size * (rev - self->length);
559 data = self->added + self->entry_size * (rev - self->length);
557 putbe64(offset_flags, data);
560 putbe64(offset_flags, data);
558 putbe64(sidedata_offset, data + 64);
561 putbe64(sidedata_offset, data + 64);
559 putbe32(sidedata_comp_len, data + 72);
562 putbe32(sidedata_comp_len, data + 72);
560
563
561 Py_RETURN_NONE;
564 Py_RETURN_NONE;
562 }
565 }
563
566
564 static PyObject *index_stats(indexObject *self)
567 static PyObject *index_stats(indexObject *self)
565 {
568 {
566 PyObject *obj = PyDict_New();
569 PyObject *obj = PyDict_New();
567 PyObject *s = NULL;
570 PyObject *s = NULL;
568 PyObject *t = NULL;
571 PyObject *t = NULL;
569
572
570 if (obj == NULL)
573 if (obj == NULL)
571 return NULL;
574 return NULL;
572
575
573 #define istat(__n, __d) \
576 #define istat(__n, __d) \
574 do { \
577 do { \
575 s = PyBytes_FromString(__d); \
578 s = PyBytes_FromString(__d); \
576 t = PyInt_FromSsize_t(self->__n); \
579 t = PyInt_FromSsize_t(self->__n); \
577 if (!s || !t) \
580 if (!s || !t) \
578 goto bail; \
581 goto bail; \
579 if (PyDict_SetItem(obj, s, t) == -1) \
582 if (PyDict_SetItem(obj, s, t) == -1) \
580 goto bail; \
583 goto bail; \
581 Py_CLEAR(s); \
584 Py_CLEAR(s); \
582 Py_CLEAR(t); \
585 Py_CLEAR(t); \
583 } while (0)
586 } while (0)
584
587
585 if (self->added_length)
588 if (self->added_length)
586 istat(new_length, "index entries added");
589 istat(new_length, "index entries added");
587 istat(length, "revs in memory");
590 istat(length, "revs in memory");
588 istat(ntlookups, "node trie lookups");
591 istat(ntlookups, "node trie lookups");
589 istat(ntmisses, "node trie misses");
592 istat(ntmisses, "node trie misses");
590 istat(ntrev, "node trie last rev scanned");
593 istat(ntrev, "node trie last rev scanned");
591 if (self->ntinitialized) {
594 if (self->ntinitialized) {
592 istat(nt.capacity, "node trie capacity");
595 istat(nt.capacity, "node trie capacity");
593 istat(nt.depth, "node trie depth");
596 istat(nt.depth, "node trie depth");
594 istat(nt.length, "node trie count");
597 istat(nt.length, "node trie count");
595 istat(nt.splits, "node trie splits");
598 istat(nt.splits, "node trie splits");
596 }
599 }
597
600
598 #undef istat
601 #undef istat
599
602
600 return obj;
603 return obj;
601
604
602 bail:
605 bail:
603 Py_XDECREF(obj);
606 Py_XDECREF(obj);
604 Py_XDECREF(s);
607 Py_XDECREF(s);
605 Py_XDECREF(t);
608 Py_XDECREF(t);
606 return NULL;
609 return NULL;
607 }
610 }
608
611
609 /*
612 /*
610 * When we cache a list, we want to be sure the caller can't mutate
613 * When we cache a list, we want to be sure the caller can't mutate
611 * the cached copy.
614 * the cached copy.
612 */
615 */
613 static PyObject *list_copy(PyObject *list)
616 static PyObject *list_copy(PyObject *list)
614 {
617 {
615 Py_ssize_t len = PyList_GET_SIZE(list);
618 Py_ssize_t len = PyList_GET_SIZE(list);
616 PyObject *newlist = PyList_New(len);
619 PyObject *newlist = PyList_New(len);
617 Py_ssize_t i;
620 Py_ssize_t i;
618
621
619 if (newlist == NULL)
622 if (newlist == NULL)
620 return NULL;
623 return NULL;
621
624
622 for (i = 0; i < len; i++) {
625 for (i = 0; i < len; i++) {
623 PyObject *obj = PyList_GET_ITEM(list, i);
626 PyObject *obj = PyList_GET_ITEM(list, i);
624 Py_INCREF(obj);
627 Py_INCREF(obj);
625 PyList_SET_ITEM(newlist, i, obj);
628 PyList_SET_ITEM(newlist, i, obj);
626 }
629 }
627
630
628 return newlist;
631 return newlist;
629 }
632 }
630
633
631 static int check_filter(PyObject *filter, Py_ssize_t arg)
634 static int check_filter(PyObject *filter, Py_ssize_t arg)
632 {
635 {
633 if (filter) {
636 if (filter) {
634 PyObject *arglist, *result;
637 PyObject *arglist, *result;
635 int isfiltered;
638 int isfiltered;
636
639
637 arglist = Py_BuildValue("(n)", arg);
640 arglist = Py_BuildValue("(n)", arg);
638 if (!arglist) {
641 if (!arglist) {
639 return -1;
642 return -1;
640 }
643 }
641
644
642 result = PyObject_Call(filter, arglist, NULL);
645 result = PyObject_Call(filter, arglist, NULL);
643 Py_DECREF(arglist);
646 Py_DECREF(arglist);
644 if (!result) {
647 if (!result) {
645 return -1;
648 return -1;
646 }
649 }
647
650
648 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
651 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
649 * same as this function, so we can just return it directly.*/
652 * same as this function, so we can just return it directly.*/
650 isfiltered = PyObject_IsTrue(result);
653 isfiltered = PyObject_IsTrue(result);
651 Py_DECREF(result);
654 Py_DECREF(result);
652 return isfiltered;
655 return isfiltered;
653 } else {
656 } else {
654 return 0;
657 return 0;
655 }
658 }
656 }
659 }
657
660
658 static inline void set_phase_from_parents(char *phases, int parent_1,
661 static inline void set_phase_from_parents(char *phases, int parent_1,
659 int parent_2, Py_ssize_t i)
662 int parent_2, Py_ssize_t i)
660 {
663 {
661 if (parent_1 >= 0 && phases[parent_1] > phases[i])
664 if (parent_1 >= 0 && phases[parent_1] > phases[i])
662 phases[i] = phases[parent_1];
665 phases[i] = phases[parent_1];
663 if (parent_2 >= 0 && phases[parent_2] > phases[i])
666 if (parent_2 >= 0 && phases[parent_2] > phases[i])
664 phases[i] = phases[parent_2];
667 phases[i] = phases[parent_2];
665 }
668 }
666
669
667 static PyObject *reachableroots2(indexObject *self, PyObject *args)
670 static PyObject *reachableroots2(indexObject *self, PyObject *args)
668 {
671 {
669
672
670 /* Input */
673 /* Input */
671 long minroot;
674 long minroot;
672 PyObject *includepatharg = NULL;
675 PyObject *includepatharg = NULL;
673 int includepath = 0;
676 int includepath = 0;
674 /* heads and roots are lists */
677 /* heads and roots are lists */
675 PyObject *heads = NULL;
678 PyObject *heads = NULL;
676 PyObject *roots = NULL;
679 PyObject *roots = NULL;
677 PyObject *reachable = NULL;
680 PyObject *reachable = NULL;
678
681
679 PyObject *val;
682 PyObject *val;
680 Py_ssize_t len = index_length(self);
683 Py_ssize_t len = index_length(self);
681 long revnum;
684 long revnum;
682 Py_ssize_t k;
685 Py_ssize_t k;
683 Py_ssize_t i;
686 Py_ssize_t i;
684 Py_ssize_t l;
687 Py_ssize_t l;
685 int r;
688 int r;
686 int parents[2];
689 int parents[2];
687
690
688 /* Internal data structure:
691 /* Internal data structure:
689 * tovisit: array of length len+1 (all revs + nullrev), filled upto
692 * tovisit: array of length len+1 (all revs + nullrev), filled upto
690 * lentovisit
693 * lentovisit
691 *
694 *
692 * revstates: array of length len+1 (all revs + nullrev) */
695 * revstates: array of length len+1 (all revs + nullrev) */
693 int *tovisit = NULL;
696 int *tovisit = NULL;
694 long lentovisit = 0;
697 long lentovisit = 0;
695 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
698 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
696 char *revstates = NULL;
699 char *revstates = NULL;
697
700
698 /* Get arguments */
701 /* Get arguments */
699 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
702 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
700 &PyList_Type, &roots, &PyBool_Type,
703 &PyList_Type, &roots, &PyBool_Type,
701 &includepatharg))
704 &includepatharg))
702 goto bail;
705 goto bail;
703
706
704 if (includepatharg == Py_True)
707 if (includepatharg == Py_True)
705 includepath = 1;
708 includepath = 1;
706
709
707 /* Initialize return set */
710 /* Initialize return set */
708 reachable = PyList_New(0);
711 reachable = PyList_New(0);
709 if (reachable == NULL)
712 if (reachable == NULL)
710 goto bail;
713 goto bail;
711
714
712 /* Initialize internal datastructures */
715 /* Initialize internal datastructures */
713 tovisit = (int *)malloc((len + 1) * sizeof(int));
716 tovisit = (int *)malloc((len + 1) * sizeof(int));
714 if (tovisit == NULL) {
717 if (tovisit == NULL) {
715 PyErr_NoMemory();
718 PyErr_NoMemory();
716 goto bail;
719 goto bail;
717 }
720 }
718
721
719 revstates = (char *)calloc(len + 1, 1);
722 revstates = (char *)calloc(len + 1, 1);
720 if (revstates == NULL) {
723 if (revstates == NULL) {
721 PyErr_NoMemory();
724 PyErr_NoMemory();
722 goto bail;
725 goto bail;
723 }
726 }
724
727
725 l = PyList_GET_SIZE(roots);
728 l = PyList_GET_SIZE(roots);
726 for (i = 0; i < l; i++) {
729 for (i = 0; i < l; i++) {
727 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
730 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
728 if (revnum == -1 && PyErr_Occurred())
731 if (revnum == -1 && PyErr_Occurred())
729 goto bail;
732 goto bail;
730 /* If root is out of range, e.g. wdir(), it must be unreachable
733 /* If root is out of range, e.g. wdir(), it must be unreachable
731 * from heads. So we can just ignore it. */
734 * from heads. So we can just ignore it. */
732 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
735 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
733 continue;
736 continue;
734 revstates[revnum + 1] |= RS_ROOT;
737 revstates[revnum + 1] |= RS_ROOT;
735 }
738 }
736
739
737 /* Populate tovisit with all the heads */
740 /* Populate tovisit with all the heads */
738 l = PyList_GET_SIZE(heads);
741 l = PyList_GET_SIZE(heads);
739 for (i = 0; i < l; i++) {
742 for (i = 0; i < l; i++) {
740 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
743 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
741 if (revnum == -1 && PyErr_Occurred())
744 if (revnum == -1 && PyErr_Occurred())
742 goto bail;
745 goto bail;
743 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
746 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
744 PyErr_SetString(PyExc_IndexError, "head out of range");
747 PyErr_SetString(PyExc_IndexError, "head out of range");
745 goto bail;
748 goto bail;
746 }
749 }
747 if (!(revstates[revnum + 1] & RS_SEEN)) {
750 if (!(revstates[revnum + 1] & RS_SEEN)) {
748 tovisit[lentovisit++] = (int)revnum;
751 tovisit[lentovisit++] = (int)revnum;
749 revstates[revnum + 1] |= RS_SEEN;
752 revstates[revnum + 1] |= RS_SEEN;
750 }
753 }
751 }
754 }
752
755
753 /* Visit the tovisit list and find the reachable roots */
756 /* Visit the tovisit list and find the reachable roots */
754 k = 0;
757 k = 0;
755 while (k < lentovisit) {
758 while (k < lentovisit) {
756 /* Add the node to reachable if it is a root*/
759 /* Add the node to reachable if it is a root*/
757 revnum = tovisit[k++];
760 revnum = tovisit[k++];
758 if (revstates[revnum + 1] & RS_ROOT) {
761 if (revstates[revnum + 1] & RS_ROOT) {
759 revstates[revnum + 1] |= RS_REACHABLE;
762 revstates[revnum + 1] |= RS_REACHABLE;
760 val = PyInt_FromLong(revnum);
763 val = PyInt_FromLong(revnum);
761 if (val == NULL)
764 if (val == NULL)
762 goto bail;
765 goto bail;
763 r = PyList_Append(reachable, val);
766 r = PyList_Append(reachable, val);
764 Py_DECREF(val);
767 Py_DECREF(val);
765 if (r < 0)
768 if (r < 0)
766 goto bail;
769 goto bail;
767 if (includepath == 0)
770 if (includepath == 0)
768 continue;
771 continue;
769 }
772 }
770
773
771 /* Add its parents to the list of nodes to visit */
774 /* Add its parents to the list of nodes to visit */
772 if (revnum == nullrev)
775 if (revnum == nullrev)
773 continue;
776 continue;
774 r = index_get_parents(self, revnum, parents, (int)len - 1);
777 r = index_get_parents(self, revnum, parents, (int)len - 1);
775 if (r < 0)
778 if (r < 0)
776 goto bail;
779 goto bail;
777 for (i = 0; i < 2; i++) {
780 for (i = 0; i < 2; i++) {
778 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
781 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
779 parents[i] >= minroot) {
782 parents[i] >= minroot) {
780 tovisit[lentovisit++] = parents[i];
783 tovisit[lentovisit++] = parents[i];
781 revstates[parents[i] + 1] |= RS_SEEN;
784 revstates[parents[i] + 1] |= RS_SEEN;
782 }
785 }
783 }
786 }
784 }
787 }
785
788
786 /* Find all the nodes in between the roots we found and the heads
789 /* Find all the nodes in between the roots we found and the heads
787 * and add them to the reachable set */
790 * and add them to the reachable set */
788 if (includepath == 1) {
791 if (includepath == 1) {
789 long minidx = minroot;
792 long minidx = minroot;
790 if (minidx < 0)
793 if (minidx < 0)
791 minidx = 0;
794 minidx = 0;
792 for (i = minidx; i < len; i++) {
795 for (i = minidx; i < len; i++) {
793 if (!(revstates[i + 1] & RS_SEEN))
796 if (!(revstates[i + 1] & RS_SEEN))
794 continue;
797 continue;
795 r = index_get_parents(self, i, parents, (int)len - 1);
798 r = index_get_parents(self, i, parents, (int)len - 1);
796 /* Corrupted index file, error is set from
799 /* Corrupted index file, error is set from
797 * index_get_parents */
800 * index_get_parents */
798 if (r < 0)
801 if (r < 0)
799 goto bail;
802 goto bail;
800 if (((revstates[parents[0] + 1] |
803 if (((revstates[parents[0] + 1] |
801 revstates[parents[1] + 1]) &
804 revstates[parents[1] + 1]) &
802 RS_REACHABLE) &&
805 RS_REACHABLE) &&
803 !(revstates[i + 1] & RS_REACHABLE)) {
806 !(revstates[i + 1] & RS_REACHABLE)) {
804 revstates[i + 1] |= RS_REACHABLE;
807 revstates[i + 1] |= RS_REACHABLE;
805 val = PyInt_FromSsize_t(i);
808 val = PyInt_FromSsize_t(i);
806 if (val == NULL)
809 if (val == NULL)
807 goto bail;
810 goto bail;
808 r = PyList_Append(reachable, val);
811 r = PyList_Append(reachable, val);
809 Py_DECREF(val);
812 Py_DECREF(val);
810 if (r < 0)
813 if (r < 0)
811 goto bail;
814 goto bail;
812 }
815 }
813 }
816 }
814 }
817 }
815
818
816 free(revstates);
819 free(revstates);
817 free(tovisit);
820 free(tovisit);
818 return reachable;
821 return reachable;
819 bail:
822 bail:
820 Py_XDECREF(reachable);
823 Py_XDECREF(reachable);
821 free(revstates);
824 free(revstates);
822 free(tovisit);
825 free(tovisit);
823 return NULL;
826 return NULL;
824 }
827 }
825
828
826 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
829 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
827 char phase)
830 char phase)
828 {
831 {
829 Py_ssize_t len = index_length(self);
832 Py_ssize_t len = index_length(self);
830 PyObject *item;
833 PyObject *item;
831 PyObject *iterator;
834 PyObject *iterator;
832 int rev, minrev = -1;
835 int rev, minrev = -1;
833 char *node;
836 char *node;
834
837
835 if (!PySet_Check(roots)) {
838 if (!PySet_Check(roots)) {
836 PyErr_SetString(PyExc_TypeError,
839 PyErr_SetString(PyExc_TypeError,
837 "roots must be a set of nodes");
840 "roots must be a set of nodes");
838 return -2;
841 return -2;
839 }
842 }
840 iterator = PyObject_GetIter(roots);
843 iterator = PyObject_GetIter(roots);
841 if (iterator == NULL)
844 if (iterator == NULL)
842 return -2;
845 return -2;
843 while ((item = PyIter_Next(iterator))) {
846 while ((item = PyIter_Next(iterator))) {
844 if (node_check(self->nodelen, item, &node) == -1)
847 if (node_check(self->nodelen, item, &node) == -1)
845 goto failed;
848 goto failed;
846 rev = index_find_node(self, node);
849 rev = index_find_node(self, node);
847 /* null is implicitly public, so negative is invalid */
850 /* null is implicitly public, so negative is invalid */
848 if (rev < 0 || rev >= len)
851 if (rev < 0 || rev >= len)
849 goto failed;
852 goto failed;
850 phases[rev] = phase;
853 phases[rev] = phase;
851 if (minrev == -1 || minrev > rev)
854 if (minrev == -1 || minrev > rev)
852 minrev = rev;
855 minrev = rev;
853 Py_DECREF(item);
856 Py_DECREF(item);
854 }
857 }
855 Py_DECREF(iterator);
858 Py_DECREF(iterator);
856 return minrev;
859 return minrev;
857 failed:
860 failed:
858 Py_DECREF(iterator);
861 Py_DECREF(iterator);
859 Py_DECREF(item);
862 Py_DECREF(item);
860 return -2;
863 return -2;
861 }
864 }
862
865
863 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
866 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
864 {
867 {
865 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
868 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
866 96: internal */
869 96: internal */
867 static const char trackedphases[] = {1, 2, 32, 96};
870 static const char trackedphases[] = {1, 2, 32, 96};
868 PyObject *roots = Py_None;
871 PyObject *roots = Py_None;
869 PyObject *phasesetsdict = NULL;
872 PyObject *phasesetsdict = NULL;
870 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
873 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
871 Py_ssize_t len = index_length(self);
874 Py_ssize_t len = index_length(self);
872 char *phases = NULL;
875 char *phases = NULL;
873 int minphaserev = -1, rev, i;
876 int minphaserev = -1, rev, i;
874 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
877 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
875
878
876 if (!PyArg_ParseTuple(args, "O", &roots))
879 if (!PyArg_ParseTuple(args, "O", &roots))
877 return NULL;
880 return NULL;
878 if (roots == NULL || !PyDict_Check(roots)) {
881 if (roots == NULL || !PyDict_Check(roots)) {
879 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
882 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
880 return NULL;
883 return NULL;
881 }
884 }
882
885
883 phases = calloc(len, 1);
886 phases = calloc(len, 1);
884 if (phases == NULL) {
887 if (phases == NULL) {
885 PyErr_NoMemory();
888 PyErr_NoMemory();
886 return NULL;
889 return NULL;
887 }
890 }
888
891
889 for (i = 0; i < numphases; ++i) {
892 for (i = 0; i < numphases; ++i) {
890 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
893 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
891 PyObject *phaseroots = NULL;
894 PyObject *phaseroots = NULL;
892 if (pyphase == NULL)
895 if (pyphase == NULL)
893 goto release;
896 goto release;
894 phaseroots = PyDict_GetItem(roots, pyphase);
897 phaseroots = PyDict_GetItem(roots, pyphase);
895 Py_DECREF(pyphase);
898 Py_DECREF(pyphase);
896 if (phaseroots == NULL)
899 if (phaseroots == NULL)
897 continue;
900 continue;
898 rev = add_roots_get_min(self, phaseroots, phases,
901 rev = add_roots_get_min(self, phaseroots, phases,
899 trackedphases[i]);
902 trackedphases[i]);
900 if (rev == -2)
903 if (rev == -2)
901 goto release;
904 goto release;
902 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
905 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
903 minphaserev = rev;
906 minphaserev = rev;
904 }
907 }
905
908
906 for (i = 0; i < numphases; ++i) {
909 for (i = 0; i < numphases; ++i) {
907 phasesets[i] = PySet_New(NULL);
910 phasesets[i] = PySet_New(NULL);
908 if (phasesets[i] == NULL)
911 if (phasesets[i] == NULL)
909 goto release;
912 goto release;
910 }
913 }
911
914
912 if (minphaserev == -1)
915 if (minphaserev == -1)
913 minphaserev = len;
916 minphaserev = len;
914 for (rev = minphaserev; rev < len; ++rev) {
917 for (rev = minphaserev; rev < len; ++rev) {
915 PyObject *pyphase = NULL;
918 PyObject *pyphase = NULL;
916 PyObject *pyrev = NULL;
919 PyObject *pyrev = NULL;
917 int parents[2];
920 int parents[2];
918 /*
921 /*
919 * The parent lookup could be skipped for phaseroots, but
922 * The parent lookup could be skipped for phaseroots, but
920 * phase --force would historically not recompute them
923 * phase --force would historically not recompute them
921 * correctly, leaving descendents with a lower phase around.
924 * correctly, leaving descendents with a lower phase around.
922 * As such, unconditionally recompute the phase.
925 * As such, unconditionally recompute the phase.
923 */
926 */
924 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
927 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
925 goto release;
928 goto release;
926 set_phase_from_parents(phases, parents[0], parents[1], rev);
929 set_phase_from_parents(phases, parents[0], parents[1], rev);
927 switch (phases[rev]) {
930 switch (phases[rev]) {
928 case 0:
931 case 0:
929 continue;
932 continue;
930 case 1:
933 case 1:
931 pyphase = phasesets[0];
934 pyphase = phasesets[0];
932 break;
935 break;
933 case 2:
936 case 2:
934 pyphase = phasesets[1];
937 pyphase = phasesets[1];
935 break;
938 break;
936 case 32:
939 case 32:
937 pyphase = phasesets[2];
940 pyphase = phasesets[2];
938 break;
941 break;
939 case 96:
942 case 96:
940 pyphase = phasesets[3];
943 pyphase = phasesets[3];
941 break;
944 break;
942 default:
945 default:
943 /* this should never happen since the phase number is
946 /* this should never happen since the phase number is
944 * specified by this function. */
947 * specified by this function. */
945 PyErr_SetString(PyExc_SystemError,
948 PyErr_SetString(PyExc_SystemError,
946 "bad phase number in internal list");
949 "bad phase number in internal list");
947 goto release;
950 goto release;
948 }
951 }
949 pyrev = PyInt_FromLong(rev);
952 pyrev = PyInt_FromLong(rev);
950 if (pyrev == NULL)
953 if (pyrev == NULL)
951 goto release;
954 goto release;
952 if (PySet_Add(pyphase, pyrev) == -1) {
955 if (PySet_Add(pyphase, pyrev) == -1) {
953 Py_DECREF(pyrev);
956 Py_DECREF(pyrev);
954 goto release;
957 goto release;
955 }
958 }
956 Py_DECREF(pyrev);
959 Py_DECREF(pyrev);
957 }
960 }
958
961
959 phasesetsdict = _dict_new_presized(numphases);
962 phasesetsdict = _dict_new_presized(numphases);
960 if (phasesetsdict == NULL)
963 if (phasesetsdict == NULL)
961 goto release;
964 goto release;
962 for (i = 0; i < numphases; ++i) {
965 for (i = 0; i < numphases; ++i) {
963 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
966 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
964 if (pyphase == NULL)
967 if (pyphase == NULL)
965 goto release;
968 goto release;
966 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
969 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
967 -1) {
970 -1) {
968 Py_DECREF(pyphase);
971 Py_DECREF(pyphase);
969 goto release;
972 goto release;
970 }
973 }
971 Py_DECREF(phasesets[i]);
974 Py_DECREF(phasesets[i]);
972 phasesets[i] = NULL;
975 phasesets[i] = NULL;
973 }
976 }
974
977
975 return Py_BuildValue("nN", len, phasesetsdict);
978 return Py_BuildValue("nN", len, phasesetsdict);
976
979
977 release:
980 release:
978 for (i = 0; i < numphases; ++i)
981 for (i = 0; i < numphases; ++i)
979 Py_XDECREF(phasesets[i]);
982 Py_XDECREF(phasesets[i]);
980 Py_XDECREF(phasesetsdict);
983 Py_XDECREF(phasesetsdict);
981
984
982 free(phases);
985 free(phases);
983 return NULL;
986 return NULL;
984 }
987 }
985
988
986 static PyObject *index_headrevs(indexObject *self, PyObject *args)
989 static PyObject *index_headrevs(indexObject *self, PyObject *args)
987 {
990 {
988 Py_ssize_t i, j, len;
991 Py_ssize_t i, j, len;
989 char *nothead = NULL;
992 char *nothead = NULL;
990 PyObject *heads = NULL;
993 PyObject *heads = NULL;
991 PyObject *filter = NULL;
994 PyObject *filter = NULL;
992 PyObject *filteredrevs = Py_None;
995 PyObject *filteredrevs = Py_None;
993
996
994 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
997 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
995 return NULL;
998 return NULL;
996 }
999 }
997
1000
998 if (self->headrevs && filteredrevs == self->filteredrevs)
1001 if (self->headrevs && filteredrevs == self->filteredrevs)
999 return list_copy(self->headrevs);
1002 return list_copy(self->headrevs);
1000
1003
1001 Py_DECREF(self->filteredrevs);
1004 Py_DECREF(self->filteredrevs);
1002 self->filteredrevs = filteredrevs;
1005 self->filteredrevs = filteredrevs;
1003 Py_INCREF(filteredrevs);
1006 Py_INCREF(filteredrevs);
1004
1007
1005 if (filteredrevs != Py_None) {
1008 if (filteredrevs != Py_None) {
1006 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1009 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1007 if (!filter) {
1010 if (!filter) {
1008 PyErr_SetString(
1011 PyErr_SetString(
1009 PyExc_TypeError,
1012 PyExc_TypeError,
1010 "filteredrevs has no attribute __contains__");
1013 "filteredrevs has no attribute __contains__");
1011 goto bail;
1014 goto bail;
1012 }
1015 }
1013 }
1016 }
1014
1017
1015 len = index_length(self);
1018 len = index_length(self);
1016 heads = PyList_New(0);
1019 heads = PyList_New(0);
1017 if (heads == NULL)
1020 if (heads == NULL)
1018 goto bail;
1021 goto bail;
1019 if (len == 0) {
1022 if (len == 0) {
1020 PyObject *nullid = PyInt_FromLong(-1);
1023 PyObject *nullid = PyInt_FromLong(-1);
1021 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1024 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1022 Py_XDECREF(nullid);
1025 Py_XDECREF(nullid);
1023 goto bail;
1026 goto bail;
1024 }
1027 }
1025 goto done;
1028 goto done;
1026 }
1029 }
1027
1030
1028 nothead = calloc(len, 1);
1031 nothead = calloc(len, 1);
1029 if (nothead == NULL) {
1032 if (nothead == NULL) {
1030 PyErr_NoMemory();
1033 PyErr_NoMemory();
1031 goto bail;
1034 goto bail;
1032 }
1035 }
1033
1036
1034 for (i = len - 1; i >= 0; i--) {
1037 for (i = len - 1; i >= 0; i--) {
1035 int isfiltered;
1038 int isfiltered;
1036 int parents[2];
1039 int parents[2];
1037
1040
1038 /* If nothead[i] == 1, it means we've seen an unfiltered child
1041 /* If nothead[i] == 1, it means we've seen an unfiltered child
1039 * of this node already, and therefore this node is not
1042 * of this node already, and therefore this node is not
1040 * filtered. So we can skip the expensive check_filter step.
1043 * filtered. So we can skip the expensive check_filter step.
1041 */
1044 */
1042 if (nothead[i] != 1) {
1045 if (nothead[i] != 1) {
1043 isfiltered = check_filter(filter, i);
1046 isfiltered = check_filter(filter, i);
1044 if (isfiltered == -1) {
1047 if (isfiltered == -1) {
1045 PyErr_SetString(PyExc_TypeError,
1048 PyErr_SetString(PyExc_TypeError,
1046 "unable to check filter");
1049 "unable to check filter");
1047 goto bail;
1050 goto bail;
1048 }
1051 }
1049
1052
1050 if (isfiltered) {
1053 if (isfiltered) {
1051 nothead[i] = 1;
1054 nothead[i] = 1;
1052 continue;
1055 continue;
1053 }
1056 }
1054 }
1057 }
1055
1058
1056 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1059 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1057 goto bail;
1060 goto bail;
1058 for (j = 0; j < 2; j++) {
1061 for (j = 0; j < 2; j++) {
1059 if (parents[j] >= 0)
1062 if (parents[j] >= 0)
1060 nothead[parents[j]] = 1;
1063 nothead[parents[j]] = 1;
1061 }
1064 }
1062 }
1065 }
1063
1066
1064 for (i = 0; i < len; i++) {
1067 for (i = 0; i < len; i++) {
1065 PyObject *head;
1068 PyObject *head;
1066
1069
1067 if (nothead[i])
1070 if (nothead[i])
1068 continue;
1071 continue;
1069 head = PyInt_FromSsize_t(i);
1072 head = PyInt_FromSsize_t(i);
1070 if (head == NULL || PyList_Append(heads, head) == -1) {
1073 if (head == NULL || PyList_Append(heads, head) == -1) {
1071 Py_XDECREF(head);
1074 Py_XDECREF(head);
1072 goto bail;
1075 goto bail;
1073 }
1076 }
1074 }
1077 }
1075
1078
1076 done:
1079 done:
1077 self->headrevs = heads;
1080 self->headrevs = heads;
1078 Py_XDECREF(filter);
1081 Py_XDECREF(filter);
1079 free(nothead);
1082 free(nothead);
1080 return list_copy(self->headrevs);
1083 return list_copy(self->headrevs);
1081 bail:
1084 bail:
1082 Py_XDECREF(filter);
1085 Py_XDECREF(filter);
1083 Py_XDECREF(heads);
1086 Py_XDECREF(heads);
1084 free(nothead);
1087 free(nothead);
1085 return NULL;
1088 return NULL;
1086 }
1089 }
1087
1090
1088 /**
1091 /**
1089 * Obtain the base revision index entry.
1092 * Obtain the base revision index entry.
1090 *
1093 *
1091 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1094 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1092 */
1095 */
1093 static inline int index_baserev(indexObject *self, int rev)
1096 static inline int index_baserev(indexObject *self, int rev)
1094 {
1097 {
1095 const char *data;
1098 const char *data;
1096 int result;
1099 int result;
1097
1100
1098 data = index_deref(self, rev);
1101 data = index_deref(self, rev);
1099 if (data == NULL)
1102 if (data == NULL)
1100 return -2;
1103 return -2;
1101 result = getbe32(data + 16);
1104 result = getbe32(data + 16);
1102
1105
1103 if (result > rev) {
1106 if (result > rev) {
1104 PyErr_Format(
1107 PyErr_Format(
1105 PyExc_ValueError,
1108 PyExc_ValueError,
1106 "corrupted revlog, revision base above revision: %d, %d",
1109 "corrupted revlog, revision base above revision: %d, %d",
1107 rev, result);
1110 rev, result);
1108 return -2;
1111 return -2;
1109 }
1112 }
1110 if (result < -1) {
1113 if (result < -1) {
1111 PyErr_Format(
1114 PyErr_Format(
1112 PyExc_ValueError,
1115 PyExc_ValueError,
1113 "corrupted revlog, revision base out of range: %d, %d", rev,
1116 "corrupted revlog, revision base out of range: %d, %d", rev,
1114 result);
1117 result);
1115 return -2;
1118 return -2;
1116 }
1119 }
1117 return result;
1120 return result;
1118 }
1121 }
1119
1122
1120 /**
1123 /**
1121 * Find if a revision is a snapshot or not
1124 * Find if a revision is a snapshot or not
1122 *
1125 *
1123 * Only relevant for sparse-revlog case.
1126 * Only relevant for sparse-revlog case.
1124 * Callers must ensure that rev is in a valid range.
1127 * Callers must ensure that rev is in a valid range.
1125 */
1128 */
1126 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1129 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1127 {
1130 {
1128 int ps[2];
1131 int ps[2];
1129 Py_ssize_t base;
1132 Py_ssize_t base;
1130 while (rev >= 0) {
1133 while (rev >= 0) {
1131 base = (Py_ssize_t)index_baserev(self, rev);
1134 base = (Py_ssize_t)index_baserev(self, rev);
1132 if (base == rev) {
1135 if (base == rev) {
1133 base = -1;
1136 base = -1;
1134 }
1137 }
1135 if (base == -2) {
1138 if (base == -2) {
1136 assert(PyErr_Occurred());
1139 assert(PyErr_Occurred());
1137 return -1;
1140 return -1;
1138 }
1141 }
1139 if (base == -1) {
1142 if (base == -1) {
1140 return 1;
1143 return 1;
1141 }
1144 }
1142 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1145 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1143 assert(PyErr_Occurred());
1146 assert(PyErr_Occurred());
1144 return -1;
1147 return -1;
1145 };
1148 };
1146 if (base == ps[0] || base == ps[1]) {
1149 if (base == ps[0] || base == ps[1]) {
1147 return 0;
1150 return 0;
1148 }
1151 }
1149 rev = base;
1152 rev = base;
1150 }
1153 }
1151 return rev == -1;
1154 return rev == -1;
1152 }
1155 }
1153
1156
1154 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1157 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1155 {
1158 {
1156 long rev;
1159 long rev;
1157 int issnap;
1160 int issnap;
1158 Py_ssize_t length = index_length(self);
1161 Py_ssize_t length = index_length(self);
1159
1162
1160 if (!pylong_to_long(value, &rev)) {
1163 if (!pylong_to_long(value, &rev)) {
1161 return NULL;
1164 return NULL;
1162 }
1165 }
1163 if (rev < -1 || rev >= length) {
1166 if (rev < -1 || rev >= length) {
1164 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1167 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1165 rev);
1168 rev);
1166 return NULL;
1169 return NULL;
1167 };
1170 };
1168 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1171 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1169 if (issnap < 0) {
1172 if (issnap < 0) {
1170 return NULL;
1173 return NULL;
1171 };
1174 };
1172 return PyBool_FromLong((long)issnap);
1175 return PyBool_FromLong((long)issnap);
1173 }
1176 }
1174
1177
1175 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1178 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1176 {
1179 {
1177 Py_ssize_t start_rev;
1180 Py_ssize_t start_rev;
1178 PyObject *cache;
1181 PyObject *cache;
1179 Py_ssize_t base;
1182 Py_ssize_t base;
1180 Py_ssize_t rev;
1183 Py_ssize_t rev;
1181 PyObject *key = NULL;
1184 PyObject *key = NULL;
1182 PyObject *value = NULL;
1185 PyObject *value = NULL;
1183 const Py_ssize_t length = index_length(self);
1186 const Py_ssize_t length = index_length(self);
1184 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1187 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1185 return NULL;
1188 return NULL;
1186 }
1189 }
1187 for (rev = start_rev; rev < length; rev++) {
1190 for (rev = start_rev; rev < length; rev++) {
1188 int issnap;
1191 int issnap;
1189 PyObject *allvalues = NULL;
1192 PyObject *allvalues = NULL;
1190 issnap = index_issnapshotrev(self, rev);
1193 issnap = index_issnapshotrev(self, rev);
1191 if (issnap < 0) {
1194 if (issnap < 0) {
1192 goto bail;
1195 goto bail;
1193 }
1196 }
1194 if (issnap == 0) {
1197 if (issnap == 0) {
1195 continue;
1198 continue;
1196 }
1199 }
1197 base = (Py_ssize_t)index_baserev(self, rev);
1200 base = (Py_ssize_t)index_baserev(self, rev);
1198 if (base == rev) {
1201 if (base == rev) {
1199 base = -1;
1202 base = -1;
1200 }
1203 }
1201 if (base == -2) {
1204 if (base == -2) {
1202 assert(PyErr_Occurred());
1205 assert(PyErr_Occurred());
1203 goto bail;
1206 goto bail;
1204 }
1207 }
1205 key = PyInt_FromSsize_t(base);
1208 key = PyInt_FromSsize_t(base);
1206 allvalues = PyDict_GetItem(cache, key);
1209 allvalues = PyDict_GetItem(cache, key);
1207 if (allvalues == NULL && PyErr_Occurred()) {
1210 if (allvalues == NULL && PyErr_Occurred()) {
1208 goto bail;
1211 goto bail;
1209 }
1212 }
1210 if (allvalues == NULL) {
1213 if (allvalues == NULL) {
1211 int r;
1214 int r;
1212 allvalues = PyList_New(0);
1215 allvalues = PyList_New(0);
1213 if (!allvalues) {
1216 if (!allvalues) {
1214 goto bail;
1217 goto bail;
1215 }
1218 }
1216 r = PyDict_SetItem(cache, key, allvalues);
1219 r = PyDict_SetItem(cache, key, allvalues);
1217 Py_DECREF(allvalues);
1220 Py_DECREF(allvalues);
1218 if (r < 0) {
1221 if (r < 0) {
1219 goto bail;
1222 goto bail;
1220 }
1223 }
1221 }
1224 }
1222 value = PyInt_FromSsize_t(rev);
1225 value = PyInt_FromSsize_t(rev);
1223 if (PyList_Append(allvalues, value)) {
1226 if (PyList_Append(allvalues, value)) {
1224 goto bail;
1227 goto bail;
1225 }
1228 }
1226 Py_CLEAR(key);
1229 Py_CLEAR(key);
1227 Py_CLEAR(value);
1230 Py_CLEAR(value);
1228 }
1231 }
1229 Py_RETURN_NONE;
1232 Py_RETURN_NONE;
1230 bail:
1233 bail:
1231 Py_XDECREF(key);
1234 Py_XDECREF(key);
1232 Py_XDECREF(value);
1235 Py_XDECREF(value);
1233 return NULL;
1236 return NULL;
1234 }
1237 }
1235
1238
1236 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1239 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1237 {
1240 {
1238 int rev, generaldelta;
1241 int rev, generaldelta;
1239 PyObject *stoparg;
1242 PyObject *stoparg;
1240 int stoprev, iterrev, baserev = -1;
1243 int stoprev, iterrev, baserev = -1;
1241 int stopped;
1244 int stopped;
1242 PyObject *chain = NULL, *result = NULL;
1245 PyObject *chain = NULL, *result = NULL;
1243 const Py_ssize_t length = index_length(self);
1246 const Py_ssize_t length = index_length(self);
1244
1247
1245 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1248 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1246 return NULL;
1249 return NULL;
1247 }
1250 }
1248
1251
1249 if (PyInt_Check(stoparg)) {
1252 if (PyInt_Check(stoparg)) {
1250 stoprev = (int)PyInt_AsLong(stoparg);
1253 stoprev = (int)PyInt_AsLong(stoparg);
1251 if (stoprev == -1 && PyErr_Occurred()) {
1254 if (stoprev == -1 && PyErr_Occurred()) {
1252 return NULL;
1255 return NULL;
1253 }
1256 }
1254 } else if (stoparg == Py_None) {
1257 } else if (stoparg == Py_None) {
1255 stoprev = -2;
1258 stoprev = -2;
1256 } else {
1259 } else {
1257 PyErr_SetString(PyExc_ValueError,
1260 PyErr_SetString(PyExc_ValueError,
1258 "stoprev must be integer or None");
1261 "stoprev must be integer or None");
1259 return NULL;
1262 return NULL;
1260 }
1263 }
1261
1264
1262 if (rev < 0 || rev >= length) {
1265 if (rev < 0 || rev >= length) {
1263 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1266 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1264 return NULL;
1267 return NULL;
1265 }
1268 }
1266
1269
1267 chain = PyList_New(0);
1270 chain = PyList_New(0);
1268 if (chain == NULL) {
1271 if (chain == NULL) {
1269 return NULL;
1272 return NULL;
1270 }
1273 }
1271
1274
1272 baserev = index_baserev(self, rev);
1275 baserev = index_baserev(self, rev);
1273
1276
1274 /* This should never happen. */
1277 /* This should never happen. */
1275 if (baserev <= -2) {
1278 if (baserev <= -2) {
1276 /* Error should be set by index_deref() */
1279 /* Error should be set by index_deref() */
1277 assert(PyErr_Occurred());
1280 assert(PyErr_Occurred());
1278 goto bail;
1281 goto bail;
1279 }
1282 }
1280
1283
1281 iterrev = rev;
1284 iterrev = rev;
1282
1285
1283 while (iterrev != baserev && iterrev != stoprev) {
1286 while (iterrev != baserev && iterrev != stoprev) {
1284 PyObject *value = PyInt_FromLong(iterrev);
1287 PyObject *value = PyInt_FromLong(iterrev);
1285 if (value == NULL) {
1288 if (value == NULL) {
1286 goto bail;
1289 goto bail;
1287 }
1290 }
1288 if (PyList_Append(chain, value)) {
1291 if (PyList_Append(chain, value)) {
1289 Py_DECREF(value);
1292 Py_DECREF(value);
1290 goto bail;
1293 goto bail;
1291 }
1294 }
1292 Py_DECREF(value);
1295 Py_DECREF(value);
1293
1296
1294 if (generaldelta) {
1297 if (generaldelta) {
1295 iterrev = baserev;
1298 iterrev = baserev;
1296 } else {
1299 } else {
1297 iterrev--;
1300 iterrev--;
1298 }
1301 }
1299
1302
1300 if (iterrev < 0) {
1303 if (iterrev < 0) {
1301 break;
1304 break;
1302 }
1305 }
1303
1306
1304 if (iterrev >= length) {
1307 if (iterrev >= length) {
1305 PyErr_SetString(PyExc_IndexError,
1308 PyErr_SetString(PyExc_IndexError,
1306 "revision outside index");
1309 "revision outside index");
1307 return NULL;
1310 return NULL;
1308 }
1311 }
1309
1312
1310 baserev = index_baserev(self, iterrev);
1313 baserev = index_baserev(self, iterrev);
1311
1314
1312 /* This should never happen. */
1315 /* This should never happen. */
1313 if (baserev <= -2) {
1316 if (baserev <= -2) {
1314 /* Error should be set by index_deref() */
1317 /* Error should be set by index_deref() */
1315 assert(PyErr_Occurred());
1318 assert(PyErr_Occurred());
1316 goto bail;
1319 goto bail;
1317 }
1320 }
1318 }
1321 }
1319
1322
1320 if (iterrev == stoprev) {
1323 if (iterrev == stoprev) {
1321 stopped = 1;
1324 stopped = 1;
1322 } else {
1325 } else {
1323 PyObject *value = PyInt_FromLong(iterrev);
1326 PyObject *value = PyInt_FromLong(iterrev);
1324 if (value == NULL) {
1327 if (value == NULL) {
1325 goto bail;
1328 goto bail;
1326 }
1329 }
1327 if (PyList_Append(chain, value)) {
1330 if (PyList_Append(chain, value)) {
1328 Py_DECREF(value);
1331 Py_DECREF(value);
1329 goto bail;
1332 goto bail;
1330 }
1333 }
1331 Py_DECREF(value);
1334 Py_DECREF(value);
1332
1335
1333 stopped = 0;
1336 stopped = 0;
1334 }
1337 }
1335
1338
1336 if (PyList_Reverse(chain)) {
1339 if (PyList_Reverse(chain)) {
1337 goto bail;
1340 goto bail;
1338 }
1341 }
1339
1342
1340 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1343 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1341 Py_DECREF(chain);
1344 Py_DECREF(chain);
1342 return result;
1345 return result;
1343
1346
1344 bail:
1347 bail:
1345 Py_DECREF(chain);
1348 Py_DECREF(chain);
1346 return NULL;
1349 return NULL;
1347 }
1350 }
1348
1351
1349 static inline int64_t
1352 static inline int64_t
1350 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1353 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1351 {
1354 {
1352 int64_t start_offset;
1355 int64_t start_offset;
1353 int64_t end_offset;
1356 int64_t end_offset;
1354 int end_size;
1357 int end_size;
1355 start_offset = index_get_start(self, start_rev);
1358 start_offset = index_get_start(self, start_rev);
1356 if (start_offset < 0) {
1359 if (start_offset < 0) {
1357 return -1;
1360 return -1;
1358 }
1361 }
1359 end_offset = index_get_start(self, end_rev);
1362 end_offset = index_get_start(self, end_rev);
1360 if (end_offset < 0) {
1363 if (end_offset < 0) {
1361 return -1;
1364 return -1;
1362 }
1365 }
1363 end_size = index_get_length(self, end_rev);
1366 end_size = index_get_length(self, end_rev);
1364 if (end_size < 0) {
1367 if (end_size < 0) {
1365 return -1;
1368 return -1;
1366 }
1369 }
1367 if (end_offset < start_offset) {
1370 if (end_offset < start_offset) {
1368 PyErr_Format(PyExc_ValueError,
1371 PyErr_Format(PyExc_ValueError,
1369 "corrupted revlog index: inconsistent offset "
1372 "corrupted revlog index: inconsistent offset "
1370 "between revisions (%zd) and (%zd)",
1373 "between revisions (%zd) and (%zd)",
1371 start_rev, end_rev);
1374 start_rev, end_rev);
1372 return -1;
1375 return -1;
1373 }
1376 }
1374 return (end_offset - start_offset) + (int64_t)end_size;
1377 return (end_offset - start_offset) + (int64_t)end_size;
1375 }
1378 }
1376
1379
1377 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1380 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1378 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1381 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1379 Py_ssize_t startidx, Py_ssize_t endidx)
1382 Py_ssize_t startidx, Py_ssize_t endidx)
1380 {
1383 {
1381 int length;
1384 int length;
1382 while (endidx > 1 && endidx > startidx) {
1385 while (endidx > 1 && endidx > startidx) {
1383 length = index_get_length(self, revs[endidx - 1]);
1386 length = index_get_length(self, revs[endidx - 1]);
1384 if (length < 0) {
1387 if (length < 0) {
1385 return -1;
1388 return -1;
1386 }
1389 }
1387 if (length != 0) {
1390 if (length != 0) {
1388 break;
1391 break;
1389 }
1392 }
1390 endidx -= 1;
1393 endidx -= 1;
1391 }
1394 }
1392 return endidx;
1395 return endidx;
1393 }
1396 }
1394
1397
1395 struct Gap {
1398 struct Gap {
1396 int64_t size;
1399 int64_t size;
1397 Py_ssize_t idx;
1400 Py_ssize_t idx;
1398 };
1401 };
1399
1402
1400 static int gap_compare(const void *left, const void *right)
1403 static int gap_compare(const void *left, const void *right)
1401 {
1404 {
1402 const struct Gap *l_left = ((const struct Gap *)left);
1405 const struct Gap *l_left = ((const struct Gap *)left);
1403 const struct Gap *l_right = ((const struct Gap *)right);
1406 const struct Gap *l_right = ((const struct Gap *)right);
1404 if (l_left->size < l_right->size) {
1407 if (l_left->size < l_right->size) {
1405 return -1;
1408 return -1;
1406 } else if (l_left->size > l_right->size) {
1409 } else if (l_left->size > l_right->size) {
1407 return 1;
1410 return 1;
1408 }
1411 }
1409 return 0;
1412 return 0;
1410 }
1413 }
1411 static int Py_ssize_t_compare(const void *left, const void *right)
1414 static int Py_ssize_t_compare(const void *left, const void *right)
1412 {
1415 {
1413 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1416 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1414 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1417 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1415 if (l_left < l_right) {
1418 if (l_left < l_right) {
1416 return -1;
1419 return -1;
1417 } else if (l_left > l_right) {
1420 } else if (l_left > l_right) {
1418 return 1;
1421 return 1;
1419 }
1422 }
1420 return 0;
1423 return 0;
1421 }
1424 }
1422
1425
1423 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1426 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1424 {
1427 {
1425 /* method arguments */
1428 /* method arguments */
1426 PyObject *list_revs = NULL; /* revisions in the chain */
1429 PyObject *list_revs = NULL; /* revisions in the chain */
1427 double targetdensity = 0; /* min density to achieve */
1430 double targetdensity = 0; /* min density to achieve */
1428 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1431 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1429
1432
1430 /* other core variables */
1433 /* other core variables */
1431 Py_ssize_t idxlen = index_length(self);
1434 Py_ssize_t idxlen = index_length(self);
1432 Py_ssize_t i; /* used for various iteration */
1435 Py_ssize_t i; /* used for various iteration */
1433 PyObject *result = NULL; /* the final return of the function */
1436 PyObject *result = NULL; /* the final return of the function */
1434
1437
1435 /* generic information about the delta chain being slice */
1438 /* generic information about the delta chain being slice */
1436 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1439 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1437 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1440 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1438 int64_t chainpayload = 0; /* sum of all delta in the chain */
1441 int64_t chainpayload = 0; /* sum of all delta in the chain */
1439 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1442 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1440
1443
1441 /* variable used for slicing the delta chain */
1444 /* variable used for slicing the delta chain */
1442 int64_t readdata = 0; /* amount of data currently planned to be read */
1445 int64_t readdata = 0; /* amount of data currently planned to be read */
1443 double density = 0; /* ration of payload data compared to read ones */
1446 double density = 0; /* ration of payload data compared to read ones */
1444 int64_t previous_end;
1447 int64_t previous_end;
1445 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1448 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1446 Py_ssize_t num_gaps =
1449 Py_ssize_t num_gaps =
1447 0; /* total number of notable gap recorded so far */
1450 0; /* total number of notable gap recorded so far */
1448 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1451 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1449 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1452 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1450 PyObject *chunk = NULL; /* individual slice */
1453 PyObject *chunk = NULL; /* individual slice */
1451 PyObject *allchunks = NULL; /* all slices */
1454 PyObject *allchunks = NULL; /* all slices */
1452 Py_ssize_t previdx;
1455 Py_ssize_t previdx;
1453
1456
1454 /* parsing argument */
1457 /* parsing argument */
1455 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1458 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1456 &targetdensity, &mingapsize)) {
1459 &targetdensity, &mingapsize)) {
1457 goto bail;
1460 goto bail;
1458 }
1461 }
1459
1462
1460 /* If the delta chain contains a single element, we do not need slicing
1463 /* If the delta chain contains a single element, we do not need slicing
1461 */
1464 */
1462 num_revs = PyList_GET_SIZE(list_revs);
1465 num_revs = PyList_GET_SIZE(list_revs);
1463 if (num_revs <= 1) {
1466 if (num_revs <= 1) {
1464 result = PyTuple_Pack(1, list_revs);
1467 result = PyTuple_Pack(1, list_revs);
1465 goto done;
1468 goto done;
1466 }
1469 }
1467
1470
1468 /* Turn the python list into a native integer array (for efficiency) */
1471 /* Turn the python list into a native integer array (for efficiency) */
1469 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1472 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1470 if (revs == NULL) {
1473 if (revs == NULL) {
1471 PyErr_NoMemory();
1474 PyErr_NoMemory();
1472 goto bail;
1475 goto bail;
1473 }
1476 }
1474 for (i = 0; i < num_revs; i++) {
1477 for (i = 0; i < num_revs; i++) {
1475 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1478 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1476 if (revnum == -1 && PyErr_Occurred()) {
1479 if (revnum == -1 && PyErr_Occurred()) {
1477 goto bail;
1480 goto bail;
1478 }
1481 }
1479 if (revnum < nullrev || revnum >= idxlen) {
1482 if (revnum < nullrev || revnum >= idxlen) {
1480 PyErr_Format(PyExc_IndexError,
1483 PyErr_Format(PyExc_IndexError,
1481 "index out of range: %zd", revnum);
1484 "index out of range: %zd", revnum);
1482 goto bail;
1485 goto bail;
1483 }
1486 }
1484 revs[i] = revnum;
1487 revs[i] = revnum;
1485 }
1488 }
1486
1489
1487 /* Compute and check various property of the unsliced delta chain */
1490 /* Compute and check various property of the unsliced delta chain */
1488 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1491 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1489 if (deltachainspan < 0) {
1492 if (deltachainspan < 0) {
1490 goto bail;
1493 goto bail;
1491 }
1494 }
1492
1495
1493 if (deltachainspan <= mingapsize) {
1496 if (deltachainspan <= mingapsize) {
1494 result = PyTuple_Pack(1, list_revs);
1497 result = PyTuple_Pack(1, list_revs);
1495 goto done;
1498 goto done;
1496 }
1499 }
1497 chainpayload = 0;
1500 chainpayload = 0;
1498 for (i = 0; i < num_revs; i++) {
1501 for (i = 0; i < num_revs; i++) {
1499 int tmp = index_get_length(self, revs[i]);
1502 int tmp = index_get_length(self, revs[i]);
1500 if (tmp < 0) {
1503 if (tmp < 0) {
1501 goto bail;
1504 goto bail;
1502 }
1505 }
1503 chainpayload += tmp;
1506 chainpayload += tmp;
1504 }
1507 }
1505
1508
1506 readdata = deltachainspan;
1509 readdata = deltachainspan;
1507 density = 1.0;
1510 density = 1.0;
1508
1511
1509 if (0 < deltachainspan) {
1512 if (0 < deltachainspan) {
1510 density = (double)chainpayload / (double)deltachainspan;
1513 density = (double)chainpayload / (double)deltachainspan;
1511 }
1514 }
1512
1515
1513 if (density >= targetdensity) {
1516 if (density >= targetdensity) {
1514 result = PyTuple_Pack(1, list_revs);
1517 result = PyTuple_Pack(1, list_revs);
1515 goto done;
1518 goto done;
1516 }
1519 }
1517
1520
1518 /* if chain is too sparse, look for relevant gaps */
1521 /* if chain is too sparse, look for relevant gaps */
1519 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1522 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1520 if (gaps == NULL) {
1523 if (gaps == NULL) {
1521 PyErr_NoMemory();
1524 PyErr_NoMemory();
1522 goto bail;
1525 goto bail;
1523 }
1526 }
1524
1527
1525 previous_end = -1;
1528 previous_end = -1;
1526 for (i = 0; i < num_revs; i++) {
1529 for (i = 0; i < num_revs; i++) {
1527 int64_t revstart;
1530 int64_t revstart;
1528 int revsize;
1531 int revsize;
1529 revstart = index_get_start(self, revs[i]);
1532 revstart = index_get_start(self, revs[i]);
1530 if (revstart < 0) {
1533 if (revstart < 0) {
1531 goto bail;
1534 goto bail;
1532 };
1535 };
1533 revsize = index_get_length(self, revs[i]);
1536 revsize = index_get_length(self, revs[i]);
1534 if (revsize < 0) {
1537 if (revsize < 0) {
1535 goto bail;
1538 goto bail;
1536 };
1539 };
1537 if (revsize == 0) {
1540 if (revsize == 0) {
1538 continue;
1541 continue;
1539 }
1542 }
1540 if (previous_end >= 0) {
1543 if (previous_end >= 0) {
1541 int64_t gapsize = revstart - previous_end;
1544 int64_t gapsize = revstart - previous_end;
1542 if (gapsize > mingapsize) {
1545 if (gapsize > mingapsize) {
1543 gaps[num_gaps].size = gapsize;
1546 gaps[num_gaps].size = gapsize;
1544 gaps[num_gaps].idx = i;
1547 gaps[num_gaps].idx = i;
1545 num_gaps += 1;
1548 num_gaps += 1;
1546 }
1549 }
1547 }
1550 }
1548 previous_end = revstart + revsize;
1551 previous_end = revstart + revsize;
1549 }
1552 }
1550 if (num_gaps == 0) {
1553 if (num_gaps == 0) {
1551 result = PyTuple_Pack(1, list_revs);
1554 result = PyTuple_Pack(1, list_revs);
1552 goto done;
1555 goto done;
1553 }
1556 }
1554 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1557 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1555
1558
1556 /* Slice the largest gap first, they improve the density the most */
1559 /* Slice the largest gap first, they improve the density the most */
1557 selected_indices =
1560 selected_indices =
1558 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1561 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1559 if (selected_indices == NULL) {
1562 if (selected_indices == NULL) {
1560 PyErr_NoMemory();
1563 PyErr_NoMemory();
1561 goto bail;
1564 goto bail;
1562 }
1565 }
1563
1566
1564 for (i = num_gaps - 1; i >= 0; i--) {
1567 for (i = num_gaps - 1; i >= 0; i--) {
1565 selected_indices[num_selected] = gaps[i].idx;
1568 selected_indices[num_selected] = gaps[i].idx;
1566 readdata -= gaps[i].size;
1569 readdata -= gaps[i].size;
1567 num_selected += 1;
1570 num_selected += 1;
1568 if (readdata <= 0) {
1571 if (readdata <= 0) {
1569 density = 1.0;
1572 density = 1.0;
1570 } else {
1573 } else {
1571 density = (double)chainpayload / (double)readdata;
1574 density = (double)chainpayload / (double)readdata;
1572 }
1575 }
1573 if (density >= targetdensity) {
1576 if (density >= targetdensity) {
1574 break;
1577 break;
1575 }
1578 }
1576 }
1579 }
1577 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1580 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1578 &Py_ssize_t_compare);
1581 &Py_ssize_t_compare);
1579
1582
1580 /* create the resulting slice */
1583 /* create the resulting slice */
1581 allchunks = PyList_New(0);
1584 allchunks = PyList_New(0);
1582 if (allchunks == NULL) {
1585 if (allchunks == NULL) {
1583 goto bail;
1586 goto bail;
1584 }
1587 }
1585 previdx = 0;
1588 previdx = 0;
1586 selected_indices[num_selected] = num_revs;
1589 selected_indices[num_selected] = num_revs;
1587 for (i = 0; i <= num_selected; i++) {
1590 for (i = 0; i <= num_selected; i++) {
1588 Py_ssize_t idx = selected_indices[i];
1591 Py_ssize_t idx = selected_indices[i];
1589 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1592 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1590 if (endidx < 0) {
1593 if (endidx < 0) {
1591 goto bail;
1594 goto bail;
1592 }
1595 }
1593 if (previdx < endidx) {
1596 if (previdx < endidx) {
1594 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1597 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1595 if (chunk == NULL) {
1598 if (chunk == NULL) {
1596 goto bail;
1599 goto bail;
1597 }
1600 }
1598 if (PyList_Append(allchunks, chunk) == -1) {
1601 if (PyList_Append(allchunks, chunk) == -1) {
1599 goto bail;
1602 goto bail;
1600 }
1603 }
1601 Py_DECREF(chunk);
1604 Py_DECREF(chunk);
1602 chunk = NULL;
1605 chunk = NULL;
1603 }
1606 }
1604 previdx = idx;
1607 previdx = idx;
1605 }
1608 }
1606 result = allchunks;
1609 result = allchunks;
1607 goto done;
1610 goto done;
1608
1611
1609 bail:
1612 bail:
1610 Py_XDECREF(allchunks);
1613 Py_XDECREF(allchunks);
1611 Py_XDECREF(chunk);
1614 Py_XDECREF(chunk);
1612 done:
1615 done:
1613 free(revs);
1616 free(revs);
1614 free(gaps);
1617 free(gaps);
1615 free(selected_indices);
1618 free(selected_indices);
1616 return result;
1619 return result;
1617 }
1620 }
1618
1621
1619 static inline int nt_level(const char *node, Py_ssize_t level)
1622 static inline int nt_level(const char *node, Py_ssize_t level)
1620 {
1623 {
1621 int v = node[level >> 1];
1624 int v = node[level >> 1];
1622 if (!(level & 1))
1625 if (!(level & 1))
1623 v >>= 4;
1626 v >>= 4;
1624 return v & 0xf;
1627 return v & 0xf;
1625 }
1628 }
1626
1629
1627 /*
1630 /*
1628 * Return values:
1631 * Return values:
1629 *
1632 *
1630 * -4: match is ambiguous (multiple candidates)
1633 * -4: match is ambiguous (multiple candidates)
1631 * -2: not found
1634 * -2: not found
1632 * rest: valid rev
1635 * rest: valid rev
1633 */
1636 */
1634 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1637 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1635 int hex)
1638 int hex)
1636 {
1639 {
1637 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1640 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1638 int level, maxlevel, off;
1641 int level, maxlevel, off;
1639
1642
1640 /* If the input is binary, do a fast check for the nullid first. */
1643 /* If the input is binary, do a fast check for the nullid first. */
1641 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1644 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1642 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1645 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1643 return -1;
1646 return -1;
1644
1647
1645 if (hex)
1648 if (hex)
1646 maxlevel = nodelen;
1649 maxlevel = nodelen;
1647 else
1650 else
1648 maxlevel = 2 * nodelen;
1651 maxlevel = 2 * nodelen;
1649 if (maxlevel > 2 * self->nodelen)
1652 if (maxlevel > 2 * self->nodelen)
1650 maxlevel = 2 * self->nodelen;
1653 maxlevel = 2 * self->nodelen;
1651
1654
1652 for (level = off = 0; level < maxlevel; level++) {
1655 for (level = off = 0; level < maxlevel; level++) {
1653 int k = getnybble(node, level);
1656 int k = getnybble(node, level);
1654 nodetreenode *n = &self->nodes[off];
1657 nodetreenode *n = &self->nodes[off];
1655 int v = n->children[k];
1658 int v = n->children[k];
1656
1659
1657 if (v < 0) {
1660 if (v < 0) {
1658 const char *n;
1661 const char *n;
1659 Py_ssize_t i;
1662 Py_ssize_t i;
1660
1663
1661 v = -(v + 2);
1664 v = -(v + 2);
1662 n = index_node(self->index, v);
1665 n = index_node(self->index, v);
1663 if (n == NULL)
1666 if (n == NULL)
1664 return -2;
1667 return -2;
1665 for (i = level; i < maxlevel; i++)
1668 for (i = level; i < maxlevel; i++)
1666 if (getnybble(node, i) != nt_level(n, i))
1669 if (getnybble(node, i) != nt_level(n, i))
1667 return -2;
1670 return -2;
1668 return v;
1671 return v;
1669 }
1672 }
1670 if (v == 0)
1673 if (v == 0)
1671 return -2;
1674 return -2;
1672 off = v;
1675 off = v;
1673 }
1676 }
1674 /* multiple matches against an ambiguous prefix */
1677 /* multiple matches against an ambiguous prefix */
1675 return -4;
1678 return -4;
1676 }
1679 }
1677
1680
1678 static int nt_new(nodetree *self)
1681 static int nt_new(nodetree *self)
1679 {
1682 {
1680 if (self->length == self->capacity) {
1683 if (self->length == self->capacity) {
1681 size_t newcapacity;
1684 size_t newcapacity;
1682 nodetreenode *newnodes;
1685 nodetreenode *newnodes;
1683 newcapacity = self->capacity * 2;
1686 newcapacity = self->capacity * 2;
1684 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1687 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1685 PyErr_SetString(PyExc_MemoryError,
1688 PyErr_SetString(PyExc_MemoryError,
1686 "overflow in nt_new");
1689 "overflow in nt_new");
1687 return -1;
1690 return -1;
1688 }
1691 }
1689 newnodes =
1692 newnodes =
1690 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1693 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1691 if (newnodes == NULL) {
1694 if (newnodes == NULL) {
1692 PyErr_SetString(PyExc_MemoryError, "out of memory");
1695 PyErr_SetString(PyExc_MemoryError, "out of memory");
1693 return -1;
1696 return -1;
1694 }
1697 }
1695 self->capacity = newcapacity;
1698 self->capacity = newcapacity;
1696 self->nodes = newnodes;
1699 self->nodes = newnodes;
1697 memset(&self->nodes[self->length], 0,
1700 memset(&self->nodes[self->length], 0,
1698 sizeof(nodetreenode) * (self->capacity - self->length));
1701 sizeof(nodetreenode) * (self->capacity - self->length));
1699 }
1702 }
1700 return self->length++;
1703 return self->length++;
1701 }
1704 }
1702
1705
1703 static int nt_insert(nodetree *self, const char *node, int rev)
1706 static int nt_insert(nodetree *self, const char *node, int rev)
1704 {
1707 {
1705 int level = 0;
1708 int level = 0;
1706 int off = 0;
1709 int off = 0;
1707
1710
1708 while (level < 2 * self->nodelen) {
1711 while (level < 2 * self->nodelen) {
1709 int k = nt_level(node, level);
1712 int k = nt_level(node, level);
1710 nodetreenode *n;
1713 nodetreenode *n;
1711 int v;
1714 int v;
1712
1715
1713 n = &self->nodes[off];
1716 n = &self->nodes[off];
1714 v = n->children[k];
1717 v = n->children[k];
1715
1718
1716 if (v == 0) {
1719 if (v == 0) {
1717 n->children[k] = -rev - 2;
1720 n->children[k] = -rev - 2;
1718 return 0;
1721 return 0;
1719 }
1722 }
1720 if (v < 0) {
1723 if (v < 0) {
1721 const char *oldnode =
1724 const char *oldnode =
1722 index_node_existing(self->index, -(v + 2));
1725 index_node_existing(self->index, -(v + 2));
1723 int noff;
1726 int noff;
1724
1727
1725 if (oldnode == NULL)
1728 if (oldnode == NULL)
1726 return -1;
1729 return -1;
1727 if (!memcmp(oldnode, node, self->nodelen)) {
1730 if (!memcmp(oldnode, node, self->nodelen)) {
1728 n->children[k] = -rev - 2;
1731 n->children[k] = -rev - 2;
1729 return 0;
1732 return 0;
1730 }
1733 }
1731 noff = nt_new(self);
1734 noff = nt_new(self);
1732 if (noff == -1)
1735 if (noff == -1)
1733 return -1;
1736 return -1;
1734 /* self->nodes may have been changed by realloc */
1737 /* self->nodes may have been changed by realloc */
1735 self->nodes[off].children[k] = noff;
1738 self->nodes[off].children[k] = noff;
1736 off = noff;
1739 off = noff;
1737 n = &self->nodes[off];
1740 n = &self->nodes[off];
1738 n->children[nt_level(oldnode, ++level)] = v;
1741 n->children[nt_level(oldnode, ++level)] = v;
1739 if (level > self->depth)
1742 if (level > self->depth)
1740 self->depth = level;
1743 self->depth = level;
1741 self->splits += 1;
1744 self->splits += 1;
1742 } else {
1745 } else {
1743 level += 1;
1746 level += 1;
1744 off = v;
1747 off = v;
1745 }
1748 }
1746 }
1749 }
1747
1750
1748 return -1;
1751 return -1;
1749 }
1752 }
1750
1753
1751 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1754 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1752 {
1755 {
1753 Py_ssize_t rev;
1756 Py_ssize_t rev;
1754 const char *node;
1757 const char *node;
1755 Py_ssize_t length;
1758 Py_ssize_t length;
1756 if (!PyArg_ParseTuple(args, "n", &rev))
1759 if (!PyArg_ParseTuple(args, "n", &rev))
1757 return NULL;
1760 return NULL;
1758 length = index_length(self->nt.index);
1761 length = index_length(self->nt.index);
1759 if (rev < 0 || rev >= length) {
1762 if (rev < 0 || rev >= length) {
1760 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1763 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1761 return NULL;
1764 return NULL;
1762 }
1765 }
1763 node = index_node_existing(self->nt.index, rev);
1766 node = index_node_existing(self->nt.index, rev);
1764 if (nt_insert(&self->nt, node, (int)rev) == -1)
1767 if (nt_insert(&self->nt, node, (int)rev) == -1)
1765 return NULL;
1768 return NULL;
1766 Py_RETURN_NONE;
1769 Py_RETURN_NONE;
1767 }
1770 }
1768
1771
1769 static int nt_delete_node(nodetree *self, const char *node)
1772 static int nt_delete_node(nodetree *self, const char *node)
1770 {
1773 {
1771 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1774 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1772 */
1775 */
1773 return nt_insert(self, node, -2);
1776 return nt_insert(self, node, -2);
1774 }
1777 }
1775
1778
1776 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1779 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1777 {
1780 {
1778 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1781 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1779 self->nodes = NULL;
1782 self->nodes = NULL;
1780
1783
1781 self->index = index;
1784 self->index = index;
1782 /* The input capacity is in terms of revisions, while the field is in
1785 /* The input capacity is in terms of revisions, while the field is in
1783 * terms of nodetree nodes. */
1786 * terms of nodetree nodes. */
1784 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1787 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1785 self->nodelen = index->nodelen;
1788 self->nodelen = index->nodelen;
1786 self->depth = 0;
1789 self->depth = 0;
1787 self->splits = 0;
1790 self->splits = 0;
1788 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1791 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1789 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1792 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1790 return -1;
1793 return -1;
1791 }
1794 }
1792 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1795 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1793 if (self->nodes == NULL) {
1796 if (self->nodes == NULL) {
1794 PyErr_NoMemory();
1797 PyErr_NoMemory();
1795 return -1;
1798 return -1;
1796 }
1799 }
1797 self->length = 1;
1800 self->length = 1;
1798 return 0;
1801 return 0;
1799 }
1802 }
1800
1803
1801 static int ntobj_init(nodetreeObject *self, PyObject *args)
1804 static int ntobj_init(nodetreeObject *self, PyObject *args)
1802 {
1805 {
1803 PyObject *index;
1806 PyObject *index;
1804 unsigned capacity;
1807 unsigned capacity;
1805 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1808 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1806 &capacity))
1809 &capacity))
1807 return -1;
1810 return -1;
1808 Py_INCREF(index);
1811 Py_INCREF(index);
1809 return nt_init(&self->nt, (indexObject *)index, capacity);
1812 return nt_init(&self->nt, (indexObject *)index, capacity);
1810 }
1813 }
1811
1814
1812 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1815 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1813 {
1816 {
1814 return nt_find(self, node, nodelen, 1);
1817 return nt_find(self, node, nodelen, 1);
1815 }
1818 }
1816
1819
1817 /*
1820 /*
1818 * Find the length of the shortest unique prefix of node.
1821 * Find the length of the shortest unique prefix of node.
1819 *
1822 *
1820 * Return values:
1823 * Return values:
1821 *
1824 *
1822 * -3: error (exception set)
1825 * -3: error (exception set)
1823 * -2: not found (no exception set)
1826 * -2: not found (no exception set)
1824 * rest: length of shortest prefix
1827 * rest: length of shortest prefix
1825 */
1828 */
1826 static int nt_shortest(nodetree *self, const char *node)
1829 static int nt_shortest(nodetree *self, const char *node)
1827 {
1830 {
1828 int level, off;
1831 int level, off;
1829
1832
1830 for (level = off = 0; level < 2 * self->nodelen; level++) {
1833 for (level = off = 0; level < 2 * self->nodelen; level++) {
1831 int k, v;
1834 int k, v;
1832 nodetreenode *n = &self->nodes[off];
1835 nodetreenode *n = &self->nodes[off];
1833 k = nt_level(node, level);
1836 k = nt_level(node, level);
1834 v = n->children[k];
1837 v = n->children[k];
1835 if (v < 0) {
1838 if (v < 0) {
1836 const char *n;
1839 const char *n;
1837 v = -(v + 2);
1840 v = -(v + 2);
1838 n = index_node_existing(self->index, v);
1841 n = index_node_existing(self->index, v);
1839 if (n == NULL)
1842 if (n == NULL)
1840 return -3;
1843 return -3;
1841 if (memcmp(node, n, self->nodelen) != 0)
1844 if (memcmp(node, n, self->nodelen) != 0)
1842 /*
1845 /*
1843 * Found a unique prefix, but it wasn't for the
1846 * Found a unique prefix, but it wasn't for the
1844 * requested node (i.e the requested node does
1847 * requested node (i.e the requested node does
1845 * not exist).
1848 * not exist).
1846 */
1849 */
1847 return -2;
1850 return -2;
1848 return level + 1;
1851 return level + 1;
1849 }
1852 }
1850 if (v == 0)
1853 if (v == 0)
1851 return -2;
1854 return -2;
1852 off = v;
1855 off = v;
1853 }
1856 }
1854 /*
1857 /*
1855 * The node was still not unique after 40 hex digits, so this won't
1858 * The node was still not unique after 40 hex digits, so this won't
1856 * happen. Also, if we get here, then there's a programming error in
1859 * happen. Also, if we get here, then there's a programming error in
1857 * this file that made us insert a node longer than 40 hex digits.
1860 * this file that made us insert a node longer than 40 hex digits.
1858 */
1861 */
1859 PyErr_SetString(PyExc_Exception, "broken node tree");
1862 PyErr_SetString(PyExc_Exception, "broken node tree");
1860 return -3;
1863 return -3;
1861 }
1864 }
1862
1865
1863 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1866 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1864 {
1867 {
1865 PyObject *val;
1868 PyObject *val;
1866 char *node;
1869 char *node;
1867 int length;
1870 int length;
1868
1871
1869 if (!PyArg_ParseTuple(args, "O", &val))
1872 if (!PyArg_ParseTuple(args, "O", &val))
1870 return NULL;
1873 return NULL;
1871 if (node_check(self->nt.nodelen, val, &node) == -1)
1874 if (node_check(self->nt.nodelen, val, &node) == -1)
1872 return NULL;
1875 return NULL;
1873
1876
1874 length = nt_shortest(&self->nt, node);
1877 length = nt_shortest(&self->nt, node);
1875 if (length == -3)
1878 if (length == -3)
1876 return NULL;
1879 return NULL;
1877 if (length == -2) {
1880 if (length == -2) {
1878 raise_revlog_error();
1881 raise_revlog_error();
1879 return NULL;
1882 return NULL;
1880 }
1883 }
1881 return PyInt_FromLong(length);
1884 return PyInt_FromLong(length);
1882 }
1885 }
1883
1886
1884 static void nt_dealloc(nodetree *self)
1887 static void nt_dealloc(nodetree *self)
1885 {
1888 {
1886 free(self->nodes);
1889 free(self->nodes);
1887 self->nodes = NULL;
1890 self->nodes = NULL;
1888 }
1891 }
1889
1892
1890 static void ntobj_dealloc(nodetreeObject *self)
1893 static void ntobj_dealloc(nodetreeObject *self)
1891 {
1894 {
1892 Py_XDECREF(self->nt.index);
1895 Py_XDECREF(self->nt.index);
1893 nt_dealloc(&self->nt);
1896 nt_dealloc(&self->nt);
1894 PyObject_Del(self);
1897 PyObject_Del(self);
1895 }
1898 }
1896
1899
1897 static PyMethodDef ntobj_methods[] = {
1900 static PyMethodDef ntobj_methods[] = {
1898 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1901 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1899 "insert an index entry"},
1902 "insert an index entry"},
1900 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1903 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1901 "find length of shortest hex nodeid of a binary ID"},
1904 "find length of shortest hex nodeid of a binary ID"},
1902 {NULL} /* Sentinel */
1905 {NULL} /* Sentinel */
1903 };
1906 };
1904
1907
1905 static PyTypeObject nodetreeType = {
1908 static PyTypeObject nodetreeType = {
1906 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1909 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1907 "parsers.nodetree", /* tp_name */
1910 "parsers.nodetree", /* tp_name */
1908 sizeof(nodetreeObject), /* tp_basicsize */
1911 sizeof(nodetreeObject), /* tp_basicsize */
1909 0, /* tp_itemsize */
1912 0, /* tp_itemsize */
1910 (destructor)ntobj_dealloc, /* tp_dealloc */
1913 (destructor)ntobj_dealloc, /* tp_dealloc */
1911 0, /* tp_print */
1914 0, /* tp_print */
1912 0, /* tp_getattr */
1915 0, /* tp_getattr */
1913 0, /* tp_setattr */
1916 0, /* tp_setattr */
1914 0, /* tp_compare */
1917 0, /* tp_compare */
1915 0, /* tp_repr */
1918 0, /* tp_repr */
1916 0, /* tp_as_number */
1919 0, /* tp_as_number */
1917 0, /* tp_as_sequence */
1920 0, /* tp_as_sequence */
1918 0, /* tp_as_mapping */
1921 0, /* tp_as_mapping */
1919 0, /* tp_hash */
1922 0, /* tp_hash */
1920 0, /* tp_call */
1923 0, /* tp_call */
1921 0, /* tp_str */
1924 0, /* tp_str */
1922 0, /* tp_getattro */
1925 0, /* tp_getattro */
1923 0, /* tp_setattro */
1926 0, /* tp_setattro */
1924 0, /* tp_as_buffer */
1927 0, /* tp_as_buffer */
1925 Py_TPFLAGS_DEFAULT, /* tp_flags */
1928 Py_TPFLAGS_DEFAULT, /* tp_flags */
1926 "nodetree", /* tp_doc */
1929 "nodetree", /* tp_doc */
1927 0, /* tp_traverse */
1930 0, /* tp_traverse */
1928 0, /* tp_clear */
1931 0, /* tp_clear */
1929 0, /* tp_richcompare */
1932 0, /* tp_richcompare */
1930 0, /* tp_weaklistoffset */
1933 0, /* tp_weaklistoffset */
1931 0, /* tp_iter */
1934 0, /* tp_iter */
1932 0, /* tp_iternext */
1935 0, /* tp_iternext */
1933 ntobj_methods, /* tp_methods */
1936 ntobj_methods, /* tp_methods */
1934 0, /* tp_members */
1937 0, /* tp_members */
1935 0, /* tp_getset */
1938 0, /* tp_getset */
1936 0, /* tp_base */
1939 0, /* tp_base */
1937 0, /* tp_dict */
1940 0, /* tp_dict */
1938 0, /* tp_descr_get */
1941 0, /* tp_descr_get */
1939 0, /* tp_descr_set */
1942 0, /* tp_descr_set */
1940 0, /* tp_dictoffset */
1943 0, /* tp_dictoffset */
1941 (initproc)ntobj_init, /* tp_init */
1944 (initproc)ntobj_init, /* tp_init */
1942 0, /* tp_alloc */
1945 0, /* tp_alloc */
1943 };
1946 };
1944
1947
1945 static int index_init_nt(indexObject *self)
1948 static int index_init_nt(indexObject *self)
1946 {
1949 {
1947 if (!self->ntinitialized) {
1950 if (!self->ntinitialized) {
1948 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1951 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1949 nt_dealloc(&self->nt);
1952 nt_dealloc(&self->nt);
1950 return -1;
1953 return -1;
1951 }
1954 }
1952 if (nt_insert(&self->nt, nullid, -1) == -1) {
1955 if (nt_insert(&self->nt, nullid, -1) == -1) {
1953 nt_dealloc(&self->nt);
1956 nt_dealloc(&self->nt);
1954 return -1;
1957 return -1;
1955 }
1958 }
1956 self->ntinitialized = 1;
1959 self->ntinitialized = 1;
1957 self->ntrev = (int)index_length(self);
1960 self->ntrev = (int)index_length(self);
1958 self->ntlookups = 1;
1961 self->ntlookups = 1;
1959 self->ntmisses = 0;
1962 self->ntmisses = 0;
1960 }
1963 }
1961 return 0;
1964 return 0;
1962 }
1965 }
1963
1966
1964 /*
1967 /*
1965 * Return values:
1968 * Return values:
1966 *
1969 *
1967 * -3: error (exception set)
1970 * -3: error (exception set)
1968 * -2: not found (no exception set)
1971 * -2: not found (no exception set)
1969 * rest: valid rev
1972 * rest: valid rev
1970 */
1973 */
1971 static int index_find_node(indexObject *self, const char *node)
1974 static int index_find_node(indexObject *self, const char *node)
1972 {
1975 {
1973 int rev;
1976 int rev;
1974
1977
1975 if (index_init_nt(self) == -1)
1978 if (index_init_nt(self) == -1)
1976 return -3;
1979 return -3;
1977
1980
1978 self->ntlookups++;
1981 self->ntlookups++;
1979 rev = nt_find(&self->nt, node, self->nodelen, 0);
1982 rev = nt_find(&self->nt, node, self->nodelen, 0);
1980 if (rev >= -1)
1983 if (rev >= -1)
1981 return rev;
1984 return rev;
1982
1985
1983 /*
1986 /*
1984 * For the first handful of lookups, we scan the entire index,
1987 * For the first handful of lookups, we scan the entire index,
1985 * and cache only the matching nodes. This optimizes for cases
1988 * and cache only the matching nodes. This optimizes for cases
1986 * like "hg tip", where only a few nodes are accessed.
1989 * like "hg tip", where only a few nodes are accessed.
1987 *
1990 *
1988 * After that, we cache every node we visit, using a single
1991 * After that, we cache every node we visit, using a single
1989 * scan amortized over multiple lookups. This gives the best
1992 * scan amortized over multiple lookups. This gives the best
1990 * bulk performance, e.g. for "hg log".
1993 * bulk performance, e.g. for "hg log".
1991 */
1994 */
1992 if (self->ntmisses++ < 4) {
1995 if (self->ntmisses++ < 4) {
1993 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1996 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1994 const char *n = index_node_existing(self, rev);
1997 const char *n = index_node_existing(self, rev);
1995 if (n == NULL)
1998 if (n == NULL)
1996 return -3;
1999 return -3;
1997 if (memcmp(node, n, self->nodelen) == 0) {
2000 if (memcmp(node, n, self->nodelen) == 0) {
1998 if (nt_insert(&self->nt, n, rev) == -1)
2001 if (nt_insert(&self->nt, n, rev) == -1)
1999 return -3;
2002 return -3;
2000 break;
2003 break;
2001 }
2004 }
2002 }
2005 }
2003 } else {
2006 } else {
2004 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2007 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2005 const char *n = index_node_existing(self, rev);
2008 const char *n = index_node_existing(self, rev);
2006 if (n == NULL)
2009 if (n == NULL)
2007 return -3;
2010 return -3;
2008 if (nt_insert(&self->nt, n, rev) == -1) {
2011 if (nt_insert(&self->nt, n, rev) == -1) {
2009 self->ntrev = rev + 1;
2012 self->ntrev = rev + 1;
2010 return -3;
2013 return -3;
2011 }
2014 }
2012 if (memcmp(node, n, self->nodelen) == 0) {
2015 if (memcmp(node, n, self->nodelen) == 0) {
2013 break;
2016 break;
2014 }
2017 }
2015 }
2018 }
2016 self->ntrev = rev;
2019 self->ntrev = rev;
2017 }
2020 }
2018
2021
2019 if (rev >= 0)
2022 if (rev >= 0)
2020 return rev;
2023 return rev;
2021 return -2;
2024 return -2;
2022 }
2025 }
2023
2026
2024 static PyObject *index_getitem(indexObject *self, PyObject *value)
2027 static PyObject *index_getitem(indexObject *self, PyObject *value)
2025 {
2028 {
2026 char *node;
2029 char *node;
2027 int rev;
2030 int rev;
2028
2031
2029 if (PyInt_Check(value)) {
2032 if (PyInt_Check(value)) {
2030 long idx;
2033 long idx;
2031 if (!pylong_to_long(value, &idx)) {
2034 if (!pylong_to_long(value, &idx)) {
2032 return NULL;
2035 return NULL;
2033 }
2036 }
2034 return index_get(self, idx);
2037 return index_get(self, idx);
2035 }
2038 }
2036
2039
2037 if (node_check(self->nodelen, value, &node) == -1)
2040 if (node_check(self->nodelen, value, &node) == -1)
2038 return NULL;
2041 return NULL;
2039 rev = index_find_node(self, node);
2042 rev = index_find_node(self, node);
2040 if (rev >= -1)
2043 if (rev >= -1)
2041 return PyInt_FromLong(rev);
2044 return PyInt_FromLong(rev);
2042 if (rev == -2)
2045 if (rev == -2)
2043 raise_revlog_error();
2046 raise_revlog_error();
2044 return NULL;
2047 return NULL;
2045 }
2048 }
2046
2049
2047 /*
2050 /*
2048 * Fully populate the radix tree.
2051 * Fully populate the radix tree.
2049 */
2052 */
2050 static int index_populate_nt(indexObject *self)
2053 static int index_populate_nt(indexObject *self)
2051 {
2054 {
2052 int rev;
2055 int rev;
2053 if (self->ntrev > 0) {
2056 if (self->ntrev > 0) {
2054 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2057 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2055 const char *n = index_node_existing(self, rev);
2058 const char *n = index_node_existing(self, rev);
2056 if (n == NULL)
2059 if (n == NULL)
2057 return -1;
2060 return -1;
2058 if (nt_insert(&self->nt, n, rev) == -1)
2061 if (nt_insert(&self->nt, n, rev) == -1)
2059 return -1;
2062 return -1;
2060 }
2063 }
2061 self->ntrev = -1;
2064 self->ntrev = -1;
2062 }
2065 }
2063 return 0;
2066 return 0;
2064 }
2067 }
2065
2068
2066 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2069 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2067 {
2070 {
2068 const char *fullnode;
2071 const char *fullnode;
2069 Py_ssize_t nodelen;
2072 Py_ssize_t nodelen;
2070 char *node;
2073 char *node;
2071 int rev, i;
2074 int rev, i;
2072
2075
2073 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2076 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2074 return NULL;
2077 return NULL;
2075
2078
2076 if (nodelen < 1) {
2079 if (nodelen < 1) {
2077 PyErr_SetString(PyExc_ValueError, "key too short");
2080 PyErr_SetString(PyExc_ValueError, "key too short");
2078 return NULL;
2081 return NULL;
2079 }
2082 }
2080
2083
2081 if (nodelen > 2 * self->nodelen) {
2084 if (nodelen > 2 * self->nodelen) {
2082 PyErr_SetString(PyExc_ValueError, "key too long");
2085 PyErr_SetString(PyExc_ValueError, "key too long");
2083 return NULL;
2086 return NULL;
2084 }
2087 }
2085
2088
2086 for (i = 0; i < nodelen; i++)
2089 for (i = 0; i < nodelen; i++)
2087 hexdigit(node, i);
2090 hexdigit(node, i);
2088 if (PyErr_Occurred()) {
2091 if (PyErr_Occurred()) {
2089 /* input contains non-hex characters */
2092 /* input contains non-hex characters */
2090 PyErr_Clear();
2093 PyErr_Clear();
2091 Py_RETURN_NONE;
2094 Py_RETURN_NONE;
2092 }
2095 }
2093
2096
2094 if (index_init_nt(self) == -1)
2097 if (index_init_nt(self) == -1)
2095 return NULL;
2098 return NULL;
2096 if (index_populate_nt(self) == -1)
2099 if (index_populate_nt(self) == -1)
2097 return NULL;
2100 return NULL;
2098 rev = nt_partialmatch(&self->nt, node, nodelen);
2101 rev = nt_partialmatch(&self->nt, node, nodelen);
2099
2102
2100 switch (rev) {
2103 switch (rev) {
2101 case -4:
2104 case -4:
2102 raise_revlog_error();
2105 raise_revlog_error();
2103 return NULL;
2106 return NULL;
2104 case -2:
2107 case -2:
2105 Py_RETURN_NONE;
2108 Py_RETURN_NONE;
2106 case -1:
2109 case -1:
2107 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2110 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2108 }
2111 }
2109
2112
2110 fullnode = index_node_existing(self, rev);
2113 fullnode = index_node_existing(self, rev);
2111 if (fullnode == NULL) {
2114 if (fullnode == NULL) {
2112 return NULL;
2115 return NULL;
2113 }
2116 }
2114 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2117 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2115 }
2118 }
2116
2119
2117 static PyObject *index_shortest(indexObject *self, PyObject *args)
2120 static PyObject *index_shortest(indexObject *self, PyObject *args)
2118 {
2121 {
2119 PyObject *val;
2122 PyObject *val;
2120 char *node;
2123 char *node;
2121 int length;
2124 int length;
2122
2125
2123 if (!PyArg_ParseTuple(args, "O", &val))
2126 if (!PyArg_ParseTuple(args, "O", &val))
2124 return NULL;
2127 return NULL;
2125 if (node_check(self->nodelen, val, &node) == -1)
2128 if (node_check(self->nodelen, val, &node) == -1)
2126 return NULL;
2129 return NULL;
2127
2130
2128 self->ntlookups++;
2131 self->ntlookups++;
2129 if (index_init_nt(self) == -1)
2132 if (index_init_nt(self) == -1)
2130 return NULL;
2133 return NULL;
2131 if (index_populate_nt(self) == -1)
2134 if (index_populate_nt(self) == -1)
2132 return NULL;
2135 return NULL;
2133 length = nt_shortest(&self->nt, node);
2136 length = nt_shortest(&self->nt, node);
2134 if (length == -3)
2137 if (length == -3)
2135 return NULL;
2138 return NULL;
2136 if (length == -2) {
2139 if (length == -2) {
2137 raise_revlog_error();
2140 raise_revlog_error();
2138 return NULL;
2141 return NULL;
2139 }
2142 }
2140 return PyInt_FromLong(length);
2143 return PyInt_FromLong(length);
2141 }
2144 }
2142
2145
2143 static PyObject *index_m_get(indexObject *self, PyObject *args)
2146 static PyObject *index_m_get(indexObject *self, PyObject *args)
2144 {
2147 {
2145 PyObject *val;
2148 PyObject *val;
2146 char *node;
2149 char *node;
2147 int rev;
2150 int rev;
2148
2151
2149 if (!PyArg_ParseTuple(args, "O", &val))
2152 if (!PyArg_ParseTuple(args, "O", &val))
2150 return NULL;
2153 return NULL;
2151 if (node_check(self->nodelen, val, &node) == -1)
2154 if (node_check(self->nodelen, val, &node) == -1)
2152 return NULL;
2155 return NULL;
2153 rev = index_find_node(self, node);
2156 rev = index_find_node(self, node);
2154 if (rev == -3)
2157 if (rev == -3)
2155 return NULL;
2158 return NULL;
2156 if (rev == -2)
2159 if (rev == -2)
2157 Py_RETURN_NONE;
2160 Py_RETURN_NONE;
2158 return PyInt_FromLong(rev);
2161 return PyInt_FromLong(rev);
2159 }
2162 }
2160
2163
2161 static int index_contains(indexObject *self, PyObject *value)
2164 static int index_contains(indexObject *self, PyObject *value)
2162 {
2165 {
2163 char *node;
2166 char *node;
2164
2167
2165 if (PyInt_Check(value)) {
2168 if (PyInt_Check(value)) {
2166 long rev;
2169 long rev;
2167 if (!pylong_to_long(value, &rev)) {
2170 if (!pylong_to_long(value, &rev)) {
2168 return -1;
2171 return -1;
2169 }
2172 }
2170 return rev >= -1 && rev < index_length(self);
2173 return rev >= -1 && rev < index_length(self);
2171 }
2174 }
2172
2175
2173 if (node_check(self->nodelen, value, &node) == -1)
2176 if (node_check(self->nodelen, value, &node) == -1)
2174 return -1;
2177 return -1;
2175
2178
2176 switch (index_find_node(self, node)) {
2179 switch (index_find_node(self, node)) {
2177 case -3:
2180 case -3:
2178 return -1;
2181 return -1;
2179 case -2:
2182 case -2:
2180 return 0;
2183 return 0;
2181 default:
2184 default:
2182 return 1;
2185 return 1;
2183 }
2186 }
2184 }
2187 }
2185
2188
2186 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2189 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2187 {
2190 {
2188 int ret = index_contains(self, args);
2191 int ret = index_contains(self, args);
2189 if (ret < 0)
2192 if (ret < 0)
2190 return NULL;
2193 return NULL;
2191 return PyBool_FromLong((long)ret);
2194 return PyBool_FromLong((long)ret);
2192 }
2195 }
2193
2196
2194 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2197 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2195 {
2198 {
2196 char *node;
2199 char *node;
2197 int rev;
2200 int rev;
2198
2201
2199 if (node_check(self->nodelen, val, &node) == -1)
2202 if (node_check(self->nodelen, val, &node) == -1)
2200 return NULL;
2203 return NULL;
2201 rev = index_find_node(self, node);
2204 rev = index_find_node(self, node);
2202 if (rev >= -1)
2205 if (rev >= -1)
2203 return PyInt_FromLong(rev);
2206 return PyInt_FromLong(rev);
2204 if (rev == -2)
2207 if (rev == -2)
2205 raise_revlog_error();
2208 raise_revlog_error();
2206 return NULL;
2209 return NULL;
2207 }
2210 }
2208
2211
2209 typedef uint64_t bitmask;
2212 typedef uint64_t bitmask;
2210
2213
2211 /*
2214 /*
2212 * Given a disjoint set of revs, return all candidates for the
2215 * Given a disjoint set of revs, return all candidates for the
2213 * greatest common ancestor. In revset notation, this is the set
2216 * greatest common ancestor. In revset notation, this is the set
2214 * "heads(::a and ::b and ...)"
2217 * "heads(::a and ::b and ...)"
2215 */
2218 */
2216 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2219 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2217 int revcount)
2220 int revcount)
2218 {
2221 {
2219 const bitmask allseen = (1ull << revcount) - 1;
2222 const bitmask allseen = (1ull << revcount) - 1;
2220 const bitmask poison = 1ull << revcount;
2223 const bitmask poison = 1ull << revcount;
2221 PyObject *gca = PyList_New(0);
2224 PyObject *gca = PyList_New(0);
2222 int i, v, interesting;
2225 int i, v, interesting;
2223 int maxrev = -1;
2226 int maxrev = -1;
2224 bitmask sp;
2227 bitmask sp;
2225 bitmask *seen;
2228 bitmask *seen;
2226
2229
2227 if (gca == NULL)
2230 if (gca == NULL)
2228 return PyErr_NoMemory();
2231 return PyErr_NoMemory();
2229
2232
2230 for (i = 0; i < revcount; i++) {
2233 for (i = 0; i < revcount; i++) {
2231 if (revs[i] > maxrev)
2234 if (revs[i] > maxrev)
2232 maxrev = revs[i];
2235 maxrev = revs[i];
2233 }
2236 }
2234
2237
2235 seen = calloc(sizeof(*seen), maxrev + 1);
2238 seen = calloc(sizeof(*seen), maxrev + 1);
2236 if (seen == NULL) {
2239 if (seen == NULL) {
2237 Py_DECREF(gca);
2240 Py_DECREF(gca);
2238 return PyErr_NoMemory();
2241 return PyErr_NoMemory();
2239 }
2242 }
2240
2243
2241 for (i = 0; i < revcount; i++)
2244 for (i = 0; i < revcount; i++)
2242 seen[revs[i]] = 1ull << i;
2245 seen[revs[i]] = 1ull << i;
2243
2246
2244 interesting = revcount;
2247 interesting = revcount;
2245
2248
2246 for (v = maxrev; v >= 0 && interesting; v--) {
2249 for (v = maxrev; v >= 0 && interesting; v--) {
2247 bitmask sv = seen[v];
2250 bitmask sv = seen[v];
2248 int parents[2];
2251 int parents[2];
2249
2252
2250 if (!sv)
2253 if (!sv)
2251 continue;
2254 continue;
2252
2255
2253 if (sv < poison) {
2256 if (sv < poison) {
2254 interesting -= 1;
2257 interesting -= 1;
2255 if (sv == allseen) {
2258 if (sv == allseen) {
2256 PyObject *obj = PyInt_FromLong(v);
2259 PyObject *obj = PyInt_FromLong(v);
2257 if (obj == NULL)
2260 if (obj == NULL)
2258 goto bail;
2261 goto bail;
2259 if (PyList_Append(gca, obj) == -1) {
2262 if (PyList_Append(gca, obj) == -1) {
2260 Py_DECREF(obj);
2263 Py_DECREF(obj);
2261 goto bail;
2264 goto bail;
2262 }
2265 }
2263 sv |= poison;
2266 sv |= poison;
2264 for (i = 0; i < revcount; i++) {
2267 for (i = 0; i < revcount; i++) {
2265 if (revs[i] == v)
2268 if (revs[i] == v)
2266 goto done;
2269 goto done;
2267 }
2270 }
2268 }
2271 }
2269 }
2272 }
2270 if (index_get_parents(self, v, parents, maxrev) < 0)
2273 if (index_get_parents(self, v, parents, maxrev) < 0)
2271 goto bail;
2274 goto bail;
2272
2275
2273 for (i = 0; i < 2; i++) {
2276 for (i = 0; i < 2; i++) {
2274 int p = parents[i];
2277 int p = parents[i];
2275 if (p == -1)
2278 if (p == -1)
2276 continue;
2279 continue;
2277 sp = seen[p];
2280 sp = seen[p];
2278 if (sv < poison) {
2281 if (sv < poison) {
2279 if (sp == 0) {
2282 if (sp == 0) {
2280 seen[p] = sv;
2283 seen[p] = sv;
2281 interesting++;
2284 interesting++;
2282 } else if (sp != sv)
2285 } else if (sp != sv)
2283 seen[p] |= sv;
2286 seen[p] |= sv;
2284 } else {
2287 } else {
2285 if (sp && sp < poison)
2288 if (sp && sp < poison)
2286 interesting--;
2289 interesting--;
2287 seen[p] = sv;
2290 seen[p] = sv;
2288 }
2291 }
2289 }
2292 }
2290 }
2293 }
2291
2294
2292 done:
2295 done:
2293 free(seen);
2296 free(seen);
2294 return gca;
2297 return gca;
2295 bail:
2298 bail:
2296 free(seen);
2299 free(seen);
2297 Py_XDECREF(gca);
2300 Py_XDECREF(gca);
2298 return NULL;
2301 return NULL;
2299 }
2302 }
2300
2303
2301 /*
2304 /*
2302 * Given a disjoint set of revs, return the subset with the longest
2305 * Given a disjoint set of revs, return the subset with the longest
2303 * path to the root.
2306 * path to the root.
2304 */
2307 */
2305 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2308 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2306 {
2309 {
2307 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2310 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2308 static const Py_ssize_t capacity = 24;
2311 static const Py_ssize_t capacity = 24;
2309 int *depth, *interesting = NULL;
2312 int *depth, *interesting = NULL;
2310 int i, j, v, ninteresting;
2313 int i, j, v, ninteresting;
2311 PyObject *dict = NULL, *keys = NULL;
2314 PyObject *dict = NULL, *keys = NULL;
2312 long *seen = NULL;
2315 long *seen = NULL;
2313 int maxrev = -1;
2316 int maxrev = -1;
2314 long final;
2317 long final;
2315
2318
2316 if (revcount > capacity) {
2319 if (revcount > capacity) {
2317 PyErr_Format(PyExc_OverflowError,
2320 PyErr_Format(PyExc_OverflowError,
2318 "bitset size (%ld) > capacity (%ld)",
2321 "bitset size (%ld) > capacity (%ld)",
2319 (long)revcount, (long)capacity);
2322 (long)revcount, (long)capacity);
2320 return NULL;
2323 return NULL;
2321 }
2324 }
2322
2325
2323 for (i = 0; i < revcount; i++) {
2326 for (i = 0; i < revcount; i++) {
2324 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2327 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2325 if (n > maxrev)
2328 if (n > maxrev)
2326 maxrev = n;
2329 maxrev = n;
2327 }
2330 }
2328
2331
2329 depth = calloc(sizeof(*depth), maxrev + 1);
2332 depth = calloc(sizeof(*depth), maxrev + 1);
2330 if (depth == NULL)
2333 if (depth == NULL)
2331 return PyErr_NoMemory();
2334 return PyErr_NoMemory();
2332
2335
2333 seen = calloc(sizeof(*seen), maxrev + 1);
2336 seen = calloc(sizeof(*seen), maxrev + 1);
2334 if (seen == NULL) {
2337 if (seen == NULL) {
2335 PyErr_NoMemory();
2338 PyErr_NoMemory();
2336 goto bail;
2339 goto bail;
2337 }
2340 }
2338
2341
2339 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2342 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2340 if (interesting == NULL) {
2343 if (interesting == NULL) {
2341 PyErr_NoMemory();
2344 PyErr_NoMemory();
2342 goto bail;
2345 goto bail;
2343 }
2346 }
2344
2347
2345 if (PyList_Sort(revs) == -1)
2348 if (PyList_Sort(revs) == -1)
2346 goto bail;
2349 goto bail;
2347
2350
2348 for (i = 0; i < revcount; i++) {
2351 for (i = 0; i < revcount; i++) {
2349 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2352 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2350 long b = 1l << i;
2353 long b = 1l << i;
2351 depth[n] = 1;
2354 depth[n] = 1;
2352 seen[n] = b;
2355 seen[n] = b;
2353 interesting[b] = 1;
2356 interesting[b] = 1;
2354 }
2357 }
2355
2358
2356 /* invariant: ninteresting is the number of non-zero entries in
2359 /* invariant: ninteresting is the number of non-zero entries in
2357 * interesting. */
2360 * interesting. */
2358 ninteresting = (int)revcount;
2361 ninteresting = (int)revcount;
2359
2362
2360 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2363 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2361 int dv = depth[v];
2364 int dv = depth[v];
2362 int parents[2];
2365 int parents[2];
2363 long sv;
2366 long sv;
2364
2367
2365 if (dv == 0)
2368 if (dv == 0)
2366 continue;
2369 continue;
2367
2370
2368 sv = seen[v];
2371 sv = seen[v];
2369 if (index_get_parents(self, v, parents, maxrev) < 0)
2372 if (index_get_parents(self, v, parents, maxrev) < 0)
2370 goto bail;
2373 goto bail;
2371
2374
2372 for (i = 0; i < 2; i++) {
2375 for (i = 0; i < 2; i++) {
2373 int p = parents[i];
2376 int p = parents[i];
2374 long sp;
2377 long sp;
2375 int dp;
2378 int dp;
2376
2379
2377 if (p == -1)
2380 if (p == -1)
2378 continue;
2381 continue;
2379
2382
2380 dp = depth[p];
2383 dp = depth[p];
2381 sp = seen[p];
2384 sp = seen[p];
2382 if (dp <= dv) {
2385 if (dp <= dv) {
2383 depth[p] = dv + 1;
2386 depth[p] = dv + 1;
2384 if (sp != sv) {
2387 if (sp != sv) {
2385 interesting[sv] += 1;
2388 interesting[sv] += 1;
2386 seen[p] = sv;
2389 seen[p] = sv;
2387 if (sp) {
2390 if (sp) {
2388 interesting[sp] -= 1;
2391 interesting[sp] -= 1;
2389 if (interesting[sp] == 0)
2392 if (interesting[sp] == 0)
2390 ninteresting -= 1;
2393 ninteresting -= 1;
2391 }
2394 }
2392 }
2395 }
2393 } else if (dv == dp - 1) {
2396 } else if (dv == dp - 1) {
2394 long nsp = sp | sv;
2397 long nsp = sp | sv;
2395 if (nsp == sp)
2398 if (nsp == sp)
2396 continue;
2399 continue;
2397 seen[p] = nsp;
2400 seen[p] = nsp;
2398 interesting[sp] -= 1;
2401 interesting[sp] -= 1;
2399 if (interesting[sp] == 0)
2402 if (interesting[sp] == 0)
2400 ninteresting -= 1;
2403 ninteresting -= 1;
2401 if (interesting[nsp] == 0)
2404 if (interesting[nsp] == 0)
2402 ninteresting += 1;
2405 ninteresting += 1;
2403 interesting[nsp] += 1;
2406 interesting[nsp] += 1;
2404 }
2407 }
2405 }
2408 }
2406 interesting[sv] -= 1;
2409 interesting[sv] -= 1;
2407 if (interesting[sv] == 0)
2410 if (interesting[sv] == 0)
2408 ninteresting -= 1;
2411 ninteresting -= 1;
2409 }
2412 }
2410
2413
2411 final = 0;
2414 final = 0;
2412 j = ninteresting;
2415 j = ninteresting;
2413 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2416 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2414 if (interesting[i] == 0)
2417 if (interesting[i] == 0)
2415 continue;
2418 continue;
2416 final |= i;
2419 final |= i;
2417 j -= 1;
2420 j -= 1;
2418 }
2421 }
2419 if (final == 0) {
2422 if (final == 0) {
2420 keys = PyList_New(0);
2423 keys = PyList_New(0);
2421 goto bail;
2424 goto bail;
2422 }
2425 }
2423
2426
2424 dict = PyDict_New();
2427 dict = PyDict_New();
2425 if (dict == NULL)
2428 if (dict == NULL)
2426 goto bail;
2429 goto bail;
2427
2430
2428 for (i = 0; i < revcount; i++) {
2431 for (i = 0; i < revcount; i++) {
2429 PyObject *key;
2432 PyObject *key;
2430
2433
2431 if ((final & (1 << i)) == 0)
2434 if ((final & (1 << i)) == 0)
2432 continue;
2435 continue;
2433
2436
2434 key = PyList_GET_ITEM(revs, i);
2437 key = PyList_GET_ITEM(revs, i);
2435 Py_INCREF(key);
2438 Py_INCREF(key);
2436 Py_INCREF(Py_None);
2439 Py_INCREF(Py_None);
2437 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2440 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2438 Py_DECREF(key);
2441 Py_DECREF(key);
2439 Py_DECREF(Py_None);
2442 Py_DECREF(Py_None);
2440 goto bail;
2443 goto bail;
2441 }
2444 }
2442 }
2445 }
2443
2446
2444 keys = PyDict_Keys(dict);
2447 keys = PyDict_Keys(dict);
2445
2448
2446 bail:
2449 bail:
2447 free(depth);
2450 free(depth);
2448 free(seen);
2451 free(seen);
2449 free(interesting);
2452 free(interesting);
2450 Py_XDECREF(dict);
2453 Py_XDECREF(dict);
2451
2454
2452 return keys;
2455 return keys;
2453 }
2456 }
2454
2457
2455 /*
2458 /*
2456 * Given a (possibly overlapping) set of revs, return all the
2459 * Given a (possibly overlapping) set of revs, return all the
2457 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2460 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2458 */
2461 */
2459 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2462 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2460 {
2463 {
2461 PyObject *ret = NULL;
2464 PyObject *ret = NULL;
2462 Py_ssize_t argcount, i, len;
2465 Py_ssize_t argcount, i, len;
2463 bitmask repeat = 0;
2466 bitmask repeat = 0;
2464 int revcount = 0;
2467 int revcount = 0;
2465 int *revs;
2468 int *revs;
2466
2469
2467 argcount = PySequence_Length(args);
2470 argcount = PySequence_Length(args);
2468 revs = PyMem_Malloc(argcount * sizeof(*revs));
2471 revs = PyMem_Malloc(argcount * sizeof(*revs));
2469 if (argcount > 0 && revs == NULL)
2472 if (argcount > 0 && revs == NULL)
2470 return PyErr_NoMemory();
2473 return PyErr_NoMemory();
2471 len = index_length(self);
2474 len = index_length(self);
2472
2475
2473 for (i = 0; i < argcount; i++) {
2476 for (i = 0; i < argcount; i++) {
2474 static const int capacity = 24;
2477 static const int capacity = 24;
2475 PyObject *obj = PySequence_GetItem(args, i);
2478 PyObject *obj = PySequence_GetItem(args, i);
2476 bitmask x;
2479 bitmask x;
2477 long val;
2480 long val;
2478
2481
2479 if (!PyInt_Check(obj)) {
2482 if (!PyInt_Check(obj)) {
2480 PyErr_SetString(PyExc_TypeError,
2483 PyErr_SetString(PyExc_TypeError,
2481 "arguments must all be ints");
2484 "arguments must all be ints");
2482 Py_DECREF(obj);
2485 Py_DECREF(obj);
2483 goto bail;
2486 goto bail;
2484 }
2487 }
2485 val = PyInt_AsLong(obj);
2488 val = PyInt_AsLong(obj);
2486 Py_DECREF(obj);
2489 Py_DECREF(obj);
2487 if (val == -1) {
2490 if (val == -1) {
2488 ret = PyList_New(0);
2491 ret = PyList_New(0);
2489 goto done;
2492 goto done;
2490 }
2493 }
2491 if (val < 0 || val >= len) {
2494 if (val < 0 || val >= len) {
2492 PyErr_SetString(PyExc_IndexError, "index out of range");
2495 PyErr_SetString(PyExc_IndexError, "index out of range");
2493 goto bail;
2496 goto bail;
2494 }
2497 }
2495 /* this cheesy bloom filter lets us avoid some more
2498 /* this cheesy bloom filter lets us avoid some more
2496 * expensive duplicate checks in the common set-is-disjoint
2499 * expensive duplicate checks in the common set-is-disjoint
2497 * case */
2500 * case */
2498 x = 1ull << (val & 0x3f);
2501 x = 1ull << (val & 0x3f);
2499 if (repeat & x) {
2502 if (repeat & x) {
2500 int k;
2503 int k;
2501 for (k = 0; k < revcount; k++) {
2504 for (k = 0; k < revcount; k++) {
2502 if (val == revs[k])
2505 if (val == revs[k])
2503 goto duplicate;
2506 goto duplicate;
2504 }
2507 }
2505 } else
2508 } else
2506 repeat |= x;
2509 repeat |= x;
2507 if (revcount >= capacity) {
2510 if (revcount >= capacity) {
2508 PyErr_Format(PyExc_OverflowError,
2511 PyErr_Format(PyExc_OverflowError,
2509 "bitset size (%d) > capacity (%d)",
2512 "bitset size (%d) > capacity (%d)",
2510 revcount, capacity);
2513 revcount, capacity);
2511 goto bail;
2514 goto bail;
2512 }
2515 }
2513 revs[revcount++] = (int)val;
2516 revs[revcount++] = (int)val;
2514 duplicate:;
2517 duplicate:;
2515 }
2518 }
2516
2519
2517 if (revcount == 0) {
2520 if (revcount == 0) {
2518 ret = PyList_New(0);
2521 ret = PyList_New(0);
2519 goto done;
2522 goto done;
2520 }
2523 }
2521 if (revcount == 1) {
2524 if (revcount == 1) {
2522 PyObject *obj;
2525 PyObject *obj;
2523 ret = PyList_New(1);
2526 ret = PyList_New(1);
2524 if (ret == NULL)
2527 if (ret == NULL)
2525 goto bail;
2528 goto bail;
2526 obj = PyInt_FromLong(revs[0]);
2529 obj = PyInt_FromLong(revs[0]);
2527 if (obj == NULL)
2530 if (obj == NULL)
2528 goto bail;
2531 goto bail;
2529 PyList_SET_ITEM(ret, 0, obj);
2532 PyList_SET_ITEM(ret, 0, obj);
2530 goto done;
2533 goto done;
2531 }
2534 }
2532
2535
2533 ret = find_gca_candidates(self, revs, revcount);
2536 ret = find_gca_candidates(self, revs, revcount);
2534 if (ret == NULL)
2537 if (ret == NULL)
2535 goto bail;
2538 goto bail;
2536
2539
2537 done:
2540 done:
2538 PyMem_Free(revs);
2541 PyMem_Free(revs);
2539 return ret;
2542 return ret;
2540
2543
2541 bail:
2544 bail:
2542 PyMem_Free(revs);
2545 PyMem_Free(revs);
2543 Py_XDECREF(ret);
2546 Py_XDECREF(ret);
2544 return NULL;
2547 return NULL;
2545 }
2548 }
2546
2549
2547 /*
2550 /*
2548 * Given a (possibly overlapping) set of revs, return the greatest
2551 * Given a (possibly overlapping) set of revs, return the greatest
2549 * common ancestors: those with the longest path to the root.
2552 * common ancestors: those with the longest path to the root.
2550 */
2553 */
2551 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2554 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2552 {
2555 {
2553 PyObject *ret;
2556 PyObject *ret;
2554 PyObject *gca = index_commonancestorsheads(self, args);
2557 PyObject *gca = index_commonancestorsheads(self, args);
2555 if (gca == NULL)
2558 if (gca == NULL)
2556 return NULL;
2559 return NULL;
2557
2560
2558 if (PyList_GET_SIZE(gca) <= 1) {
2561 if (PyList_GET_SIZE(gca) <= 1) {
2559 return gca;
2562 return gca;
2560 }
2563 }
2561
2564
2562 ret = find_deepest(self, gca);
2565 ret = find_deepest(self, gca);
2563 Py_DECREF(gca);
2566 Py_DECREF(gca);
2564 return ret;
2567 return ret;
2565 }
2568 }
2566
2569
2567 /*
2570 /*
2568 * Invalidate any trie entries introduced by added revs.
2571 * Invalidate any trie entries introduced by added revs.
2569 */
2572 */
2570 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2573 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2571 {
2574 {
2572 Py_ssize_t i, len;
2575 Py_ssize_t i, len;
2573
2576
2574 len = self->length + self->new_length;
2577 len = self->length + self->new_length;
2575 i = start - self->length;
2578 i = start - self->length;
2576 if (i < 0)
2579 if (i < 0)
2577 return;
2580 return;
2578
2581
2579 for (i = start; i < len; i++)
2582 for (i = start; i < len; i++)
2580 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2583 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2581
2584
2582 self->new_length = start - self->length;
2585 self->new_length = start - self->length;
2583 }
2586 }
2584
2587
2585 /*
2588 /*
2586 * Delete a numeric range of revs, which must be at the end of the
2589 * Delete a numeric range of revs, which must be at the end of the
2587 * range.
2590 * range.
2588 */
2591 */
2589 static int index_slice_del(indexObject *self, PyObject *item)
2592 static int index_slice_del(indexObject *self, PyObject *item)
2590 {
2593 {
2591 Py_ssize_t start, stop, step, slicelength;
2594 Py_ssize_t start, stop, step, slicelength;
2592 Py_ssize_t length = index_length(self) + 1;
2595 Py_ssize_t length = index_length(self) + 1;
2593 int ret = 0;
2596 int ret = 0;
2594
2597
2595 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2598 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2596 #ifdef IS_PY3K
2599 #ifdef IS_PY3K
2597 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2600 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2598 &slicelength) < 0)
2601 &slicelength) < 0)
2599 #else
2602 #else
2600 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2603 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2601 &step, &slicelength) < 0)
2604 &step, &slicelength) < 0)
2602 #endif
2605 #endif
2603 return -1;
2606 return -1;
2604
2607
2605 if (slicelength <= 0)
2608 if (slicelength <= 0)
2606 return 0;
2609 return 0;
2607
2610
2608 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2611 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2609 stop = start;
2612 stop = start;
2610
2613
2611 if (step < 0) {
2614 if (step < 0) {
2612 stop = start + 1;
2615 stop = start + 1;
2613 start = stop + step * (slicelength - 1) - 1;
2616 start = stop + step * (slicelength - 1) - 1;
2614 step = -step;
2617 step = -step;
2615 }
2618 }
2616
2619
2617 if (step != 1) {
2620 if (step != 1) {
2618 PyErr_SetString(PyExc_ValueError,
2621 PyErr_SetString(PyExc_ValueError,
2619 "revlog index delete requires step size of 1");
2622 "revlog index delete requires step size of 1");
2620 return -1;
2623 return -1;
2621 }
2624 }
2622
2625
2623 if (stop != length - 1) {
2626 if (stop != length - 1) {
2624 PyErr_SetString(PyExc_IndexError,
2627 PyErr_SetString(PyExc_IndexError,
2625 "revlog index deletion indices are invalid");
2628 "revlog index deletion indices are invalid");
2626 return -1;
2629 return -1;
2627 }
2630 }
2628
2631
2629 if (start < self->length) {
2632 if (start < self->length) {
2630 if (self->ntinitialized) {
2633 if (self->ntinitialized) {
2631 Py_ssize_t i;
2634 Py_ssize_t i;
2632
2635
2633 for (i = start; i < self->length; i++) {
2636 for (i = start; i < self->length; i++) {
2634 const char *node = index_node_existing(self, i);
2637 const char *node = index_node_existing(self, i);
2635 if (node == NULL)
2638 if (node == NULL)
2636 return -1;
2639 return -1;
2637
2640
2638 nt_delete_node(&self->nt, node);
2641 nt_delete_node(&self->nt, node);
2639 }
2642 }
2640 if (self->new_length)
2643 if (self->new_length)
2641 index_invalidate_added(self, self->length);
2644 index_invalidate_added(self, self->length);
2642 if (self->ntrev > start)
2645 if (self->ntrev > start)
2643 self->ntrev = (int)start;
2646 self->ntrev = (int)start;
2644 } else if (self->new_length) {
2647 } else if (self->new_length) {
2645 self->new_length = 0;
2648 self->new_length = 0;
2646 }
2649 }
2647
2650
2648 self->length = start;
2651 self->length = start;
2649 goto done;
2652 goto done;
2650 }
2653 }
2651
2654
2652 if (self->ntinitialized) {
2655 if (self->ntinitialized) {
2653 index_invalidate_added(self, start);
2656 index_invalidate_added(self, start);
2654 if (self->ntrev > start)
2657 if (self->ntrev > start)
2655 self->ntrev = (int)start;
2658 self->ntrev = (int)start;
2656 } else {
2659 } else {
2657 self->new_length = start - self->length;
2660 self->new_length = start - self->length;
2658 }
2661 }
2659 done:
2662 done:
2660 Py_CLEAR(self->headrevs);
2663 Py_CLEAR(self->headrevs);
2661 return ret;
2664 return ret;
2662 }
2665 }
2663
2666
2664 /*
2667 /*
2665 * Supported ops:
2668 * Supported ops:
2666 *
2669 *
2667 * slice deletion
2670 * slice deletion
2668 * string assignment (extend node->rev mapping)
2671 * string assignment (extend node->rev mapping)
2669 * string deletion (shrink node->rev mapping)
2672 * string deletion (shrink node->rev mapping)
2670 */
2673 */
2671 static int index_assign_subscript(indexObject *self, PyObject *item,
2674 static int index_assign_subscript(indexObject *self, PyObject *item,
2672 PyObject *value)
2675 PyObject *value)
2673 {
2676 {
2674 char *node;
2677 char *node;
2675 long rev;
2678 long rev;
2676
2679
2677 if (PySlice_Check(item) && value == NULL)
2680 if (PySlice_Check(item) && value == NULL)
2678 return index_slice_del(self, item);
2681 return index_slice_del(self, item);
2679
2682
2680 if (node_check(self->nodelen, item, &node) == -1)
2683 if (node_check(self->nodelen, item, &node) == -1)
2681 return -1;
2684 return -1;
2682
2685
2683 if (value == NULL)
2686 if (value == NULL)
2684 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2687 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2685 : 0;
2688 : 0;
2686 rev = PyInt_AsLong(value);
2689 rev = PyInt_AsLong(value);
2687 if (rev > INT_MAX || rev < 0) {
2690 if (rev > INT_MAX || rev < 0) {
2688 if (!PyErr_Occurred())
2691 if (!PyErr_Occurred())
2689 PyErr_SetString(PyExc_ValueError, "rev out of range");
2692 PyErr_SetString(PyExc_ValueError, "rev out of range");
2690 return -1;
2693 return -1;
2691 }
2694 }
2692
2695
2693 if (index_init_nt(self) == -1)
2696 if (index_init_nt(self) == -1)
2694 return -1;
2697 return -1;
2695 return nt_insert(&self->nt, node, (int)rev);
2698 return nt_insert(&self->nt, node, (int)rev);
2696 }
2699 }
2697
2700
2698 /*
2701 /*
2699 * Find all RevlogNG entries in an index that has inline data. Update
2702 * Find all RevlogNG entries in an index that has inline data. Update
2700 * the optional "offsets" table with those entries.
2703 * the optional "offsets" table with those entries.
2701 */
2704 */
2702 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2705 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2703 {
2706 {
2704 const char *data = (const char *)self->buf.buf;
2707 const char *data = (const char *)self->buf.buf;
2705 Py_ssize_t pos = 0;
2708 Py_ssize_t pos = 0;
2706 Py_ssize_t end = self->buf.len;
2709 Py_ssize_t end = self->buf.len;
2707 long incr = self->entry_size;
2710 long incr = self->entry_size;
2708 Py_ssize_t len = 0;
2711 Py_ssize_t len = 0;
2709
2712
2710 while (pos + self->entry_size <= end && pos >= 0) {
2713 while (pos + self->entry_size <= end && pos >= 0) {
2711 uint32_t comp_len, sidedata_comp_len = 0;
2714 uint32_t comp_len, sidedata_comp_len = 0;
2712 /* 3rd element of header is length of compressed inline data */
2715 /* 3rd element of header is length of compressed inline data */
2713 comp_len = getbe32(data + pos + 8);
2716 comp_len = getbe32(data + pos + 8);
2714 if (self->entry_size == v2_entry_size) {
2717 if (self->entry_size == v2_entry_size) {
2715 sidedata_comp_len = getbe32(data + pos + 72);
2718 sidedata_comp_len = getbe32(data + pos + 72);
2716 }
2719 }
2717 incr = self->entry_size + comp_len + sidedata_comp_len;
2720 incr = self->entry_size + comp_len + sidedata_comp_len;
2718 if (offsets)
2721 if (offsets)
2719 offsets[len] = data + pos;
2722 offsets[len] = data + pos;
2720 len++;
2723 len++;
2721 pos += incr;
2724 pos += incr;
2722 }
2725 }
2723
2726
2724 if (pos != end) {
2727 if (pos != end) {
2725 if (!PyErr_Occurred())
2728 if (!PyErr_Occurred())
2726 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2729 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2727 return -1;
2730 return -1;
2728 }
2731 }
2729
2732
2730 return len;
2733 return len;
2731 }
2734 }
2732
2735
2733 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2736 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2734 {
2737 {
2735 PyObject *data_obj, *inlined_obj, *revlogv2;
2738 PyObject *data_obj, *inlined_obj, *revlogv2;
2736 Py_ssize_t size;
2739 Py_ssize_t size;
2737
2740
2738 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2741 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2739
2742
2740 /* Initialize before argument-checking to avoid index_dealloc() crash.
2743 /* Initialize before argument-checking to avoid index_dealloc() crash.
2741 */
2744 */
2742 self->added = NULL;
2745 self->added = NULL;
2743 self->new_length = 0;
2746 self->new_length = 0;
2744 self->added_length = 0;
2747 self->added_length = 0;
2745 self->data = NULL;
2748 self->data = NULL;
2746 memset(&self->buf, 0, sizeof(self->buf));
2749 memset(&self->buf, 0, sizeof(self->buf));
2747 self->headrevs = NULL;
2750 self->headrevs = NULL;
2748 self->filteredrevs = Py_None;
2751 self->filteredrevs = Py_None;
2749 Py_INCREF(Py_None);
2752 Py_INCREF(Py_None);
2750 self->ntinitialized = 0;
2753 self->ntinitialized = 0;
2751 self->offsets = NULL;
2754 self->offsets = NULL;
2752 self->nodelen = 20;
2755 self->nodelen = 20;
2753 self->nullentry = NULL;
2756 self->nullentry = NULL;
2754
2757
2755 revlogv2 = NULL;
2758 revlogv2 = NULL;
2756 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2759 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2757 &data_obj, &inlined_obj, &revlogv2))
2760 &data_obj, &inlined_obj, &revlogv2))
2758 return -1;
2761 return -1;
2759 if (!PyObject_CheckBuffer(data_obj)) {
2762 if (!PyObject_CheckBuffer(data_obj)) {
2760 PyErr_SetString(PyExc_TypeError,
2763 PyErr_SetString(PyExc_TypeError,
2761 "data does not support buffer interface");
2764 "data does not support buffer interface");
2762 return -1;
2765 return -1;
2763 }
2766 }
2764 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2767 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2765 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2768 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2766 return -1;
2769 return -1;
2767 }
2770 }
2768
2771
2769 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2772 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2770 self->format_version = format_v2;
2773 self->format_version = format_v2;
2771 self->entry_size = v2_entry_size;
2774 self->entry_size = v2_entry_size;
2772 } else {
2775 } else {
2773 self->format_version = format_v1;
2776 self->format_version = format_v1;
2774 self->entry_size = v1_entry_size;
2777 self->entry_size = v1_entry_size;
2775 }
2778 }
2776
2779
2777 self->nullentry = Py_BuildValue(PY23("iiiiiiis#iiB", "iiiiiiiy#iiB"), 0,
2780 self->nullentry = Py_BuildValue(PY23("iiiiiiis#iiB", "iiiiiiiy#iiB"), 0,
2778 0, 0, -1, -1, -1, -1, nullid,
2781 0, 0, -1, -1, -1, -1, nullid,
2779 self->nodelen, 0, 0, comp_mode_inline);
2782 self->nodelen, 0, 0, comp_mode_inline);
2780
2783
2781 if (!self->nullentry)
2784 if (!self->nullentry)
2782 return -1;
2785 return -1;
2783 PyObject_GC_UnTrack(self->nullentry);
2786 PyObject_GC_UnTrack(self->nullentry);
2784
2787
2785 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2788 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2786 return -1;
2789 return -1;
2787 size = self->buf.len;
2790 size = self->buf.len;
2788
2791
2789 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2792 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2790 self->data = data_obj;
2793 self->data = data_obj;
2791
2794
2792 self->ntlookups = self->ntmisses = 0;
2795 self->ntlookups = self->ntmisses = 0;
2793 self->ntrev = -1;
2796 self->ntrev = -1;
2794 Py_INCREF(self->data);
2797 Py_INCREF(self->data);
2795
2798
2796 if (self->inlined) {
2799 if (self->inlined) {
2797 Py_ssize_t len = inline_scan(self, NULL);
2800 Py_ssize_t len = inline_scan(self, NULL);
2798 if (len == -1)
2801 if (len == -1)
2799 goto bail;
2802 goto bail;
2800 self->length = len;
2803 self->length = len;
2801 } else {
2804 } else {
2802 if (size % self->entry_size) {
2805 if (size % self->entry_size) {
2803 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2806 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2804 goto bail;
2807 goto bail;
2805 }
2808 }
2806 self->length = size / self->entry_size;
2809 self->length = size / self->entry_size;
2807 }
2810 }
2808
2811
2809 return 0;
2812 return 0;
2810 bail:
2813 bail:
2811 return -1;
2814 return -1;
2812 }
2815 }
2813
2816
2814 static PyObject *index_nodemap(indexObject *self)
2817 static PyObject *index_nodemap(indexObject *self)
2815 {
2818 {
2816 Py_INCREF(self);
2819 Py_INCREF(self);
2817 return (PyObject *)self;
2820 return (PyObject *)self;
2818 }
2821 }
2819
2822
2820 static void _index_clearcaches(indexObject *self)
2823 static void _index_clearcaches(indexObject *self)
2821 {
2824 {
2822 if (self->offsets) {
2825 if (self->offsets) {
2823 PyMem_Free((void *)self->offsets);
2826 PyMem_Free((void *)self->offsets);
2824 self->offsets = NULL;
2827 self->offsets = NULL;
2825 }
2828 }
2826 if (self->ntinitialized) {
2829 if (self->ntinitialized) {
2827 nt_dealloc(&self->nt);
2830 nt_dealloc(&self->nt);
2828 }
2831 }
2829 self->ntinitialized = 0;
2832 self->ntinitialized = 0;
2830 Py_CLEAR(self->headrevs);
2833 Py_CLEAR(self->headrevs);
2831 }
2834 }
2832
2835
2833 static PyObject *index_clearcaches(indexObject *self)
2836 static PyObject *index_clearcaches(indexObject *self)
2834 {
2837 {
2835 _index_clearcaches(self);
2838 _index_clearcaches(self);
2836 self->ntrev = -1;
2839 self->ntrev = -1;
2837 self->ntlookups = self->ntmisses = 0;
2840 self->ntlookups = self->ntmisses = 0;
2838 Py_RETURN_NONE;
2841 Py_RETURN_NONE;
2839 }
2842 }
2840
2843
2841 static void index_dealloc(indexObject *self)
2844 static void index_dealloc(indexObject *self)
2842 {
2845 {
2843 _index_clearcaches(self);
2846 _index_clearcaches(self);
2844 Py_XDECREF(self->filteredrevs);
2847 Py_XDECREF(self->filteredrevs);
2845 if (self->buf.buf) {
2848 if (self->buf.buf) {
2846 PyBuffer_Release(&self->buf);
2849 PyBuffer_Release(&self->buf);
2847 memset(&self->buf, 0, sizeof(self->buf));
2850 memset(&self->buf, 0, sizeof(self->buf));
2848 }
2851 }
2849 Py_XDECREF(self->data);
2852 Py_XDECREF(self->data);
2850 PyMem_Free(self->added);
2853 PyMem_Free(self->added);
2851 Py_XDECREF(self->nullentry);
2854 Py_XDECREF(self->nullentry);
2852 PyObject_Del(self);
2855 PyObject_Del(self);
2853 }
2856 }
2854
2857
2855 static PySequenceMethods index_sequence_methods = {
2858 static PySequenceMethods index_sequence_methods = {
2856 (lenfunc)index_length, /* sq_length */
2859 (lenfunc)index_length, /* sq_length */
2857 0, /* sq_concat */
2860 0, /* sq_concat */
2858 0, /* sq_repeat */
2861 0, /* sq_repeat */
2859 (ssizeargfunc)index_get, /* sq_item */
2862 (ssizeargfunc)index_get, /* sq_item */
2860 0, /* sq_slice */
2863 0, /* sq_slice */
2861 0, /* sq_ass_item */
2864 0, /* sq_ass_item */
2862 0, /* sq_ass_slice */
2865 0, /* sq_ass_slice */
2863 (objobjproc)index_contains, /* sq_contains */
2866 (objobjproc)index_contains, /* sq_contains */
2864 };
2867 };
2865
2868
2866 static PyMappingMethods index_mapping_methods = {
2869 static PyMappingMethods index_mapping_methods = {
2867 (lenfunc)index_length, /* mp_length */
2870 (lenfunc)index_length, /* mp_length */
2868 (binaryfunc)index_getitem, /* mp_subscript */
2871 (binaryfunc)index_getitem, /* mp_subscript */
2869 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2872 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2870 };
2873 };
2871
2874
2872 static PyMethodDef index_methods[] = {
2875 static PyMethodDef index_methods[] = {
2873 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2876 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2874 "return the gca set of the given revs"},
2877 "return the gca set of the given revs"},
2875 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2878 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2876 METH_VARARGS,
2879 METH_VARARGS,
2877 "return the heads of the common ancestors of the given revs"},
2880 "return the heads of the common ancestors of the given revs"},
2878 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2881 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2879 "clear the index caches"},
2882 "clear the index caches"},
2880 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2883 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2881 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2884 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2882 "return `rev` associated with a node or None"},
2885 "return `rev` associated with a node or None"},
2883 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2886 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2884 "return True if the node exist in the index"},
2887 "return True if the node exist in the index"},
2885 {"rev", (PyCFunction)index_m_rev, METH_O,
2888 {"rev", (PyCFunction)index_m_rev, METH_O,
2886 "return `rev` associated with a node or raise RevlogError"},
2889 "return `rev` associated with a node or raise RevlogError"},
2887 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2890 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2888 "compute phases"},
2891 "compute phases"},
2889 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2892 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2890 "reachableroots"},
2893 "reachableroots"},
2891 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2894 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2892 METH_VARARGS, "replace an existing index entry with a new value"},
2895 METH_VARARGS, "replace an existing index entry with a new value"},
2893 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2896 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2894 "get head revisions"}, /* Can do filtering since 3.2 */
2897 "get head revisions"}, /* Can do filtering since 3.2 */
2895 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2898 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2896 "get filtered head revisions"}, /* Can always do filtering */
2899 "get filtered head revisions"}, /* Can always do filtering */
2897 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2900 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2898 "True if the object is a snapshot"},
2901 "True if the object is a snapshot"},
2899 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2902 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2900 "Gather snapshot data in a cache dict"},
2903 "Gather snapshot data in a cache dict"},
2901 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2904 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2902 "determine revisions with deltas to reconstruct fulltext"},
2905 "determine revisions with deltas to reconstruct fulltext"},
2903 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2906 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2904 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2907 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2905 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2908 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2906 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2909 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2907 "match a potentially ambiguous node ID"},
2910 "match a potentially ambiguous node ID"},
2908 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2911 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2909 "find length of shortest hex nodeid of a binary ID"},
2912 "find length of shortest hex nodeid of a binary ID"},
2910 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2913 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2911 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
2914 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
2912 "return an entry in binary form"},
2915 "return an entry in binary form"},
2913 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
2916 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
2914 "pack the revlog header information into binary"},
2917 "pack the revlog header information into binary"},
2915 {NULL} /* Sentinel */
2918 {NULL} /* Sentinel */
2916 };
2919 };
2917
2920
2918 static PyGetSetDef index_getset[] = {
2921 static PyGetSetDef index_getset[] = {
2919 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2922 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2920 {NULL} /* Sentinel */
2923 {NULL} /* Sentinel */
2921 };
2924 };
2922
2925
2923 static PyMemberDef index_members[] = {
2926 static PyMemberDef index_members[] = {
2924 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
2927 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
2925 "size of an index entry"},
2928 "size of an index entry"},
2926 {NULL} /* Sentinel */
2929 {NULL} /* Sentinel */
2927 };
2930 };
2928
2931
2929 PyTypeObject HgRevlogIndex_Type = {
2932 PyTypeObject HgRevlogIndex_Type = {
2930 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2933 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2931 "parsers.index", /* tp_name */
2934 "parsers.index", /* tp_name */
2932 sizeof(indexObject), /* tp_basicsize */
2935 sizeof(indexObject), /* tp_basicsize */
2933 0, /* tp_itemsize */
2936 0, /* tp_itemsize */
2934 (destructor)index_dealloc, /* tp_dealloc */
2937 (destructor)index_dealloc, /* tp_dealloc */
2935 0, /* tp_print */
2938 0, /* tp_print */
2936 0, /* tp_getattr */
2939 0, /* tp_getattr */
2937 0, /* tp_setattr */
2940 0, /* tp_setattr */
2938 0, /* tp_compare */
2941 0, /* tp_compare */
2939 0, /* tp_repr */
2942 0, /* tp_repr */
2940 0, /* tp_as_number */
2943 0, /* tp_as_number */
2941 &index_sequence_methods, /* tp_as_sequence */
2944 &index_sequence_methods, /* tp_as_sequence */
2942 &index_mapping_methods, /* tp_as_mapping */
2945 &index_mapping_methods, /* tp_as_mapping */
2943 0, /* tp_hash */
2946 0, /* tp_hash */
2944 0, /* tp_call */
2947 0, /* tp_call */
2945 0, /* tp_str */
2948 0, /* tp_str */
2946 0, /* tp_getattro */
2949 0, /* tp_getattro */
2947 0, /* tp_setattro */
2950 0, /* tp_setattro */
2948 0, /* tp_as_buffer */
2951 0, /* tp_as_buffer */
2949 Py_TPFLAGS_DEFAULT, /* tp_flags */
2952 Py_TPFLAGS_DEFAULT, /* tp_flags */
2950 "revlog index", /* tp_doc */
2953 "revlog index", /* tp_doc */
2951 0, /* tp_traverse */
2954 0, /* tp_traverse */
2952 0, /* tp_clear */
2955 0, /* tp_clear */
2953 0, /* tp_richcompare */
2956 0, /* tp_richcompare */
2954 0, /* tp_weaklistoffset */
2957 0, /* tp_weaklistoffset */
2955 0, /* tp_iter */
2958 0, /* tp_iter */
2956 0, /* tp_iternext */
2959 0, /* tp_iternext */
2957 index_methods, /* tp_methods */
2960 index_methods, /* tp_methods */
2958 index_members, /* tp_members */
2961 index_members, /* tp_members */
2959 index_getset, /* tp_getset */
2962 index_getset, /* tp_getset */
2960 0, /* tp_base */
2963 0, /* tp_base */
2961 0, /* tp_dict */
2964 0, /* tp_dict */
2962 0, /* tp_descr_get */
2965 0, /* tp_descr_get */
2963 0, /* tp_descr_set */
2966 0, /* tp_descr_set */
2964 0, /* tp_dictoffset */
2967 0, /* tp_dictoffset */
2965 (initproc)index_init, /* tp_init */
2968 (initproc)index_init, /* tp_init */
2966 0, /* tp_alloc */
2969 0, /* tp_alloc */
2967 };
2970 };
2968
2971
2969 /*
2972 /*
2970 * returns a tuple of the form (index, cache) with elements as
2973 * returns a tuple of the form (index, cache) with elements as
2971 * follows:
2974 * follows:
2972 *
2975 *
2973 * index: an index object that lazily parses Revlog (v1 or v2) records
2976 * index: an index object that lazily parses Revlog (v1 or v2) records
2974 * cache: if data is inlined, a tuple (0, index_file_content), else None
2977 * cache: if data is inlined, a tuple (0, index_file_content), else None
2975 * index_file_content could be a string, or a buffer
2978 * index_file_content could be a string, or a buffer
2976 *
2979 *
2977 * added complications are for backwards compatibility
2980 * added complications are for backwards compatibility
2978 */
2981 */
2979 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2982 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2980 {
2983 {
2981 PyObject *cache = NULL;
2984 PyObject *cache = NULL;
2982 indexObject *idx;
2985 indexObject *idx;
2983 int ret;
2986 int ret;
2984
2987
2985 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2988 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2986 if (idx == NULL)
2989 if (idx == NULL)
2987 goto bail;
2990 goto bail;
2988
2991
2989 ret = index_init(idx, args, kwargs);
2992 ret = index_init(idx, args, kwargs);
2990 if (ret == -1)
2993 if (ret == -1)
2991 goto bail;
2994 goto bail;
2992
2995
2993 if (idx->inlined) {
2996 if (idx->inlined) {
2994 cache = Py_BuildValue("iO", 0, idx->data);
2997 cache = Py_BuildValue("iO", 0, idx->data);
2995 if (cache == NULL)
2998 if (cache == NULL)
2996 goto bail;
2999 goto bail;
2997 } else {
3000 } else {
2998 cache = Py_None;
3001 cache = Py_None;
2999 Py_INCREF(cache);
3002 Py_INCREF(cache);
3000 }
3003 }
3001
3004
3002 return Py_BuildValue("NN", idx, cache);
3005 return Py_BuildValue("NN", idx, cache);
3003
3006
3004 bail:
3007 bail:
3005 Py_XDECREF(idx);
3008 Py_XDECREF(idx);
3006 Py_XDECREF(cache);
3009 Py_XDECREF(cache);
3007 return NULL;
3010 return NULL;
3008 }
3011 }
3009
3012
3010 static Revlog_CAPI CAPI = {
3013 static Revlog_CAPI CAPI = {
3011 /* increment the abi_version field upon each change in the Revlog_CAPI
3014 /* increment the abi_version field upon each change in the Revlog_CAPI
3012 struct or in the ABI of the listed functions */
3015 struct or in the ABI of the listed functions */
3013 2,
3016 2,
3014 index_length,
3017 index_length,
3015 index_node,
3018 index_node,
3016 HgRevlogIndex_GetParents,
3019 HgRevlogIndex_GetParents,
3017 };
3020 };
3018
3021
3019 void revlog_module_init(PyObject *mod)
3022 void revlog_module_init(PyObject *mod)
3020 {
3023 {
3021 PyObject *caps = NULL;
3024 PyObject *caps = NULL;
3022 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3025 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3023 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3026 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3024 return;
3027 return;
3025 Py_INCREF(&HgRevlogIndex_Type);
3028 Py_INCREF(&HgRevlogIndex_Type);
3026 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3029 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3027
3030
3028 nodetreeType.tp_new = PyType_GenericNew;
3031 nodetreeType.tp_new = PyType_GenericNew;
3029 if (PyType_Ready(&nodetreeType) < 0)
3032 if (PyType_Ready(&nodetreeType) < 0)
3030 return;
3033 return;
3031 Py_INCREF(&nodetreeType);
3034 Py_INCREF(&nodetreeType);
3032 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3035 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3033
3036
3034 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3037 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3035 if (caps != NULL)
3038 if (caps != NULL)
3036 PyModule_AddObject(mod, "revlog_CAPI", caps);
3039 PyModule_AddObject(mod, "revlog_CAPI", caps);
3037 }
3040 }
@@ -1,417 +1,415 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import (
13 from ..node import (
14 nullrev,
14 nullrev,
15 sha1nodeconstants,
15 sha1nodeconstants,
16 )
16 )
17 from .. import (
17 from .. import (
18 error,
18 error,
19 pycompat,
19 pycompat,
20 util,
20 util,
21 )
21 )
22
22
23 from ..revlogutils import nodemap as nodemaputil
23 from ..revlogutils import nodemap as nodemaputil
24 from ..revlogutils import constants as revlog_constants
24 from ..revlogutils import constants as revlog_constants
25
25
26 stringio = pycompat.bytesio
26 stringio = pycompat.bytesio
27
27
28
28
29 _pack = struct.pack
29 _pack = struct.pack
30 _unpack = struct.unpack
30 _unpack = struct.unpack
31 _compress = zlib.compress
31 _compress = zlib.compress
32 _decompress = zlib.decompress
32 _decompress = zlib.decompress
33
33
34 # Some code below makes tuples directly because it's more convenient. However,
34 # Some code below makes tuples directly because it's more convenient. However,
35 # code outside this module should always use dirstatetuple.
35 # code outside this module should always use dirstatetuple.
36 def dirstatetuple(*x):
36 def dirstatetuple(*x):
37 # x is a tuple
37 # x is a tuple
38 return x
38 return x
39
39
40
40
41 def gettype(q):
41 def gettype(q):
42 return int(q & 0xFFFF)
42 return int(q & 0xFFFF)
43
43
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return int(int(offset) << 16 | type)
46 return int(int(offset) << 16 | type)
47
47
48
48
49 class BaseIndexObject(object):
49 class BaseIndexObject(object):
50 # Format of an index entry according to Python's `struct` language
50 # Format of an index entry according to Python's `struct` language
51 index_format = revlog_constants.INDEX_ENTRY_V1
51 index_format = revlog_constants.INDEX_ENTRY_V1
52 # Size of a C unsigned long long int, platform independent
52 # Size of a C unsigned long long int, platform independent
53 big_int_size = struct.calcsize(b'>Q')
53 big_int_size = struct.calcsize(b'>Q')
54 # Size of a C long int, platform independent
54 # Size of a C long int, platform independent
55 int_size = struct.calcsize(b'>i')
55 int_size = struct.calcsize(b'>i')
56 # An empty index entry, used as a default value to be overridden, or nullrev
56 # An empty index entry, used as a default value to be overridden, or nullrev
57 null_item = (
57 null_item = (
58 0,
58 0,
59 0,
59 0,
60 0,
60 0,
61 -1,
61 -1,
62 -1,
62 -1,
63 -1,
63 -1,
64 -1,
64 -1,
65 sha1nodeconstants.nullid,
65 sha1nodeconstants.nullid,
66 0,
66 0,
67 0,
67 0,
68 revlog_constants.COMP_MODE_INLINE,
68 revlog_constants.COMP_MODE_INLINE,
69 )
69 )
70
70
71 @util.propertycache
71 @util.propertycache
72 def entry_size(self):
72 def entry_size(self):
73 return self.index_format.size
73 return self.index_format.size
74
74
75 @property
75 @property
76 def nodemap(self):
76 def nodemap(self):
77 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
77 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
78 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
78 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
79 return self._nodemap
79 return self._nodemap
80
80
81 @util.propertycache
81 @util.propertycache
82 def _nodemap(self):
82 def _nodemap(self):
83 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
83 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
84 for r in range(0, len(self)):
84 for r in range(0, len(self)):
85 n = self[r][7]
85 n = self[r][7]
86 nodemap[n] = r
86 nodemap[n] = r
87 return nodemap
87 return nodemap
88
88
89 def has_node(self, node):
89 def has_node(self, node):
90 """return True if the node exist in the index"""
90 """return True if the node exist in the index"""
91 return node in self._nodemap
91 return node in self._nodemap
92
92
93 def rev(self, node):
93 def rev(self, node):
94 """return a revision for a node
94 """return a revision for a node
95
95
96 If the node is unknown, raise a RevlogError"""
96 If the node is unknown, raise a RevlogError"""
97 return self._nodemap[node]
97 return self._nodemap[node]
98
98
99 def get_rev(self, node):
99 def get_rev(self, node):
100 """return a revision for a node
100 """return a revision for a node
101
101
102 If the node is unknown, return None"""
102 If the node is unknown, return None"""
103 return self._nodemap.get(node)
103 return self._nodemap.get(node)
104
104
105 def _stripnodes(self, start):
105 def _stripnodes(self, start):
106 if '_nodemap' in vars(self):
106 if '_nodemap' in vars(self):
107 for r in range(start, len(self)):
107 for r in range(start, len(self)):
108 n = self[r][7]
108 n = self[r][7]
109 del self._nodemap[n]
109 del self._nodemap[n]
110
110
111 def clearcaches(self):
111 def clearcaches(self):
112 self.__dict__.pop('_nodemap', None)
112 self.__dict__.pop('_nodemap', None)
113
113
114 def __len__(self):
114 def __len__(self):
115 return self._lgt + len(self._extra)
115 return self._lgt + len(self._extra)
116
116
117 def append(self, tup):
117 def append(self, tup):
118 if '_nodemap' in vars(self):
118 if '_nodemap' in vars(self):
119 self._nodemap[tup[7]] = len(self)
119 self._nodemap[tup[7]] = len(self)
120 data = self._pack_entry(tup)
120 data = self._pack_entry(tup)
121 self._extra.append(data)
121 self._extra.append(data)
122
122
123 def _pack_entry(self, entry):
123 def _pack_entry(self, entry):
124 assert entry[8] == 0
124 assert entry[8] == 0
125 assert entry[9] == 0
125 assert entry[9] == 0
126 return self.index_format.pack(*entry[:8])
126 return self.index_format.pack(*entry[:8])
127
127
128 def _check_index(self, i):
128 def _check_index(self, i):
129 if not isinstance(i, int):
129 if not isinstance(i, int):
130 raise TypeError(b"expecting int indexes")
130 raise TypeError(b"expecting int indexes")
131 if i < 0 or i >= len(self):
131 if i < 0 or i >= len(self):
132 raise IndexError
132 raise IndexError
133
133
134 def __getitem__(self, i):
134 def __getitem__(self, i):
135 if i == -1:
135 if i == -1:
136 return self.null_item
136 return self.null_item
137 self._check_index(i)
137 self._check_index(i)
138 if i >= self._lgt:
138 if i >= self._lgt:
139 data = self._extra[i - self._lgt]
139 data = self._extra[i - self._lgt]
140 else:
140 else:
141 index = self._calculate_index(i)
141 index = self._calculate_index(i)
142 data = self._data[index : index + self.entry_size]
142 data = self._data[index : index + self.entry_size]
143 r = self._unpack_entry(data)
143 r = self._unpack_entry(data)
144 if self._lgt and i == 0:
144 if self._lgt and i == 0:
145 r = (offset_type(0, gettype(r[0])),) + r[1:]
145 r = (offset_type(0, gettype(r[0])),) + r[1:]
146 return r
146 return r
147
147
148 def _unpack_entry(self, data):
148 def _unpack_entry(self, data):
149 r = self.index_format.unpack(data)
149 r = self.index_format.unpack(data)
150 r = r + (0, 0, revlog_constants.COMP_MODE_INLINE)
150 r = r + (0, 0, revlog_constants.COMP_MODE_INLINE)
151 return r
151 return r
152
152
153 def pack_header(self, header):
153 def pack_header(self, header):
154 """pack header information as binary"""
154 """pack header information as binary"""
155 v_fmt = revlog_constants.INDEX_HEADER
155 v_fmt = revlog_constants.INDEX_HEADER
156 return v_fmt.pack(header)
156 return v_fmt.pack(header)
157
157
158 def entry_binary(self, rev):
158 def entry_binary(self, rev):
159 """return the raw binary string representing a revision"""
159 """return the raw binary string representing a revision"""
160 entry = self[rev]
160 entry = self[rev]
161 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
161 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
162 if rev == 0:
162 if rev == 0:
163 p = p[revlog_constants.INDEX_HEADER.size :]
163 p = p[revlog_constants.INDEX_HEADER.size :]
164 return p
164 return p
165
165
166
166
167 class IndexObject(BaseIndexObject):
167 class IndexObject(BaseIndexObject):
168 def __init__(self, data):
168 def __init__(self, data):
169 assert len(data) % self.entry_size == 0, (
169 assert len(data) % self.entry_size == 0, (
170 len(data),
170 len(data),
171 self.entry_size,
171 self.entry_size,
172 len(data) % self.entry_size,
172 len(data) % self.entry_size,
173 )
173 )
174 self._data = data
174 self._data = data
175 self._lgt = len(data) // self.entry_size
175 self._lgt = len(data) // self.entry_size
176 self._extra = []
176 self._extra = []
177
177
178 def _calculate_index(self, i):
178 def _calculate_index(self, i):
179 return i * self.entry_size
179 return i * self.entry_size
180
180
181 def __delitem__(self, i):
181 def __delitem__(self, i):
182 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
182 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
183 raise ValueError(b"deleting slices only supports a:-1 with step 1")
183 raise ValueError(b"deleting slices only supports a:-1 with step 1")
184 i = i.start
184 i = i.start
185 self._check_index(i)
185 self._check_index(i)
186 self._stripnodes(i)
186 self._stripnodes(i)
187 if i < self._lgt:
187 if i < self._lgt:
188 self._data = self._data[: i * self.entry_size]
188 self._data = self._data[: i * self.entry_size]
189 self._lgt = i
189 self._lgt = i
190 self._extra = []
190 self._extra = []
191 else:
191 else:
192 self._extra = self._extra[: i - self._lgt]
192 self._extra = self._extra[: i - self._lgt]
193
193
194
194
195 class PersistentNodeMapIndexObject(IndexObject):
195 class PersistentNodeMapIndexObject(IndexObject):
196 """a Debug oriented class to test persistent nodemap
196 """a Debug oriented class to test persistent nodemap
197
197
198 We need a simple python object to test API and higher level behavior. See
198 We need a simple python object to test API and higher level behavior. See
199 the Rust implementation for more serious usage. This should be used only
199 the Rust implementation for more serious usage. This should be used only
200 through the dedicated `devel.persistent-nodemap` config.
200 through the dedicated `devel.persistent-nodemap` config.
201 """
201 """
202
202
203 def nodemap_data_all(self):
203 def nodemap_data_all(self):
204 """Return bytes containing a full serialization of a nodemap
204 """Return bytes containing a full serialization of a nodemap
205
205
206 The nodemap should be valid for the full set of revisions in the
206 The nodemap should be valid for the full set of revisions in the
207 index."""
207 index."""
208 return nodemaputil.persistent_data(self)
208 return nodemaputil.persistent_data(self)
209
209
210 def nodemap_data_incremental(self):
210 def nodemap_data_incremental(self):
211 """Return bytes containing a incremental update to persistent nodemap
211 """Return bytes containing a incremental update to persistent nodemap
212
212
213 This containst the data for an append-only update of the data provided
213 This containst the data for an append-only update of the data provided
214 in the last call to `update_nodemap_data`.
214 in the last call to `update_nodemap_data`.
215 """
215 """
216 if self._nm_root is None:
216 if self._nm_root is None:
217 return None
217 return None
218 docket = self._nm_docket
218 docket = self._nm_docket
219 changed, data = nodemaputil.update_persistent_data(
219 changed, data = nodemaputil.update_persistent_data(
220 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
220 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
221 )
221 )
222
222
223 self._nm_root = self._nm_max_idx = self._nm_docket = None
223 self._nm_root = self._nm_max_idx = self._nm_docket = None
224 return docket, changed, data
224 return docket, changed, data
225
225
226 def update_nodemap_data(self, docket, nm_data):
226 def update_nodemap_data(self, docket, nm_data):
227 """provide full block of persisted binary data for a nodemap
227 """provide full block of persisted binary data for a nodemap
228
228
229 The data are expected to come from disk. See `nodemap_data_all` for a
229 The data are expected to come from disk. See `nodemap_data_all` for a
230 produceur of such data."""
230 produceur of such data."""
231 if nm_data is not None:
231 if nm_data is not None:
232 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
232 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
233 if self._nm_root:
233 if self._nm_root:
234 self._nm_docket = docket
234 self._nm_docket = docket
235 else:
235 else:
236 self._nm_root = self._nm_max_idx = self._nm_docket = None
236 self._nm_root = self._nm_max_idx = self._nm_docket = None
237
237
238
238
239 class InlinedIndexObject(BaseIndexObject):
239 class InlinedIndexObject(BaseIndexObject):
240 def __init__(self, data, inline=0):
240 def __init__(self, data, inline=0):
241 self._data = data
241 self._data = data
242 self._lgt = self._inline_scan(None)
242 self._lgt = self._inline_scan(None)
243 self._inline_scan(self._lgt)
243 self._inline_scan(self._lgt)
244 self._extra = []
244 self._extra = []
245
245
246 def _inline_scan(self, lgt):
246 def _inline_scan(self, lgt):
247 off = 0
247 off = 0
248 if lgt is not None:
248 if lgt is not None:
249 self._offsets = [0] * lgt
249 self._offsets = [0] * lgt
250 count = 0
250 count = 0
251 while off <= len(self._data) - self.entry_size:
251 while off <= len(self._data) - self.entry_size:
252 start = off + self.big_int_size
252 start = off + self.big_int_size
253 (s,) = struct.unpack(
253 (s,) = struct.unpack(
254 b'>i',
254 b'>i',
255 self._data[start : start + self.int_size],
255 self._data[start : start + self.int_size],
256 )
256 )
257 if lgt is not None:
257 if lgt is not None:
258 self._offsets[count] = off
258 self._offsets[count] = off
259 count += 1
259 count += 1
260 off += self.entry_size + s
260 off += self.entry_size + s
261 if off != len(self._data):
261 if off != len(self._data):
262 raise ValueError(b"corrupted data")
262 raise ValueError(b"corrupted data")
263 return count
263 return count
264
264
265 def __delitem__(self, i):
265 def __delitem__(self, i):
266 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
266 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
267 raise ValueError(b"deleting slices only supports a:-1 with step 1")
267 raise ValueError(b"deleting slices only supports a:-1 with step 1")
268 i = i.start
268 i = i.start
269 self._check_index(i)
269 self._check_index(i)
270 self._stripnodes(i)
270 self._stripnodes(i)
271 if i < self._lgt:
271 if i < self._lgt:
272 self._offsets = self._offsets[:i]
272 self._offsets = self._offsets[:i]
273 self._lgt = i
273 self._lgt = i
274 self._extra = []
274 self._extra = []
275 else:
275 else:
276 self._extra = self._extra[: i - self._lgt]
276 self._extra = self._extra[: i - self._lgt]
277
277
278 def _calculate_index(self, i):
278 def _calculate_index(self, i):
279 return self._offsets[i]
279 return self._offsets[i]
280
280
281
281
282 def parse_index2(data, inline, revlogv2=False):
282 def parse_index2(data, inline, revlogv2=False):
283 if not inline:
283 if not inline:
284 cls = IndexObject2 if revlogv2 else IndexObject
284 cls = IndexObject2 if revlogv2 else IndexObject
285 return cls(data), None
285 return cls(data), None
286 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
286 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
287 return cls(data, inline), (0, data)
287 return cls(data, inline), (0, data)
288
288
289
289
290 class Index2Mixin(object):
290 class Index2Mixin(object):
291 index_format = revlog_constants.INDEX_ENTRY_V2
291 index_format = revlog_constants.INDEX_ENTRY_V2
292
292
293 def replace_sidedata_info(
293 def replace_sidedata_info(
294 self, rev, sidedata_offset, sidedata_length, offset_flags
294 self, rev, sidedata_offset, sidedata_length, offset_flags
295 ):
295 ):
296 """
296 """
297 Replace an existing index entry's sidedata offset and length with new
297 Replace an existing index entry's sidedata offset and length with new
298 ones.
298 ones.
299 This cannot be used outside of the context of sidedata rewriting,
299 This cannot be used outside of the context of sidedata rewriting,
300 inside the transaction that creates the revision `rev`.
300 inside the transaction that creates the revision `rev`.
301 """
301 """
302 if rev < 0:
302 if rev < 0:
303 raise KeyError
303 raise KeyError
304 self._check_index(rev)
304 self._check_index(rev)
305 if rev < self._lgt:
305 if rev < self._lgt:
306 msg = b"cannot rewrite entries outside of this transaction"
306 msg = b"cannot rewrite entries outside of this transaction"
307 raise KeyError(msg)
307 raise KeyError(msg)
308 else:
308 else:
309 entry = list(self[rev])
309 entry = list(self[rev])
310 entry[0] = offset_flags
310 entry[0] = offset_flags
311 entry[8] = sidedata_offset
311 entry[8] = sidedata_offset
312 entry[9] = sidedata_length
312 entry[9] = sidedata_length
313 entry = tuple(entry)
313 entry = tuple(entry)
314 new = self._pack_entry(entry)
314 new = self._pack_entry(entry)
315 self._extra[rev - self._lgt] = new
315 self._extra[rev - self._lgt] = new
316
316
317 def _unpack_entry(self, data):
317 def _unpack_entry(self, data):
318 return self.index_format.unpack(data) + (
318 return self.index_format.unpack(data)
319 revlog_constants.COMP_MODE_INLINE,
320 )
321
319
322 def _pack_entry(self, entry):
320 def _pack_entry(self, entry):
323 return self.index_format.pack(*entry[:10])
321 return self.index_format.pack(*entry[:11])
324
322
325 def entry_binary(self, rev):
323 def entry_binary(self, rev):
326 """return the raw binary string representing a revision"""
324 """return the raw binary string representing a revision"""
327 entry = self[rev]
325 entry = self[rev]
328 return self._pack_entry(entry)
326 return self._pack_entry(entry)
329
327
330 def pack_header(self, header):
328 def pack_header(self, header):
331 """pack header information as binary"""
329 """pack header information as binary"""
332 msg = 'version header should go in the docket, not the index: %d'
330 msg = 'version header should go in the docket, not the index: %d'
333 msg %= header
331 msg %= header
334 raise error.ProgrammingError(msg)
332 raise error.ProgrammingError(msg)
335
333
336
334
337 class IndexObject2(Index2Mixin, IndexObject):
335 class IndexObject2(Index2Mixin, IndexObject):
338 pass
336 pass
339
337
340
338
341 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
339 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
342 def _inline_scan(self, lgt):
340 def _inline_scan(self, lgt):
343 sidedata_length_pos = 72
341 sidedata_length_pos = 72
344 off = 0
342 off = 0
345 if lgt is not None:
343 if lgt is not None:
346 self._offsets = [0] * lgt
344 self._offsets = [0] * lgt
347 count = 0
345 count = 0
348 while off <= len(self._data) - self.entry_size:
346 while off <= len(self._data) - self.entry_size:
349 start = off + self.big_int_size
347 start = off + self.big_int_size
350 (data_size,) = struct.unpack(
348 (data_size,) = struct.unpack(
351 b'>i',
349 b'>i',
352 self._data[start : start + self.int_size],
350 self._data[start : start + self.int_size],
353 )
351 )
354 start = off + sidedata_length_pos
352 start = off + sidedata_length_pos
355 (side_data_size,) = struct.unpack(
353 (side_data_size,) = struct.unpack(
356 b'>i', self._data[start : start + self.int_size]
354 b'>i', self._data[start : start + self.int_size]
357 )
355 )
358 if lgt is not None:
356 if lgt is not None:
359 self._offsets[count] = off
357 self._offsets[count] = off
360 count += 1
358 count += 1
361 off += self.entry_size + data_size + side_data_size
359 off += self.entry_size + data_size + side_data_size
362 if off != len(self._data):
360 if off != len(self._data):
363 raise ValueError(b"corrupted data")
361 raise ValueError(b"corrupted data")
364 return count
362 return count
365
363
366
364
367 def parse_index_devel_nodemap(data, inline):
365 def parse_index_devel_nodemap(data, inline):
368 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
366 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
369 return PersistentNodeMapIndexObject(data), None
367 return PersistentNodeMapIndexObject(data), None
370
368
371
369
372 def parse_dirstate(dmap, copymap, st):
370 def parse_dirstate(dmap, copymap, st):
373 parents = [st[:20], st[20:40]]
371 parents = [st[:20], st[20:40]]
374 # dereference fields so they will be local in loop
372 # dereference fields so they will be local in loop
375 format = b">cllll"
373 format = b">cllll"
376 e_size = struct.calcsize(format)
374 e_size = struct.calcsize(format)
377 pos1 = 40
375 pos1 = 40
378 l = len(st)
376 l = len(st)
379
377
380 # the inner loop
378 # the inner loop
381 while pos1 < l:
379 while pos1 < l:
382 pos2 = pos1 + e_size
380 pos2 = pos1 + e_size
383 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
381 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
384 pos1 = pos2 + e[4]
382 pos1 = pos2 + e[4]
385 f = st[pos2:pos1]
383 f = st[pos2:pos1]
386 if b'\0' in f:
384 if b'\0' in f:
387 f, c = f.split(b'\0')
385 f, c = f.split(b'\0')
388 copymap[f] = c
386 copymap[f] = c
389 dmap[f] = e[:4]
387 dmap[f] = e[:4]
390 return parents
388 return parents
391
389
392
390
393 def pack_dirstate(dmap, copymap, pl, now):
391 def pack_dirstate(dmap, copymap, pl, now):
394 now = int(now)
392 now = int(now)
395 cs = stringio()
393 cs = stringio()
396 write = cs.write
394 write = cs.write
397 write(b"".join(pl))
395 write(b"".join(pl))
398 for f, e in pycompat.iteritems(dmap):
396 for f, e in pycompat.iteritems(dmap):
399 if e[0] == b'n' and e[3] == now:
397 if e[0] == b'n' and e[3] == now:
400 # The file was last modified "simultaneously" with the current
398 # The file was last modified "simultaneously" with the current
401 # write to dirstate (i.e. within the same second for file-
399 # write to dirstate (i.e. within the same second for file-
402 # systems with a granularity of 1 sec). This commonly happens
400 # systems with a granularity of 1 sec). This commonly happens
403 # for at least a couple of files on 'update'.
401 # for at least a couple of files on 'update'.
404 # The user could change the file without changing its size
402 # The user could change the file without changing its size
405 # within the same second. Invalidate the file's mtime in
403 # within the same second. Invalidate the file's mtime in
406 # dirstate, forcing future 'status' calls to compare the
404 # dirstate, forcing future 'status' calls to compare the
407 # contents of the file if the size is the same. This prevents
405 # contents of the file if the size is the same. This prevents
408 # mistakenly treating such files as clean.
406 # mistakenly treating such files as clean.
409 e = dirstatetuple(e[0], e[1], e[2], -1)
407 e = dirstatetuple(e[0], e[1], e[2], -1)
410 dmap[f] = e
408 dmap[f] = e
411
409
412 if f in copymap:
410 if f in copymap:
413 f = b"%s\0%s" % (f, copymap[f])
411 f = b"%s\0%s" % (f, copymap[f])
414 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
412 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
415 write(e)
413 write(e)
416 write(f)
414 write(f)
417 return cs.getvalue()
415 return cs.getvalue()
@@ -1,164 +1,165 b''
1 # revlogdeltas.py - constant used for revlog logic.
1 # revlogdeltas.py - constant used for revlog logic.
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import struct
12 import struct
13
13
14 from ..interfaces import repository
14 from ..interfaces import repository
15
15
16 ### Internal utily constants
16 ### Internal utily constants
17
17
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 KIND_MANIFESTLOG = 1002
19 KIND_MANIFESTLOG = 1002
20 KIND_FILELOG = 1003
20 KIND_FILELOG = 1003
21 KIND_OTHER = 1004
21 KIND_OTHER = 1004
22
22
23 ALL_KINDS = {
23 ALL_KINDS = {
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_MANIFESTLOG,
25 KIND_MANIFESTLOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_OTHER,
27 KIND_OTHER,
28 }
28 }
29
29
30 ### main revlog header
30 ### main revlog header
31
31
32 INDEX_HEADER = struct.Struct(b">I")
32 INDEX_HEADER = struct.Struct(b">I")
33
33
34 ## revlog version
34 ## revlog version
35 REVLOGV0 = 0
35 REVLOGV0 = 0
36 REVLOGV1 = 1
36 REVLOGV1 = 1
37 # Dummy value until file format is finalized.
37 # Dummy value until file format is finalized.
38 REVLOGV2 = 0xDEAD
38 REVLOGV2 = 0xDEAD
39
39
40 ## global revlog header flags
40 ## global revlog header flags
41 # Shared across v1 and v2.
41 # Shared across v1 and v2.
42 FLAG_INLINE_DATA = 1 << 16
42 FLAG_INLINE_DATA = 1 << 16
43 # Only used by v1, implied by v2.
43 # Only used by v1, implied by v2.
44 FLAG_GENERALDELTA = 1 << 17
44 FLAG_GENERALDELTA = 1 << 17
45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
46 REVLOG_DEFAULT_FORMAT = REVLOGV1
46 REVLOG_DEFAULT_FORMAT = REVLOGV1
47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
48 REVLOGV0_FLAGS = 0
48 REVLOGV0_FLAGS = 0
49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
51
51
52 ### individual entry
52 ### individual entry
53
53
54 ## index v0:
54 ## index v0:
55 # 4 bytes: offset
55 # 4 bytes: offset
56 # 4 bytes: compressed length
56 # 4 bytes: compressed length
57 # 4 bytes: base rev
57 # 4 bytes: base rev
58 # 4 bytes: link rev
58 # 4 bytes: link rev
59 # 20 bytes: parent 1 nodeid
59 # 20 bytes: parent 1 nodeid
60 # 20 bytes: parent 2 nodeid
60 # 20 bytes: parent 2 nodeid
61 # 20 bytes: nodeid
61 # 20 bytes: nodeid
62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
63
63
64 ## index v1
64 ## index v1
65 # 6 bytes: offset
65 # 6 bytes: offset
66 # 2 bytes: flags
66 # 2 bytes: flags
67 # 4 bytes: compressed length
67 # 4 bytes: compressed length
68 # 4 bytes: uncompressed length
68 # 4 bytes: uncompressed length
69 # 4 bytes: base rev
69 # 4 bytes: base rev
70 # 4 bytes: link rev
70 # 4 bytes: link rev
71 # 4 bytes: parent 1 rev
71 # 4 bytes: parent 1 rev
72 # 4 bytes: parent 2 rev
72 # 4 bytes: parent 2 rev
73 # 32 bytes: nodeid
73 # 32 bytes: nodeid
74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
75 assert INDEX_ENTRY_V1.size == 32 * 2
75 assert INDEX_ENTRY_V1.size == 32 * 2
76
76
77 # 6 bytes: offset
77 # 6 bytes: offset
78 # 2 bytes: flags
78 # 2 bytes: flags
79 # 4 bytes: compressed length
79 # 4 bytes: compressed length
80 # 4 bytes: uncompressed length
80 # 4 bytes: uncompressed length
81 # 4 bytes: base rev
81 # 4 bytes: base rev
82 # 4 bytes: link rev
82 # 4 bytes: link rev
83 # 4 bytes: parent 1 rev
83 # 4 bytes: parent 1 rev
84 # 4 bytes: parent 2 rev
84 # 4 bytes: parent 2 rev
85 # 32 bytes: nodeid
85 # 32 bytes: nodeid
86 # 8 bytes: sidedata offset
86 # 8 bytes: sidedata offset
87 # 4 bytes: sidedata compressed length
87 # 4 bytes: sidedata compressed length
88 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
88 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
89 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQi20x")
89 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
90 assert INDEX_ENTRY_V2.size == 32 * 3
90 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
91 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
91
92
92 # revlog index flags
93 # revlog index flags
93
94
94 # For historical reasons, revlog's internal flags were exposed via the
95 # For historical reasons, revlog's internal flags were exposed via the
95 # wire protocol and are even exposed in parts of the storage APIs.
96 # wire protocol and are even exposed in parts of the storage APIs.
96
97
97 # revision has censor metadata, must be verified
98 # revision has censor metadata, must be verified
98 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
99 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
99 # revision hash does not match data (narrowhg)
100 # revision hash does not match data (narrowhg)
100 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
101 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
101 # revision data is stored externally
102 # revision data is stored externally
102 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
103 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
103 # revision changes files in a way that could affect copy tracing.
104 # revision changes files in a way that could affect copy tracing.
104 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
105 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
105 REVIDX_DEFAULT_FLAGS = 0
106 REVIDX_DEFAULT_FLAGS = 0
106 # stable order in which flags need to be processed and their processors applied
107 # stable order in which flags need to be processed and their processors applied
107 REVIDX_FLAGS_ORDER = [
108 REVIDX_FLAGS_ORDER = [
108 REVIDX_ISCENSORED,
109 REVIDX_ISCENSORED,
109 REVIDX_ELLIPSIS,
110 REVIDX_ELLIPSIS,
110 REVIDX_EXTSTORED,
111 REVIDX_EXTSTORED,
111 REVIDX_HASCOPIESINFO,
112 REVIDX_HASCOPIESINFO,
112 ]
113 ]
113
114
114 # bitmark for flags that could cause rawdata content change
115 # bitmark for flags that could cause rawdata content change
115 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
116 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
116
117
117 ## chunk compression mode constants:
118 ## chunk compression mode constants:
118 # These constants are used in revlog version >=2 to denote the compression used
119 # These constants are used in revlog version >=2 to denote the compression used
119 # for a chunk.
120 # for a chunk.
120
121
121 # Chunk use a compression mode stored "inline" at the start of the chunk
122 # Chunk use a compression mode stored "inline" at the start of the chunk
122 # itself. This is the mode always used for revlog version "0" and "1"
123 # itself. This is the mode always used for revlog version "0" and "1"
123 COMP_MODE_INLINE = 2
124 COMP_MODE_INLINE = 2
124
125
125 SUPPORTED_FLAGS = {
126 SUPPORTED_FLAGS = {
126 REVLOGV0: REVLOGV0_FLAGS,
127 REVLOGV0: REVLOGV0_FLAGS,
127 REVLOGV1: REVLOGV1_FLAGS,
128 REVLOGV1: REVLOGV1_FLAGS,
128 REVLOGV2: REVLOGV2_FLAGS,
129 REVLOGV2: REVLOGV2_FLAGS,
129 }
130 }
130
131
131 _no = lambda flags: False
132 _no = lambda flags: False
132 _yes = lambda flags: True
133 _yes = lambda flags: True
133
134
134
135
135 def _from_flag(flag):
136 def _from_flag(flag):
136 return lambda flags: bool(flags & flag)
137 return lambda flags: bool(flags & flag)
137
138
138
139
139 FEATURES_BY_VERSION = {
140 FEATURES_BY_VERSION = {
140 REVLOGV0: {
141 REVLOGV0: {
141 b'inline': _no,
142 b'inline': _no,
142 b'generaldelta': _no,
143 b'generaldelta': _no,
143 b'sidedata': False,
144 b'sidedata': False,
144 b'docket': False,
145 b'docket': False,
145 },
146 },
146 REVLOGV1: {
147 REVLOGV1: {
147 b'inline': _from_flag(FLAG_INLINE_DATA),
148 b'inline': _from_flag(FLAG_INLINE_DATA),
148 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
149 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
149 b'sidedata': False,
150 b'sidedata': False,
150 b'docket': False,
151 b'docket': False,
151 },
152 },
152 REVLOGV2: {
153 REVLOGV2: {
153 # The point of inline-revlog is to reduce the number of files used in
154 # The point of inline-revlog is to reduce the number of files used in
154 # the store. Using a docket defeat this purpose. So we needs other
155 # the store. Using a docket defeat this purpose. So we needs other
155 # means to reduce the number of files for revlogv2.
156 # means to reduce the number of files for revlogv2.
156 b'inline': _no,
157 b'inline': _no,
157 b'generaldelta': _yes,
158 b'generaldelta': _yes,
158 b'sidedata': True,
159 b'sidedata': True,
159 b'docket': True,
160 b'docket': True,
160 },
161 },
161 }
162 }
162
163
163
164
164 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
165 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
General Comments 0
You need to be logged in to leave comments. Login now