##// END OF EJS Templates
revlog: have an explicit "pack_header" method...
marmoute -
r47811:d57386e5 default
parent child Browse files
Show More
@@ -1,3018 +1,3028 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <assert.h>
12 #include <assert.h>
13 #include <ctype.h>
13 #include <ctype.h>
14 #include <limits.h>
14 #include <limits.h>
15 #include <stddef.h>
15 #include <stddef.h>
16 #include <stdlib.h>
16 #include <stdlib.h>
17 #include <string.h>
17 #include <string.h>
18 #include <structmember.h>
18 #include <structmember.h>
19
19
20 #include "bitmanipulation.h"
20 #include "bitmanipulation.h"
21 #include "charencode.h"
21 #include "charencode.h"
22 #include "compat.h"
22 #include "compat.h"
23 #include "revlog.h"
23 #include "revlog.h"
24 #include "util.h"
24 #include "util.h"
25
25
26 #ifdef IS_PY3K
26 #ifdef IS_PY3K
27 /* The mapping of Python types is meant to be temporary to get Python
27 /* The mapping of Python types is meant to be temporary to get Python
28 * 3 to compile. We should remove this once Python 3 support is fully
28 * 3 to compile. We should remove this once Python 3 support is fully
29 * supported and proper types are used in the extensions themselves. */
29 * supported and proper types are used in the extensions themselves. */
30 #define PyInt_Check PyLong_Check
30 #define PyInt_Check PyLong_Check
31 #define PyInt_FromLong PyLong_FromLong
31 #define PyInt_FromLong PyLong_FromLong
32 #define PyInt_FromSsize_t PyLong_FromSsize_t
32 #define PyInt_FromSsize_t PyLong_FromSsize_t
33 #define PyInt_AsLong PyLong_AsLong
33 #define PyInt_AsLong PyLong_AsLong
34 #endif
34 #endif
35
35
36 typedef struct indexObjectStruct indexObject;
36 typedef struct indexObjectStruct indexObject;
37
37
38 typedef struct {
38 typedef struct {
39 int children[16];
39 int children[16];
40 } nodetreenode;
40 } nodetreenode;
41
41
42 typedef struct {
42 typedef struct {
43 int abi_version;
43 int abi_version;
44 Py_ssize_t (*index_length)(const indexObject *);
44 Py_ssize_t (*index_length)(const indexObject *);
45 const char *(*index_node)(indexObject *, Py_ssize_t);
45 const char *(*index_node)(indexObject *, Py_ssize_t);
46 int (*index_parents)(PyObject *, int, int *);
46 int (*index_parents)(PyObject *, int, int *);
47 } Revlog_CAPI;
47 } Revlog_CAPI;
48
48
49 /*
49 /*
50 * A base-16 trie for fast node->rev mapping.
50 * A base-16 trie for fast node->rev mapping.
51 *
51 *
52 * Positive value is index of the next node in the trie
52 * Positive value is index of the next node in the trie
53 * Negative value is a leaf: -(rev + 2)
53 * Negative value is a leaf: -(rev + 2)
54 * Zero is empty
54 * Zero is empty
55 */
55 */
56 typedef struct {
56 typedef struct {
57 indexObject *index;
57 indexObject *index;
58 nodetreenode *nodes;
58 nodetreenode *nodes;
59 Py_ssize_t nodelen;
59 Py_ssize_t nodelen;
60 size_t length; /* # nodes in use */
60 size_t length; /* # nodes in use */
61 size_t capacity; /* # nodes allocated */
61 size_t capacity; /* # nodes allocated */
62 int depth; /* maximum depth of tree */
62 int depth; /* maximum depth of tree */
63 int splits; /* # splits performed */
63 int splits; /* # splits performed */
64 } nodetree;
64 } nodetree;
65
65
66 typedef struct {
66 typedef struct {
67 PyObject_HEAD /* ; */
67 PyObject_HEAD /* ; */
68 nodetree nt;
68 nodetree nt;
69 } nodetreeObject;
69 } nodetreeObject;
70
70
71 /*
71 /*
72 * This class has two behaviors.
72 * This class has two behaviors.
73 *
73 *
74 * When used in a list-like way (with integer keys), we decode an
74 * When used in a list-like way (with integer keys), we decode an
75 * entry in a RevlogNG index file on demand. We have limited support for
75 * entry in a RevlogNG index file on demand. We have limited support for
76 * integer-keyed insert and delete, only at elements right before the
76 * integer-keyed insert and delete, only at elements right before the
77 * end.
77 * end.
78 *
78 *
79 * With string keys, we lazily perform a reverse mapping from node to
79 * With string keys, we lazily perform a reverse mapping from node to
80 * rev, using a base-16 trie.
80 * rev, using a base-16 trie.
81 */
81 */
82 struct indexObjectStruct {
82 struct indexObjectStruct {
83 PyObject_HEAD
83 PyObject_HEAD
84 /* Type-specific fields go here. */
84 /* Type-specific fields go here. */
85 PyObject *data; /* raw bytes of index */
85 PyObject *data; /* raw bytes of index */
86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
87 PyObject *nullentry; /* fast path for references to null */
87 PyObject *nullentry; /* fast path for references to null */
88 Py_buffer buf; /* buffer of data */
88 Py_buffer buf; /* buffer of data */
89 const char **offsets; /* populated on demand */
89 const char **offsets; /* populated on demand */
90 Py_ssize_t length; /* current on-disk number of elements */
90 Py_ssize_t length; /* current on-disk number of elements */
91 unsigned new_length; /* number of added elements */
91 unsigned new_length; /* number of added elements */
92 unsigned added_length; /* space reserved for added elements */
92 unsigned added_length; /* space reserved for added elements */
93 char *added; /* populated on demand */
93 char *added; /* populated on demand */
94 PyObject *headrevs; /* cache, invalidated on changes */
94 PyObject *headrevs; /* cache, invalidated on changes */
95 PyObject *filteredrevs; /* filtered revs set */
95 PyObject *filteredrevs; /* filtered revs set */
96 nodetree nt; /* base-16 trie */
96 nodetree nt; /* base-16 trie */
97 int ntinitialized; /* 0 or 1 */
97 int ntinitialized; /* 0 or 1 */
98 int ntrev; /* last rev scanned */
98 int ntrev; /* last rev scanned */
99 int ntlookups; /* # lookups */
99 int ntlookups; /* # lookups */
100 int ntmisses; /* # lookups that miss the cache */
100 int ntmisses; /* # lookups that miss the cache */
101 int inlined;
101 int inlined;
102 long hdrsize; /* size of index headers. Differs in v1 v.s. v2 format */
102 long hdrsize; /* size of index headers. Differs in v1 v.s. v2 format */
103 };
103 };
104
104
105 static Py_ssize_t index_length(const indexObject *self)
105 static Py_ssize_t index_length(const indexObject *self)
106 {
106 {
107 return self->length + self->new_length;
107 return self->length + self->new_length;
108 }
108 }
109
109
110 static const char nullid[32] = {0};
110 static const char nullid[32] = {0};
111 static const Py_ssize_t nullrev = -1;
111 static const Py_ssize_t nullrev = -1;
112
112
113 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
113 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
114
114
115 static int index_find_node(indexObject *self, const char *node);
115 static int index_find_node(indexObject *self, const char *node);
116
116
117 #if LONG_MAX == 0x7fffffffL
117 #if LONG_MAX == 0x7fffffffL
118 static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
118 static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
119 static const char *const v2_tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki");
119 static const char *const v2_tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki");
120 #else
120 #else
121 static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
121 static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
122 static const char *const v2_tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki");
122 static const char *const v2_tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki");
123 #endif
123 #endif
124
124
125 /* A RevlogNG v1 index entry is 64 bytes long. */
125 /* A RevlogNG v1 index entry is 64 bytes long. */
126 static const long v1_hdrsize = 64;
126 static const long v1_hdrsize = 64;
127
127
128 /* A Revlogv2 index entry is 96 bytes long. */
128 /* A Revlogv2 index entry is 96 bytes long. */
129 static const long v2_hdrsize = 96;
129 static const long v2_hdrsize = 96;
130
130
131 static void raise_revlog_error(void)
131 static void raise_revlog_error(void)
132 {
132 {
133 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
133 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
134
134
135 mod = PyImport_ImportModule("mercurial.error");
135 mod = PyImport_ImportModule("mercurial.error");
136 if (mod == NULL) {
136 if (mod == NULL) {
137 goto cleanup;
137 goto cleanup;
138 }
138 }
139
139
140 dict = PyModule_GetDict(mod);
140 dict = PyModule_GetDict(mod);
141 if (dict == NULL) {
141 if (dict == NULL) {
142 goto cleanup;
142 goto cleanup;
143 }
143 }
144 Py_INCREF(dict);
144 Py_INCREF(dict);
145
145
146 errclass = PyDict_GetItemString(dict, "RevlogError");
146 errclass = PyDict_GetItemString(dict, "RevlogError");
147 if (errclass == NULL) {
147 if (errclass == NULL) {
148 PyErr_SetString(PyExc_SystemError,
148 PyErr_SetString(PyExc_SystemError,
149 "could not find RevlogError");
149 "could not find RevlogError");
150 goto cleanup;
150 goto cleanup;
151 }
151 }
152
152
153 /* value of exception is ignored by callers */
153 /* value of exception is ignored by callers */
154 PyErr_SetString(errclass, "RevlogError");
154 PyErr_SetString(errclass, "RevlogError");
155
155
156 cleanup:
156 cleanup:
157 Py_XDECREF(dict);
157 Py_XDECREF(dict);
158 Py_XDECREF(mod);
158 Py_XDECREF(mod);
159 }
159 }
160
160
161 /*
161 /*
162 * Return a pointer to the beginning of a RevlogNG record.
162 * Return a pointer to the beginning of a RevlogNG record.
163 */
163 */
164 static const char *index_deref(indexObject *self, Py_ssize_t pos)
164 static const char *index_deref(indexObject *self, Py_ssize_t pos)
165 {
165 {
166 if (pos >= self->length)
166 if (pos >= self->length)
167 return self->added + (pos - self->length) * self->hdrsize;
167 return self->added + (pos - self->length) * self->hdrsize;
168
168
169 if (self->inlined && pos > 0) {
169 if (self->inlined && pos > 0) {
170 if (self->offsets == NULL) {
170 if (self->offsets == NULL) {
171 Py_ssize_t ret;
171 Py_ssize_t ret;
172 self->offsets =
172 self->offsets =
173 PyMem_Malloc(self->length * sizeof(*self->offsets));
173 PyMem_Malloc(self->length * sizeof(*self->offsets));
174 if (self->offsets == NULL)
174 if (self->offsets == NULL)
175 return (const char *)PyErr_NoMemory();
175 return (const char *)PyErr_NoMemory();
176 ret = inline_scan(self, self->offsets);
176 ret = inline_scan(self, self->offsets);
177 if (ret == -1) {
177 if (ret == -1) {
178 return NULL;
178 return NULL;
179 };
179 };
180 }
180 }
181 return self->offsets[pos];
181 return self->offsets[pos];
182 }
182 }
183
183
184 return (const char *)(self->buf.buf) + pos * self->hdrsize;
184 return (const char *)(self->buf.buf) + pos * self->hdrsize;
185 }
185 }
186
186
187 /*
187 /*
188 * Get parents of the given rev.
188 * Get parents of the given rev.
189 *
189 *
190 * The specified rev must be valid and must not be nullrev. A returned
190 * The specified rev must be valid and must not be nullrev. A returned
191 * parent revision may be nullrev, but is guaranteed to be in valid range.
191 * parent revision may be nullrev, but is guaranteed to be in valid range.
192 */
192 */
193 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
193 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
194 int maxrev)
194 int maxrev)
195 {
195 {
196 const char *data = index_deref(self, rev);
196 const char *data = index_deref(self, rev);
197
197
198 ps[0] = getbe32(data + 24);
198 ps[0] = getbe32(data + 24);
199 ps[1] = getbe32(data + 28);
199 ps[1] = getbe32(data + 28);
200
200
201 /* If index file is corrupted, ps[] may point to invalid revisions. So
201 /* If index file is corrupted, ps[] may point to invalid revisions. So
202 * there is a risk of buffer overflow to trust them unconditionally. */
202 * there is a risk of buffer overflow to trust them unconditionally. */
203 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
203 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
204 PyErr_SetString(PyExc_ValueError, "parent out of range");
204 PyErr_SetString(PyExc_ValueError, "parent out of range");
205 return -1;
205 return -1;
206 }
206 }
207 return 0;
207 return 0;
208 }
208 }
209
209
210 /*
210 /*
211 * Get parents of the given rev.
211 * Get parents of the given rev.
212 *
212 *
213 * If the specified rev is out of range, IndexError will be raised. If the
213 * If the specified rev is out of range, IndexError will be raised. If the
214 * revlog entry is corrupted, ValueError may be raised.
214 * revlog entry is corrupted, ValueError may be raised.
215 *
215 *
216 * Returns 0 on success or -1 on failure.
216 * Returns 0 on success or -1 on failure.
217 */
217 */
218 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
218 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
219 {
219 {
220 int tiprev;
220 int tiprev;
221 if (!op || !HgRevlogIndex_Check(op) || !ps) {
221 if (!op || !HgRevlogIndex_Check(op) || !ps) {
222 PyErr_BadInternalCall();
222 PyErr_BadInternalCall();
223 return -1;
223 return -1;
224 }
224 }
225 tiprev = (int)index_length((indexObject *)op) - 1;
225 tiprev = (int)index_length((indexObject *)op) - 1;
226 if (rev < -1 || rev > tiprev) {
226 if (rev < -1 || rev > tiprev) {
227 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
227 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
228 return -1;
228 return -1;
229 } else if (rev == -1) {
229 } else if (rev == -1) {
230 ps[0] = ps[1] = -1;
230 ps[0] = ps[1] = -1;
231 return 0;
231 return 0;
232 } else {
232 } else {
233 return index_get_parents((indexObject *)op, rev, ps, tiprev);
233 return index_get_parents((indexObject *)op, rev, ps, tiprev);
234 }
234 }
235 }
235 }
236
236
237 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
237 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
238 {
238 {
239 const char *data;
239 const char *data;
240 uint64_t offset;
240 uint64_t offset;
241
241
242 if (rev == nullrev)
242 if (rev == nullrev)
243 return 0;
243 return 0;
244
244
245 data = index_deref(self, rev);
245 data = index_deref(self, rev);
246 offset = getbe32(data + 4);
246 offset = getbe32(data + 4);
247 if (rev == 0) {
247 if (rev == 0) {
248 /* mask out version number for the first entry */
248 /* mask out version number for the first entry */
249 offset &= 0xFFFF;
249 offset &= 0xFFFF;
250 } else {
250 } else {
251 uint32_t offset_high = getbe32(data);
251 uint32_t offset_high = getbe32(data);
252 offset |= ((uint64_t)offset_high) << 32;
252 offset |= ((uint64_t)offset_high) << 32;
253 }
253 }
254 return (int64_t)(offset >> 16);
254 return (int64_t)(offset >> 16);
255 }
255 }
256
256
257 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
257 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
258 {
258 {
259 const char *data;
259 const char *data;
260 int tmp;
260 int tmp;
261
261
262 if (rev == nullrev)
262 if (rev == nullrev)
263 return 0;
263 return 0;
264
264
265 data = index_deref(self, rev);
265 data = index_deref(self, rev);
266
266
267 tmp = (int)getbe32(data + 8);
267 tmp = (int)getbe32(data + 8);
268 if (tmp < 0) {
268 if (tmp < 0) {
269 PyErr_Format(PyExc_OverflowError,
269 PyErr_Format(PyExc_OverflowError,
270 "revlog entry size out of bound (%d)", tmp);
270 "revlog entry size out of bound (%d)", tmp);
271 return -1;
271 return -1;
272 }
272 }
273 return tmp;
273 return tmp;
274 }
274 }
275
275
276 /*
276 /*
277 * RevlogNG format (all in big endian, data may be inlined):
277 * RevlogNG format (all in big endian, data may be inlined):
278 * 6 bytes: offset
278 * 6 bytes: offset
279 * 2 bytes: flags
279 * 2 bytes: flags
280 * 4 bytes: compressed length
280 * 4 bytes: compressed length
281 * 4 bytes: uncompressed length
281 * 4 bytes: uncompressed length
282 * 4 bytes: base revision
282 * 4 bytes: base revision
283 * 4 bytes: link revision
283 * 4 bytes: link revision
284 * 4 bytes: parent 1 revision
284 * 4 bytes: parent 1 revision
285 * 4 bytes: parent 2 revision
285 * 4 bytes: parent 2 revision
286 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
286 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
287 */
287 */
288 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
288 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
289 {
289 {
290 uint64_t offset_flags, sidedata_offset;
290 uint64_t offset_flags, sidedata_offset;
291 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
291 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
292 sidedata_comp_len;
292 sidedata_comp_len;
293 const char *c_node_id;
293 const char *c_node_id;
294 const char *data;
294 const char *data;
295 Py_ssize_t length = index_length(self);
295 Py_ssize_t length = index_length(self);
296
296
297 if (pos == nullrev) {
297 if (pos == nullrev) {
298 Py_INCREF(self->nullentry);
298 Py_INCREF(self->nullentry);
299 return self->nullentry;
299 return self->nullentry;
300 }
300 }
301
301
302 if (pos < 0 || pos >= length) {
302 if (pos < 0 || pos >= length) {
303 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
303 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
304 return NULL;
304 return NULL;
305 }
305 }
306
306
307 data = index_deref(self, pos);
307 data = index_deref(self, pos);
308 if (data == NULL)
308 if (data == NULL)
309 return NULL;
309 return NULL;
310
310
311 offset_flags = getbe32(data + 4);
311 offset_flags = getbe32(data + 4);
312 /*
312 /*
313 * The first entry on-disk needs the version number masked out,
313 * The first entry on-disk needs the version number masked out,
314 * but this doesn't apply if entries are added to an empty index.
314 * but this doesn't apply if entries are added to an empty index.
315 */
315 */
316 if (self->length && pos == 0)
316 if (self->length && pos == 0)
317 offset_flags &= 0xFFFF;
317 offset_flags &= 0xFFFF;
318 else {
318 else {
319 uint32_t offset_high = getbe32(data);
319 uint32_t offset_high = getbe32(data);
320 offset_flags |= ((uint64_t)offset_high) << 32;
320 offset_flags |= ((uint64_t)offset_high) << 32;
321 }
321 }
322
322
323 comp_len = getbe32(data + 8);
323 comp_len = getbe32(data + 8);
324 uncomp_len = getbe32(data + 12);
324 uncomp_len = getbe32(data + 12);
325 base_rev = getbe32(data + 16);
325 base_rev = getbe32(data + 16);
326 link_rev = getbe32(data + 20);
326 link_rev = getbe32(data + 20);
327 parent_1 = getbe32(data + 24);
327 parent_1 = getbe32(data + 24);
328 parent_2 = getbe32(data + 28);
328 parent_2 = getbe32(data + 28);
329 c_node_id = data + 32;
329 c_node_id = data + 32;
330
330
331 if (self->hdrsize == v1_hdrsize) {
331 if (self->hdrsize == v1_hdrsize) {
332 return Py_BuildValue(v1_tuple_format, offset_flags, comp_len,
332 return Py_BuildValue(v1_tuple_format, offset_flags, comp_len,
333 uncomp_len, base_rev, link_rev, parent_1,
333 uncomp_len, base_rev, link_rev, parent_1,
334 parent_2, c_node_id, self->nodelen);
334 parent_2, c_node_id, self->nodelen);
335 } else {
335 } else {
336 sidedata_offset = getbe64(data + 64);
336 sidedata_offset = getbe64(data + 64);
337 sidedata_comp_len = getbe32(data + 72);
337 sidedata_comp_len = getbe32(data + 72);
338
338
339 return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
339 return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
340 uncomp_len, base_rev, link_rev, parent_1,
340 uncomp_len, base_rev, link_rev, parent_1,
341 parent_2, c_node_id, self->nodelen,
341 parent_2, c_node_id, self->nodelen,
342 sidedata_offset, sidedata_comp_len);
342 sidedata_offset, sidedata_comp_len);
343 }
343 }
344 }
344 }
345 /*
345 /*
346 * Pack header information in binary
347 */
348 static PyObject *index_pack_header(indexObject *self, PyObject *args)
349 {
350 int header;
351 char out[4];
352 if (!PyArg_ParseTuple(args, "I", &header)) {
353 return NULL;
354 }
355 putbe32(header, out);
356 return PyBytes_FromStringAndSize(out, 4);
357 }
358 /*
346 * Return the raw binary string representing a revision
359 * Return the raw binary string representing a revision
347 */
360 */
348 static PyObject *index_entry_binary(indexObject *self, PyObject *args)
361 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
349 {
362 {
350 long rev;
363 long rev;
351 int header;
352 const char *data;
364 const char *data;
353 char entry[v2_hdrsize];
354
355 Py_ssize_t length = index_length(self);
365 Py_ssize_t length = index_length(self);
356
366
357 if (!PyArg_ParseTuple(args, "lI", &rev, &header)) {
367 if (!pylong_to_long(value, &rev)) {
358 return NULL;
368 return NULL;
359 }
369 }
360 if (rev < 0 || rev >= length) {
370 if (rev < 0 || rev >= length) {
361 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
371 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
362 rev);
372 rev);
363 return NULL;
373 return NULL;
364 };
374 };
365
375
366 data = index_deref(self, rev);
376 data = index_deref(self, rev);
367 if (data == NULL)
377 if (data == NULL)
368 return NULL;
378 return NULL;
369 if (rev == 0) {
379 if (rev == 0) {
370 /* put the header at the start of the first entry */
380 /* the header is eating the start of the first entry */
371 memcpy(entry, data, self->hdrsize);
381 return PyBytes_FromStringAndSize(data + 4, self->hdrsize - 4);
372 putbe32(header, entry);
373 return PyBytes_FromStringAndSize(entry, self->hdrsize);
374 }
382 }
375 return PyBytes_FromStringAndSize(data, self->hdrsize);
383 return PyBytes_FromStringAndSize(data, self->hdrsize);
376 }
384 }
377
385
378 /*
386 /*
379 * Return the hash of node corresponding to the given rev.
387 * Return the hash of node corresponding to the given rev.
380 */
388 */
381 static const char *index_node(indexObject *self, Py_ssize_t pos)
389 static const char *index_node(indexObject *self, Py_ssize_t pos)
382 {
390 {
383 Py_ssize_t length = index_length(self);
391 Py_ssize_t length = index_length(self);
384 const char *data;
392 const char *data;
385
393
386 if (pos == nullrev)
394 if (pos == nullrev)
387 return nullid;
395 return nullid;
388
396
389 if (pos >= length)
397 if (pos >= length)
390 return NULL;
398 return NULL;
391
399
392 data = index_deref(self, pos);
400 data = index_deref(self, pos);
393 return data ? data + 32 : NULL;
401 return data ? data + 32 : NULL;
394 }
402 }
395
403
396 /*
404 /*
397 * Return the hash of the node corresponding to the given rev. The
405 * Return the hash of the node corresponding to the given rev. The
398 * rev is assumed to be existing. If not, an exception is set.
406 * rev is assumed to be existing. If not, an exception is set.
399 */
407 */
400 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
408 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
401 {
409 {
402 const char *node = index_node(self, pos);
410 const char *node = index_node(self, pos);
403 if (node == NULL) {
411 if (node == NULL) {
404 PyErr_Format(PyExc_IndexError, "could not access rev %d",
412 PyErr_Format(PyExc_IndexError, "could not access rev %d",
405 (int)pos);
413 (int)pos);
406 }
414 }
407 return node;
415 return node;
408 }
416 }
409
417
410 static int nt_insert(nodetree *self, const char *node, int rev);
418 static int nt_insert(nodetree *self, const char *node, int rev);
411
419
412 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
420 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
413 {
421 {
414 Py_ssize_t thisnodelen;
422 Py_ssize_t thisnodelen;
415 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
423 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
416 return -1;
424 return -1;
417 if (nodelen == thisnodelen)
425 if (nodelen == thisnodelen)
418 return 0;
426 return 0;
419 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
427 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
420 thisnodelen, nodelen);
428 thisnodelen, nodelen);
421 return -1;
429 return -1;
422 }
430 }
423
431
424 static PyObject *index_append(indexObject *self, PyObject *obj)
432 static PyObject *index_append(indexObject *self, PyObject *obj)
425 {
433 {
426 uint64_t offset_flags, sidedata_offset;
434 uint64_t offset_flags, sidedata_offset;
427 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
435 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
428 Py_ssize_t c_node_id_len, sidedata_comp_len;
436 Py_ssize_t c_node_id_len, sidedata_comp_len;
429 const char *c_node_id;
437 const char *c_node_id;
430 char *data;
438 char *data;
431
439
432 if (self->hdrsize == v1_hdrsize) {
440 if (self->hdrsize == v1_hdrsize) {
433 if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags,
441 if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags,
434 &comp_len, &uncomp_len, &base_rev,
442 &comp_len, &uncomp_len, &base_rev,
435 &link_rev, &parent_1, &parent_2,
443 &link_rev, &parent_1, &parent_2,
436 &c_node_id, &c_node_id_len)) {
444 &c_node_id, &c_node_id_len)) {
437 PyErr_SetString(PyExc_TypeError, "8-tuple required");
445 PyErr_SetString(PyExc_TypeError, "8-tuple required");
438 return NULL;
446 return NULL;
439 }
447 }
440 } else {
448 } else {
441 if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags,
449 if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags,
442 &comp_len, &uncomp_len, &base_rev,
450 &comp_len, &uncomp_len, &base_rev,
443 &link_rev, &parent_1, &parent_2,
451 &link_rev, &parent_1, &parent_2,
444 &c_node_id, &c_node_id_len,
452 &c_node_id, &c_node_id_len,
445 &sidedata_offset, &sidedata_comp_len)) {
453 &sidedata_offset, &sidedata_comp_len)) {
446 PyErr_SetString(PyExc_TypeError, "10-tuple required");
454 PyErr_SetString(PyExc_TypeError, "10-tuple required");
447 return NULL;
455 return NULL;
448 }
456 }
449 }
457 }
450
458
451 if (c_node_id_len != self->nodelen) {
459 if (c_node_id_len != self->nodelen) {
452 PyErr_SetString(PyExc_TypeError, "invalid node");
460 PyErr_SetString(PyExc_TypeError, "invalid node");
453 return NULL;
461 return NULL;
454 }
462 }
455
463
456 if (self->new_length == self->added_length) {
464 if (self->new_length == self->added_length) {
457 size_t new_added_length =
465 size_t new_added_length =
458 self->added_length ? self->added_length * 2 : 4096;
466 self->added_length ? self->added_length * 2 : 4096;
459 void *new_added = PyMem_Realloc(self->added, new_added_length *
467 void *new_added = PyMem_Realloc(self->added, new_added_length *
460 self->hdrsize);
468 self->hdrsize);
461 if (!new_added)
469 if (!new_added)
462 return PyErr_NoMemory();
470 return PyErr_NoMemory();
463 self->added = new_added;
471 self->added = new_added;
464 self->added_length = new_added_length;
472 self->added_length = new_added_length;
465 }
473 }
466 rev = self->length + self->new_length;
474 rev = self->length + self->new_length;
467 data = self->added + self->hdrsize * self->new_length++;
475 data = self->added + self->hdrsize * self->new_length++;
468 putbe32(offset_flags >> 32, data);
476 putbe32(offset_flags >> 32, data);
469 putbe32(offset_flags & 0xffffffffU, data + 4);
477 putbe32(offset_flags & 0xffffffffU, data + 4);
470 putbe32(comp_len, data + 8);
478 putbe32(comp_len, data + 8);
471 putbe32(uncomp_len, data + 12);
479 putbe32(uncomp_len, data + 12);
472 putbe32(base_rev, data + 16);
480 putbe32(base_rev, data + 16);
473 putbe32(link_rev, data + 20);
481 putbe32(link_rev, data + 20);
474 putbe32(parent_1, data + 24);
482 putbe32(parent_1, data + 24);
475 putbe32(parent_2, data + 28);
483 putbe32(parent_2, data + 28);
476 memcpy(data + 32, c_node_id, c_node_id_len);
484 memcpy(data + 32, c_node_id, c_node_id_len);
477 /* Padding since SHA-1 is only 20 bytes for now */
485 /* Padding since SHA-1 is only 20 bytes for now */
478 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
486 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
479 if (self->hdrsize != v1_hdrsize) {
487 if (self->hdrsize != v1_hdrsize) {
480 putbe64(sidedata_offset, data + 64);
488 putbe64(sidedata_offset, data + 64);
481 putbe32(sidedata_comp_len, data + 72);
489 putbe32(sidedata_comp_len, data + 72);
482 /* Padding for 96 bytes alignment */
490 /* Padding for 96 bytes alignment */
483 memset(data + 76, 0, self->hdrsize - 76);
491 memset(data + 76, 0, self->hdrsize - 76);
484 }
492 }
485
493
486 if (self->ntinitialized)
494 if (self->ntinitialized)
487 nt_insert(&self->nt, c_node_id, rev);
495 nt_insert(&self->nt, c_node_id, rev);
488
496
489 Py_CLEAR(self->headrevs);
497 Py_CLEAR(self->headrevs);
490 Py_RETURN_NONE;
498 Py_RETURN_NONE;
491 }
499 }
492
500
493 /* Replace an existing index entry's sidedata offset and length with new ones.
501 /* Replace an existing index entry's sidedata offset and length with new ones.
494 This cannot be used outside of the context of sidedata rewriting,
502 This cannot be used outside of the context of sidedata rewriting,
495 inside the transaction that creates the given revision. */
503 inside the transaction that creates the given revision. */
496 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
504 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
497 {
505 {
498 uint64_t sidedata_offset;
506 uint64_t sidedata_offset;
499 int rev;
507 int rev;
500 Py_ssize_t sidedata_comp_len;
508 Py_ssize_t sidedata_comp_len;
501 char *data;
509 char *data;
502 #if LONG_MAX == 0x7fffffffL
510 #if LONG_MAX == 0x7fffffffL
503 const char *const sidedata_format = PY23("nKi", "nKi");
511 const char *const sidedata_format = PY23("nKi", "nKi");
504 #else
512 #else
505 const char *const sidedata_format = PY23("nki", "nki");
513 const char *const sidedata_format = PY23("nki", "nki");
506 #endif
514 #endif
507
515
508 if (self->hdrsize == v1_hdrsize || self->inlined) {
516 if (self->hdrsize == v1_hdrsize || self->inlined) {
509 /*
517 /*
510 There is a bug in the transaction handling when going from an
518 There is a bug in the transaction handling when going from an
511 inline revlog to a separate index and data file. Turn it off until
519 inline revlog to a separate index and data file. Turn it off until
512 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
520 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
513 See issue6485.
521 See issue6485.
514 */
522 */
515 raise_revlog_error();
523 raise_revlog_error();
516 return NULL;
524 return NULL;
517 }
525 }
518
526
519 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
527 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
520 &sidedata_comp_len))
528 &sidedata_comp_len))
521 return NULL;
529 return NULL;
522
530
523 if (rev < 0 || rev >= index_length(self)) {
531 if (rev < 0 || rev >= index_length(self)) {
524 PyErr_SetString(PyExc_IndexError, "revision outside index");
532 PyErr_SetString(PyExc_IndexError, "revision outside index");
525 return NULL;
533 return NULL;
526 }
534 }
527 if (rev < self->length) {
535 if (rev < self->length) {
528 PyErr_SetString(
536 PyErr_SetString(
529 PyExc_IndexError,
537 PyExc_IndexError,
530 "cannot rewrite entries outside of this transaction");
538 "cannot rewrite entries outside of this transaction");
531 return NULL;
539 return NULL;
532 }
540 }
533
541
534 /* Find the newly added node, offset from the "already on-disk" length
542 /* Find the newly added node, offset from the "already on-disk" length
535 */
543 */
536 data = self->added + self->hdrsize * (rev - self->length);
544 data = self->added + self->hdrsize * (rev - self->length);
537 putbe64(sidedata_offset, data + 64);
545 putbe64(sidedata_offset, data + 64);
538 putbe32(sidedata_comp_len, data + 72);
546 putbe32(sidedata_comp_len, data + 72);
539
547
540 Py_RETURN_NONE;
548 Py_RETURN_NONE;
541 }
549 }
542
550
543 static PyObject *index_stats(indexObject *self)
551 static PyObject *index_stats(indexObject *self)
544 {
552 {
545 PyObject *obj = PyDict_New();
553 PyObject *obj = PyDict_New();
546 PyObject *s = NULL;
554 PyObject *s = NULL;
547 PyObject *t = NULL;
555 PyObject *t = NULL;
548
556
549 if (obj == NULL)
557 if (obj == NULL)
550 return NULL;
558 return NULL;
551
559
552 #define istat(__n, __d) \
560 #define istat(__n, __d) \
553 do { \
561 do { \
554 s = PyBytes_FromString(__d); \
562 s = PyBytes_FromString(__d); \
555 t = PyInt_FromSsize_t(self->__n); \
563 t = PyInt_FromSsize_t(self->__n); \
556 if (!s || !t) \
564 if (!s || !t) \
557 goto bail; \
565 goto bail; \
558 if (PyDict_SetItem(obj, s, t) == -1) \
566 if (PyDict_SetItem(obj, s, t) == -1) \
559 goto bail; \
567 goto bail; \
560 Py_CLEAR(s); \
568 Py_CLEAR(s); \
561 Py_CLEAR(t); \
569 Py_CLEAR(t); \
562 } while (0)
570 } while (0)
563
571
564 if (self->added_length)
572 if (self->added_length)
565 istat(new_length, "index entries added");
573 istat(new_length, "index entries added");
566 istat(length, "revs in memory");
574 istat(length, "revs in memory");
567 istat(ntlookups, "node trie lookups");
575 istat(ntlookups, "node trie lookups");
568 istat(ntmisses, "node trie misses");
576 istat(ntmisses, "node trie misses");
569 istat(ntrev, "node trie last rev scanned");
577 istat(ntrev, "node trie last rev scanned");
570 if (self->ntinitialized) {
578 if (self->ntinitialized) {
571 istat(nt.capacity, "node trie capacity");
579 istat(nt.capacity, "node trie capacity");
572 istat(nt.depth, "node trie depth");
580 istat(nt.depth, "node trie depth");
573 istat(nt.length, "node trie count");
581 istat(nt.length, "node trie count");
574 istat(nt.splits, "node trie splits");
582 istat(nt.splits, "node trie splits");
575 }
583 }
576
584
577 #undef istat
585 #undef istat
578
586
579 return obj;
587 return obj;
580
588
581 bail:
589 bail:
582 Py_XDECREF(obj);
590 Py_XDECREF(obj);
583 Py_XDECREF(s);
591 Py_XDECREF(s);
584 Py_XDECREF(t);
592 Py_XDECREF(t);
585 return NULL;
593 return NULL;
586 }
594 }
587
595
588 /*
596 /*
589 * When we cache a list, we want to be sure the caller can't mutate
597 * When we cache a list, we want to be sure the caller can't mutate
590 * the cached copy.
598 * the cached copy.
591 */
599 */
592 static PyObject *list_copy(PyObject *list)
600 static PyObject *list_copy(PyObject *list)
593 {
601 {
594 Py_ssize_t len = PyList_GET_SIZE(list);
602 Py_ssize_t len = PyList_GET_SIZE(list);
595 PyObject *newlist = PyList_New(len);
603 PyObject *newlist = PyList_New(len);
596 Py_ssize_t i;
604 Py_ssize_t i;
597
605
598 if (newlist == NULL)
606 if (newlist == NULL)
599 return NULL;
607 return NULL;
600
608
601 for (i = 0; i < len; i++) {
609 for (i = 0; i < len; i++) {
602 PyObject *obj = PyList_GET_ITEM(list, i);
610 PyObject *obj = PyList_GET_ITEM(list, i);
603 Py_INCREF(obj);
611 Py_INCREF(obj);
604 PyList_SET_ITEM(newlist, i, obj);
612 PyList_SET_ITEM(newlist, i, obj);
605 }
613 }
606
614
607 return newlist;
615 return newlist;
608 }
616 }
609
617
610 static int check_filter(PyObject *filter, Py_ssize_t arg)
618 static int check_filter(PyObject *filter, Py_ssize_t arg)
611 {
619 {
612 if (filter) {
620 if (filter) {
613 PyObject *arglist, *result;
621 PyObject *arglist, *result;
614 int isfiltered;
622 int isfiltered;
615
623
616 arglist = Py_BuildValue("(n)", arg);
624 arglist = Py_BuildValue("(n)", arg);
617 if (!arglist) {
625 if (!arglist) {
618 return -1;
626 return -1;
619 }
627 }
620
628
621 result = PyObject_Call(filter, arglist, NULL);
629 result = PyObject_Call(filter, arglist, NULL);
622 Py_DECREF(arglist);
630 Py_DECREF(arglist);
623 if (!result) {
631 if (!result) {
624 return -1;
632 return -1;
625 }
633 }
626
634
627 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
635 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
628 * same as this function, so we can just return it directly.*/
636 * same as this function, so we can just return it directly.*/
629 isfiltered = PyObject_IsTrue(result);
637 isfiltered = PyObject_IsTrue(result);
630 Py_DECREF(result);
638 Py_DECREF(result);
631 return isfiltered;
639 return isfiltered;
632 } else {
640 } else {
633 return 0;
641 return 0;
634 }
642 }
635 }
643 }
636
644
637 static inline void set_phase_from_parents(char *phases, int parent_1,
645 static inline void set_phase_from_parents(char *phases, int parent_1,
638 int parent_2, Py_ssize_t i)
646 int parent_2, Py_ssize_t i)
639 {
647 {
640 if (parent_1 >= 0 && phases[parent_1] > phases[i])
648 if (parent_1 >= 0 && phases[parent_1] > phases[i])
641 phases[i] = phases[parent_1];
649 phases[i] = phases[parent_1];
642 if (parent_2 >= 0 && phases[parent_2] > phases[i])
650 if (parent_2 >= 0 && phases[parent_2] > phases[i])
643 phases[i] = phases[parent_2];
651 phases[i] = phases[parent_2];
644 }
652 }
645
653
646 static PyObject *reachableroots2(indexObject *self, PyObject *args)
654 static PyObject *reachableroots2(indexObject *self, PyObject *args)
647 {
655 {
648
656
649 /* Input */
657 /* Input */
650 long minroot;
658 long minroot;
651 PyObject *includepatharg = NULL;
659 PyObject *includepatharg = NULL;
652 int includepath = 0;
660 int includepath = 0;
653 /* heads and roots are lists */
661 /* heads and roots are lists */
654 PyObject *heads = NULL;
662 PyObject *heads = NULL;
655 PyObject *roots = NULL;
663 PyObject *roots = NULL;
656 PyObject *reachable = NULL;
664 PyObject *reachable = NULL;
657
665
658 PyObject *val;
666 PyObject *val;
659 Py_ssize_t len = index_length(self);
667 Py_ssize_t len = index_length(self);
660 long revnum;
668 long revnum;
661 Py_ssize_t k;
669 Py_ssize_t k;
662 Py_ssize_t i;
670 Py_ssize_t i;
663 Py_ssize_t l;
671 Py_ssize_t l;
664 int r;
672 int r;
665 int parents[2];
673 int parents[2];
666
674
667 /* Internal data structure:
675 /* Internal data structure:
668 * tovisit: array of length len+1 (all revs + nullrev), filled upto
676 * tovisit: array of length len+1 (all revs + nullrev), filled upto
669 * lentovisit
677 * lentovisit
670 *
678 *
671 * revstates: array of length len+1 (all revs + nullrev) */
679 * revstates: array of length len+1 (all revs + nullrev) */
672 int *tovisit = NULL;
680 int *tovisit = NULL;
673 long lentovisit = 0;
681 long lentovisit = 0;
674 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
682 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
675 char *revstates = NULL;
683 char *revstates = NULL;
676
684
677 /* Get arguments */
685 /* Get arguments */
678 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
686 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
679 &PyList_Type, &roots, &PyBool_Type,
687 &PyList_Type, &roots, &PyBool_Type,
680 &includepatharg))
688 &includepatharg))
681 goto bail;
689 goto bail;
682
690
683 if (includepatharg == Py_True)
691 if (includepatharg == Py_True)
684 includepath = 1;
692 includepath = 1;
685
693
686 /* Initialize return set */
694 /* Initialize return set */
687 reachable = PyList_New(0);
695 reachable = PyList_New(0);
688 if (reachable == NULL)
696 if (reachable == NULL)
689 goto bail;
697 goto bail;
690
698
691 /* Initialize internal datastructures */
699 /* Initialize internal datastructures */
692 tovisit = (int *)malloc((len + 1) * sizeof(int));
700 tovisit = (int *)malloc((len + 1) * sizeof(int));
693 if (tovisit == NULL) {
701 if (tovisit == NULL) {
694 PyErr_NoMemory();
702 PyErr_NoMemory();
695 goto bail;
703 goto bail;
696 }
704 }
697
705
698 revstates = (char *)calloc(len + 1, 1);
706 revstates = (char *)calloc(len + 1, 1);
699 if (revstates == NULL) {
707 if (revstates == NULL) {
700 PyErr_NoMemory();
708 PyErr_NoMemory();
701 goto bail;
709 goto bail;
702 }
710 }
703
711
704 l = PyList_GET_SIZE(roots);
712 l = PyList_GET_SIZE(roots);
705 for (i = 0; i < l; i++) {
713 for (i = 0; i < l; i++) {
706 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
714 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
707 if (revnum == -1 && PyErr_Occurred())
715 if (revnum == -1 && PyErr_Occurred())
708 goto bail;
716 goto bail;
709 /* If root is out of range, e.g. wdir(), it must be unreachable
717 /* If root is out of range, e.g. wdir(), it must be unreachable
710 * from heads. So we can just ignore it. */
718 * from heads. So we can just ignore it. */
711 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
719 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
712 continue;
720 continue;
713 revstates[revnum + 1] |= RS_ROOT;
721 revstates[revnum + 1] |= RS_ROOT;
714 }
722 }
715
723
716 /* Populate tovisit with all the heads */
724 /* Populate tovisit with all the heads */
717 l = PyList_GET_SIZE(heads);
725 l = PyList_GET_SIZE(heads);
718 for (i = 0; i < l; i++) {
726 for (i = 0; i < l; i++) {
719 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
727 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
720 if (revnum == -1 && PyErr_Occurred())
728 if (revnum == -1 && PyErr_Occurred())
721 goto bail;
729 goto bail;
722 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
730 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
723 PyErr_SetString(PyExc_IndexError, "head out of range");
731 PyErr_SetString(PyExc_IndexError, "head out of range");
724 goto bail;
732 goto bail;
725 }
733 }
726 if (!(revstates[revnum + 1] & RS_SEEN)) {
734 if (!(revstates[revnum + 1] & RS_SEEN)) {
727 tovisit[lentovisit++] = (int)revnum;
735 tovisit[lentovisit++] = (int)revnum;
728 revstates[revnum + 1] |= RS_SEEN;
736 revstates[revnum + 1] |= RS_SEEN;
729 }
737 }
730 }
738 }
731
739
732 /* Visit the tovisit list and find the reachable roots */
740 /* Visit the tovisit list and find the reachable roots */
733 k = 0;
741 k = 0;
734 while (k < lentovisit) {
742 while (k < lentovisit) {
735 /* Add the node to reachable if it is a root*/
743 /* Add the node to reachable if it is a root*/
736 revnum = tovisit[k++];
744 revnum = tovisit[k++];
737 if (revstates[revnum + 1] & RS_ROOT) {
745 if (revstates[revnum + 1] & RS_ROOT) {
738 revstates[revnum + 1] |= RS_REACHABLE;
746 revstates[revnum + 1] |= RS_REACHABLE;
739 val = PyInt_FromLong(revnum);
747 val = PyInt_FromLong(revnum);
740 if (val == NULL)
748 if (val == NULL)
741 goto bail;
749 goto bail;
742 r = PyList_Append(reachable, val);
750 r = PyList_Append(reachable, val);
743 Py_DECREF(val);
751 Py_DECREF(val);
744 if (r < 0)
752 if (r < 0)
745 goto bail;
753 goto bail;
746 if (includepath == 0)
754 if (includepath == 0)
747 continue;
755 continue;
748 }
756 }
749
757
750 /* Add its parents to the list of nodes to visit */
758 /* Add its parents to the list of nodes to visit */
751 if (revnum == nullrev)
759 if (revnum == nullrev)
752 continue;
760 continue;
753 r = index_get_parents(self, revnum, parents, (int)len - 1);
761 r = index_get_parents(self, revnum, parents, (int)len - 1);
754 if (r < 0)
762 if (r < 0)
755 goto bail;
763 goto bail;
756 for (i = 0; i < 2; i++) {
764 for (i = 0; i < 2; i++) {
757 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
765 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
758 parents[i] >= minroot) {
766 parents[i] >= minroot) {
759 tovisit[lentovisit++] = parents[i];
767 tovisit[lentovisit++] = parents[i];
760 revstates[parents[i] + 1] |= RS_SEEN;
768 revstates[parents[i] + 1] |= RS_SEEN;
761 }
769 }
762 }
770 }
763 }
771 }
764
772
765 /* Find all the nodes in between the roots we found and the heads
773 /* Find all the nodes in between the roots we found and the heads
766 * and add them to the reachable set */
774 * and add them to the reachable set */
767 if (includepath == 1) {
775 if (includepath == 1) {
768 long minidx = minroot;
776 long minidx = minroot;
769 if (minidx < 0)
777 if (minidx < 0)
770 minidx = 0;
778 minidx = 0;
771 for (i = minidx; i < len; i++) {
779 for (i = minidx; i < len; i++) {
772 if (!(revstates[i + 1] & RS_SEEN))
780 if (!(revstates[i + 1] & RS_SEEN))
773 continue;
781 continue;
774 r = index_get_parents(self, i, parents, (int)len - 1);
782 r = index_get_parents(self, i, parents, (int)len - 1);
775 /* Corrupted index file, error is set from
783 /* Corrupted index file, error is set from
776 * index_get_parents */
784 * index_get_parents */
777 if (r < 0)
785 if (r < 0)
778 goto bail;
786 goto bail;
779 if (((revstates[parents[0] + 1] |
787 if (((revstates[parents[0] + 1] |
780 revstates[parents[1] + 1]) &
788 revstates[parents[1] + 1]) &
781 RS_REACHABLE) &&
789 RS_REACHABLE) &&
782 !(revstates[i + 1] & RS_REACHABLE)) {
790 !(revstates[i + 1] & RS_REACHABLE)) {
783 revstates[i + 1] |= RS_REACHABLE;
791 revstates[i + 1] |= RS_REACHABLE;
784 val = PyInt_FromSsize_t(i);
792 val = PyInt_FromSsize_t(i);
785 if (val == NULL)
793 if (val == NULL)
786 goto bail;
794 goto bail;
787 r = PyList_Append(reachable, val);
795 r = PyList_Append(reachable, val);
788 Py_DECREF(val);
796 Py_DECREF(val);
789 if (r < 0)
797 if (r < 0)
790 goto bail;
798 goto bail;
791 }
799 }
792 }
800 }
793 }
801 }
794
802
795 free(revstates);
803 free(revstates);
796 free(tovisit);
804 free(tovisit);
797 return reachable;
805 return reachable;
798 bail:
806 bail:
799 Py_XDECREF(reachable);
807 Py_XDECREF(reachable);
800 free(revstates);
808 free(revstates);
801 free(tovisit);
809 free(tovisit);
802 return NULL;
810 return NULL;
803 }
811 }
804
812
805 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
813 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
806 char phase)
814 char phase)
807 {
815 {
808 Py_ssize_t len = index_length(self);
816 Py_ssize_t len = index_length(self);
809 PyObject *item;
817 PyObject *item;
810 PyObject *iterator;
818 PyObject *iterator;
811 int rev, minrev = -1;
819 int rev, minrev = -1;
812 char *node;
820 char *node;
813
821
814 if (!PySet_Check(roots)) {
822 if (!PySet_Check(roots)) {
815 PyErr_SetString(PyExc_TypeError,
823 PyErr_SetString(PyExc_TypeError,
816 "roots must be a set of nodes");
824 "roots must be a set of nodes");
817 return -2;
825 return -2;
818 }
826 }
819 iterator = PyObject_GetIter(roots);
827 iterator = PyObject_GetIter(roots);
820 if (iterator == NULL)
828 if (iterator == NULL)
821 return -2;
829 return -2;
822 while ((item = PyIter_Next(iterator))) {
830 while ((item = PyIter_Next(iterator))) {
823 if (node_check(self->nodelen, item, &node) == -1)
831 if (node_check(self->nodelen, item, &node) == -1)
824 goto failed;
832 goto failed;
825 rev = index_find_node(self, node);
833 rev = index_find_node(self, node);
826 /* null is implicitly public, so negative is invalid */
834 /* null is implicitly public, so negative is invalid */
827 if (rev < 0 || rev >= len)
835 if (rev < 0 || rev >= len)
828 goto failed;
836 goto failed;
829 phases[rev] = phase;
837 phases[rev] = phase;
830 if (minrev == -1 || minrev > rev)
838 if (minrev == -1 || minrev > rev)
831 minrev = rev;
839 minrev = rev;
832 Py_DECREF(item);
840 Py_DECREF(item);
833 }
841 }
834 Py_DECREF(iterator);
842 Py_DECREF(iterator);
835 return minrev;
843 return minrev;
836 failed:
844 failed:
837 Py_DECREF(iterator);
845 Py_DECREF(iterator);
838 Py_DECREF(item);
846 Py_DECREF(item);
839 return -2;
847 return -2;
840 }
848 }
841
849
842 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
850 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
843 {
851 {
844 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
852 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
845 96: internal */
853 96: internal */
846 static const char trackedphases[] = {1, 2, 32, 96};
854 static const char trackedphases[] = {1, 2, 32, 96};
847 PyObject *roots = Py_None;
855 PyObject *roots = Py_None;
848 PyObject *phasesetsdict = NULL;
856 PyObject *phasesetsdict = NULL;
849 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
857 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
850 Py_ssize_t len = index_length(self);
858 Py_ssize_t len = index_length(self);
851 char *phases = NULL;
859 char *phases = NULL;
852 int minphaserev = -1, rev, i;
860 int minphaserev = -1, rev, i;
853 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
861 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
854
862
855 if (!PyArg_ParseTuple(args, "O", &roots))
863 if (!PyArg_ParseTuple(args, "O", &roots))
856 return NULL;
864 return NULL;
857 if (roots == NULL || !PyDict_Check(roots)) {
865 if (roots == NULL || !PyDict_Check(roots)) {
858 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
866 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
859 return NULL;
867 return NULL;
860 }
868 }
861
869
862 phases = calloc(len, 1);
870 phases = calloc(len, 1);
863 if (phases == NULL) {
871 if (phases == NULL) {
864 PyErr_NoMemory();
872 PyErr_NoMemory();
865 return NULL;
873 return NULL;
866 }
874 }
867
875
868 for (i = 0; i < numphases; ++i) {
876 for (i = 0; i < numphases; ++i) {
869 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
877 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
870 PyObject *phaseroots = NULL;
878 PyObject *phaseroots = NULL;
871 if (pyphase == NULL)
879 if (pyphase == NULL)
872 goto release;
880 goto release;
873 phaseroots = PyDict_GetItem(roots, pyphase);
881 phaseroots = PyDict_GetItem(roots, pyphase);
874 Py_DECREF(pyphase);
882 Py_DECREF(pyphase);
875 if (phaseroots == NULL)
883 if (phaseroots == NULL)
876 continue;
884 continue;
877 rev = add_roots_get_min(self, phaseroots, phases,
885 rev = add_roots_get_min(self, phaseroots, phases,
878 trackedphases[i]);
886 trackedphases[i]);
879 if (rev == -2)
887 if (rev == -2)
880 goto release;
888 goto release;
881 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
889 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
882 minphaserev = rev;
890 minphaserev = rev;
883 }
891 }
884
892
885 for (i = 0; i < numphases; ++i) {
893 for (i = 0; i < numphases; ++i) {
886 phasesets[i] = PySet_New(NULL);
894 phasesets[i] = PySet_New(NULL);
887 if (phasesets[i] == NULL)
895 if (phasesets[i] == NULL)
888 goto release;
896 goto release;
889 }
897 }
890
898
891 if (minphaserev == -1)
899 if (minphaserev == -1)
892 minphaserev = len;
900 minphaserev = len;
893 for (rev = minphaserev; rev < len; ++rev) {
901 for (rev = minphaserev; rev < len; ++rev) {
894 PyObject *pyphase = NULL;
902 PyObject *pyphase = NULL;
895 PyObject *pyrev = NULL;
903 PyObject *pyrev = NULL;
896 int parents[2];
904 int parents[2];
897 /*
905 /*
898 * The parent lookup could be skipped for phaseroots, but
906 * The parent lookup could be skipped for phaseroots, but
899 * phase --force would historically not recompute them
907 * phase --force would historically not recompute them
900 * correctly, leaving descendents with a lower phase around.
908 * correctly, leaving descendents with a lower phase around.
901 * As such, unconditionally recompute the phase.
909 * As such, unconditionally recompute the phase.
902 */
910 */
903 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
911 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
904 goto release;
912 goto release;
905 set_phase_from_parents(phases, parents[0], parents[1], rev);
913 set_phase_from_parents(phases, parents[0], parents[1], rev);
906 switch (phases[rev]) {
914 switch (phases[rev]) {
907 case 0:
915 case 0:
908 continue;
916 continue;
909 case 1:
917 case 1:
910 pyphase = phasesets[0];
918 pyphase = phasesets[0];
911 break;
919 break;
912 case 2:
920 case 2:
913 pyphase = phasesets[1];
921 pyphase = phasesets[1];
914 break;
922 break;
915 case 32:
923 case 32:
916 pyphase = phasesets[2];
924 pyphase = phasesets[2];
917 break;
925 break;
918 case 96:
926 case 96:
919 pyphase = phasesets[3];
927 pyphase = phasesets[3];
920 break;
928 break;
921 default:
929 default:
922 /* this should never happen since the phase number is
930 /* this should never happen since the phase number is
923 * specified by this function. */
931 * specified by this function. */
924 PyErr_SetString(PyExc_SystemError,
932 PyErr_SetString(PyExc_SystemError,
925 "bad phase number in internal list");
933 "bad phase number in internal list");
926 goto release;
934 goto release;
927 }
935 }
928 pyrev = PyInt_FromLong(rev);
936 pyrev = PyInt_FromLong(rev);
929 if (pyrev == NULL)
937 if (pyrev == NULL)
930 goto release;
938 goto release;
931 if (PySet_Add(pyphase, pyrev) == -1) {
939 if (PySet_Add(pyphase, pyrev) == -1) {
932 Py_DECREF(pyrev);
940 Py_DECREF(pyrev);
933 goto release;
941 goto release;
934 }
942 }
935 Py_DECREF(pyrev);
943 Py_DECREF(pyrev);
936 }
944 }
937
945
938 phasesetsdict = _dict_new_presized(numphases);
946 phasesetsdict = _dict_new_presized(numphases);
939 if (phasesetsdict == NULL)
947 if (phasesetsdict == NULL)
940 goto release;
948 goto release;
941 for (i = 0; i < numphases; ++i) {
949 for (i = 0; i < numphases; ++i) {
942 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
950 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
943 if (pyphase == NULL)
951 if (pyphase == NULL)
944 goto release;
952 goto release;
945 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
953 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
946 -1) {
954 -1) {
947 Py_DECREF(pyphase);
955 Py_DECREF(pyphase);
948 goto release;
956 goto release;
949 }
957 }
950 Py_DECREF(phasesets[i]);
958 Py_DECREF(phasesets[i]);
951 phasesets[i] = NULL;
959 phasesets[i] = NULL;
952 }
960 }
953
961
954 return Py_BuildValue("nN", len, phasesetsdict);
962 return Py_BuildValue("nN", len, phasesetsdict);
955
963
956 release:
964 release:
957 for (i = 0; i < numphases; ++i)
965 for (i = 0; i < numphases; ++i)
958 Py_XDECREF(phasesets[i]);
966 Py_XDECREF(phasesets[i]);
959 Py_XDECREF(phasesetsdict);
967 Py_XDECREF(phasesetsdict);
960
968
961 free(phases);
969 free(phases);
962 return NULL;
970 return NULL;
963 }
971 }
964
972
965 static PyObject *index_headrevs(indexObject *self, PyObject *args)
973 static PyObject *index_headrevs(indexObject *self, PyObject *args)
966 {
974 {
967 Py_ssize_t i, j, len;
975 Py_ssize_t i, j, len;
968 char *nothead = NULL;
976 char *nothead = NULL;
969 PyObject *heads = NULL;
977 PyObject *heads = NULL;
970 PyObject *filter = NULL;
978 PyObject *filter = NULL;
971 PyObject *filteredrevs = Py_None;
979 PyObject *filteredrevs = Py_None;
972
980
973 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
981 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
974 return NULL;
982 return NULL;
975 }
983 }
976
984
977 if (self->headrevs && filteredrevs == self->filteredrevs)
985 if (self->headrevs && filteredrevs == self->filteredrevs)
978 return list_copy(self->headrevs);
986 return list_copy(self->headrevs);
979
987
980 Py_DECREF(self->filteredrevs);
988 Py_DECREF(self->filteredrevs);
981 self->filteredrevs = filteredrevs;
989 self->filteredrevs = filteredrevs;
982 Py_INCREF(filteredrevs);
990 Py_INCREF(filteredrevs);
983
991
984 if (filteredrevs != Py_None) {
992 if (filteredrevs != Py_None) {
985 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
993 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
986 if (!filter) {
994 if (!filter) {
987 PyErr_SetString(
995 PyErr_SetString(
988 PyExc_TypeError,
996 PyExc_TypeError,
989 "filteredrevs has no attribute __contains__");
997 "filteredrevs has no attribute __contains__");
990 goto bail;
998 goto bail;
991 }
999 }
992 }
1000 }
993
1001
994 len = index_length(self);
1002 len = index_length(self);
995 heads = PyList_New(0);
1003 heads = PyList_New(0);
996 if (heads == NULL)
1004 if (heads == NULL)
997 goto bail;
1005 goto bail;
998 if (len == 0) {
1006 if (len == 0) {
999 PyObject *nullid = PyInt_FromLong(-1);
1007 PyObject *nullid = PyInt_FromLong(-1);
1000 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1008 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1001 Py_XDECREF(nullid);
1009 Py_XDECREF(nullid);
1002 goto bail;
1010 goto bail;
1003 }
1011 }
1004 goto done;
1012 goto done;
1005 }
1013 }
1006
1014
1007 nothead = calloc(len, 1);
1015 nothead = calloc(len, 1);
1008 if (nothead == NULL) {
1016 if (nothead == NULL) {
1009 PyErr_NoMemory();
1017 PyErr_NoMemory();
1010 goto bail;
1018 goto bail;
1011 }
1019 }
1012
1020
1013 for (i = len - 1; i >= 0; i--) {
1021 for (i = len - 1; i >= 0; i--) {
1014 int isfiltered;
1022 int isfiltered;
1015 int parents[2];
1023 int parents[2];
1016
1024
1017 /* If nothead[i] == 1, it means we've seen an unfiltered child
1025 /* If nothead[i] == 1, it means we've seen an unfiltered child
1018 * of this node already, and therefore this node is not
1026 * of this node already, and therefore this node is not
1019 * filtered. So we can skip the expensive check_filter step.
1027 * filtered. So we can skip the expensive check_filter step.
1020 */
1028 */
1021 if (nothead[i] != 1) {
1029 if (nothead[i] != 1) {
1022 isfiltered = check_filter(filter, i);
1030 isfiltered = check_filter(filter, i);
1023 if (isfiltered == -1) {
1031 if (isfiltered == -1) {
1024 PyErr_SetString(PyExc_TypeError,
1032 PyErr_SetString(PyExc_TypeError,
1025 "unable to check filter");
1033 "unable to check filter");
1026 goto bail;
1034 goto bail;
1027 }
1035 }
1028
1036
1029 if (isfiltered) {
1037 if (isfiltered) {
1030 nothead[i] = 1;
1038 nothead[i] = 1;
1031 continue;
1039 continue;
1032 }
1040 }
1033 }
1041 }
1034
1042
1035 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1043 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1036 goto bail;
1044 goto bail;
1037 for (j = 0; j < 2; j++) {
1045 for (j = 0; j < 2; j++) {
1038 if (parents[j] >= 0)
1046 if (parents[j] >= 0)
1039 nothead[parents[j]] = 1;
1047 nothead[parents[j]] = 1;
1040 }
1048 }
1041 }
1049 }
1042
1050
1043 for (i = 0; i < len; i++) {
1051 for (i = 0; i < len; i++) {
1044 PyObject *head;
1052 PyObject *head;
1045
1053
1046 if (nothead[i])
1054 if (nothead[i])
1047 continue;
1055 continue;
1048 head = PyInt_FromSsize_t(i);
1056 head = PyInt_FromSsize_t(i);
1049 if (head == NULL || PyList_Append(heads, head) == -1) {
1057 if (head == NULL || PyList_Append(heads, head) == -1) {
1050 Py_XDECREF(head);
1058 Py_XDECREF(head);
1051 goto bail;
1059 goto bail;
1052 }
1060 }
1053 }
1061 }
1054
1062
1055 done:
1063 done:
1056 self->headrevs = heads;
1064 self->headrevs = heads;
1057 Py_XDECREF(filter);
1065 Py_XDECREF(filter);
1058 free(nothead);
1066 free(nothead);
1059 return list_copy(self->headrevs);
1067 return list_copy(self->headrevs);
1060 bail:
1068 bail:
1061 Py_XDECREF(filter);
1069 Py_XDECREF(filter);
1062 Py_XDECREF(heads);
1070 Py_XDECREF(heads);
1063 free(nothead);
1071 free(nothead);
1064 return NULL;
1072 return NULL;
1065 }
1073 }
1066
1074
1067 /**
1075 /**
1068 * Obtain the base revision index entry.
1076 * Obtain the base revision index entry.
1069 *
1077 *
1070 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1078 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1071 */
1079 */
1072 static inline int index_baserev(indexObject *self, int rev)
1080 static inline int index_baserev(indexObject *self, int rev)
1073 {
1081 {
1074 const char *data;
1082 const char *data;
1075 int result;
1083 int result;
1076
1084
1077 data = index_deref(self, rev);
1085 data = index_deref(self, rev);
1078 if (data == NULL)
1086 if (data == NULL)
1079 return -2;
1087 return -2;
1080 result = getbe32(data + 16);
1088 result = getbe32(data + 16);
1081
1089
1082 if (result > rev) {
1090 if (result > rev) {
1083 PyErr_Format(
1091 PyErr_Format(
1084 PyExc_ValueError,
1092 PyExc_ValueError,
1085 "corrupted revlog, revision base above revision: %d, %d",
1093 "corrupted revlog, revision base above revision: %d, %d",
1086 rev, result);
1094 rev, result);
1087 return -2;
1095 return -2;
1088 }
1096 }
1089 if (result < -1) {
1097 if (result < -1) {
1090 PyErr_Format(
1098 PyErr_Format(
1091 PyExc_ValueError,
1099 PyExc_ValueError,
1092 "corrupted revlog, revision base out of range: %d, %d", rev,
1100 "corrupted revlog, revision base out of range: %d, %d", rev,
1093 result);
1101 result);
1094 return -2;
1102 return -2;
1095 }
1103 }
1096 return result;
1104 return result;
1097 }
1105 }
1098
1106
1099 /**
1107 /**
1100 * Find if a revision is a snapshot or not
1108 * Find if a revision is a snapshot or not
1101 *
1109 *
1102 * Only relevant for sparse-revlog case.
1110 * Only relevant for sparse-revlog case.
1103 * Callers must ensure that rev is in a valid range.
1111 * Callers must ensure that rev is in a valid range.
1104 */
1112 */
1105 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1113 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1106 {
1114 {
1107 int ps[2];
1115 int ps[2];
1108 Py_ssize_t base;
1116 Py_ssize_t base;
1109 while (rev >= 0) {
1117 while (rev >= 0) {
1110 base = (Py_ssize_t)index_baserev(self, rev);
1118 base = (Py_ssize_t)index_baserev(self, rev);
1111 if (base == rev) {
1119 if (base == rev) {
1112 base = -1;
1120 base = -1;
1113 }
1121 }
1114 if (base == -2) {
1122 if (base == -2) {
1115 assert(PyErr_Occurred());
1123 assert(PyErr_Occurred());
1116 return -1;
1124 return -1;
1117 }
1125 }
1118 if (base == -1) {
1126 if (base == -1) {
1119 return 1;
1127 return 1;
1120 }
1128 }
1121 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1129 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1122 assert(PyErr_Occurred());
1130 assert(PyErr_Occurred());
1123 return -1;
1131 return -1;
1124 };
1132 };
1125 if (base == ps[0] || base == ps[1]) {
1133 if (base == ps[0] || base == ps[1]) {
1126 return 0;
1134 return 0;
1127 }
1135 }
1128 rev = base;
1136 rev = base;
1129 }
1137 }
1130 return rev == -1;
1138 return rev == -1;
1131 }
1139 }
1132
1140
1133 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1141 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1134 {
1142 {
1135 long rev;
1143 long rev;
1136 int issnap;
1144 int issnap;
1137 Py_ssize_t length = index_length(self);
1145 Py_ssize_t length = index_length(self);
1138
1146
1139 if (!pylong_to_long(value, &rev)) {
1147 if (!pylong_to_long(value, &rev)) {
1140 return NULL;
1148 return NULL;
1141 }
1149 }
1142 if (rev < -1 || rev >= length) {
1150 if (rev < -1 || rev >= length) {
1143 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1151 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1144 rev);
1152 rev);
1145 return NULL;
1153 return NULL;
1146 };
1154 };
1147 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1155 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1148 if (issnap < 0) {
1156 if (issnap < 0) {
1149 return NULL;
1157 return NULL;
1150 };
1158 };
1151 return PyBool_FromLong((long)issnap);
1159 return PyBool_FromLong((long)issnap);
1152 }
1160 }
1153
1161
1154 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1162 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1155 {
1163 {
1156 Py_ssize_t start_rev;
1164 Py_ssize_t start_rev;
1157 PyObject *cache;
1165 PyObject *cache;
1158 Py_ssize_t base;
1166 Py_ssize_t base;
1159 Py_ssize_t rev;
1167 Py_ssize_t rev;
1160 PyObject *key = NULL;
1168 PyObject *key = NULL;
1161 PyObject *value = NULL;
1169 PyObject *value = NULL;
1162 const Py_ssize_t length = index_length(self);
1170 const Py_ssize_t length = index_length(self);
1163 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1171 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1164 return NULL;
1172 return NULL;
1165 }
1173 }
1166 for (rev = start_rev; rev < length; rev++) {
1174 for (rev = start_rev; rev < length; rev++) {
1167 int issnap;
1175 int issnap;
1168 PyObject *allvalues = NULL;
1176 PyObject *allvalues = NULL;
1169 issnap = index_issnapshotrev(self, rev);
1177 issnap = index_issnapshotrev(self, rev);
1170 if (issnap < 0) {
1178 if (issnap < 0) {
1171 goto bail;
1179 goto bail;
1172 }
1180 }
1173 if (issnap == 0) {
1181 if (issnap == 0) {
1174 continue;
1182 continue;
1175 }
1183 }
1176 base = (Py_ssize_t)index_baserev(self, rev);
1184 base = (Py_ssize_t)index_baserev(self, rev);
1177 if (base == rev) {
1185 if (base == rev) {
1178 base = -1;
1186 base = -1;
1179 }
1187 }
1180 if (base == -2) {
1188 if (base == -2) {
1181 assert(PyErr_Occurred());
1189 assert(PyErr_Occurred());
1182 goto bail;
1190 goto bail;
1183 }
1191 }
1184 key = PyInt_FromSsize_t(base);
1192 key = PyInt_FromSsize_t(base);
1185 allvalues = PyDict_GetItem(cache, key);
1193 allvalues = PyDict_GetItem(cache, key);
1186 if (allvalues == NULL && PyErr_Occurred()) {
1194 if (allvalues == NULL && PyErr_Occurred()) {
1187 goto bail;
1195 goto bail;
1188 }
1196 }
1189 if (allvalues == NULL) {
1197 if (allvalues == NULL) {
1190 int r;
1198 int r;
1191 allvalues = PyList_New(0);
1199 allvalues = PyList_New(0);
1192 if (!allvalues) {
1200 if (!allvalues) {
1193 goto bail;
1201 goto bail;
1194 }
1202 }
1195 r = PyDict_SetItem(cache, key, allvalues);
1203 r = PyDict_SetItem(cache, key, allvalues);
1196 Py_DECREF(allvalues);
1204 Py_DECREF(allvalues);
1197 if (r < 0) {
1205 if (r < 0) {
1198 goto bail;
1206 goto bail;
1199 }
1207 }
1200 }
1208 }
1201 value = PyInt_FromSsize_t(rev);
1209 value = PyInt_FromSsize_t(rev);
1202 if (PyList_Append(allvalues, value)) {
1210 if (PyList_Append(allvalues, value)) {
1203 goto bail;
1211 goto bail;
1204 }
1212 }
1205 Py_CLEAR(key);
1213 Py_CLEAR(key);
1206 Py_CLEAR(value);
1214 Py_CLEAR(value);
1207 }
1215 }
1208 Py_RETURN_NONE;
1216 Py_RETURN_NONE;
1209 bail:
1217 bail:
1210 Py_XDECREF(key);
1218 Py_XDECREF(key);
1211 Py_XDECREF(value);
1219 Py_XDECREF(value);
1212 return NULL;
1220 return NULL;
1213 }
1221 }
1214
1222
1215 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1223 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1216 {
1224 {
1217 int rev, generaldelta;
1225 int rev, generaldelta;
1218 PyObject *stoparg;
1226 PyObject *stoparg;
1219 int stoprev, iterrev, baserev = -1;
1227 int stoprev, iterrev, baserev = -1;
1220 int stopped;
1228 int stopped;
1221 PyObject *chain = NULL, *result = NULL;
1229 PyObject *chain = NULL, *result = NULL;
1222 const Py_ssize_t length = index_length(self);
1230 const Py_ssize_t length = index_length(self);
1223
1231
1224 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1232 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1225 return NULL;
1233 return NULL;
1226 }
1234 }
1227
1235
1228 if (PyInt_Check(stoparg)) {
1236 if (PyInt_Check(stoparg)) {
1229 stoprev = (int)PyInt_AsLong(stoparg);
1237 stoprev = (int)PyInt_AsLong(stoparg);
1230 if (stoprev == -1 && PyErr_Occurred()) {
1238 if (stoprev == -1 && PyErr_Occurred()) {
1231 return NULL;
1239 return NULL;
1232 }
1240 }
1233 } else if (stoparg == Py_None) {
1241 } else if (stoparg == Py_None) {
1234 stoprev = -2;
1242 stoprev = -2;
1235 } else {
1243 } else {
1236 PyErr_SetString(PyExc_ValueError,
1244 PyErr_SetString(PyExc_ValueError,
1237 "stoprev must be integer or None");
1245 "stoprev must be integer or None");
1238 return NULL;
1246 return NULL;
1239 }
1247 }
1240
1248
1241 if (rev < 0 || rev >= length) {
1249 if (rev < 0 || rev >= length) {
1242 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1250 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1243 return NULL;
1251 return NULL;
1244 }
1252 }
1245
1253
1246 chain = PyList_New(0);
1254 chain = PyList_New(0);
1247 if (chain == NULL) {
1255 if (chain == NULL) {
1248 return NULL;
1256 return NULL;
1249 }
1257 }
1250
1258
1251 baserev = index_baserev(self, rev);
1259 baserev = index_baserev(self, rev);
1252
1260
1253 /* This should never happen. */
1261 /* This should never happen. */
1254 if (baserev <= -2) {
1262 if (baserev <= -2) {
1255 /* Error should be set by index_deref() */
1263 /* Error should be set by index_deref() */
1256 assert(PyErr_Occurred());
1264 assert(PyErr_Occurred());
1257 goto bail;
1265 goto bail;
1258 }
1266 }
1259
1267
1260 iterrev = rev;
1268 iterrev = rev;
1261
1269
1262 while (iterrev != baserev && iterrev != stoprev) {
1270 while (iterrev != baserev && iterrev != stoprev) {
1263 PyObject *value = PyInt_FromLong(iterrev);
1271 PyObject *value = PyInt_FromLong(iterrev);
1264 if (value == NULL) {
1272 if (value == NULL) {
1265 goto bail;
1273 goto bail;
1266 }
1274 }
1267 if (PyList_Append(chain, value)) {
1275 if (PyList_Append(chain, value)) {
1268 Py_DECREF(value);
1276 Py_DECREF(value);
1269 goto bail;
1277 goto bail;
1270 }
1278 }
1271 Py_DECREF(value);
1279 Py_DECREF(value);
1272
1280
1273 if (generaldelta) {
1281 if (generaldelta) {
1274 iterrev = baserev;
1282 iterrev = baserev;
1275 } else {
1283 } else {
1276 iterrev--;
1284 iterrev--;
1277 }
1285 }
1278
1286
1279 if (iterrev < 0) {
1287 if (iterrev < 0) {
1280 break;
1288 break;
1281 }
1289 }
1282
1290
1283 if (iterrev >= length) {
1291 if (iterrev >= length) {
1284 PyErr_SetString(PyExc_IndexError,
1292 PyErr_SetString(PyExc_IndexError,
1285 "revision outside index");
1293 "revision outside index");
1286 return NULL;
1294 return NULL;
1287 }
1295 }
1288
1296
1289 baserev = index_baserev(self, iterrev);
1297 baserev = index_baserev(self, iterrev);
1290
1298
1291 /* This should never happen. */
1299 /* This should never happen. */
1292 if (baserev <= -2) {
1300 if (baserev <= -2) {
1293 /* Error should be set by index_deref() */
1301 /* Error should be set by index_deref() */
1294 assert(PyErr_Occurred());
1302 assert(PyErr_Occurred());
1295 goto bail;
1303 goto bail;
1296 }
1304 }
1297 }
1305 }
1298
1306
1299 if (iterrev == stoprev) {
1307 if (iterrev == stoprev) {
1300 stopped = 1;
1308 stopped = 1;
1301 } else {
1309 } else {
1302 PyObject *value = PyInt_FromLong(iterrev);
1310 PyObject *value = PyInt_FromLong(iterrev);
1303 if (value == NULL) {
1311 if (value == NULL) {
1304 goto bail;
1312 goto bail;
1305 }
1313 }
1306 if (PyList_Append(chain, value)) {
1314 if (PyList_Append(chain, value)) {
1307 Py_DECREF(value);
1315 Py_DECREF(value);
1308 goto bail;
1316 goto bail;
1309 }
1317 }
1310 Py_DECREF(value);
1318 Py_DECREF(value);
1311
1319
1312 stopped = 0;
1320 stopped = 0;
1313 }
1321 }
1314
1322
1315 if (PyList_Reverse(chain)) {
1323 if (PyList_Reverse(chain)) {
1316 goto bail;
1324 goto bail;
1317 }
1325 }
1318
1326
1319 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1327 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1320 Py_DECREF(chain);
1328 Py_DECREF(chain);
1321 return result;
1329 return result;
1322
1330
1323 bail:
1331 bail:
1324 Py_DECREF(chain);
1332 Py_DECREF(chain);
1325 return NULL;
1333 return NULL;
1326 }
1334 }
1327
1335
1328 static inline int64_t
1336 static inline int64_t
1329 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1337 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1330 {
1338 {
1331 int64_t start_offset;
1339 int64_t start_offset;
1332 int64_t end_offset;
1340 int64_t end_offset;
1333 int end_size;
1341 int end_size;
1334 start_offset = index_get_start(self, start_rev);
1342 start_offset = index_get_start(self, start_rev);
1335 if (start_offset < 0) {
1343 if (start_offset < 0) {
1336 return -1;
1344 return -1;
1337 }
1345 }
1338 end_offset = index_get_start(self, end_rev);
1346 end_offset = index_get_start(self, end_rev);
1339 if (end_offset < 0) {
1347 if (end_offset < 0) {
1340 return -1;
1348 return -1;
1341 }
1349 }
1342 end_size = index_get_length(self, end_rev);
1350 end_size = index_get_length(self, end_rev);
1343 if (end_size < 0) {
1351 if (end_size < 0) {
1344 return -1;
1352 return -1;
1345 }
1353 }
1346 if (end_offset < start_offset) {
1354 if (end_offset < start_offset) {
1347 PyErr_Format(PyExc_ValueError,
1355 PyErr_Format(PyExc_ValueError,
1348 "corrupted revlog index: inconsistent offset "
1356 "corrupted revlog index: inconsistent offset "
1349 "between revisions (%zd) and (%zd)",
1357 "between revisions (%zd) and (%zd)",
1350 start_rev, end_rev);
1358 start_rev, end_rev);
1351 return -1;
1359 return -1;
1352 }
1360 }
1353 return (end_offset - start_offset) + (int64_t)end_size;
1361 return (end_offset - start_offset) + (int64_t)end_size;
1354 }
1362 }
1355
1363
1356 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1364 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1357 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1365 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1358 Py_ssize_t startidx, Py_ssize_t endidx)
1366 Py_ssize_t startidx, Py_ssize_t endidx)
1359 {
1367 {
1360 int length;
1368 int length;
1361 while (endidx > 1 && endidx > startidx) {
1369 while (endidx > 1 && endidx > startidx) {
1362 length = index_get_length(self, revs[endidx - 1]);
1370 length = index_get_length(self, revs[endidx - 1]);
1363 if (length < 0) {
1371 if (length < 0) {
1364 return -1;
1372 return -1;
1365 }
1373 }
1366 if (length != 0) {
1374 if (length != 0) {
1367 break;
1375 break;
1368 }
1376 }
1369 endidx -= 1;
1377 endidx -= 1;
1370 }
1378 }
1371 return endidx;
1379 return endidx;
1372 }
1380 }
1373
1381
1374 struct Gap {
1382 struct Gap {
1375 int64_t size;
1383 int64_t size;
1376 Py_ssize_t idx;
1384 Py_ssize_t idx;
1377 };
1385 };
1378
1386
1379 static int gap_compare(const void *left, const void *right)
1387 static int gap_compare(const void *left, const void *right)
1380 {
1388 {
1381 const struct Gap *l_left = ((const struct Gap *)left);
1389 const struct Gap *l_left = ((const struct Gap *)left);
1382 const struct Gap *l_right = ((const struct Gap *)right);
1390 const struct Gap *l_right = ((const struct Gap *)right);
1383 if (l_left->size < l_right->size) {
1391 if (l_left->size < l_right->size) {
1384 return -1;
1392 return -1;
1385 } else if (l_left->size > l_right->size) {
1393 } else if (l_left->size > l_right->size) {
1386 return 1;
1394 return 1;
1387 }
1395 }
1388 return 0;
1396 return 0;
1389 }
1397 }
1390 static int Py_ssize_t_compare(const void *left, const void *right)
1398 static int Py_ssize_t_compare(const void *left, const void *right)
1391 {
1399 {
1392 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1400 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1393 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1401 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1394 if (l_left < l_right) {
1402 if (l_left < l_right) {
1395 return -1;
1403 return -1;
1396 } else if (l_left > l_right) {
1404 } else if (l_left > l_right) {
1397 return 1;
1405 return 1;
1398 }
1406 }
1399 return 0;
1407 return 0;
1400 }
1408 }
1401
1409
1402 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1410 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1403 {
1411 {
1404 /* method arguments */
1412 /* method arguments */
1405 PyObject *list_revs = NULL; /* revisions in the chain */
1413 PyObject *list_revs = NULL; /* revisions in the chain */
1406 double targetdensity = 0; /* min density to achieve */
1414 double targetdensity = 0; /* min density to achieve */
1407 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1415 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1408
1416
1409 /* other core variables */
1417 /* other core variables */
1410 Py_ssize_t idxlen = index_length(self);
1418 Py_ssize_t idxlen = index_length(self);
1411 Py_ssize_t i; /* used for various iteration */
1419 Py_ssize_t i; /* used for various iteration */
1412 PyObject *result = NULL; /* the final return of the function */
1420 PyObject *result = NULL; /* the final return of the function */
1413
1421
1414 /* generic information about the delta chain being slice */
1422 /* generic information about the delta chain being slice */
1415 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1423 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1416 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1424 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1417 int64_t chainpayload = 0; /* sum of all delta in the chain */
1425 int64_t chainpayload = 0; /* sum of all delta in the chain */
1418 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1426 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1419
1427
1420 /* variable used for slicing the delta chain */
1428 /* variable used for slicing the delta chain */
1421 int64_t readdata = 0; /* amount of data currently planned to be read */
1429 int64_t readdata = 0; /* amount of data currently planned to be read */
1422 double density = 0; /* ration of payload data compared to read ones */
1430 double density = 0; /* ration of payload data compared to read ones */
1423 int64_t previous_end;
1431 int64_t previous_end;
1424 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1432 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1425 Py_ssize_t num_gaps =
1433 Py_ssize_t num_gaps =
1426 0; /* total number of notable gap recorded so far */
1434 0; /* total number of notable gap recorded so far */
1427 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1435 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1428 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1436 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1429 PyObject *chunk = NULL; /* individual slice */
1437 PyObject *chunk = NULL; /* individual slice */
1430 PyObject *allchunks = NULL; /* all slices */
1438 PyObject *allchunks = NULL; /* all slices */
1431 Py_ssize_t previdx;
1439 Py_ssize_t previdx;
1432
1440
1433 /* parsing argument */
1441 /* parsing argument */
1434 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1442 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1435 &targetdensity, &mingapsize)) {
1443 &targetdensity, &mingapsize)) {
1436 goto bail;
1444 goto bail;
1437 }
1445 }
1438
1446
1439 /* If the delta chain contains a single element, we do not need slicing
1447 /* If the delta chain contains a single element, we do not need slicing
1440 */
1448 */
1441 num_revs = PyList_GET_SIZE(list_revs);
1449 num_revs = PyList_GET_SIZE(list_revs);
1442 if (num_revs <= 1) {
1450 if (num_revs <= 1) {
1443 result = PyTuple_Pack(1, list_revs);
1451 result = PyTuple_Pack(1, list_revs);
1444 goto done;
1452 goto done;
1445 }
1453 }
1446
1454
1447 /* Turn the python list into a native integer array (for efficiency) */
1455 /* Turn the python list into a native integer array (for efficiency) */
1448 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1456 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1449 if (revs == NULL) {
1457 if (revs == NULL) {
1450 PyErr_NoMemory();
1458 PyErr_NoMemory();
1451 goto bail;
1459 goto bail;
1452 }
1460 }
1453 for (i = 0; i < num_revs; i++) {
1461 for (i = 0; i < num_revs; i++) {
1454 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1462 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1455 if (revnum == -1 && PyErr_Occurred()) {
1463 if (revnum == -1 && PyErr_Occurred()) {
1456 goto bail;
1464 goto bail;
1457 }
1465 }
1458 if (revnum < nullrev || revnum >= idxlen) {
1466 if (revnum < nullrev || revnum >= idxlen) {
1459 PyErr_Format(PyExc_IndexError,
1467 PyErr_Format(PyExc_IndexError,
1460 "index out of range: %zd", revnum);
1468 "index out of range: %zd", revnum);
1461 goto bail;
1469 goto bail;
1462 }
1470 }
1463 revs[i] = revnum;
1471 revs[i] = revnum;
1464 }
1472 }
1465
1473
1466 /* Compute and check various property of the unsliced delta chain */
1474 /* Compute and check various property of the unsliced delta chain */
1467 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1475 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1468 if (deltachainspan < 0) {
1476 if (deltachainspan < 0) {
1469 goto bail;
1477 goto bail;
1470 }
1478 }
1471
1479
1472 if (deltachainspan <= mingapsize) {
1480 if (deltachainspan <= mingapsize) {
1473 result = PyTuple_Pack(1, list_revs);
1481 result = PyTuple_Pack(1, list_revs);
1474 goto done;
1482 goto done;
1475 }
1483 }
1476 chainpayload = 0;
1484 chainpayload = 0;
1477 for (i = 0; i < num_revs; i++) {
1485 for (i = 0; i < num_revs; i++) {
1478 int tmp = index_get_length(self, revs[i]);
1486 int tmp = index_get_length(self, revs[i]);
1479 if (tmp < 0) {
1487 if (tmp < 0) {
1480 goto bail;
1488 goto bail;
1481 }
1489 }
1482 chainpayload += tmp;
1490 chainpayload += tmp;
1483 }
1491 }
1484
1492
1485 readdata = deltachainspan;
1493 readdata = deltachainspan;
1486 density = 1.0;
1494 density = 1.0;
1487
1495
1488 if (0 < deltachainspan) {
1496 if (0 < deltachainspan) {
1489 density = (double)chainpayload / (double)deltachainspan;
1497 density = (double)chainpayload / (double)deltachainspan;
1490 }
1498 }
1491
1499
1492 if (density >= targetdensity) {
1500 if (density >= targetdensity) {
1493 result = PyTuple_Pack(1, list_revs);
1501 result = PyTuple_Pack(1, list_revs);
1494 goto done;
1502 goto done;
1495 }
1503 }
1496
1504
1497 /* if chain is too sparse, look for relevant gaps */
1505 /* if chain is too sparse, look for relevant gaps */
1498 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1506 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1499 if (gaps == NULL) {
1507 if (gaps == NULL) {
1500 PyErr_NoMemory();
1508 PyErr_NoMemory();
1501 goto bail;
1509 goto bail;
1502 }
1510 }
1503
1511
1504 previous_end = -1;
1512 previous_end = -1;
1505 for (i = 0; i < num_revs; i++) {
1513 for (i = 0; i < num_revs; i++) {
1506 int64_t revstart;
1514 int64_t revstart;
1507 int revsize;
1515 int revsize;
1508 revstart = index_get_start(self, revs[i]);
1516 revstart = index_get_start(self, revs[i]);
1509 if (revstart < 0) {
1517 if (revstart < 0) {
1510 goto bail;
1518 goto bail;
1511 };
1519 };
1512 revsize = index_get_length(self, revs[i]);
1520 revsize = index_get_length(self, revs[i]);
1513 if (revsize < 0) {
1521 if (revsize < 0) {
1514 goto bail;
1522 goto bail;
1515 };
1523 };
1516 if (revsize == 0) {
1524 if (revsize == 0) {
1517 continue;
1525 continue;
1518 }
1526 }
1519 if (previous_end >= 0) {
1527 if (previous_end >= 0) {
1520 int64_t gapsize = revstart - previous_end;
1528 int64_t gapsize = revstart - previous_end;
1521 if (gapsize > mingapsize) {
1529 if (gapsize > mingapsize) {
1522 gaps[num_gaps].size = gapsize;
1530 gaps[num_gaps].size = gapsize;
1523 gaps[num_gaps].idx = i;
1531 gaps[num_gaps].idx = i;
1524 num_gaps += 1;
1532 num_gaps += 1;
1525 }
1533 }
1526 }
1534 }
1527 previous_end = revstart + revsize;
1535 previous_end = revstart + revsize;
1528 }
1536 }
1529 if (num_gaps == 0) {
1537 if (num_gaps == 0) {
1530 result = PyTuple_Pack(1, list_revs);
1538 result = PyTuple_Pack(1, list_revs);
1531 goto done;
1539 goto done;
1532 }
1540 }
1533 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1541 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1534
1542
1535 /* Slice the largest gap first, they improve the density the most */
1543 /* Slice the largest gap first, they improve the density the most */
1536 selected_indices =
1544 selected_indices =
1537 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1545 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1538 if (selected_indices == NULL) {
1546 if (selected_indices == NULL) {
1539 PyErr_NoMemory();
1547 PyErr_NoMemory();
1540 goto bail;
1548 goto bail;
1541 }
1549 }
1542
1550
1543 for (i = num_gaps - 1; i >= 0; i--) {
1551 for (i = num_gaps - 1; i >= 0; i--) {
1544 selected_indices[num_selected] = gaps[i].idx;
1552 selected_indices[num_selected] = gaps[i].idx;
1545 readdata -= gaps[i].size;
1553 readdata -= gaps[i].size;
1546 num_selected += 1;
1554 num_selected += 1;
1547 if (readdata <= 0) {
1555 if (readdata <= 0) {
1548 density = 1.0;
1556 density = 1.0;
1549 } else {
1557 } else {
1550 density = (double)chainpayload / (double)readdata;
1558 density = (double)chainpayload / (double)readdata;
1551 }
1559 }
1552 if (density >= targetdensity) {
1560 if (density >= targetdensity) {
1553 break;
1561 break;
1554 }
1562 }
1555 }
1563 }
1556 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1564 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1557 &Py_ssize_t_compare);
1565 &Py_ssize_t_compare);
1558
1566
1559 /* create the resulting slice */
1567 /* create the resulting slice */
1560 allchunks = PyList_New(0);
1568 allchunks = PyList_New(0);
1561 if (allchunks == NULL) {
1569 if (allchunks == NULL) {
1562 goto bail;
1570 goto bail;
1563 }
1571 }
1564 previdx = 0;
1572 previdx = 0;
1565 selected_indices[num_selected] = num_revs;
1573 selected_indices[num_selected] = num_revs;
1566 for (i = 0; i <= num_selected; i++) {
1574 for (i = 0; i <= num_selected; i++) {
1567 Py_ssize_t idx = selected_indices[i];
1575 Py_ssize_t idx = selected_indices[i];
1568 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1576 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1569 if (endidx < 0) {
1577 if (endidx < 0) {
1570 goto bail;
1578 goto bail;
1571 }
1579 }
1572 if (previdx < endidx) {
1580 if (previdx < endidx) {
1573 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1581 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1574 if (chunk == NULL) {
1582 if (chunk == NULL) {
1575 goto bail;
1583 goto bail;
1576 }
1584 }
1577 if (PyList_Append(allchunks, chunk) == -1) {
1585 if (PyList_Append(allchunks, chunk) == -1) {
1578 goto bail;
1586 goto bail;
1579 }
1587 }
1580 Py_DECREF(chunk);
1588 Py_DECREF(chunk);
1581 chunk = NULL;
1589 chunk = NULL;
1582 }
1590 }
1583 previdx = idx;
1591 previdx = idx;
1584 }
1592 }
1585 result = allchunks;
1593 result = allchunks;
1586 goto done;
1594 goto done;
1587
1595
1588 bail:
1596 bail:
1589 Py_XDECREF(allchunks);
1597 Py_XDECREF(allchunks);
1590 Py_XDECREF(chunk);
1598 Py_XDECREF(chunk);
1591 done:
1599 done:
1592 free(revs);
1600 free(revs);
1593 free(gaps);
1601 free(gaps);
1594 free(selected_indices);
1602 free(selected_indices);
1595 return result;
1603 return result;
1596 }
1604 }
1597
1605
1598 static inline int nt_level(const char *node, Py_ssize_t level)
1606 static inline int nt_level(const char *node, Py_ssize_t level)
1599 {
1607 {
1600 int v = node[level >> 1];
1608 int v = node[level >> 1];
1601 if (!(level & 1))
1609 if (!(level & 1))
1602 v >>= 4;
1610 v >>= 4;
1603 return v & 0xf;
1611 return v & 0xf;
1604 }
1612 }
1605
1613
1606 /*
1614 /*
1607 * Return values:
1615 * Return values:
1608 *
1616 *
1609 * -4: match is ambiguous (multiple candidates)
1617 * -4: match is ambiguous (multiple candidates)
1610 * -2: not found
1618 * -2: not found
1611 * rest: valid rev
1619 * rest: valid rev
1612 */
1620 */
1613 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1621 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1614 int hex)
1622 int hex)
1615 {
1623 {
1616 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1624 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1617 int level, maxlevel, off;
1625 int level, maxlevel, off;
1618
1626
1619 /* If the input is binary, do a fast check for the nullid first. */
1627 /* If the input is binary, do a fast check for the nullid first. */
1620 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1628 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1621 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1629 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1622 return -1;
1630 return -1;
1623
1631
1624 if (hex)
1632 if (hex)
1625 maxlevel = nodelen;
1633 maxlevel = nodelen;
1626 else
1634 else
1627 maxlevel = 2 * nodelen;
1635 maxlevel = 2 * nodelen;
1628 if (maxlevel > 2 * self->nodelen)
1636 if (maxlevel > 2 * self->nodelen)
1629 maxlevel = 2 * self->nodelen;
1637 maxlevel = 2 * self->nodelen;
1630
1638
1631 for (level = off = 0; level < maxlevel; level++) {
1639 for (level = off = 0; level < maxlevel; level++) {
1632 int k = getnybble(node, level);
1640 int k = getnybble(node, level);
1633 nodetreenode *n = &self->nodes[off];
1641 nodetreenode *n = &self->nodes[off];
1634 int v = n->children[k];
1642 int v = n->children[k];
1635
1643
1636 if (v < 0) {
1644 if (v < 0) {
1637 const char *n;
1645 const char *n;
1638 Py_ssize_t i;
1646 Py_ssize_t i;
1639
1647
1640 v = -(v + 2);
1648 v = -(v + 2);
1641 n = index_node(self->index, v);
1649 n = index_node(self->index, v);
1642 if (n == NULL)
1650 if (n == NULL)
1643 return -2;
1651 return -2;
1644 for (i = level; i < maxlevel; i++)
1652 for (i = level; i < maxlevel; i++)
1645 if (getnybble(node, i) != nt_level(n, i))
1653 if (getnybble(node, i) != nt_level(n, i))
1646 return -2;
1654 return -2;
1647 return v;
1655 return v;
1648 }
1656 }
1649 if (v == 0)
1657 if (v == 0)
1650 return -2;
1658 return -2;
1651 off = v;
1659 off = v;
1652 }
1660 }
1653 /* multiple matches against an ambiguous prefix */
1661 /* multiple matches against an ambiguous prefix */
1654 return -4;
1662 return -4;
1655 }
1663 }
1656
1664
1657 static int nt_new(nodetree *self)
1665 static int nt_new(nodetree *self)
1658 {
1666 {
1659 if (self->length == self->capacity) {
1667 if (self->length == self->capacity) {
1660 size_t newcapacity;
1668 size_t newcapacity;
1661 nodetreenode *newnodes;
1669 nodetreenode *newnodes;
1662 newcapacity = self->capacity * 2;
1670 newcapacity = self->capacity * 2;
1663 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1671 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1664 PyErr_SetString(PyExc_MemoryError,
1672 PyErr_SetString(PyExc_MemoryError,
1665 "overflow in nt_new");
1673 "overflow in nt_new");
1666 return -1;
1674 return -1;
1667 }
1675 }
1668 newnodes =
1676 newnodes =
1669 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1677 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1670 if (newnodes == NULL) {
1678 if (newnodes == NULL) {
1671 PyErr_SetString(PyExc_MemoryError, "out of memory");
1679 PyErr_SetString(PyExc_MemoryError, "out of memory");
1672 return -1;
1680 return -1;
1673 }
1681 }
1674 self->capacity = newcapacity;
1682 self->capacity = newcapacity;
1675 self->nodes = newnodes;
1683 self->nodes = newnodes;
1676 memset(&self->nodes[self->length], 0,
1684 memset(&self->nodes[self->length], 0,
1677 sizeof(nodetreenode) * (self->capacity - self->length));
1685 sizeof(nodetreenode) * (self->capacity - self->length));
1678 }
1686 }
1679 return self->length++;
1687 return self->length++;
1680 }
1688 }
1681
1689
1682 static int nt_insert(nodetree *self, const char *node, int rev)
1690 static int nt_insert(nodetree *self, const char *node, int rev)
1683 {
1691 {
1684 int level = 0;
1692 int level = 0;
1685 int off = 0;
1693 int off = 0;
1686
1694
1687 while (level < 2 * self->nodelen) {
1695 while (level < 2 * self->nodelen) {
1688 int k = nt_level(node, level);
1696 int k = nt_level(node, level);
1689 nodetreenode *n;
1697 nodetreenode *n;
1690 int v;
1698 int v;
1691
1699
1692 n = &self->nodes[off];
1700 n = &self->nodes[off];
1693 v = n->children[k];
1701 v = n->children[k];
1694
1702
1695 if (v == 0) {
1703 if (v == 0) {
1696 n->children[k] = -rev - 2;
1704 n->children[k] = -rev - 2;
1697 return 0;
1705 return 0;
1698 }
1706 }
1699 if (v < 0) {
1707 if (v < 0) {
1700 const char *oldnode =
1708 const char *oldnode =
1701 index_node_existing(self->index, -(v + 2));
1709 index_node_existing(self->index, -(v + 2));
1702 int noff;
1710 int noff;
1703
1711
1704 if (oldnode == NULL)
1712 if (oldnode == NULL)
1705 return -1;
1713 return -1;
1706 if (!memcmp(oldnode, node, self->nodelen)) {
1714 if (!memcmp(oldnode, node, self->nodelen)) {
1707 n->children[k] = -rev - 2;
1715 n->children[k] = -rev - 2;
1708 return 0;
1716 return 0;
1709 }
1717 }
1710 noff = nt_new(self);
1718 noff = nt_new(self);
1711 if (noff == -1)
1719 if (noff == -1)
1712 return -1;
1720 return -1;
1713 /* self->nodes may have been changed by realloc */
1721 /* self->nodes may have been changed by realloc */
1714 self->nodes[off].children[k] = noff;
1722 self->nodes[off].children[k] = noff;
1715 off = noff;
1723 off = noff;
1716 n = &self->nodes[off];
1724 n = &self->nodes[off];
1717 n->children[nt_level(oldnode, ++level)] = v;
1725 n->children[nt_level(oldnode, ++level)] = v;
1718 if (level > self->depth)
1726 if (level > self->depth)
1719 self->depth = level;
1727 self->depth = level;
1720 self->splits += 1;
1728 self->splits += 1;
1721 } else {
1729 } else {
1722 level += 1;
1730 level += 1;
1723 off = v;
1731 off = v;
1724 }
1732 }
1725 }
1733 }
1726
1734
1727 return -1;
1735 return -1;
1728 }
1736 }
1729
1737
1730 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1738 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1731 {
1739 {
1732 Py_ssize_t rev;
1740 Py_ssize_t rev;
1733 const char *node;
1741 const char *node;
1734 Py_ssize_t length;
1742 Py_ssize_t length;
1735 if (!PyArg_ParseTuple(args, "n", &rev))
1743 if (!PyArg_ParseTuple(args, "n", &rev))
1736 return NULL;
1744 return NULL;
1737 length = index_length(self->nt.index);
1745 length = index_length(self->nt.index);
1738 if (rev < 0 || rev >= length) {
1746 if (rev < 0 || rev >= length) {
1739 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1747 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1740 return NULL;
1748 return NULL;
1741 }
1749 }
1742 node = index_node_existing(self->nt.index, rev);
1750 node = index_node_existing(self->nt.index, rev);
1743 if (nt_insert(&self->nt, node, (int)rev) == -1)
1751 if (nt_insert(&self->nt, node, (int)rev) == -1)
1744 return NULL;
1752 return NULL;
1745 Py_RETURN_NONE;
1753 Py_RETURN_NONE;
1746 }
1754 }
1747
1755
1748 static int nt_delete_node(nodetree *self, const char *node)
1756 static int nt_delete_node(nodetree *self, const char *node)
1749 {
1757 {
1750 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1758 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1751 */
1759 */
1752 return nt_insert(self, node, -2);
1760 return nt_insert(self, node, -2);
1753 }
1761 }
1754
1762
1755 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1763 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1756 {
1764 {
1757 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1765 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1758 self->nodes = NULL;
1766 self->nodes = NULL;
1759
1767
1760 self->index = index;
1768 self->index = index;
1761 /* The input capacity is in terms of revisions, while the field is in
1769 /* The input capacity is in terms of revisions, while the field is in
1762 * terms of nodetree nodes. */
1770 * terms of nodetree nodes. */
1763 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1771 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1764 self->nodelen = index->nodelen;
1772 self->nodelen = index->nodelen;
1765 self->depth = 0;
1773 self->depth = 0;
1766 self->splits = 0;
1774 self->splits = 0;
1767 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1775 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1768 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1776 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1769 return -1;
1777 return -1;
1770 }
1778 }
1771 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1779 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1772 if (self->nodes == NULL) {
1780 if (self->nodes == NULL) {
1773 PyErr_NoMemory();
1781 PyErr_NoMemory();
1774 return -1;
1782 return -1;
1775 }
1783 }
1776 self->length = 1;
1784 self->length = 1;
1777 return 0;
1785 return 0;
1778 }
1786 }
1779
1787
1780 static int ntobj_init(nodetreeObject *self, PyObject *args)
1788 static int ntobj_init(nodetreeObject *self, PyObject *args)
1781 {
1789 {
1782 PyObject *index;
1790 PyObject *index;
1783 unsigned capacity;
1791 unsigned capacity;
1784 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1792 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1785 &capacity))
1793 &capacity))
1786 return -1;
1794 return -1;
1787 Py_INCREF(index);
1795 Py_INCREF(index);
1788 return nt_init(&self->nt, (indexObject *)index, capacity);
1796 return nt_init(&self->nt, (indexObject *)index, capacity);
1789 }
1797 }
1790
1798
1791 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1799 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1792 {
1800 {
1793 return nt_find(self, node, nodelen, 1);
1801 return nt_find(self, node, nodelen, 1);
1794 }
1802 }
1795
1803
1796 /*
1804 /*
1797 * Find the length of the shortest unique prefix of node.
1805 * Find the length of the shortest unique prefix of node.
1798 *
1806 *
1799 * Return values:
1807 * Return values:
1800 *
1808 *
1801 * -3: error (exception set)
1809 * -3: error (exception set)
1802 * -2: not found (no exception set)
1810 * -2: not found (no exception set)
1803 * rest: length of shortest prefix
1811 * rest: length of shortest prefix
1804 */
1812 */
1805 static int nt_shortest(nodetree *self, const char *node)
1813 static int nt_shortest(nodetree *self, const char *node)
1806 {
1814 {
1807 int level, off;
1815 int level, off;
1808
1816
1809 for (level = off = 0; level < 2 * self->nodelen; level++) {
1817 for (level = off = 0; level < 2 * self->nodelen; level++) {
1810 int k, v;
1818 int k, v;
1811 nodetreenode *n = &self->nodes[off];
1819 nodetreenode *n = &self->nodes[off];
1812 k = nt_level(node, level);
1820 k = nt_level(node, level);
1813 v = n->children[k];
1821 v = n->children[k];
1814 if (v < 0) {
1822 if (v < 0) {
1815 const char *n;
1823 const char *n;
1816 v = -(v + 2);
1824 v = -(v + 2);
1817 n = index_node_existing(self->index, v);
1825 n = index_node_existing(self->index, v);
1818 if (n == NULL)
1826 if (n == NULL)
1819 return -3;
1827 return -3;
1820 if (memcmp(node, n, self->nodelen) != 0)
1828 if (memcmp(node, n, self->nodelen) != 0)
1821 /*
1829 /*
1822 * Found a unique prefix, but it wasn't for the
1830 * Found a unique prefix, but it wasn't for the
1823 * requested node (i.e the requested node does
1831 * requested node (i.e the requested node does
1824 * not exist).
1832 * not exist).
1825 */
1833 */
1826 return -2;
1834 return -2;
1827 return level + 1;
1835 return level + 1;
1828 }
1836 }
1829 if (v == 0)
1837 if (v == 0)
1830 return -2;
1838 return -2;
1831 off = v;
1839 off = v;
1832 }
1840 }
1833 /*
1841 /*
1834 * The node was still not unique after 40 hex digits, so this won't
1842 * The node was still not unique after 40 hex digits, so this won't
1835 * happen. Also, if we get here, then there's a programming error in
1843 * happen. Also, if we get here, then there's a programming error in
1836 * this file that made us insert a node longer than 40 hex digits.
1844 * this file that made us insert a node longer than 40 hex digits.
1837 */
1845 */
1838 PyErr_SetString(PyExc_Exception, "broken node tree");
1846 PyErr_SetString(PyExc_Exception, "broken node tree");
1839 return -3;
1847 return -3;
1840 }
1848 }
1841
1849
1842 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1850 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1843 {
1851 {
1844 PyObject *val;
1852 PyObject *val;
1845 char *node;
1853 char *node;
1846 int length;
1854 int length;
1847
1855
1848 if (!PyArg_ParseTuple(args, "O", &val))
1856 if (!PyArg_ParseTuple(args, "O", &val))
1849 return NULL;
1857 return NULL;
1850 if (node_check(self->nt.nodelen, val, &node) == -1)
1858 if (node_check(self->nt.nodelen, val, &node) == -1)
1851 return NULL;
1859 return NULL;
1852
1860
1853 length = nt_shortest(&self->nt, node);
1861 length = nt_shortest(&self->nt, node);
1854 if (length == -3)
1862 if (length == -3)
1855 return NULL;
1863 return NULL;
1856 if (length == -2) {
1864 if (length == -2) {
1857 raise_revlog_error();
1865 raise_revlog_error();
1858 return NULL;
1866 return NULL;
1859 }
1867 }
1860 return PyInt_FromLong(length);
1868 return PyInt_FromLong(length);
1861 }
1869 }
1862
1870
1863 static void nt_dealloc(nodetree *self)
1871 static void nt_dealloc(nodetree *self)
1864 {
1872 {
1865 free(self->nodes);
1873 free(self->nodes);
1866 self->nodes = NULL;
1874 self->nodes = NULL;
1867 }
1875 }
1868
1876
1869 static void ntobj_dealloc(nodetreeObject *self)
1877 static void ntobj_dealloc(nodetreeObject *self)
1870 {
1878 {
1871 Py_XDECREF(self->nt.index);
1879 Py_XDECREF(self->nt.index);
1872 nt_dealloc(&self->nt);
1880 nt_dealloc(&self->nt);
1873 PyObject_Del(self);
1881 PyObject_Del(self);
1874 }
1882 }
1875
1883
1876 static PyMethodDef ntobj_methods[] = {
1884 static PyMethodDef ntobj_methods[] = {
1877 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1885 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1878 "insert an index entry"},
1886 "insert an index entry"},
1879 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1887 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1880 "find length of shortest hex nodeid of a binary ID"},
1888 "find length of shortest hex nodeid of a binary ID"},
1881 {NULL} /* Sentinel */
1889 {NULL} /* Sentinel */
1882 };
1890 };
1883
1891
1884 static PyTypeObject nodetreeType = {
1892 static PyTypeObject nodetreeType = {
1885 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1893 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1886 "parsers.nodetree", /* tp_name */
1894 "parsers.nodetree", /* tp_name */
1887 sizeof(nodetreeObject), /* tp_basicsize */
1895 sizeof(nodetreeObject), /* tp_basicsize */
1888 0, /* tp_itemsize */
1896 0, /* tp_itemsize */
1889 (destructor)ntobj_dealloc, /* tp_dealloc */
1897 (destructor)ntobj_dealloc, /* tp_dealloc */
1890 0, /* tp_print */
1898 0, /* tp_print */
1891 0, /* tp_getattr */
1899 0, /* tp_getattr */
1892 0, /* tp_setattr */
1900 0, /* tp_setattr */
1893 0, /* tp_compare */
1901 0, /* tp_compare */
1894 0, /* tp_repr */
1902 0, /* tp_repr */
1895 0, /* tp_as_number */
1903 0, /* tp_as_number */
1896 0, /* tp_as_sequence */
1904 0, /* tp_as_sequence */
1897 0, /* tp_as_mapping */
1905 0, /* tp_as_mapping */
1898 0, /* tp_hash */
1906 0, /* tp_hash */
1899 0, /* tp_call */
1907 0, /* tp_call */
1900 0, /* tp_str */
1908 0, /* tp_str */
1901 0, /* tp_getattro */
1909 0, /* tp_getattro */
1902 0, /* tp_setattro */
1910 0, /* tp_setattro */
1903 0, /* tp_as_buffer */
1911 0, /* tp_as_buffer */
1904 Py_TPFLAGS_DEFAULT, /* tp_flags */
1912 Py_TPFLAGS_DEFAULT, /* tp_flags */
1905 "nodetree", /* tp_doc */
1913 "nodetree", /* tp_doc */
1906 0, /* tp_traverse */
1914 0, /* tp_traverse */
1907 0, /* tp_clear */
1915 0, /* tp_clear */
1908 0, /* tp_richcompare */
1916 0, /* tp_richcompare */
1909 0, /* tp_weaklistoffset */
1917 0, /* tp_weaklistoffset */
1910 0, /* tp_iter */
1918 0, /* tp_iter */
1911 0, /* tp_iternext */
1919 0, /* tp_iternext */
1912 ntobj_methods, /* tp_methods */
1920 ntobj_methods, /* tp_methods */
1913 0, /* tp_members */
1921 0, /* tp_members */
1914 0, /* tp_getset */
1922 0, /* tp_getset */
1915 0, /* tp_base */
1923 0, /* tp_base */
1916 0, /* tp_dict */
1924 0, /* tp_dict */
1917 0, /* tp_descr_get */
1925 0, /* tp_descr_get */
1918 0, /* tp_descr_set */
1926 0, /* tp_descr_set */
1919 0, /* tp_dictoffset */
1927 0, /* tp_dictoffset */
1920 (initproc)ntobj_init, /* tp_init */
1928 (initproc)ntobj_init, /* tp_init */
1921 0, /* tp_alloc */
1929 0, /* tp_alloc */
1922 };
1930 };
1923
1931
1924 static int index_init_nt(indexObject *self)
1932 static int index_init_nt(indexObject *self)
1925 {
1933 {
1926 if (!self->ntinitialized) {
1934 if (!self->ntinitialized) {
1927 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1935 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1928 nt_dealloc(&self->nt);
1936 nt_dealloc(&self->nt);
1929 return -1;
1937 return -1;
1930 }
1938 }
1931 if (nt_insert(&self->nt, nullid, -1) == -1) {
1939 if (nt_insert(&self->nt, nullid, -1) == -1) {
1932 nt_dealloc(&self->nt);
1940 nt_dealloc(&self->nt);
1933 return -1;
1941 return -1;
1934 }
1942 }
1935 self->ntinitialized = 1;
1943 self->ntinitialized = 1;
1936 self->ntrev = (int)index_length(self);
1944 self->ntrev = (int)index_length(self);
1937 self->ntlookups = 1;
1945 self->ntlookups = 1;
1938 self->ntmisses = 0;
1946 self->ntmisses = 0;
1939 }
1947 }
1940 return 0;
1948 return 0;
1941 }
1949 }
1942
1950
1943 /*
1951 /*
1944 * Return values:
1952 * Return values:
1945 *
1953 *
1946 * -3: error (exception set)
1954 * -3: error (exception set)
1947 * -2: not found (no exception set)
1955 * -2: not found (no exception set)
1948 * rest: valid rev
1956 * rest: valid rev
1949 */
1957 */
1950 static int index_find_node(indexObject *self, const char *node)
1958 static int index_find_node(indexObject *self, const char *node)
1951 {
1959 {
1952 int rev;
1960 int rev;
1953
1961
1954 if (index_init_nt(self) == -1)
1962 if (index_init_nt(self) == -1)
1955 return -3;
1963 return -3;
1956
1964
1957 self->ntlookups++;
1965 self->ntlookups++;
1958 rev = nt_find(&self->nt, node, self->nodelen, 0);
1966 rev = nt_find(&self->nt, node, self->nodelen, 0);
1959 if (rev >= -1)
1967 if (rev >= -1)
1960 return rev;
1968 return rev;
1961
1969
1962 /*
1970 /*
1963 * For the first handful of lookups, we scan the entire index,
1971 * For the first handful of lookups, we scan the entire index,
1964 * and cache only the matching nodes. This optimizes for cases
1972 * and cache only the matching nodes. This optimizes for cases
1965 * like "hg tip", where only a few nodes are accessed.
1973 * like "hg tip", where only a few nodes are accessed.
1966 *
1974 *
1967 * After that, we cache every node we visit, using a single
1975 * After that, we cache every node we visit, using a single
1968 * scan amortized over multiple lookups. This gives the best
1976 * scan amortized over multiple lookups. This gives the best
1969 * bulk performance, e.g. for "hg log".
1977 * bulk performance, e.g. for "hg log".
1970 */
1978 */
1971 if (self->ntmisses++ < 4) {
1979 if (self->ntmisses++ < 4) {
1972 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1980 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1973 const char *n = index_node_existing(self, rev);
1981 const char *n = index_node_existing(self, rev);
1974 if (n == NULL)
1982 if (n == NULL)
1975 return -3;
1983 return -3;
1976 if (memcmp(node, n, self->nodelen) == 0) {
1984 if (memcmp(node, n, self->nodelen) == 0) {
1977 if (nt_insert(&self->nt, n, rev) == -1)
1985 if (nt_insert(&self->nt, n, rev) == -1)
1978 return -3;
1986 return -3;
1979 break;
1987 break;
1980 }
1988 }
1981 }
1989 }
1982 } else {
1990 } else {
1983 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1991 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1984 const char *n = index_node_existing(self, rev);
1992 const char *n = index_node_existing(self, rev);
1985 if (n == NULL)
1993 if (n == NULL)
1986 return -3;
1994 return -3;
1987 if (nt_insert(&self->nt, n, rev) == -1) {
1995 if (nt_insert(&self->nt, n, rev) == -1) {
1988 self->ntrev = rev + 1;
1996 self->ntrev = rev + 1;
1989 return -3;
1997 return -3;
1990 }
1998 }
1991 if (memcmp(node, n, self->nodelen) == 0) {
1999 if (memcmp(node, n, self->nodelen) == 0) {
1992 break;
2000 break;
1993 }
2001 }
1994 }
2002 }
1995 self->ntrev = rev;
2003 self->ntrev = rev;
1996 }
2004 }
1997
2005
1998 if (rev >= 0)
2006 if (rev >= 0)
1999 return rev;
2007 return rev;
2000 return -2;
2008 return -2;
2001 }
2009 }
2002
2010
2003 static PyObject *index_getitem(indexObject *self, PyObject *value)
2011 static PyObject *index_getitem(indexObject *self, PyObject *value)
2004 {
2012 {
2005 char *node;
2013 char *node;
2006 int rev;
2014 int rev;
2007
2015
2008 if (PyInt_Check(value)) {
2016 if (PyInt_Check(value)) {
2009 long idx;
2017 long idx;
2010 if (!pylong_to_long(value, &idx)) {
2018 if (!pylong_to_long(value, &idx)) {
2011 return NULL;
2019 return NULL;
2012 }
2020 }
2013 return index_get(self, idx);
2021 return index_get(self, idx);
2014 }
2022 }
2015
2023
2016 if (node_check(self->nodelen, value, &node) == -1)
2024 if (node_check(self->nodelen, value, &node) == -1)
2017 return NULL;
2025 return NULL;
2018 rev = index_find_node(self, node);
2026 rev = index_find_node(self, node);
2019 if (rev >= -1)
2027 if (rev >= -1)
2020 return PyInt_FromLong(rev);
2028 return PyInt_FromLong(rev);
2021 if (rev == -2)
2029 if (rev == -2)
2022 raise_revlog_error();
2030 raise_revlog_error();
2023 return NULL;
2031 return NULL;
2024 }
2032 }
2025
2033
2026 /*
2034 /*
2027 * Fully populate the radix tree.
2035 * Fully populate the radix tree.
2028 */
2036 */
2029 static int index_populate_nt(indexObject *self)
2037 static int index_populate_nt(indexObject *self)
2030 {
2038 {
2031 int rev;
2039 int rev;
2032 if (self->ntrev > 0) {
2040 if (self->ntrev > 0) {
2033 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2041 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2034 const char *n = index_node_existing(self, rev);
2042 const char *n = index_node_existing(self, rev);
2035 if (n == NULL)
2043 if (n == NULL)
2036 return -1;
2044 return -1;
2037 if (nt_insert(&self->nt, n, rev) == -1)
2045 if (nt_insert(&self->nt, n, rev) == -1)
2038 return -1;
2046 return -1;
2039 }
2047 }
2040 self->ntrev = -1;
2048 self->ntrev = -1;
2041 }
2049 }
2042 return 0;
2050 return 0;
2043 }
2051 }
2044
2052
2045 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2053 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2046 {
2054 {
2047 const char *fullnode;
2055 const char *fullnode;
2048 Py_ssize_t nodelen;
2056 Py_ssize_t nodelen;
2049 char *node;
2057 char *node;
2050 int rev, i;
2058 int rev, i;
2051
2059
2052 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2060 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2053 return NULL;
2061 return NULL;
2054
2062
2055 if (nodelen < 1) {
2063 if (nodelen < 1) {
2056 PyErr_SetString(PyExc_ValueError, "key too short");
2064 PyErr_SetString(PyExc_ValueError, "key too short");
2057 return NULL;
2065 return NULL;
2058 }
2066 }
2059
2067
2060 if (nodelen > 2 * self->nodelen) {
2068 if (nodelen > 2 * self->nodelen) {
2061 PyErr_SetString(PyExc_ValueError, "key too long");
2069 PyErr_SetString(PyExc_ValueError, "key too long");
2062 return NULL;
2070 return NULL;
2063 }
2071 }
2064
2072
2065 for (i = 0; i < nodelen; i++)
2073 for (i = 0; i < nodelen; i++)
2066 hexdigit(node, i);
2074 hexdigit(node, i);
2067 if (PyErr_Occurred()) {
2075 if (PyErr_Occurred()) {
2068 /* input contains non-hex characters */
2076 /* input contains non-hex characters */
2069 PyErr_Clear();
2077 PyErr_Clear();
2070 Py_RETURN_NONE;
2078 Py_RETURN_NONE;
2071 }
2079 }
2072
2080
2073 if (index_init_nt(self) == -1)
2081 if (index_init_nt(self) == -1)
2074 return NULL;
2082 return NULL;
2075 if (index_populate_nt(self) == -1)
2083 if (index_populate_nt(self) == -1)
2076 return NULL;
2084 return NULL;
2077 rev = nt_partialmatch(&self->nt, node, nodelen);
2085 rev = nt_partialmatch(&self->nt, node, nodelen);
2078
2086
2079 switch (rev) {
2087 switch (rev) {
2080 case -4:
2088 case -4:
2081 raise_revlog_error();
2089 raise_revlog_error();
2082 return NULL;
2090 return NULL;
2083 case -2:
2091 case -2:
2084 Py_RETURN_NONE;
2092 Py_RETURN_NONE;
2085 case -1:
2093 case -1:
2086 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2094 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2087 }
2095 }
2088
2096
2089 fullnode = index_node_existing(self, rev);
2097 fullnode = index_node_existing(self, rev);
2090 if (fullnode == NULL) {
2098 if (fullnode == NULL) {
2091 return NULL;
2099 return NULL;
2092 }
2100 }
2093 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2101 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2094 }
2102 }
2095
2103
2096 static PyObject *index_shortest(indexObject *self, PyObject *args)
2104 static PyObject *index_shortest(indexObject *self, PyObject *args)
2097 {
2105 {
2098 PyObject *val;
2106 PyObject *val;
2099 char *node;
2107 char *node;
2100 int length;
2108 int length;
2101
2109
2102 if (!PyArg_ParseTuple(args, "O", &val))
2110 if (!PyArg_ParseTuple(args, "O", &val))
2103 return NULL;
2111 return NULL;
2104 if (node_check(self->nodelen, val, &node) == -1)
2112 if (node_check(self->nodelen, val, &node) == -1)
2105 return NULL;
2113 return NULL;
2106
2114
2107 self->ntlookups++;
2115 self->ntlookups++;
2108 if (index_init_nt(self) == -1)
2116 if (index_init_nt(self) == -1)
2109 return NULL;
2117 return NULL;
2110 if (index_populate_nt(self) == -1)
2118 if (index_populate_nt(self) == -1)
2111 return NULL;
2119 return NULL;
2112 length = nt_shortest(&self->nt, node);
2120 length = nt_shortest(&self->nt, node);
2113 if (length == -3)
2121 if (length == -3)
2114 return NULL;
2122 return NULL;
2115 if (length == -2) {
2123 if (length == -2) {
2116 raise_revlog_error();
2124 raise_revlog_error();
2117 return NULL;
2125 return NULL;
2118 }
2126 }
2119 return PyInt_FromLong(length);
2127 return PyInt_FromLong(length);
2120 }
2128 }
2121
2129
2122 static PyObject *index_m_get(indexObject *self, PyObject *args)
2130 static PyObject *index_m_get(indexObject *self, PyObject *args)
2123 {
2131 {
2124 PyObject *val;
2132 PyObject *val;
2125 char *node;
2133 char *node;
2126 int rev;
2134 int rev;
2127
2135
2128 if (!PyArg_ParseTuple(args, "O", &val))
2136 if (!PyArg_ParseTuple(args, "O", &val))
2129 return NULL;
2137 return NULL;
2130 if (node_check(self->nodelen, val, &node) == -1)
2138 if (node_check(self->nodelen, val, &node) == -1)
2131 return NULL;
2139 return NULL;
2132 rev = index_find_node(self, node);
2140 rev = index_find_node(self, node);
2133 if (rev == -3)
2141 if (rev == -3)
2134 return NULL;
2142 return NULL;
2135 if (rev == -2)
2143 if (rev == -2)
2136 Py_RETURN_NONE;
2144 Py_RETURN_NONE;
2137 return PyInt_FromLong(rev);
2145 return PyInt_FromLong(rev);
2138 }
2146 }
2139
2147
2140 static int index_contains(indexObject *self, PyObject *value)
2148 static int index_contains(indexObject *self, PyObject *value)
2141 {
2149 {
2142 char *node;
2150 char *node;
2143
2151
2144 if (PyInt_Check(value)) {
2152 if (PyInt_Check(value)) {
2145 long rev;
2153 long rev;
2146 if (!pylong_to_long(value, &rev)) {
2154 if (!pylong_to_long(value, &rev)) {
2147 return -1;
2155 return -1;
2148 }
2156 }
2149 return rev >= -1 && rev < index_length(self);
2157 return rev >= -1 && rev < index_length(self);
2150 }
2158 }
2151
2159
2152 if (node_check(self->nodelen, value, &node) == -1)
2160 if (node_check(self->nodelen, value, &node) == -1)
2153 return -1;
2161 return -1;
2154
2162
2155 switch (index_find_node(self, node)) {
2163 switch (index_find_node(self, node)) {
2156 case -3:
2164 case -3:
2157 return -1;
2165 return -1;
2158 case -2:
2166 case -2:
2159 return 0;
2167 return 0;
2160 default:
2168 default:
2161 return 1;
2169 return 1;
2162 }
2170 }
2163 }
2171 }
2164
2172
2165 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2173 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2166 {
2174 {
2167 int ret = index_contains(self, args);
2175 int ret = index_contains(self, args);
2168 if (ret < 0)
2176 if (ret < 0)
2169 return NULL;
2177 return NULL;
2170 return PyBool_FromLong((long)ret);
2178 return PyBool_FromLong((long)ret);
2171 }
2179 }
2172
2180
2173 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2181 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2174 {
2182 {
2175 char *node;
2183 char *node;
2176 int rev;
2184 int rev;
2177
2185
2178 if (node_check(self->nodelen, val, &node) == -1)
2186 if (node_check(self->nodelen, val, &node) == -1)
2179 return NULL;
2187 return NULL;
2180 rev = index_find_node(self, node);
2188 rev = index_find_node(self, node);
2181 if (rev >= -1)
2189 if (rev >= -1)
2182 return PyInt_FromLong(rev);
2190 return PyInt_FromLong(rev);
2183 if (rev == -2)
2191 if (rev == -2)
2184 raise_revlog_error();
2192 raise_revlog_error();
2185 return NULL;
2193 return NULL;
2186 }
2194 }
2187
2195
2188 typedef uint64_t bitmask;
2196 typedef uint64_t bitmask;
2189
2197
2190 /*
2198 /*
2191 * Given a disjoint set of revs, return all candidates for the
2199 * Given a disjoint set of revs, return all candidates for the
2192 * greatest common ancestor. In revset notation, this is the set
2200 * greatest common ancestor. In revset notation, this is the set
2193 * "heads(::a and ::b and ...)"
2201 * "heads(::a and ::b and ...)"
2194 */
2202 */
2195 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2203 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2196 int revcount)
2204 int revcount)
2197 {
2205 {
2198 const bitmask allseen = (1ull << revcount) - 1;
2206 const bitmask allseen = (1ull << revcount) - 1;
2199 const bitmask poison = 1ull << revcount;
2207 const bitmask poison = 1ull << revcount;
2200 PyObject *gca = PyList_New(0);
2208 PyObject *gca = PyList_New(0);
2201 int i, v, interesting;
2209 int i, v, interesting;
2202 int maxrev = -1;
2210 int maxrev = -1;
2203 bitmask sp;
2211 bitmask sp;
2204 bitmask *seen;
2212 bitmask *seen;
2205
2213
2206 if (gca == NULL)
2214 if (gca == NULL)
2207 return PyErr_NoMemory();
2215 return PyErr_NoMemory();
2208
2216
2209 for (i = 0; i < revcount; i++) {
2217 for (i = 0; i < revcount; i++) {
2210 if (revs[i] > maxrev)
2218 if (revs[i] > maxrev)
2211 maxrev = revs[i];
2219 maxrev = revs[i];
2212 }
2220 }
2213
2221
2214 seen = calloc(sizeof(*seen), maxrev + 1);
2222 seen = calloc(sizeof(*seen), maxrev + 1);
2215 if (seen == NULL) {
2223 if (seen == NULL) {
2216 Py_DECREF(gca);
2224 Py_DECREF(gca);
2217 return PyErr_NoMemory();
2225 return PyErr_NoMemory();
2218 }
2226 }
2219
2227
2220 for (i = 0; i < revcount; i++)
2228 for (i = 0; i < revcount; i++)
2221 seen[revs[i]] = 1ull << i;
2229 seen[revs[i]] = 1ull << i;
2222
2230
2223 interesting = revcount;
2231 interesting = revcount;
2224
2232
2225 for (v = maxrev; v >= 0 && interesting; v--) {
2233 for (v = maxrev; v >= 0 && interesting; v--) {
2226 bitmask sv = seen[v];
2234 bitmask sv = seen[v];
2227 int parents[2];
2235 int parents[2];
2228
2236
2229 if (!sv)
2237 if (!sv)
2230 continue;
2238 continue;
2231
2239
2232 if (sv < poison) {
2240 if (sv < poison) {
2233 interesting -= 1;
2241 interesting -= 1;
2234 if (sv == allseen) {
2242 if (sv == allseen) {
2235 PyObject *obj = PyInt_FromLong(v);
2243 PyObject *obj = PyInt_FromLong(v);
2236 if (obj == NULL)
2244 if (obj == NULL)
2237 goto bail;
2245 goto bail;
2238 if (PyList_Append(gca, obj) == -1) {
2246 if (PyList_Append(gca, obj) == -1) {
2239 Py_DECREF(obj);
2247 Py_DECREF(obj);
2240 goto bail;
2248 goto bail;
2241 }
2249 }
2242 sv |= poison;
2250 sv |= poison;
2243 for (i = 0; i < revcount; i++) {
2251 for (i = 0; i < revcount; i++) {
2244 if (revs[i] == v)
2252 if (revs[i] == v)
2245 goto done;
2253 goto done;
2246 }
2254 }
2247 }
2255 }
2248 }
2256 }
2249 if (index_get_parents(self, v, parents, maxrev) < 0)
2257 if (index_get_parents(self, v, parents, maxrev) < 0)
2250 goto bail;
2258 goto bail;
2251
2259
2252 for (i = 0; i < 2; i++) {
2260 for (i = 0; i < 2; i++) {
2253 int p = parents[i];
2261 int p = parents[i];
2254 if (p == -1)
2262 if (p == -1)
2255 continue;
2263 continue;
2256 sp = seen[p];
2264 sp = seen[p];
2257 if (sv < poison) {
2265 if (sv < poison) {
2258 if (sp == 0) {
2266 if (sp == 0) {
2259 seen[p] = sv;
2267 seen[p] = sv;
2260 interesting++;
2268 interesting++;
2261 } else if (sp != sv)
2269 } else if (sp != sv)
2262 seen[p] |= sv;
2270 seen[p] |= sv;
2263 } else {
2271 } else {
2264 if (sp && sp < poison)
2272 if (sp && sp < poison)
2265 interesting--;
2273 interesting--;
2266 seen[p] = sv;
2274 seen[p] = sv;
2267 }
2275 }
2268 }
2276 }
2269 }
2277 }
2270
2278
2271 done:
2279 done:
2272 free(seen);
2280 free(seen);
2273 return gca;
2281 return gca;
2274 bail:
2282 bail:
2275 free(seen);
2283 free(seen);
2276 Py_XDECREF(gca);
2284 Py_XDECREF(gca);
2277 return NULL;
2285 return NULL;
2278 }
2286 }
2279
2287
2280 /*
2288 /*
2281 * Given a disjoint set of revs, return the subset with the longest
2289 * Given a disjoint set of revs, return the subset with the longest
2282 * path to the root.
2290 * path to the root.
2283 */
2291 */
2284 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2292 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2285 {
2293 {
2286 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2294 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2287 static const Py_ssize_t capacity = 24;
2295 static const Py_ssize_t capacity = 24;
2288 int *depth, *interesting = NULL;
2296 int *depth, *interesting = NULL;
2289 int i, j, v, ninteresting;
2297 int i, j, v, ninteresting;
2290 PyObject *dict = NULL, *keys = NULL;
2298 PyObject *dict = NULL, *keys = NULL;
2291 long *seen = NULL;
2299 long *seen = NULL;
2292 int maxrev = -1;
2300 int maxrev = -1;
2293 long final;
2301 long final;
2294
2302
2295 if (revcount > capacity) {
2303 if (revcount > capacity) {
2296 PyErr_Format(PyExc_OverflowError,
2304 PyErr_Format(PyExc_OverflowError,
2297 "bitset size (%ld) > capacity (%ld)",
2305 "bitset size (%ld) > capacity (%ld)",
2298 (long)revcount, (long)capacity);
2306 (long)revcount, (long)capacity);
2299 return NULL;
2307 return NULL;
2300 }
2308 }
2301
2309
2302 for (i = 0; i < revcount; i++) {
2310 for (i = 0; i < revcount; i++) {
2303 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2311 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2304 if (n > maxrev)
2312 if (n > maxrev)
2305 maxrev = n;
2313 maxrev = n;
2306 }
2314 }
2307
2315
2308 depth = calloc(sizeof(*depth), maxrev + 1);
2316 depth = calloc(sizeof(*depth), maxrev + 1);
2309 if (depth == NULL)
2317 if (depth == NULL)
2310 return PyErr_NoMemory();
2318 return PyErr_NoMemory();
2311
2319
2312 seen = calloc(sizeof(*seen), maxrev + 1);
2320 seen = calloc(sizeof(*seen), maxrev + 1);
2313 if (seen == NULL) {
2321 if (seen == NULL) {
2314 PyErr_NoMemory();
2322 PyErr_NoMemory();
2315 goto bail;
2323 goto bail;
2316 }
2324 }
2317
2325
2318 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2326 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2319 if (interesting == NULL) {
2327 if (interesting == NULL) {
2320 PyErr_NoMemory();
2328 PyErr_NoMemory();
2321 goto bail;
2329 goto bail;
2322 }
2330 }
2323
2331
2324 if (PyList_Sort(revs) == -1)
2332 if (PyList_Sort(revs) == -1)
2325 goto bail;
2333 goto bail;
2326
2334
2327 for (i = 0; i < revcount; i++) {
2335 for (i = 0; i < revcount; i++) {
2328 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2336 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2329 long b = 1l << i;
2337 long b = 1l << i;
2330 depth[n] = 1;
2338 depth[n] = 1;
2331 seen[n] = b;
2339 seen[n] = b;
2332 interesting[b] = 1;
2340 interesting[b] = 1;
2333 }
2341 }
2334
2342
2335 /* invariant: ninteresting is the number of non-zero entries in
2343 /* invariant: ninteresting is the number of non-zero entries in
2336 * interesting. */
2344 * interesting. */
2337 ninteresting = (int)revcount;
2345 ninteresting = (int)revcount;
2338
2346
2339 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2347 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2340 int dv = depth[v];
2348 int dv = depth[v];
2341 int parents[2];
2349 int parents[2];
2342 long sv;
2350 long sv;
2343
2351
2344 if (dv == 0)
2352 if (dv == 0)
2345 continue;
2353 continue;
2346
2354
2347 sv = seen[v];
2355 sv = seen[v];
2348 if (index_get_parents(self, v, parents, maxrev) < 0)
2356 if (index_get_parents(self, v, parents, maxrev) < 0)
2349 goto bail;
2357 goto bail;
2350
2358
2351 for (i = 0; i < 2; i++) {
2359 for (i = 0; i < 2; i++) {
2352 int p = parents[i];
2360 int p = parents[i];
2353 long sp;
2361 long sp;
2354 int dp;
2362 int dp;
2355
2363
2356 if (p == -1)
2364 if (p == -1)
2357 continue;
2365 continue;
2358
2366
2359 dp = depth[p];
2367 dp = depth[p];
2360 sp = seen[p];
2368 sp = seen[p];
2361 if (dp <= dv) {
2369 if (dp <= dv) {
2362 depth[p] = dv + 1;
2370 depth[p] = dv + 1;
2363 if (sp != sv) {
2371 if (sp != sv) {
2364 interesting[sv] += 1;
2372 interesting[sv] += 1;
2365 seen[p] = sv;
2373 seen[p] = sv;
2366 if (sp) {
2374 if (sp) {
2367 interesting[sp] -= 1;
2375 interesting[sp] -= 1;
2368 if (interesting[sp] == 0)
2376 if (interesting[sp] == 0)
2369 ninteresting -= 1;
2377 ninteresting -= 1;
2370 }
2378 }
2371 }
2379 }
2372 } else if (dv == dp - 1) {
2380 } else if (dv == dp - 1) {
2373 long nsp = sp | sv;
2381 long nsp = sp | sv;
2374 if (nsp == sp)
2382 if (nsp == sp)
2375 continue;
2383 continue;
2376 seen[p] = nsp;
2384 seen[p] = nsp;
2377 interesting[sp] -= 1;
2385 interesting[sp] -= 1;
2378 if (interesting[sp] == 0)
2386 if (interesting[sp] == 0)
2379 ninteresting -= 1;
2387 ninteresting -= 1;
2380 if (interesting[nsp] == 0)
2388 if (interesting[nsp] == 0)
2381 ninteresting += 1;
2389 ninteresting += 1;
2382 interesting[nsp] += 1;
2390 interesting[nsp] += 1;
2383 }
2391 }
2384 }
2392 }
2385 interesting[sv] -= 1;
2393 interesting[sv] -= 1;
2386 if (interesting[sv] == 0)
2394 if (interesting[sv] == 0)
2387 ninteresting -= 1;
2395 ninteresting -= 1;
2388 }
2396 }
2389
2397
2390 final = 0;
2398 final = 0;
2391 j = ninteresting;
2399 j = ninteresting;
2392 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2400 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2393 if (interesting[i] == 0)
2401 if (interesting[i] == 0)
2394 continue;
2402 continue;
2395 final |= i;
2403 final |= i;
2396 j -= 1;
2404 j -= 1;
2397 }
2405 }
2398 if (final == 0) {
2406 if (final == 0) {
2399 keys = PyList_New(0);
2407 keys = PyList_New(0);
2400 goto bail;
2408 goto bail;
2401 }
2409 }
2402
2410
2403 dict = PyDict_New();
2411 dict = PyDict_New();
2404 if (dict == NULL)
2412 if (dict == NULL)
2405 goto bail;
2413 goto bail;
2406
2414
2407 for (i = 0; i < revcount; i++) {
2415 for (i = 0; i < revcount; i++) {
2408 PyObject *key;
2416 PyObject *key;
2409
2417
2410 if ((final & (1 << i)) == 0)
2418 if ((final & (1 << i)) == 0)
2411 continue;
2419 continue;
2412
2420
2413 key = PyList_GET_ITEM(revs, i);
2421 key = PyList_GET_ITEM(revs, i);
2414 Py_INCREF(key);
2422 Py_INCREF(key);
2415 Py_INCREF(Py_None);
2423 Py_INCREF(Py_None);
2416 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2424 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2417 Py_DECREF(key);
2425 Py_DECREF(key);
2418 Py_DECREF(Py_None);
2426 Py_DECREF(Py_None);
2419 goto bail;
2427 goto bail;
2420 }
2428 }
2421 }
2429 }
2422
2430
2423 keys = PyDict_Keys(dict);
2431 keys = PyDict_Keys(dict);
2424
2432
2425 bail:
2433 bail:
2426 free(depth);
2434 free(depth);
2427 free(seen);
2435 free(seen);
2428 free(interesting);
2436 free(interesting);
2429 Py_XDECREF(dict);
2437 Py_XDECREF(dict);
2430
2438
2431 return keys;
2439 return keys;
2432 }
2440 }
2433
2441
2434 /*
2442 /*
2435 * Given a (possibly overlapping) set of revs, return all the
2443 * Given a (possibly overlapping) set of revs, return all the
2436 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2444 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2437 */
2445 */
2438 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2446 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2439 {
2447 {
2440 PyObject *ret = NULL;
2448 PyObject *ret = NULL;
2441 Py_ssize_t argcount, i, len;
2449 Py_ssize_t argcount, i, len;
2442 bitmask repeat = 0;
2450 bitmask repeat = 0;
2443 int revcount = 0;
2451 int revcount = 0;
2444 int *revs;
2452 int *revs;
2445
2453
2446 argcount = PySequence_Length(args);
2454 argcount = PySequence_Length(args);
2447 revs = PyMem_Malloc(argcount * sizeof(*revs));
2455 revs = PyMem_Malloc(argcount * sizeof(*revs));
2448 if (argcount > 0 && revs == NULL)
2456 if (argcount > 0 && revs == NULL)
2449 return PyErr_NoMemory();
2457 return PyErr_NoMemory();
2450 len = index_length(self);
2458 len = index_length(self);
2451
2459
2452 for (i = 0; i < argcount; i++) {
2460 for (i = 0; i < argcount; i++) {
2453 static const int capacity = 24;
2461 static const int capacity = 24;
2454 PyObject *obj = PySequence_GetItem(args, i);
2462 PyObject *obj = PySequence_GetItem(args, i);
2455 bitmask x;
2463 bitmask x;
2456 long val;
2464 long val;
2457
2465
2458 if (!PyInt_Check(obj)) {
2466 if (!PyInt_Check(obj)) {
2459 PyErr_SetString(PyExc_TypeError,
2467 PyErr_SetString(PyExc_TypeError,
2460 "arguments must all be ints");
2468 "arguments must all be ints");
2461 Py_DECREF(obj);
2469 Py_DECREF(obj);
2462 goto bail;
2470 goto bail;
2463 }
2471 }
2464 val = PyInt_AsLong(obj);
2472 val = PyInt_AsLong(obj);
2465 Py_DECREF(obj);
2473 Py_DECREF(obj);
2466 if (val == -1) {
2474 if (val == -1) {
2467 ret = PyList_New(0);
2475 ret = PyList_New(0);
2468 goto done;
2476 goto done;
2469 }
2477 }
2470 if (val < 0 || val >= len) {
2478 if (val < 0 || val >= len) {
2471 PyErr_SetString(PyExc_IndexError, "index out of range");
2479 PyErr_SetString(PyExc_IndexError, "index out of range");
2472 goto bail;
2480 goto bail;
2473 }
2481 }
2474 /* this cheesy bloom filter lets us avoid some more
2482 /* this cheesy bloom filter lets us avoid some more
2475 * expensive duplicate checks in the common set-is-disjoint
2483 * expensive duplicate checks in the common set-is-disjoint
2476 * case */
2484 * case */
2477 x = 1ull << (val & 0x3f);
2485 x = 1ull << (val & 0x3f);
2478 if (repeat & x) {
2486 if (repeat & x) {
2479 int k;
2487 int k;
2480 for (k = 0; k < revcount; k++) {
2488 for (k = 0; k < revcount; k++) {
2481 if (val == revs[k])
2489 if (val == revs[k])
2482 goto duplicate;
2490 goto duplicate;
2483 }
2491 }
2484 } else
2492 } else
2485 repeat |= x;
2493 repeat |= x;
2486 if (revcount >= capacity) {
2494 if (revcount >= capacity) {
2487 PyErr_Format(PyExc_OverflowError,
2495 PyErr_Format(PyExc_OverflowError,
2488 "bitset size (%d) > capacity (%d)",
2496 "bitset size (%d) > capacity (%d)",
2489 revcount, capacity);
2497 revcount, capacity);
2490 goto bail;
2498 goto bail;
2491 }
2499 }
2492 revs[revcount++] = (int)val;
2500 revs[revcount++] = (int)val;
2493 duplicate:;
2501 duplicate:;
2494 }
2502 }
2495
2503
2496 if (revcount == 0) {
2504 if (revcount == 0) {
2497 ret = PyList_New(0);
2505 ret = PyList_New(0);
2498 goto done;
2506 goto done;
2499 }
2507 }
2500 if (revcount == 1) {
2508 if (revcount == 1) {
2501 PyObject *obj;
2509 PyObject *obj;
2502 ret = PyList_New(1);
2510 ret = PyList_New(1);
2503 if (ret == NULL)
2511 if (ret == NULL)
2504 goto bail;
2512 goto bail;
2505 obj = PyInt_FromLong(revs[0]);
2513 obj = PyInt_FromLong(revs[0]);
2506 if (obj == NULL)
2514 if (obj == NULL)
2507 goto bail;
2515 goto bail;
2508 PyList_SET_ITEM(ret, 0, obj);
2516 PyList_SET_ITEM(ret, 0, obj);
2509 goto done;
2517 goto done;
2510 }
2518 }
2511
2519
2512 ret = find_gca_candidates(self, revs, revcount);
2520 ret = find_gca_candidates(self, revs, revcount);
2513 if (ret == NULL)
2521 if (ret == NULL)
2514 goto bail;
2522 goto bail;
2515
2523
2516 done:
2524 done:
2517 PyMem_Free(revs);
2525 PyMem_Free(revs);
2518 return ret;
2526 return ret;
2519
2527
2520 bail:
2528 bail:
2521 PyMem_Free(revs);
2529 PyMem_Free(revs);
2522 Py_XDECREF(ret);
2530 Py_XDECREF(ret);
2523 return NULL;
2531 return NULL;
2524 }
2532 }
2525
2533
2526 /*
2534 /*
2527 * Given a (possibly overlapping) set of revs, return the greatest
2535 * Given a (possibly overlapping) set of revs, return the greatest
2528 * common ancestors: those with the longest path to the root.
2536 * common ancestors: those with the longest path to the root.
2529 */
2537 */
2530 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2538 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2531 {
2539 {
2532 PyObject *ret;
2540 PyObject *ret;
2533 PyObject *gca = index_commonancestorsheads(self, args);
2541 PyObject *gca = index_commonancestorsheads(self, args);
2534 if (gca == NULL)
2542 if (gca == NULL)
2535 return NULL;
2543 return NULL;
2536
2544
2537 if (PyList_GET_SIZE(gca) <= 1) {
2545 if (PyList_GET_SIZE(gca) <= 1) {
2538 return gca;
2546 return gca;
2539 }
2547 }
2540
2548
2541 ret = find_deepest(self, gca);
2549 ret = find_deepest(self, gca);
2542 Py_DECREF(gca);
2550 Py_DECREF(gca);
2543 return ret;
2551 return ret;
2544 }
2552 }
2545
2553
2546 /*
2554 /*
2547 * Invalidate any trie entries introduced by added revs.
2555 * Invalidate any trie entries introduced by added revs.
2548 */
2556 */
2549 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2557 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2550 {
2558 {
2551 Py_ssize_t i, len;
2559 Py_ssize_t i, len;
2552
2560
2553 len = self->length + self->new_length;
2561 len = self->length + self->new_length;
2554 i = start - self->length;
2562 i = start - self->length;
2555 if (i < 0)
2563 if (i < 0)
2556 return;
2564 return;
2557
2565
2558 for (i = start; i < len; i++)
2566 for (i = start; i < len; i++)
2559 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2567 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2560
2568
2561 self->new_length = start - self->length;
2569 self->new_length = start - self->length;
2562 }
2570 }
2563
2571
2564 /*
2572 /*
2565 * Delete a numeric range of revs, which must be at the end of the
2573 * Delete a numeric range of revs, which must be at the end of the
2566 * range.
2574 * range.
2567 */
2575 */
2568 static int index_slice_del(indexObject *self, PyObject *item)
2576 static int index_slice_del(indexObject *self, PyObject *item)
2569 {
2577 {
2570 Py_ssize_t start, stop, step, slicelength;
2578 Py_ssize_t start, stop, step, slicelength;
2571 Py_ssize_t length = index_length(self) + 1;
2579 Py_ssize_t length = index_length(self) + 1;
2572 int ret = 0;
2580 int ret = 0;
2573
2581
2574 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2582 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2575 #ifdef IS_PY3K
2583 #ifdef IS_PY3K
2576 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2584 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2577 &slicelength) < 0)
2585 &slicelength) < 0)
2578 #else
2586 #else
2579 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2587 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2580 &step, &slicelength) < 0)
2588 &step, &slicelength) < 0)
2581 #endif
2589 #endif
2582 return -1;
2590 return -1;
2583
2591
2584 if (slicelength <= 0)
2592 if (slicelength <= 0)
2585 return 0;
2593 return 0;
2586
2594
2587 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2595 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2588 stop = start;
2596 stop = start;
2589
2597
2590 if (step < 0) {
2598 if (step < 0) {
2591 stop = start + 1;
2599 stop = start + 1;
2592 start = stop + step * (slicelength - 1) - 1;
2600 start = stop + step * (slicelength - 1) - 1;
2593 step = -step;
2601 step = -step;
2594 }
2602 }
2595
2603
2596 if (step != 1) {
2604 if (step != 1) {
2597 PyErr_SetString(PyExc_ValueError,
2605 PyErr_SetString(PyExc_ValueError,
2598 "revlog index delete requires step size of 1");
2606 "revlog index delete requires step size of 1");
2599 return -1;
2607 return -1;
2600 }
2608 }
2601
2609
2602 if (stop != length - 1) {
2610 if (stop != length - 1) {
2603 PyErr_SetString(PyExc_IndexError,
2611 PyErr_SetString(PyExc_IndexError,
2604 "revlog index deletion indices are invalid");
2612 "revlog index deletion indices are invalid");
2605 return -1;
2613 return -1;
2606 }
2614 }
2607
2615
2608 if (start < self->length) {
2616 if (start < self->length) {
2609 if (self->ntinitialized) {
2617 if (self->ntinitialized) {
2610 Py_ssize_t i;
2618 Py_ssize_t i;
2611
2619
2612 for (i = start; i < self->length; i++) {
2620 for (i = start; i < self->length; i++) {
2613 const char *node = index_node_existing(self, i);
2621 const char *node = index_node_existing(self, i);
2614 if (node == NULL)
2622 if (node == NULL)
2615 return -1;
2623 return -1;
2616
2624
2617 nt_delete_node(&self->nt, node);
2625 nt_delete_node(&self->nt, node);
2618 }
2626 }
2619 if (self->new_length)
2627 if (self->new_length)
2620 index_invalidate_added(self, self->length);
2628 index_invalidate_added(self, self->length);
2621 if (self->ntrev > start)
2629 if (self->ntrev > start)
2622 self->ntrev = (int)start;
2630 self->ntrev = (int)start;
2623 } else if (self->new_length) {
2631 } else if (self->new_length) {
2624 self->new_length = 0;
2632 self->new_length = 0;
2625 }
2633 }
2626
2634
2627 self->length = start;
2635 self->length = start;
2628 goto done;
2636 goto done;
2629 }
2637 }
2630
2638
2631 if (self->ntinitialized) {
2639 if (self->ntinitialized) {
2632 index_invalidate_added(self, start);
2640 index_invalidate_added(self, start);
2633 if (self->ntrev > start)
2641 if (self->ntrev > start)
2634 self->ntrev = (int)start;
2642 self->ntrev = (int)start;
2635 } else {
2643 } else {
2636 self->new_length = start - self->length;
2644 self->new_length = start - self->length;
2637 }
2645 }
2638 done:
2646 done:
2639 Py_CLEAR(self->headrevs);
2647 Py_CLEAR(self->headrevs);
2640 return ret;
2648 return ret;
2641 }
2649 }
2642
2650
2643 /*
2651 /*
2644 * Supported ops:
2652 * Supported ops:
2645 *
2653 *
2646 * slice deletion
2654 * slice deletion
2647 * string assignment (extend node->rev mapping)
2655 * string assignment (extend node->rev mapping)
2648 * string deletion (shrink node->rev mapping)
2656 * string deletion (shrink node->rev mapping)
2649 */
2657 */
2650 static int index_assign_subscript(indexObject *self, PyObject *item,
2658 static int index_assign_subscript(indexObject *self, PyObject *item,
2651 PyObject *value)
2659 PyObject *value)
2652 {
2660 {
2653 char *node;
2661 char *node;
2654 long rev;
2662 long rev;
2655
2663
2656 if (PySlice_Check(item) && value == NULL)
2664 if (PySlice_Check(item) && value == NULL)
2657 return index_slice_del(self, item);
2665 return index_slice_del(self, item);
2658
2666
2659 if (node_check(self->nodelen, item, &node) == -1)
2667 if (node_check(self->nodelen, item, &node) == -1)
2660 return -1;
2668 return -1;
2661
2669
2662 if (value == NULL)
2670 if (value == NULL)
2663 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2671 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2664 : 0;
2672 : 0;
2665 rev = PyInt_AsLong(value);
2673 rev = PyInt_AsLong(value);
2666 if (rev > INT_MAX || rev < 0) {
2674 if (rev > INT_MAX || rev < 0) {
2667 if (!PyErr_Occurred())
2675 if (!PyErr_Occurred())
2668 PyErr_SetString(PyExc_ValueError, "rev out of range");
2676 PyErr_SetString(PyExc_ValueError, "rev out of range");
2669 return -1;
2677 return -1;
2670 }
2678 }
2671
2679
2672 if (index_init_nt(self) == -1)
2680 if (index_init_nt(self) == -1)
2673 return -1;
2681 return -1;
2674 return nt_insert(&self->nt, node, (int)rev);
2682 return nt_insert(&self->nt, node, (int)rev);
2675 }
2683 }
2676
2684
2677 /*
2685 /*
2678 * Find all RevlogNG entries in an index that has inline data. Update
2686 * Find all RevlogNG entries in an index that has inline data. Update
2679 * the optional "offsets" table with those entries.
2687 * the optional "offsets" table with those entries.
2680 */
2688 */
2681 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2689 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2682 {
2690 {
2683 const char *data = (const char *)self->buf.buf;
2691 const char *data = (const char *)self->buf.buf;
2684 Py_ssize_t pos = 0;
2692 Py_ssize_t pos = 0;
2685 Py_ssize_t end = self->buf.len;
2693 Py_ssize_t end = self->buf.len;
2686 long incr = self->hdrsize;
2694 long incr = self->hdrsize;
2687 Py_ssize_t len = 0;
2695 Py_ssize_t len = 0;
2688
2696
2689 while (pos + self->hdrsize <= end && pos >= 0) {
2697 while (pos + self->hdrsize <= end && pos >= 0) {
2690 uint32_t comp_len, sidedata_comp_len = 0;
2698 uint32_t comp_len, sidedata_comp_len = 0;
2691 /* 3rd element of header is length of compressed inline data */
2699 /* 3rd element of header is length of compressed inline data */
2692 comp_len = getbe32(data + pos + 8);
2700 comp_len = getbe32(data + pos + 8);
2693 if (self->hdrsize == v2_hdrsize) {
2701 if (self->hdrsize == v2_hdrsize) {
2694 sidedata_comp_len = getbe32(data + pos + 72);
2702 sidedata_comp_len = getbe32(data + pos + 72);
2695 }
2703 }
2696 incr = self->hdrsize + comp_len + sidedata_comp_len;
2704 incr = self->hdrsize + comp_len + sidedata_comp_len;
2697 if (offsets)
2705 if (offsets)
2698 offsets[len] = data + pos;
2706 offsets[len] = data + pos;
2699 len++;
2707 len++;
2700 pos += incr;
2708 pos += incr;
2701 }
2709 }
2702
2710
2703 if (pos != end) {
2711 if (pos != end) {
2704 if (!PyErr_Occurred())
2712 if (!PyErr_Occurred())
2705 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2713 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2706 return -1;
2714 return -1;
2707 }
2715 }
2708
2716
2709 return len;
2717 return len;
2710 }
2718 }
2711
2719
2712 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2720 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2713 {
2721 {
2714 PyObject *data_obj, *inlined_obj, *revlogv2;
2722 PyObject *data_obj, *inlined_obj, *revlogv2;
2715 Py_ssize_t size;
2723 Py_ssize_t size;
2716
2724
2717 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2725 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2718
2726
2719 /* Initialize before argument-checking to avoid index_dealloc() crash.
2727 /* Initialize before argument-checking to avoid index_dealloc() crash.
2720 */
2728 */
2721 self->added = NULL;
2729 self->added = NULL;
2722 self->new_length = 0;
2730 self->new_length = 0;
2723 self->added_length = 0;
2731 self->added_length = 0;
2724 self->data = NULL;
2732 self->data = NULL;
2725 memset(&self->buf, 0, sizeof(self->buf));
2733 memset(&self->buf, 0, sizeof(self->buf));
2726 self->headrevs = NULL;
2734 self->headrevs = NULL;
2727 self->filteredrevs = Py_None;
2735 self->filteredrevs = Py_None;
2728 Py_INCREF(Py_None);
2736 Py_INCREF(Py_None);
2729 self->ntinitialized = 0;
2737 self->ntinitialized = 0;
2730 self->offsets = NULL;
2738 self->offsets = NULL;
2731 self->nodelen = 20;
2739 self->nodelen = 20;
2732 self->nullentry = NULL;
2740 self->nullentry = NULL;
2733
2741
2734 revlogv2 = NULL;
2742 revlogv2 = NULL;
2735 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2743 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2736 &data_obj, &inlined_obj, &revlogv2))
2744 &data_obj, &inlined_obj, &revlogv2))
2737 return -1;
2745 return -1;
2738 if (!PyObject_CheckBuffer(data_obj)) {
2746 if (!PyObject_CheckBuffer(data_obj)) {
2739 PyErr_SetString(PyExc_TypeError,
2747 PyErr_SetString(PyExc_TypeError,
2740 "data does not support buffer interface");
2748 "data does not support buffer interface");
2741 return -1;
2749 return -1;
2742 }
2750 }
2743 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2751 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2744 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2752 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2745 return -1;
2753 return -1;
2746 }
2754 }
2747
2755
2748 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2756 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2749 self->hdrsize = v2_hdrsize;
2757 self->hdrsize = v2_hdrsize;
2750 } else {
2758 } else {
2751 self->hdrsize = v1_hdrsize;
2759 self->hdrsize = v1_hdrsize;
2752 }
2760 }
2753
2761
2754 if (self->hdrsize == v1_hdrsize) {
2762 if (self->hdrsize == v1_hdrsize) {
2755 self->nullentry =
2763 self->nullentry =
2756 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
2764 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
2757 -1, -1, -1, nullid, self->nodelen);
2765 -1, -1, -1, nullid, self->nodelen);
2758 } else {
2766 } else {
2759 self->nullentry =
2767 self->nullentry =
2760 Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0,
2768 Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0,
2761 -1, -1, -1, -1, nullid, self->nodelen, 0, 0);
2769 -1, -1, -1, -1, nullid, self->nodelen, 0, 0);
2762 }
2770 }
2763
2771
2764 if (!self->nullentry)
2772 if (!self->nullentry)
2765 return -1;
2773 return -1;
2766 PyObject_GC_UnTrack(self->nullentry);
2774 PyObject_GC_UnTrack(self->nullentry);
2767
2775
2768 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2776 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2769 return -1;
2777 return -1;
2770 size = self->buf.len;
2778 size = self->buf.len;
2771
2779
2772 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2780 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2773 self->data = data_obj;
2781 self->data = data_obj;
2774
2782
2775 self->ntlookups = self->ntmisses = 0;
2783 self->ntlookups = self->ntmisses = 0;
2776 self->ntrev = -1;
2784 self->ntrev = -1;
2777 Py_INCREF(self->data);
2785 Py_INCREF(self->data);
2778
2786
2779 if (self->inlined) {
2787 if (self->inlined) {
2780 Py_ssize_t len = inline_scan(self, NULL);
2788 Py_ssize_t len = inline_scan(self, NULL);
2781 if (len == -1)
2789 if (len == -1)
2782 goto bail;
2790 goto bail;
2783 self->length = len;
2791 self->length = len;
2784 } else {
2792 } else {
2785 if (size % self->hdrsize) {
2793 if (size % self->hdrsize) {
2786 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2794 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2787 goto bail;
2795 goto bail;
2788 }
2796 }
2789 self->length = size / self->hdrsize;
2797 self->length = size / self->hdrsize;
2790 }
2798 }
2791
2799
2792 return 0;
2800 return 0;
2793 bail:
2801 bail:
2794 return -1;
2802 return -1;
2795 }
2803 }
2796
2804
2797 static PyObject *index_nodemap(indexObject *self)
2805 static PyObject *index_nodemap(indexObject *self)
2798 {
2806 {
2799 Py_INCREF(self);
2807 Py_INCREF(self);
2800 return (PyObject *)self;
2808 return (PyObject *)self;
2801 }
2809 }
2802
2810
2803 static void _index_clearcaches(indexObject *self)
2811 static void _index_clearcaches(indexObject *self)
2804 {
2812 {
2805 if (self->offsets) {
2813 if (self->offsets) {
2806 PyMem_Free((void *)self->offsets);
2814 PyMem_Free((void *)self->offsets);
2807 self->offsets = NULL;
2815 self->offsets = NULL;
2808 }
2816 }
2809 if (self->ntinitialized) {
2817 if (self->ntinitialized) {
2810 nt_dealloc(&self->nt);
2818 nt_dealloc(&self->nt);
2811 }
2819 }
2812 self->ntinitialized = 0;
2820 self->ntinitialized = 0;
2813 Py_CLEAR(self->headrevs);
2821 Py_CLEAR(self->headrevs);
2814 }
2822 }
2815
2823
2816 static PyObject *index_clearcaches(indexObject *self)
2824 static PyObject *index_clearcaches(indexObject *self)
2817 {
2825 {
2818 _index_clearcaches(self);
2826 _index_clearcaches(self);
2819 self->ntrev = -1;
2827 self->ntrev = -1;
2820 self->ntlookups = self->ntmisses = 0;
2828 self->ntlookups = self->ntmisses = 0;
2821 Py_RETURN_NONE;
2829 Py_RETURN_NONE;
2822 }
2830 }
2823
2831
2824 static void index_dealloc(indexObject *self)
2832 static void index_dealloc(indexObject *self)
2825 {
2833 {
2826 _index_clearcaches(self);
2834 _index_clearcaches(self);
2827 Py_XDECREF(self->filteredrevs);
2835 Py_XDECREF(self->filteredrevs);
2828 if (self->buf.buf) {
2836 if (self->buf.buf) {
2829 PyBuffer_Release(&self->buf);
2837 PyBuffer_Release(&self->buf);
2830 memset(&self->buf, 0, sizeof(self->buf));
2838 memset(&self->buf, 0, sizeof(self->buf));
2831 }
2839 }
2832 Py_XDECREF(self->data);
2840 Py_XDECREF(self->data);
2833 PyMem_Free(self->added);
2841 PyMem_Free(self->added);
2834 Py_XDECREF(self->nullentry);
2842 Py_XDECREF(self->nullentry);
2835 PyObject_Del(self);
2843 PyObject_Del(self);
2836 }
2844 }
2837
2845
2838 static PySequenceMethods index_sequence_methods = {
2846 static PySequenceMethods index_sequence_methods = {
2839 (lenfunc)index_length, /* sq_length */
2847 (lenfunc)index_length, /* sq_length */
2840 0, /* sq_concat */
2848 0, /* sq_concat */
2841 0, /* sq_repeat */
2849 0, /* sq_repeat */
2842 (ssizeargfunc)index_get, /* sq_item */
2850 (ssizeargfunc)index_get, /* sq_item */
2843 0, /* sq_slice */
2851 0, /* sq_slice */
2844 0, /* sq_ass_item */
2852 0, /* sq_ass_item */
2845 0, /* sq_ass_slice */
2853 0, /* sq_ass_slice */
2846 (objobjproc)index_contains, /* sq_contains */
2854 (objobjproc)index_contains, /* sq_contains */
2847 };
2855 };
2848
2856
2849 static PyMappingMethods index_mapping_methods = {
2857 static PyMappingMethods index_mapping_methods = {
2850 (lenfunc)index_length, /* mp_length */
2858 (lenfunc)index_length, /* mp_length */
2851 (binaryfunc)index_getitem, /* mp_subscript */
2859 (binaryfunc)index_getitem, /* mp_subscript */
2852 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2860 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2853 };
2861 };
2854
2862
2855 static PyMethodDef index_methods[] = {
2863 static PyMethodDef index_methods[] = {
2856 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2864 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2857 "return the gca set of the given revs"},
2865 "return the gca set of the given revs"},
2858 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2866 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2859 METH_VARARGS,
2867 METH_VARARGS,
2860 "return the heads of the common ancestors of the given revs"},
2868 "return the heads of the common ancestors of the given revs"},
2861 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2869 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2862 "clear the index caches"},
2870 "clear the index caches"},
2863 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2871 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2864 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2872 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2865 "return `rev` associated with a node or None"},
2873 "return `rev` associated with a node or None"},
2866 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2874 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2867 "return True if the node exist in the index"},
2875 "return True if the node exist in the index"},
2868 {"rev", (PyCFunction)index_m_rev, METH_O,
2876 {"rev", (PyCFunction)index_m_rev, METH_O,
2869 "return `rev` associated with a node or raise RevlogError"},
2877 "return `rev` associated with a node or raise RevlogError"},
2870 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2878 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2871 "compute phases"},
2879 "compute phases"},
2872 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2880 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2873 "reachableroots"},
2881 "reachableroots"},
2874 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2882 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2875 METH_VARARGS, "replace an existing index entry with a new value"},
2883 METH_VARARGS, "replace an existing index entry with a new value"},
2876 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2884 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2877 "get head revisions"}, /* Can do filtering since 3.2 */
2885 "get head revisions"}, /* Can do filtering since 3.2 */
2878 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2886 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2879 "get filtered head revisions"}, /* Can always do filtering */
2887 "get filtered head revisions"}, /* Can always do filtering */
2880 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2888 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2881 "True if the object is a snapshot"},
2889 "True if the object is a snapshot"},
2882 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2890 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2883 "Gather snapshot data in a cache dict"},
2891 "Gather snapshot data in a cache dict"},
2884 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2892 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2885 "determine revisions with deltas to reconstruct fulltext"},
2893 "determine revisions with deltas to reconstruct fulltext"},
2886 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2894 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2887 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2895 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2888 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2896 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2889 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2897 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2890 "match a potentially ambiguous node ID"},
2898 "match a potentially ambiguous node ID"},
2891 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2899 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2892 "find length of shortest hex nodeid of a binary ID"},
2900 "find length of shortest hex nodeid of a binary ID"},
2893 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2901 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2894 {"entry_binary", (PyCFunction)index_entry_binary, METH_VARARGS,
2902 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
2895 "return an entry in binary form"},
2903 "return an entry in binary form"},
2904 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
2905 "pack the revlog header information into binary"},
2896 {NULL} /* Sentinel */
2906 {NULL} /* Sentinel */
2897 };
2907 };
2898
2908
2899 static PyGetSetDef index_getset[] = {
2909 static PyGetSetDef index_getset[] = {
2900 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2910 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2901 {NULL} /* Sentinel */
2911 {NULL} /* Sentinel */
2902 };
2912 };
2903
2913
2904 static PyMemberDef index_members[] = {
2914 static PyMemberDef index_members[] = {
2905 {"entry_size", T_LONG, offsetof(indexObject, hdrsize), 0,
2915 {"entry_size", T_LONG, offsetof(indexObject, hdrsize), 0,
2906 "size of an index entry"},
2916 "size of an index entry"},
2907 {NULL} /* Sentinel */
2917 {NULL} /* Sentinel */
2908 };
2918 };
2909
2919
2910 PyTypeObject HgRevlogIndex_Type = {
2920 PyTypeObject HgRevlogIndex_Type = {
2911 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2921 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2912 "parsers.index", /* tp_name */
2922 "parsers.index", /* tp_name */
2913 sizeof(indexObject), /* tp_basicsize */
2923 sizeof(indexObject), /* tp_basicsize */
2914 0, /* tp_itemsize */
2924 0, /* tp_itemsize */
2915 (destructor)index_dealloc, /* tp_dealloc */
2925 (destructor)index_dealloc, /* tp_dealloc */
2916 0, /* tp_print */
2926 0, /* tp_print */
2917 0, /* tp_getattr */
2927 0, /* tp_getattr */
2918 0, /* tp_setattr */
2928 0, /* tp_setattr */
2919 0, /* tp_compare */
2929 0, /* tp_compare */
2920 0, /* tp_repr */
2930 0, /* tp_repr */
2921 0, /* tp_as_number */
2931 0, /* tp_as_number */
2922 &index_sequence_methods, /* tp_as_sequence */
2932 &index_sequence_methods, /* tp_as_sequence */
2923 &index_mapping_methods, /* tp_as_mapping */
2933 &index_mapping_methods, /* tp_as_mapping */
2924 0, /* tp_hash */
2934 0, /* tp_hash */
2925 0, /* tp_call */
2935 0, /* tp_call */
2926 0, /* tp_str */
2936 0, /* tp_str */
2927 0, /* tp_getattro */
2937 0, /* tp_getattro */
2928 0, /* tp_setattro */
2938 0, /* tp_setattro */
2929 0, /* tp_as_buffer */
2939 0, /* tp_as_buffer */
2930 Py_TPFLAGS_DEFAULT, /* tp_flags */
2940 Py_TPFLAGS_DEFAULT, /* tp_flags */
2931 "revlog index", /* tp_doc */
2941 "revlog index", /* tp_doc */
2932 0, /* tp_traverse */
2942 0, /* tp_traverse */
2933 0, /* tp_clear */
2943 0, /* tp_clear */
2934 0, /* tp_richcompare */
2944 0, /* tp_richcompare */
2935 0, /* tp_weaklistoffset */
2945 0, /* tp_weaklistoffset */
2936 0, /* tp_iter */
2946 0, /* tp_iter */
2937 0, /* tp_iternext */
2947 0, /* tp_iternext */
2938 index_methods, /* tp_methods */
2948 index_methods, /* tp_methods */
2939 index_members, /* tp_members */
2949 index_members, /* tp_members */
2940 index_getset, /* tp_getset */
2950 index_getset, /* tp_getset */
2941 0, /* tp_base */
2951 0, /* tp_base */
2942 0, /* tp_dict */
2952 0, /* tp_dict */
2943 0, /* tp_descr_get */
2953 0, /* tp_descr_get */
2944 0, /* tp_descr_set */
2954 0, /* tp_descr_set */
2945 0, /* tp_dictoffset */
2955 0, /* tp_dictoffset */
2946 (initproc)index_init, /* tp_init */
2956 (initproc)index_init, /* tp_init */
2947 0, /* tp_alloc */
2957 0, /* tp_alloc */
2948 };
2958 };
2949
2959
2950 /*
2960 /*
2951 * returns a tuple of the form (index, cache) with elements as
2961 * returns a tuple of the form (index, cache) with elements as
2952 * follows:
2962 * follows:
2953 *
2963 *
2954 * index: an index object that lazily parses Revlog (v1 or v2) records
2964 * index: an index object that lazily parses Revlog (v1 or v2) records
2955 * cache: if data is inlined, a tuple (0, index_file_content), else None
2965 * cache: if data is inlined, a tuple (0, index_file_content), else None
2956 * index_file_content could be a string, or a buffer
2966 * index_file_content could be a string, or a buffer
2957 *
2967 *
2958 * added complications are for backwards compatibility
2968 * added complications are for backwards compatibility
2959 */
2969 */
2960 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2970 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2961 {
2971 {
2962 PyObject *cache = NULL;
2972 PyObject *cache = NULL;
2963 indexObject *idx;
2973 indexObject *idx;
2964 int ret;
2974 int ret;
2965
2975
2966 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2976 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2967 if (idx == NULL)
2977 if (idx == NULL)
2968 goto bail;
2978 goto bail;
2969
2979
2970 ret = index_init(idx, args, kwargs);
2980 ret = index_init(idx, args, kwargs);
2971 if (ret == -1)
2981 if (ret == -1)
2972 goto bail;
2982 goto bail;
2973
2983
2974 if (idx->inlined) {
2984 if (idx->inlined) {
2975 cache = Py_BuildValue("iO", 0, idx->data);
2985 cache = Py_BuildValue("iO", 0, idx->data);
2976 if (cache == NULL)
2986 if (cache == NULL)
2977 goto bail;
2987 goto bail;
2978 } else {
2988 } else {
2979 cache = Py_None;
2989 cache = Py_None;
2980 Py_INCREF(cache);
2990 Py_INCREF(cache);
2981 }
2991 }
2982
2992
2983 return Py_BuildValue("NN", idx, cache);
2993 return Py_BuildValue("NN", idx, cache);
2984
2994
2985 bail:
2995 bail:
2986 Py_XDECREF(idx);
2996 Py_XDECREF(idx);
2987 Py_XDECREF(cache);
2997 Py_XDECREF(cache);
2988 return NULL;
2998 return NULL;
2989 }
2999 }
2990
3000
2991 static Revlog_CAPI CAPI = {
3001 static Revlog_CAPI CAPI = {
2992 /* increment the abi_version field upon each change in the Revlog_CAPI
3002 /* increment the abi_version field upon each change in the Revlog_CAPI
2993 struct or in the ABI of the listed functions */
3003 struct or in the ABI of the listed functions */
2994 2,
3004 2,
2995 index_length,
3005 index_length,
2996 index_node,
3006 index_node,
2997 HgRevlogIndex_GetParents,
3007 HgRevlogIndex_GetParents,
2998 };
3008 };
2999
3009
3000 void revlog_module_init(PyObject *mod)
3010 void revlog_module_init(PyObject *mod)
3001 {
3011 {
3002 PyObject *caps = NULL;
3012 PyObject *caps = NULL;
3003 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3013 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3004 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3014 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3005 return;
3015 return;
3006 Py_INCREF(&HgRevlogIndex_Type);
3016 Py_INCREF(&HgRevlogIndex_Type);
3007 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3017 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3008
3018
3009 nodetreeType.tp_new = PyType_GenericNew;
3019 nodetreeType.tp_new = PyType_GenericNew;
3010 if (PyType_Ready(&nodetreeType) < 0)
3020 if (PyType_Ready(&nodetreeType) < 0)
3011 return;
3021 return;
3012 Py_INCREF(&nodetreeType);
3022 Py_INCREF(&nodetreeType);
3013 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3023 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3014
3024
3015 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3025 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3016 if (caps != NULL)
3026 if (caps != NULL)
3017 PyModule_AddObject(mod, "revlog_CAPI", caps);
3027 PyModule_AddObject(mod, "revlog_CAPI", caps);
3018 }
3028 }
@@ -1,380 +1,381 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import (
13 from ..node import (
14 nullrev,
14 nullrev,
15 sha1nodeconstants,
15 sha1nodeconstants,
16 )
16 )
17 from .. import (
17 from .. import (
18 pycompat,
18 pycompat,
19 util,
19 util,
20 )
20 )
21
21
22 from ..revlogutils import nodemap as nodemaputil
22 from ..revlogutils import nodemap as nodemaputil
23 from ..revlogutils import constants as revlog_constants
23 from ..revlogutils import constants as revlog_constants
24
24
25 stringio = pycompat.bytesio
25 stringio = pycompat.bytesio
26
26
27
27
28 _pack = struct.pack
28 _pack = struct.pack
29 _unpack = struct.unpack
29 _unpack = struct.unpack
30 _compress = zlib.compress
30 _compress = zlib.compress
31 _decompress = zlib.decompress
31 _decompress = zlib.decompress
32
32
33 # Some code below makes tuples directly because it's more convenient. However,
33 # Some code below makes tuples directly because it's more convenient. However,
34 # code outside this module should always use dirstatetuple.
34 # code outside this module should always use dirstatetuple.
35 def dirstatetuple(*x):
35 def dirstatetuple(*x):
36 # x is a tuple
36 # x is a tuple
37 return x
37 return x
38
38
39
39
40 def gettype(q):
40 def gettype(q):
41 return int(q & 0xFFFF)
41 return int(q & 0xFFFF)
42
42
43
43
44 def offset_type(offset, type):
44 def offset_type(offset, type):
45 return int(int(offset) << 16 | type)
45 return int(int(offset) << 16 | type)
46
46
47
47
48 class BaseIndexObject(object):
48 class BaseIndexObject(object):
49 # Format of an index entry according to Python's `struct` language
49 # Format of an index entry according to Python's `struct` language
50 index_format = revlog_constants.INDEX_ENTRY_V1
50 index_format = revlog_constants.INDEX_ENTRY_V1
51 # Size of a C unsigned long long int, platform independent
51 # Size of a C unsigned long long int, platform independent
52 big_int_size = struct.calcsize(b'>Q')
52 big_int_size = struct.calcsize(b'>Q')
53 # Size of a C long int, platform independent
53 # Size of a C long int, platform independent
54 int_size = struct.calcsize(b'>i')
54 int_size = struct.calcsize(b'>i')
55 # An empty index entry, used as a default value to be overridden, or nullrev
55 # An empty index entry, used as a default value to be overridden, or nullrev
56 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
56 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
57
57
58 @util.propertycache
58 @util.propertycache
59 def entry_size(self):
59 def entry_size(self):
60 return self.index_format.size
60 return self.index_format.size
61
61
62 @property
62 @property
63 def nodemap(self):
63 def nodemap(self):
64 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
64 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
65 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
65 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
66 return self._nodemap
66 return self._nodemap
67
67
68 @util.propertycache
68 @util.propertycache
69 def _nodemap(self):
69 def _nodemap(self):
70 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
70 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
71 for r in range(0, len(self)):
71 for r in range(0, len(self)):
72 n = self[r][7]
72 n = self[r][7]
73 nodemap[n] = r
73 nodemap[n] = r
74 return nodemap
74 return nodemap
75
75
76 def has_node(self, node):
76 def has_node(self, node):
77 """return True if the node exist in the index"""
77 """return True if the node exist in the index"""
78 return node in self._nodemap
78 return node in self._nodemap
79
79
80 def rev(self, node):
80 def rev(self, node):
81 """return a revision for a node
81 """return a revision for a node
82
82
83 If the node is unknown, raise a RevlogError"""
83 If the node is unknown, raise a RevlogError"""
84 return self._nodemap[node]
84 return self._nodemap[node]
85
85
86 def get_rev(self, node):
86 def get_rev(self, node):
87 """return a revision for a node
87 """return a revision for a node
88
88
89 If the node is unknown, return None"""
89 If the node is unknown, return None"""
90 return self._nodemap.get(node)
90 return self._nodemap.get(node)
91
91
92 def _stripnodes(self, start):
92 def _stripnodes(self, start):
93 if '_nodemap' in vars(self):
93 if '_nodemap' in vars(self):
94 for r in range(start, len(self)):
94 for r in range(start, len(self)):
95 n = self[r][7]
95 n = self[r][7]
96 del self._nodemap[n]
96 del self._nodemap[n]
97
97
98 def clearcaches(self):
98 def clearcaches(self):
99 self.__dict__.pop('_nodemap', None)
99 self.__dict__.pop('_nodemap', None)
100
100
101 def __len__(self):
101 def __len__(self):
102 return self._lgt + len(self._extra)
102 return self._lgt + len(self._extra)
103
103
104 def append(self, tup):
104 def append(self, tup):
105 if '_nodemap' in vars(self):
105 if '_nodemap' in vars(self):
106 self._nodemap[tup[7]] = len(self)
106 self._nodemap[tup[7]] = len(self)
107 data = self.index_format.pack(*tup)
107 data = self.index_format.pack(*tup)
108 self._extra.append(data)
108 self._extra.append(data)
109
109
110 def _check_index(self, i):
110 def _check_index(self, i):
111 if not isinstance(i, int):
111 if not isinstance(i, int):
112 raise TypeError(b"expecting int indexes")
112 raise TypeError(b"expecting int indexes")
113 if i < 0 or i >= len(self):
113 if i < 0 or i >= len(self):
114 raise IndexError
114 raise IndexError
115
115
116 def __getitem__(self, i):
116 def __getitem__(self, i):
117 if i == -1:
117 if i == -1:
118 return self.null_item
118 return self.null_item
119 self._check_index(i)
119 self._check_index(i)
120 if i >= self._lgt:
120 if i >= self._lgt:
121 data = self._extra[i - self._lgt]
121 data = self._extra[i - self._lgt]
122 else:
122 else:
123 index = self._calculate_index(i)
123 index = self._calculate_index(i)
124 data = self._data[index : index + self.entry_size]
124 data = self._data[index : index + self.entry_size]
125 r = self.index_format.unpack(data)
125 r = self.index_format.unpack(data)
126 if self._lgt and i == 0:
126 if self._lgt and i == 0:
127 r = (offset_type(0, gettype(r[0])),) + r[1:]
127 r = (offset_type(0, gettype(r[0])),) + r[1:]
128 return r
128 return r
129
129
130 def entry_binary(self, rev, header):
130 def pack_header(self, header):
131 """pack header information as binary"""
132 v_fmt = revlog_constants.INDEX_HEADER
133 return v_fmt.pack(header)
134
135 def entry_binary(self, rev):
131 """return the raw binary string representing a revision"""
136 """return the raw binary string representing a revision"""
132 entry = self[rev]
137 entry = self[rev]
133 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry)
138 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry)
134 if rev == 0:
139 if rev == 0:
135 v_fmt = revlog_constants.INDEX_HEADER
140 p = p[revlog_constants.INDEX_HEADER.size :]
136 v_bin = v_fmt.pack(header)
137 p = v_bin + p[v_fmt.size :]
138 return p
141 return p
139
142
140
143
141 class IndexObject(BaseIndexObject):
144 class IndexObject(BaseIndexObject):
142 def __init__(self, data):
145 def __init__(self, data):
143 assert len(data) % self.entry_size == 0, (
146 assert len(data) % self.entry_size == 0, (
144 len(data),
147 len(data),
145 self.entry_size,
148 self.entry_size,
146 len(data) % self.entry_size,
149 len(data) % self.entry_size,
147 )
150 )
148 self._data = data
151 self._data = data
149 self._lgt = len(data) // self.entry_size
152 self._lgt = len(data) // self.entry_size
150 self._extra = []
153 self._extra = []
151
154
152 def _calculate_index(self, i):
155 def _calculate_index(self, i):
153 return i * self.entry_size
156 return i * self.entry_size
154
157
155 def __delitem__(self, i):
158 def __delitem__(self, i):
156 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
159 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
157 raise ValueError(b"deleting slices only supports a:-1 with step 1")
160 raise ValueError(b"deleting slices only supports a:-1 with step 1")
158 i = i.start
161 i = i.start
159 self._check_index(i)
162 self._check_index(i)
160 self._stripnodes(i)
163 self._stripnodes(i)
161 if i < self._lgt:
164 if i < self._lgt:
162 self._data = self._data[: i * self.entry_size]
165 self._data = self._data[: i * self.entry_size]
163 self._lgt = i
166 self._lgt = i
164 self._extra = []
167 self._extra = []
165 else:
168 else:
166 self._extra = self._extra[: i - self._lgt]
169 self._extra = self._extra[: i - self._lgt]
167
170
168
171
169 class PersistentNodeMapIndexObject(IndexObject):
172 class PersistentNodeMapIndexObject(IndexObject):
170 """a Debug oriented class to test persistent nodemap
173 """a Debug oriented class to test persistent nodemap
171
174
172 We need a simple python object to test API and higher level behavior. See
175 We need a simple python object to test API and higher level behavior. See
173 the Rust implementation for more serious usage. This should be used only
176 the Rust implementation for more serious usage. This should be used only
174 through the dedicated `devel.persistent-nodemap` config.
177 through the dedicated `devel.persistent-nodemap` config.
175 """
178 """
176
179
177 def nodemap_data_all(self):
180 def nodemap_data_all(self):
178 """Return bytes containing a full serialization of a nodemap
181 """Return bytes containing a full serialization of a nodemap
179
182
180 The nodemap should be valid for the full set of revisions in the
183 The nodemap should be valid for the full set of revisions in the
181 index."""
184 index."""
182 return nodemaputil.persistent_data(self)
185 return nodemaputil.persistent_data(self)
183
186
184 def nodemap_data_incremental(self):
187 def nodemap_data_incremental(self):
185 """Return bytes containing a incremental update to persistent nodemap
188 """Return bytes containing a incremental update to persistent nodemap
186
189
187 This containst the data for an append-only update of the data provided
190 This containst the data for an append-only update of the data provided
188 in the last call to `update_nodemap_data`.
191 in the last call to `update_nodemap_data`.
189 """
192 """
190 if self._nm_root is None:
193 if self._nm_root is None:
191 return None
194 return None
192 docket = self._nm_docket
195 docket = self._nm_docket
193 changed, data = nodemaputil.update_persistent_data(
196 changed, data = nodemaputil.update_persistent_data(
194 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
197 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
195 )
198 )
196
199
197 self._nm_root = self._nm_max_idx = self._nm_docket = None
200 self._nm_root = self._nm_max_idx = self._nm_docket = None
198 return docket, changed, data
201 return docket, changed, data
199
202
200 def update_nodemap_data(self, docket, nm_data):
203 def update_nodemap_data(self, docket, nm_data):
201 """provide full block of persisted binary data for a nodemap
204 """provide full block of persisted binary data for a nodemap
202
205
203 The data are expected to come from disk. See `nodemap_data_all` for a
206 The data are expected to come from disk. See `nodemap_data_all` for a
204 produceur of such data."""
207 produceur of such data."""
205 if nm_data is not None:
208 if nm_data is not None:
206 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
209 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
207 if self._nm_root:
210 if self._nm_root:
208 self._nm_docket = docket
211 self._nm_docket = docket
209 else:
212 else:
210 self._nm_root = self._nm_max_idx = self._nm_docket = None
213 self._nm_root = self._nm_max_idx = self._nm_docket = None
211
214
212
215
213 class InlinedIndexObject(BaseIndexObject):
216 class InlinedIndexObject(BaseIndexObject):
214 def __init__(self, data, inline=0):
217 def __init__(self, data, inline=0):
215 self._data = data
218 self._data = data
216 self._lgt = self._inline_scan(None)
219 self._lgt = self._inline_scan(None)
217 self._inline_scan(self._lgt)
220 self._inline_scan(self._lgt)
218 self._extra = []
221 self._extra = []
219
222
220 def _inline_scan(self, lgt):
223 def _inline_scan(self, lgt):
221 off = 0
224 off = 0
222 if lgt is not None:
225 if lgt is not None:
223 self._offsets = [0] * lgt
226 self._offsets = [0] * lgt
224 count = 0
227 count = 0
225 while off <= len(self._data) - self.entry_size:
228 while off <= len(self._data) - self.entry_size:
226 start = off + self.big_int_size
229 start = off + self.big_int_size
227 (s,) = struct.unpack(
230 (s,) = struct.unpack(
228 b'>i',
231 b'>i',
229 self._data[start : start + self.int_size],
232 self._data[start : start + self.int_size],
230 )
233 )
231 if lgt is not None:
234 if lgt is not None:
232 self._offsets[count] = off
235 self._offsets[count] = off
233 count += 1
236 count += 1
234 off += self.entry_size + s
237 off += self.entry_size + s
235 if off != len(self._data):
238 if off != len(self._data):
236 raise ValueError(b"corrupted data")
239 raise ValueError(b"corrupted data")
237 return count
240 return count
238
241
239 def __delitem__(self, i):
242 def __delitem__(self, i):
240 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
243 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
241 raise ValueError(b"deleting slices only supports a:-1 with step 1")
244 raise ValueError(b"deleting slices only supports a:-1 with step 1")
242 i = i.start
245 i = i.start
243 self._check_index(i)
246 self._check_index(i)
244 self._stripnodes(i)
247 self._stripnodes(i)
245 if i < self._lgt:
248 if i < self._lgt:
246 self._offsets = self._offsets[:i]
249 self._offsets = self._offsets[:i]
247 self._lgt = i
250 self._lgt = i
248 self._extra = []
251 self._extra = []
249 else:
252 else:
250 self._extra = self._extra[: i - self._lgt]
253 self._extra = self._extra[: i - self._lgt]
251
254
252 def _calculate_index(self, i):
255 def _calculate_index(self, i):
253 return self._offsets[i]
256 return self._offsets[i]
254
257
255
258
256 def parse_index2(data, inline, revlogv2=False):
259 def parse_index2(data, inline, revlogv2=False):
257 if not inline:
260 if not inline:
258 cls = IndexObject2 if revlogv2 else IndexObject
261 cls = IndexObject2 if revlogv2 else IndexObject
259 return cls(data), None
262 return cls(data), None
260 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
263 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
261 return cls(data, inline), (0, data)
264 return cls(data, inline), (0, data)
262
265
263
266
264 class Index2Mixin(object):
267 class Index2Mixin(object):
265 index_format = revlog_constants.INDEX_ENTRY_V2
268 index_format = revlog_constants.INDEX_ENTRY_V2
266 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
269 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
267
270
268 def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
271 def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
269 """
272 """
270 Replace an existing index entry's sidedata offset and length with new
273 Replace an existing index entry's sidedata offset and length with new
271 ones.
274 ones.
272 This cannot be used outside of the context of sidedata rewriting,
275 This cannot be used outside of the context of sidedata rewriting,
273 inside the transaction that creates the revision `i`.
276 inside the transaction that creates the revision `i`.
274 """
277 """
275 if i < 0:
278 if i < 0:
276 raise KeyError
279 raise KeyError
277 self._check_index(i)
280 self._check_index(i)
278 sidedata_format = b">Qi"
281 sidedata_format = b">Qi"
279 packed_size = struct.calcsize(sidedata_format)
282 packed_size = struct.calcsize(sidedata_format)
280 if i >= self._lgt:
283 if i >= self._lgt:
281 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
284 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
282 old = self._extra[i - self._lgt]
285 old = self._extra[i - self._lgt]
283 new = old[:64] + packed + old[64 + packed_size :]
286 new = old[:64] + packed + old[64 + packed_size :]
284 self._extra[i - self._lgt] = new
287 self._extra[i - self._lgt] = new
285 else:
288 else:
286 msg = b"cannot rewrite entries outside of this transaction"
289 msg = b"cannot rewrite entries outside of this transaction"
287 raise KeyError(msg)
290 raise KeyError(msg)
288
291
289 def entry_binary(self, rev, header):
292 def entry_binary(self, rev):
290 """return the raw binary string representing a revision"""
293 """return the raw binary string representing a revision"""
291 entry = self[rev]
294 entry = self[rev]
292 p = revlog_constants.INDEX_ENTRY_V2.pack(*entry)
295 p = revlog_constants.INDEX_ENTRY_V2.pack(*entry)
293 if rev == 0:
296 if rev == 0:
294 v_fmt = revlog_constants.INDEX_HEADER
297 p = p[revlog_constants.INDEX_HEADER.size :]
295 v_bin = v_fmt.pack(header)
296 p = v_bin + p[v_fmt.size :]
297 return p
298 return p
298
299
299
300
300 class IndexObject2(Index2Mixin, IndexObject):
301 class IndexObject2(Index2Mixin, IndexObject):
301 pass
302 pass
302
303
303
304
304 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
305 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
305 def _inline_scan(self, lgt):
306 def _inline_scan(self, lgt):
306 sidedata_length_pos = 72
307 sidedata_length_pos = 72
307 off = 0
308 off = 0
308 if lgt is not None:
309 if lgt is not None:
309 self._offsets = [0] * lgt
310 self._offsets = [0] * lgt
310 count = 0
311 count = 0
311 while off <= len(self._data) - self.entry_size:
312 while off <= len(self._data) - self.entry_size:
312 start = off + self.big_int_size
313 start = off + self.big_int_size
313 (data_size,) = struct.unpack(
314 (data_size,) = struct.unpack(
314 b'>i',
315 b'>i',
315 self._data[start : start + self.int_size],
316 self._data[start : start + self.int_size],
316 )
317 )
317 start = off + sidedata_length_pos
318 start = off + sidedata_length_pos
318 (side_data_size,) = struct.unpack(
319 (side_data_size,) = struct.unpack(
319 b'>i', self._data[start : start + self.int_size]
320 b'>i', self._data[start : start + self.int_size]
320 )
321 )
321 if lgt is not None:
322 if lgt is not None:
322 self._offsets[count] = off
323 self._offsets[count] = off
323 count += 1
324 count += 1
324 off += self.entry_size + data_size + side_data_size
325 off += self.entry_size + data_size + side_data_size
325 if off != len(self._data):
326 if off != len(self._data):
326 raise ValueError(b"corrupted data")
327 raise ValueError(b"corrupted data")
327 return count
328 return count
328
329
329
330
330 def parse_index_devel_nodemap(data, inline):
331 def parse_index_devel_nodemap(data, inline):
331 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
332 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
332 return PersistentNodeMapIndexObject(data), None
333 return PersistentNodeMapIndexObject(data), None
333
334
334
335
335 def parse_dirstate(dmap, copymap, st):
336 def parse_dirstate(dmap, copymap, st):
336 parents = [st[:20], st[20:40]]
337 parents = [st[:20], st[20:40]]
337 # dereference fields so they will be local in loop
338 # dereference fields so they will be local in loop
338 format = b">cllll"
339 format = b">cllll"
339 e_size = struct.calcsize(format)
340 e_size = struct.calcsize(format)
340 pos1 = 40
341 pos1 = 40
341 l = len(st)
342 l = len(st)
342
343
343 # the inner loop
344 # the inner loop
344 while pos1 < l:
345 while pos1 < l:
345 pos2 = pos1 + e_size
346 pos2 = pos1 + e_size
346 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
347 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
347 pos1 = pos2 + e[4]
348 pos1 = pos2 + e[4]
348 f = st[pos2:pos1]
349 f = st[pos2:pos1]
349 if b'\0' in f:
350 if b'\0' in f:
350 f, c = f.split(b'\0')
351 f, c = f.split(b'\0')
351 copymap[f] = c
352 copymap[f] = c
352 dmap[f] = e[:4]
353 dmap[f] = e[:4]
353 return parents
354 return parents
354
355
355
356
356 def pack_dirstate(dmap, copymap, pl, now):
357 def pack_dirstate(dmap, copymap, pl, now):
357 now = int(now)
358 now = int(now)
358 cs = stringio()
359 cs = stringio()
359 write = cs.write
360 write = cs.write
360 write(b"".join(pl))
361 write(b"".join(pl))
361 for f, e in pycompat.iteritems(dmap):
362 for f, e in pycompat.iteritems(dmap):
362 if e[0] == b'n' and e[3] == now:
363 if e[0] == b'n' and e[3] == now:
363 # The file was last modified "simultaneously" with the current
364 # The file was last modified "simultaneously" with the current
364 # write to dirstate (i.e. within the same second for file-
365 # write to dirstate (i.e. within the same second for file-
365 # systems with a granularity of 1 sec). This commonly happens
366 # systems with a granularity of 1 sec). This commonly happens
366 # for at least a couple of files on 'update'.
367 # for at least a couple of files on 'update'.
367 # The user could change the file without changing its size
368 # The user could change the file without changing its size
368 # within the same second. Invalidate the file's mtime in
369 # within the same second. Invalidate the file's mtime in
369 # dirstate, forcing future 'status' calls to compare the
370 # dirstate, forcing future 'status' calls to compare the
370 # contents of the file if the size is the same. This prevents
371 # contents of the file if the size is the same. This prevents
371 # mistakenly treating such files as clean.
372 # mistakenly treating such files as clean.
372 e = dirstatetuple(e[0], e[1], e[2], -1)
373 e = dirstatetuple(e[0], e[1], e[2], -1)
373 dmap[f] = e
374 dmap[f] = e
374
375
375 if f in copymap:
376 if f in copymap:
376 f = b"%s\0%s" % (f, copymap[f])
377 f = b"%s\0%s" % (f, copymap[f])
377 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
378 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
378 write(e)
379 write(e)
379 write(f)
380 write(f)
380 return cs.getvalue()
381 return cs.getvalue()
@@ -1,3220 +1,3233 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 FLAG_GENERALDELTA,
37 FLAG_GENERALDELTA,
38 FLAG_INLINE_DATA,
38 FLAG_INLINE_DATA,
39 INDEX_ENTRY_V0,
39 INDEX_ENTRY_V0,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 templatefilters,
68 templatefilters,
69 util,
69 util,
70 )
70 )
71 from .interfaces import (
71 from .interfaces import (
72 repository,
72 repository,
73 util as interfaceutil,
73 util as interfaceutil,
74 )
74 )
75 from .revlogutils import (
75 from .revlogutils import (
76 deltas as deltautil,
76 deltas as deltautil,
77 flagutil,
77 flagutil,
78 nodemap as nodemaputil,
78 nodemap as nodemaputil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def getoffset(q):
139 def getoffset(q):
140 return int(q >> 16)
140 return int(q >> 16)
141
141
142
142
143 def gettype(q):
143 def gettype(q):
144 return int(q & 0xFFFF)
144 return int(q & 0xFFFF)
145
145
146
146
147 def offset_type(offset, type):
147 def offset_type(offset, type):
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 raise ValueError(b'unknown revlog index flags')
149 raise ValueError(b'unknown revlog index flags')
150 return int(int(offset) << 16 | type)
150 return int(int(offset) << 16 | type)
151
151
152
152
153 def _verify_revision(rl, skipflags, state, node):
153 def _verify_revision(rl, skipflags, state, node):
154 """Verify the integrity of the given revlog ``node`` while providing a hook
154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 point for extensions to influence the operation."""
155 point for extensions to influence the operation."""
156 if skipflags:
156 if skipflags:
157 state[b'skipread'].add(node)
157 state[b'skipread'].add(node)
158 else:
158 else:
159 # Side-effect: read content and verify hash.
159 # Side-effect: read content and verify hash.
160 rl.revision(node)
160 rl.revision(node)
161
161
162
162
163 # True if a fast implementation for persistent-nodemap is available
163 # True if a fast implementation for persistent-nodemap is available
164 #
164 #
165 # We also consider we have a "fast" implementation in "pure" python because
165 # We also consider we have a "fast" implementation in "pure" python because
166 # people using pure don't really have performance consideration (and a
166 # people using pure don't really have performance consideration (and a
167 # wheelbarrow of other slowness source)
167 # wheelbarrow of other slowness source)
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 parsers, 'BaseIndexObject'
169 parsers, 'BaseIndexObject'
170 )
170 )
171
171
172
172
173 @attr.s(slots=True, frozen=True)
173 @attr.s(slots=True, frozen=True)
174 class _revisioninfo(object):
174 class _revisioninfo(object):
175 """Information about a revision that allows building its fulltext
175 """Information about a revision that allows building its fulltext
176 node: expected hash of the revision
176 node: expected hash of the revision
177 p1, p2: parent revs of the revision
177 p1, p2: parent revs of the revision
178 btext: built text cache consisting of a one-element list
178 btext: built text cache consisting of a one-element list
179 cachedelta: (baserev, uncompressed_delta) or None
179 cachedelta: (baserev, uncompressed_delta) or None
180 flags: flags associated to the revision storage
180 flags: flags associated to the revision storage
181
181
182 One of btext[0] or cachedelta must be set.
182 One of btext[0] or cachedelta must be set.
183 """
183 """
184
184
185 node = attr.ib()
185 node = attr.ib()
186 p1 = attr.ib()
186 p1 = attr.ib()
187 p2 = attr.ib()
187 p2 = attr.ib()
188 btext = attr.ib()
188 btext = attr.ib()
189 textlen = attr.ib()
189 textlen = attr.ib()
190 cachedelta = attr.ib()
190 cachedelta = attr.ib()
191 flags = attr.ib()
191 flags = attr.ib()
192
192
193
193
194 @interfaceutil.implementer(repository.irevisiondelta)
194 @interfaceutil.implementer(repository.irevisiondelta)
195 @attr.s(slots=True)
195 @attr.s(slots=True)
196 class revlogrevisiondelta(object):
196 class revlogrevisiondelta(object):
197 node = attr.ib()
197 node = attr.ib()
198 p1node = attr.ib()
198 p1node = attr.ib()
199 p2node = attr.ib()
199 p2node = attr.ib()
200 basenode = attr.ib()
200 basenode = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202 baserevisionsize = attr.ib()
202 baserevisionsize = attr.ib()
203 revision = attr.ib()
203 revision = attr.ib()
204 delta = attr.ib()
204 delta = attr.ib()
205 sidedata = attr.ib()
205 sidedata = attr.ib()
206 linknode = attr.ib(default=None)
206 linknode = attr.ib(default=None)
207
207
208
208
209 @interfaceutil.implementer(repository.iverifyproblem)
209 @interfaceutil.implementer(repository.iverifyproblem)
210 @attr.s(frozen=True)
210 @attr.s(frozen=True)
211 class revlogproblem(object):
211 class revlogproblem(object):
212 warning = attr.ib(default=None)
212 warning = attr.ib(default=None)
213 error = attr.ib(default=None)
213 error = attr.ib(default=None)
214 node = attr.ib(default=None)
214 node = attr.ib(default=None)
215
215
216
216
217 class revlogoldindex(list):
217 class revlogoldindex(list):
218 entry_size = INDEX_ENTRY_V0.size
218 entry_size = INDEX_ENTRY_V0.size
219
219
220 @property
220 @property
221 def nodemap(self):
221 def nodemap(self):
222 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
222 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
223 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
223 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
224 return self._nodemap
224 return self._nodemap
225
225
226 @util.propertycache
226 @util.propertycache
227 def _nodemap(self):
227 def _nodemap(self):
228 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
228 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
229 for r in range(0, len(self)):
229 for r in range(0, len(self)):
230 n = self[r][7]
230 n = self[r][7]
231 nodemap[n] = r
231 nodemap[n] = r
232 return nodemap
232 return nodemap
233
233
234 def has_node(self, node):
234 def has_node(self, node):
235 """return True if the node exist in the index"""
235 """return True if the node exist in the index"""
236 return node in self._nodemap
236 return node in self._nodemap
237
237
238 def rev(self, node):
238 def rev(self, node):
239 """return a revision for a node
239 """return a revision for a node
240
240
241 If the node is unknown, raise a RevlogError"""
241 If the node is unknown, raise a RevlogError"""
242 return self._nodemap[node]
242 return self._nodemap[node]
243
243
244 def get_rev(self, node):
244 def get_rev(self, node):
245 """return a revision for a node
245 """return a revision for a node
246
246
247 If the node is unknown, return None"""
247 If the node is unknown, return None"""
248 return self._nodemap.get(node)
248 return self._nodemap.get(node)
249
249
250 def append(self, tup):
250 def append(self, tup):
251 self._nodemap[tup[7]] = len(self)
251 self._nodemap[tup[7]] = len(self)
252 super(revlogoldindex, self).append(tup)
252 super(revlogoldindex, self).append(tup)
253
253
254 def __delitem__(self, i):
254 def __delitem__(self, i):
255 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
255 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
256 raise ValueError(b"deleting slices only supports a:-1 with step 1")
256 raise ValueError(b"deleting slices only supports a:-1 with step 1")
257 for r in pycompat.xrange(i.start, len(self)):
257 for r in pycompat.xrange(i.start, len(self)):
258 del self._nodemap[self[r][7]]
258 del self._nodemap[self[r][7]]
259 super(revlogoldindex, self).__delitem__(i)
259 super(revlogoldindex, self).__delitem__(i)
260
260
261 def clearcaches(self):
261 def clearcaches(self):
262 self.__dict__.pop('_nodemap', None)
262 self.__dict__.pop('_nodemap', None)
263
263
264 def __getitem__(self, i):
264 def __getitem__(self, i):
265 if i == -1:
265 if i == -1:
266 return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
266 return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
267 return list.__getitem__(self, i)
267 return list.__getitem__(self, i)
268
268
269 def entry_binary(self, rev, header):
269 def entry_binary(self, rev):
270 """return the raw binary string representing a revision"""
270 """return the raw binary string representing a revision"""
271 entry = self[rev]
271 entry = self[rev]
272 if gettype(entry[0]):
272 if gettype(entry[0]):
273 raise error.RevlogError(
273 raise error.RevlogError(
274 _(b'index entry flags need revlog version 1')
274 _(b'index entry flags need revlog version 1')
275 )
275 )
276 e2 = (
276 e2 = (
277 getoffset(entry[0]),
277 getoffset(entry[0]),
278 entry[1],
278 entry[1],
279 entry[3],
279 entry[3],
280 entry[4],
280 entry[4],
281 self[entry[5]][7],
281 self[entry[5]][7],
282 self[entry[6]][7],
282 self[entry[6]][7],
283 entry[7],
283 entry[7],
284 )
284 )
285 return INDEX_ENTRY_V0.pack(*e2)
285 return INDEX_ENTRY_V0.pack(*e2)
286
286
287 def pack_header(self, header):
288 """Pack header information in binary"""
289 return b''
290
287
291
288 def parse_index_v0(data, inline):
292 def parse_index_v0(data, inline):
289 s = INDEX_ENTRY_V0.size
293 s = INDEX_ENTRY_V0.size
290 index = []
294 index = []
291 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
295 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
292 n = off = 0
296 n = off = 0
293 l = len(data)
297 l = len(data)
294 while off + s <= l:
298 while off + s <= l:
295 cur = data[off : off + s]
299 cur = data[off : off + s]
296 off += s
300 off += s
297 e = INDEX_ENTRY_V0.unpack(cur)
301 e = INDEX_ENTRY_V0.unpack(cur)
298 # transform to revlogv1 format
302 # transform to revlogv1 format
299 e2 = (
303 e2 = (
300 offset_type(e[0], 0),
304 offset_type(e[0], 0),
301 e[1],
305 e[1],
302 -1,
306 -1,
303 e[2],
307 e[2],
304 e[3],
308 e[3],
305 nodemap.get(e[4], nullrev),
309 nodemap.get(e[4], nullrev),
306 nodemap.get(e[5], nullrev),
310 nodemap.get(e[5], nullrev),
307 e[6],
311 e[6],
308 )
312 )
309 index.append(e2)
313 index.append(e2)
310 nodemap[e[6]] = n
314 nodemap[e[6]] = n
311 n += 1
315 n += 1
312
316
313 index = revlogoldindex(index)
317 index = revlogoldindex(index)
314 return index, None
318 return index, None
315
319
316
320
317 def parse_index_v1(data, inline):
321 def parse_index_v1(data, inline):
318 # call the C implementation to parse the index data
322 # call the C implementation to parse the index data
319 index, cache = parsers.parse_index2(data, inline)
323 index, cache = parsers.parse_index2(data, inline)
320 return index, cache
324 return index, cache
321
325
322
326
323 def parse_index_v2(data, inline):
327 def parse_index_v2(data, inline):
324 # call the C implementation to parse the index data
328 # call the C implementation to parse the index data
325 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
329 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
326 return index, cache
330 return index, cache
327
331
328
332
329 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
333 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
330
334
331 def parse_index_v1_nodemap(data, inline):
335 def parse_index_v1_nodemap(data, inline):
332 index, cache = parsers.parse_index_devel_nodemap(data, inline)
336 index, cache = parsers.parse_index_devel_nodemap(data, inline)
333 return index, cache
337 return index, cache
334
338
335
339
336 else:
340 else:
337 parse_index_v1_nodemap = None
341 parse_index_v1_nodemap = None
338
342
339
343
340 def parse_index_v1_mixed(data, inline):
344 def parse_index_v1_mixed(data, inline):
341 index, cache = parse_index_v1(data, inline)
345 index, cache = parse_index_v1(data, inline)
342 return rustrevlog.MixedIndex(index), cache
346 return rustrevlog.MixedIndex(index), cache
343
347
344
348
345 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
349 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
346 # signed integer)
350 # signed integer)
347 _maxentrysize = 0x7FFFFFFF
351 _maxentrysize = 0x7FFFFFFF
348
352
349
353
350 class revlog(object):
354 class revlog(object):
351 """
355 """
352 the underlying revision storage object
356 the underlying revision storage object
353
357
354 A revlog consists of two parts, an index and the revision data.
358 A revlog consists of two parts, an index and the revision data.
355
359
356 The index is a file with a fixed record size containing
360 The index is a file with a fixed record size containing
357 information on each revision, including its nodeid (hash), the
361 information on each revision, including its nodeid (hash), the
358 nodeids of its parents, the position and offset of its data within
362 nodeids of its parents, the position and offset of its data within
359 the data file, and the revision it's based on. Finally, each entry
363 the data file, and the revision it's based on. Finally, each entry
360 contains a linkrev entry that can serve as a pointer to external
364 contains a linkrev entry that can serve as a pointer to external
361 data.
365 data.
362
366
363 The revision data itself is a linear collection of data chunks.
367 The revision data itself is a linear collection of data chunks.
364 Each chunk represents a revision and is usually represented as a
368 Each chunk represents a revision and is usually represented as a
365 delta against the previous chunk. To bound lookup time, runs of
369 delta against the previous chunk. To bound lookup time, runs of
366 deltas are limited to about 2 times the length of the original
370 deltas are limited to about 2 times the length of the original
367 version data. This makes retrieval of a version proportional to
371 version data. This makes retrieval of a version proportional to
368 its size, or O(1) relative to the number of revisions.
372 its size, or O(1) relative to the number of revisions.
369
373
370 Both pieces of the revlog are written to in an append-only
374 Both pieces of the revlog are written to in an append-only
371 fashion, which means we never need to rewrite a file to insert or
375 fashion, which means we never need to rewrite a file to insert or
372 remove data, and can use some simple techniques to avoid the need
376 remove data, and can use some simple techniques to avoid the need
373 for locking while reading.
377 for locking while reading.
374
378
375 If checkambig, indexfile is opened with checkambig=True at
379 If checkambig, indexfile is opened with checkambig=True at
376 writing, to avoid file stat ambiguity.
380 writing, to avoid file stat ambiguity.
377
381
378 If mmaplargeindex is True, and an mmapindexthreshold is set, the
382 If mmaplargeindex is True, and an mmapindexthreshold is set, the
379 index will be mmapped rather than read if it is larger than the
383 index will be mmapped rather than read if it is larger than the
380 configured threshold.
384 configured threshold.
381
385
382 If censorable is True, the revlog can have censored revisions.
386 If censorable is True, the revlog can have censored revisions.
383
387
384 If `upperboundcomp` is not None, this is the expected maximal gain from
388 If `upperboundcomp` is not None, this is the expected maximal gain from
385 compression for the data content.
389 compression for the data content.
386
390
387 `concurrencychecker` is an optional function that receives 3 arguments: a
391 `concurrencychecker` is an optional function that receives 3 arguments: a
388 file handle, a filename, and an expected position. It should check whether
392 file handle, a filename, and an expected position. It should check whether
389 the current position in the file handle is valid, and log/warn/fail (by
393 the current position in the file handle is valid, and log/warn/fail (by
390 raising).
394 raising).
391 """
395 """
392
396
393 _flagserrorclass = error.RevlogError
397 _flagserrorclass = error.RevlogError
394
398
395 def __init__(
399 def __init__(
396 self,
400 self,
397 opener,
401 opener,
398 indexfile,
402 indexfile,
399 datafile=None,
403 datafile=None,
400 checkambig=False,
404 checkambig=False,
401 mmaplargeindex=False,
405 mmaplargeindex=False,
402 censorable=False,
406 censorable=False,
403 upperboundcomp=None,
407 upperboundcomp=None,
404 persistentnodemap=False,
408 persistentnodemap=False,
405 concurrencychecker=None,
409 concurrencychecker=None,
406 ):
410 ):
407 """
411 """
408 create a revlog object
412 create a revlog object
409
413
410 opener is a function that abstracts the file opening operation
414 opener is a function that abstracts the file opening operation
411 and can be used to implement COW semantics or the like.
415 and can be used to implement COW semantics or the like.
412
416
413 """
417 """
414 self.upperboundcomp = upperboundcomp
418 self.upperboundcomp = upperboundcomp
415 self.indexfile = indexfile
419 self.indexfile = indexfile
416 self.datafile = datafile or (indexfile[:-2] + b".d")
420 self.datafile = datafile or (indexfile[:-2] + b".d")
417 self.nodemap_file = None
421 self.nodemap_file = None
418 if persistentnodemap:
422 if persistentnodemap:
419 self.nodemap_file = nodemaputil.get_nodemap_file(
423 self.nodemap_file = nodemaputil.get_nodemap_file(
420 opener, self.indexfile
424 opener, self.indexfile
421 )
425 )
422
426
423 self.opener = opener
427 self.opener = opener
424 # When True, indexfile is opened with checkambig=True at writing, to
428 # When True, indexfile is opened with checkambig=True at writing, to
425 # avoid file stat ambiguity.
429 # avoid file stat ambiguity.
426 self._checkambig = checkambig
430 self._checkambig = checkambig
427 self._mmaplargeindex = mmaplargeindex
431 self._mmaplargeindex = mmaplargeindex
428 self._censorable = censorable
432 self._censorable = censorable
429 # 3-tuple of (node, rev, text) for a raw revision.
433 # 3-tuple of (node, rev, text) for a raw revision.
430 self._revisioncache = None
434 self._revisioncache = None
431 # Maps rev to chain base rev.
435 # Maps rev to chain base rev.
432 self._chainbasecache = util.lrucachedict(100)
436 self._chainbasecache = util.lrucachedict(100)
433 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
437 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
434 self._chunkcache = (0, b'')
438 self._chunkcache = (0, b'')
435 # How much data to read and cache into the raw revlog data cache.
439 # How much data to read and cache into the raw revlog data cache.
436 self._chunkcachesize = 65536
440 self._chunkcachesize = 65536
437 self._maxchainlen = None
441 self._maxchainlen = None
438 self._deltabothparents = True
442 self._deltabothparents = True
439 self.index = None
443 self.index = None
440 self._nodemap_docket = None
444 self._nodemap_docket = None
441 # Mapping of partial identifiers to full nodes.
445 # Mapping of partial identifiers to full nodes.
442 self._pcache = {}
446 self._pcache = {}
443 # Mapping of revision integer to full node.
447 # Mapping of revision integer to full node.
444 self._compengine = b'zlib'
448 self._compengine = b'zlib'
445 self._compengineopts = {}
449 self._compengineopts = {}
446 self._maxdeltachainspan = -1
450 self._maxdeltachainspan = -1
447 self._withsparseread = False
451 self._withsparseread = False
448 self._sparserevlog = False
452 self._sparserevlog = False
449 self._srdensitythreshold = 0.50
453 self._srdensitythreshold = 0.50
450 self._srmingapsize = 262144
454 self._srmingapsize = 262144
451
455
452 # Make copy of flag processors so each revlog instance can support
456 # Make copy of flag processors so each revlog instance can support
453 # custom flags.
457 # custom flags.
454 self._flagprocessors = dict(flagutil.flagprocessors)
458 self._flagprocessors = dict(flagutil.flagprocessors)
455
459
456 # 2-tuple of file handles being used for active writing.
460 # 2-tuple of file handles being used for active writing.
457 self._writinghandles = None
461 self._writinghandles = None
458
462
459 self._loadindex()
463 self._loadindex()
460
464
461 self._concurrencychecker = concurrencychecker
465 self._concurrencychecker = concurrencychecker
462
466
463 def _loadindex(self):
467 def _loadindex(self):
464 mmapindexthreshold = None
468 mmapindexthreshold = None
465 opts = self.opener.options
469 opts = self.opener.options
466
470
467 if b'revlogv2' in opts:
471 if b'revlogv2' in opts:
468 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
472 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
469 elif b'revlogv1' in opts:
473 elif b'revlogv1' in opts:
470 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
474 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
471 if b'generaldelta' in opts:
475 if b'generaldelta' in opts:
472 newversionflags |= FLAG_GENERALDELTA
476 newversionflags |= FLAG_GENERALDELTA
473 elif b'revlogv0' in self.opener.options:
477 elif b'revlogv0' in self.opener.options:
474 newversionflags = REVLOGV0
478 newversionflags = REVLOGV0
475 else:
479 else:
476 newversionflags = REVLOG_DEFAULT_VERSION
480 newversionflags = REVLOG_DEFAULT_VERSION
477
481
478 if b'chunkcachesize' in opts:
482 if b'chunkcachesize' in opts:
479 self._chunkcachesize = opts[b'chunkcachesize']
483 self._chunkcachesize = opts[b'chunkcachesize']
480 if b'maxchainlen' in opts:
484 if b'maxchainlen' in opts:
481 self._maxchainlen = opts[b'maxchainlen']
485 self._maxchainlen = opts[b'maxchainlen']
482 if b'deltabothparents' in opts:
486 if b'deltabothparents' in opts:
483 self._deltabothparents = opts[b'deltabothparents']
487 self._deltabothparents = opts[b'deltabothparents']
484 self._lazydelta = bool(opts.get(b'lazydelta', True))
488 self._lazydelta = bool(opts.get(b'lazydelta', True))
485 self._lazydeltabase = False
489 self._lazydeltabase = False
486 if self._lazydelta:
490 if self._lazydelta:
487 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
491 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
488 if b'compengine' in opts:
492 if b'compengine' in opts:
489 self._compengine = opts[b'compengine']
493 self._compengine = opts[b'compengine']
490 if b'zlib.level' in opts:
494 if b'zlib.level' in opts:
491 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
495 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
492 if b'zstd.level' in opts:
496 if b'zstd.level' in opts:
493 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
497 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
494 if b'maxdeltachainspan' in opts:
498 if b'maxdeltachainspan' in opts:
495 self._maxdeltachainspan = opts[b'maxdeltachainspan']
499 self._maxdeltachainspan = opts[b'maxdeltachainspan']
496 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
500 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
497 mmapindexthreshold = opts[b'mmapindexthreshold']
501 mmapindexthreshold = opts[b'mmapindexthreshold']
498 self.hassidedata = bool(opts.get(b'side-data', False))
502 self.hassidedata = bool(opts.get(b'side-data', False))
499 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
503 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
500 withsparseread = bool(opts.get(b'with-sparse-read', False))
504 withsparseread = bool(opts.get(b'with-sparse-read', False))
501 # sparse-revlog forces sparse-read
505 # sparse-revlog forces sparse-read
502 self._withsparseread = self._sparserevlog or withsparseread
506 self._withsparseread = self._sparserevlog or withsparseread
503 if b'sparse-read-density-threshold' in opts:
507 if b'sparse-read-density-threshold' in opts:
504 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
508 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
505 if b'sparse-read-min-gap-size' in opts:
509 if b'sparse-read-min-gap-size' in opts:
506 self._srmingapsize = opts[b'sparse-read-min-gap-size']
510 self._srmingapsize = opts[b'sparse-read-min-gap-size']
507 if opts.get(b'enableellipsis'):
511 if opts.get(b'enableellipsis'):
508 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
512 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
509
513
510 # revlog v0 doesn't have flag processors
514 # revlog v0 doesn't have flag processors
511 for flag, processor in pycompat.iteritems(
515 for flag, processor in pycompat.iteritems(
512 opts.get(b'flagprocessors', {})
516 opts.get(b'flagprocessors', {})
513 ):
517 ):
514 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
518 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
515
519
516 if self._chunkcachesize <= 0:
520 if self._chunkcachesize <= 0:
517 raise error.RevlogError(
521 raise error.RevlogError(
518 _(b'revlog chunk cache size %r is not greater than 0')
522 _(b'revlog chunk cache size %r is not greater than 0')
519 % self._chunkcachesize
523 % self._chunkcachesize
520 )
524 )
521 elif self._chunkcachesize & (self._chunkcachesize - 1):
525 elif self._chunkcachesize & (self._chunkcachesize - 1):
522 raise error.RevlogError(
526 raise error.RevlogError(
523 _(b'revlog chunk cache size %r is not a power of 2')
527 _(b'revlog chunk cache size %r is not a power of 2')
524 % self._chunkcachesize
528 % self._chunkcachesize
525 )
529 )
526
530
527 indexdata = b''
531 indexdata = b''
528 self._initempty = True
532 self._initempty = True
529 try:
533 try:
530 with self._indexfp() as f:
534 with self._indexfp() as f:
531 if (
535 if (
532 mmapindexthreshold is not None
536 mmapindexthreshold is not None
533 and self.opener.fstat(f).st_size >= mmapindexthreshold
537 and self.opener.fstat(f).st_size >= mmapindexthreshold
534 ):
538 ):
535 # TODO: should .close() to release resources without
539 # TODO: should .close() to release resources without
536 # relying on Python GC
540 # relying on Python GC
537 indexdata = util.buffer(util.mmapread(f))
541 indexdata = util.buffer(util.mmapread(f))
538 else:
542 else:
539 indexdata = f.read()
543 indexdata = f.read()
540 if len(indexdata) > 0:
544 if len(indexdata) > 0:
541 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
545 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
542 self._initempty = False
546 self._initempty = False
543 else:
547 else:
544 versionflags = newversionflags
548 versionflags = newversionflags
545 except IOError as inst:
549 except IOError as inst:
546 if inst.errno != errno.ENOENT:
550 if inst.errno != errno.ENOENT:
547 raise
551 raise
548
552
549 versionflags = newversionflags
553 versionflags = newversionflags
550
554
551 self.version = versionflags
555 self.version = versionflags
552
556
553 flags = versionflags & ~0xFFFF
557 flags = versionflags & ~0xFFFF
554 fmt = versionflags & 0xFFFF
558 fmt = versionflags & 0xFFFF
555
559
556 if fmt == REVLOGV0:
560 if fmt == REVLOGV0:
557 if flags:
561 if flags:
558 raise error.RevlogError(
562 raise error.RevlogError(
559 _(b'unknown flags (%#04x) in version %d revlog %s')
563 _(b'unknown flags (%#04x) in version %d revlog %s')
560 % (flags >> 16, fmt, self.indexfile)
564 % (flags >> 16, fmt, self.indexfile)
561 )
565 )
562
566
563 self._inline = False
567 self._inline = False
564 self._generaldelta = False
568 self._generaldelta = False
565
569
566 elif fmt == REVLOGV1:
570 elif fmt == REVLOGV1:
567 if flags & ~REVLOGV1_FLAGS:
571 if flags & ~REVLOGV1_FLAGS:
568 raise error.RevlogError(
572 raise error.RevlogError(
569 _(b'unknown flags (%#04x) in version %d revlog %s')
573 _(b'unknown flags (%#04x) in version %d revlog %s')
570 % (flags >> 16, fmt, self.indexfile)
574 % (flags >> 16, fmt, self.indexfile)
571 )
575 )
572
576
573 self._inline = versionflags & FLAG_INLINE_DATA
577 self._inline = versionflags & FLAG_INLINE_DATA
574 self._generaldelta = versionflags & FLAG_GENERALDELTA
578 self._generaldelta = versionflags & FLAG_GENERALDELTA
575
579
576 elif fmt == REVLOGV2:
580 elif fmt == REVLOGV2:
577 if flags & ~REVLOGV2_FLAGS:
581 if flags & ~REVLOGV2_FLAGS:
578 raise error.RevlogError(
582 raise error.RevlogError(
579 _(b'unknown flags (%#04x) in version %d revlog %s')
583 _(b'unknown flags (%#04x) in version %d revlog %s')
580 % (flags >> 16, fmt, self.indexfile)
584 % (flags >> 16, fmt, self.indexfile)
581 )
585 )
582
586
583 # There is a bug in the transaction handling when going from an
587 # There is a bug in the transaction handling when going from an
584 # inline revlog to a separate index and data file. Turn it off until
588 # inline revlog to a separate index and data file. Turn it off until
585 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
589 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
586 # See issue6485
590 # See issue6485
587 self._inline = False
591 self._inline = False
588 # generaldelta implied by version 2 revlogs.
592 # generaldelta implied by version 2 revlogs.
589 self._generaldelta = True
593 self._generaldelta = True
590
594
591 else:
595 else:
592 raise error.RevlogError(
596 raise error.RevlogError(
593 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
597 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
594 )
598 )
595
599
596 self.nodeconstants = sha1nodeconstants
600 self.nodeconstants = sha1nodeconstants
597 self.nullid = self.nodeconstants.nullid
601 self.nullid = self.nodeconstants.nullid
598
602
599 # sparse-revlog can't be on without general-delta (issue6056)
603 # sparse-revlog can't be on without general-delta (issue6056)
600 if not self._generaldelta:
604 if not self._generaldelta:
601 self._sparserevlog = False
605 self._sparserevlog = False
602
606
603 self._storedeltachains = True
607 self._storedeltachains = True
604
608
605 devel_nodemap = (
609 devel_nodemap = (
606 self.nodemap_file
610 self.nodemap_file
607 and opts.get(b'devel-force-nodemap', False)
611 and opts.get(b'devel-force-nodemap', False)
608 and parse_index_v1_nodemap is not None
612 and parse_index_v1_nodemap is not None
609 )
613 )
610
614
611 use_rust_index = False
615 use_rust_index = False
612 if rustrevlog is not None:
616 if rustrevlog is not None:
613 if self.nodemap_file is not None:
617 if self.nodemap_file is not None:
614 use_rust_index = True
618 use_rust_index = True
615 else:
619 else:
616 use_rust_index = self.opener.options.get(b'rust.index')
620 use_rust_index = self.opener.options.get(b'rust.index')
617
621
618 self._parse_index = parse_index_v1
622 self._parse_index = parse_index_v1
619 if self.version == REVLOGV0:
623 if self.version == REVLOGV0:
620 self._parse_index = parse_index_v0
624 self._parse_index = parse_index_v0
621 elif fmt == REVLOGV2:
625 elif fmt == REVLOGV2:
622 self._parse_index = parse_index_v2
626 self._parse_index = parse_index_v2
623 elif devel_nodemap:
627 elif devel_nodemap:
624 self._parse_index = parse_index_v1_nodemap
628 self._parse_index = parse_index_v1_nodemap
625 elif use_rust_index:
629 elif use_rust_index:
626 self._parse_index = parse_index_v1_mixed
630 self._parse_index = parse_index_v1_mixed
627 try:
631 try:
628 d = self._parse_index(indexdata, self._inline)
632 d = self._parse_index(indexdata, self._inline)
629 index, _chunkcache = d
633 index, _chunkcache = d
630 use_nodemap = (
634 use_nodemap = (
631 not self._inline
635 not self._inline
632 and self.nodemap_file is not None
636 and self.nodemap_file is not None
633 and util.safehasattr(index, 'update_nodemap_data')
637 and util.safehasattr(index, 'update_nodemap_data')
634 )
638 )
635 if use_nodemap:
639 if use_nodemap:
636 nodemap_data = nodemaputil.persisted_data(self)
640 nodemap_data = nodemaputil.persisted_data(self)
637 if nodemap_data is not None:
641 if nodemap_data is not None:
638 docket = nodemap_data[0]
642 docket = nodemap_data[0]
639 if (
643 if (
640 len(d[0]) > docket.tip_rev
644 len(d[0]) > docket.tip_rev
641 and d[0][docket.tip_rev][7] == docket.tip_node
645 and d[0][docket.tip_rev][7] == docket.tip_node
642 ):
646 ):
643 # no changelog tampering
647 # no changelog tampering
644 self._nodemap_docket = docket
648 self._nodemap_docket = docket
645 index.update_nodemap_data(*nodemap_data)
649 index.update_nodemap_data(*nodemap_data)
646 except (ValueError, IndexError):
650 except (ValueError, IndexError):
647 raise error.RevlogError(
651 raise error.RevlogError(
648 _(b"index %s is corrupted") % self.indexfile
652 _(b"index %s is corrupted") % self.indexfile
649 )
653 )
650 self.index, self._chunkcache = d
654 self.index, self._chunkcache = d
651 if not self._chunkcache:
655 if not self._chunkcache:
652 self._chunkclear()
656 self._chunkclear()
653 # revnum -> (chain-length, sum-delta-length)
657 # revnum -> (chain-length, sum-delta-length)
654 self._chaininfocache = util.lrucachedict(500)
658 self._chaininfocache = util.lrucachedict(500)
655 # revlog header -> revlog compressor
659 # revlog header -> revlog compressor
656 self._decompressors = {}
660 self._decompressors = {}
657
661
658 @util.propertycache
662 @util.propertycache
659 def _compressor(self):
663 def _compressor(self):
660 engine = util.compengines[self._compengine]
664 engine = util.compengines[self._compengine]
661 return engine.revlogcompressor(self._compengineopts)
665 return engine.revlogcompressor(self._compengineopts)
662
666
663 def _indexfp(self, mode=b'r'):
667 def _indexfp(self, mode=b'r'):
664 """file object for the revlog's index file"""
668 """file object for the revlog's index file"""
665 args = {'mode': mode}
669 args = {'mode': mode}
666 if mode != b'r':
670 if mode != b'r':
667 args['checkambig'] = self._checkambig
671 args['checkambig'] = self._checkambig
668 if mode == b'w':
672 if mode == b'w':
669 args['atomictemp'] = True
673 args['atomictemp'] = True
670 return self.opener(self.indexfile, **args)
674 return self.opener(self.indexfile, **args)
671
675
672 def _datafp(self, mode=b'r'):
676 def _datafp(self, mode=b'r'):
673 """file object for the revlog's data file"""
677 """file object for the revlog's data file"""
674 return self.opener(self.datafile, mode=mode)
678 return self.opener(self.datafile, mode=mode)
675
679
676 @contextlib.contextmanager
680 @contextlib.contextmanager
677 def _datareadfp(self, existingfp=None):
681 def _datareadfp(self, existingfp=None):
678 """file object suitable to read data"""
682 """file object suitable to read data"""
679 # Use explicit file handle, if given.
683 # Use explicit file handle, if given.
680 if existingfp is not None:
684 if existingfp is not None:
681 yield existingfp
685 yield existingfp
682
686
683 # Use a file handle being actively used for writes, if available.
687 # Use a file handle being actively used for writes, if available.
684 # There is some danger to doing this because reads will seek the
688 # There is some danger to doing this because reads will seek the
685 # file. However, _writeentry() performs a SEEK_END before all writes,
689 # file. However, _writeentry() performs a SEEK_END before all writes,
686 # so we should be safe.
690 # so we should be safe.
687 elif self._writinghandles:
691 elif self._writinghandles:
688 if self._inline:
692 if self._inline:
689 yield self._writinghandles[0]
693 yield self._writinghandles[0]
690 else:
694 else:
691 yield self._writinghandles[1]
695 yield self._writinghandles[1]
692
696
693 # Otherwise open a new file handle.
697 # Otherwise open a new file handle.
694 else:
698 else:
695 if self._inline:
699 if self._inline:
696 func = self._indexfp
700 func = self._indexfp
697 else:
701 else:
698 func = self._datafp
702 func = self._datafp
699 with func() as fp:
703 with func() as fp:
700 yield fp
704 yield fp
701
705
702 def tiprev(self):
706 def tiprev(self):
703 return len(self.index) - 1
707 return len(self.index) - 1
704
708
705 def tip(self):
709 def tip(self):
706 return self.node(self.tiprev())
710 return self.node(self.tiprev())
707
711
708 def __contains__(self, rev):
712 def __contains__(self, rev):
709 return 0 <= rev < len(self)
713 return 0 <= rev < len(self)
710
714
711 def __len__(self):
715 def __len__(self):
712 return len(self.index)
716 return len(self.index)
713
717
714 def __iter__(self):
718 def __iter__(self):
715 return iter(pycompat.xrange(len(self)))
719 return iter(pycompat.xrange(len(self)))
716
720
717 def revs(self, start=0, stop=None):
721 def revs(self, start=0, stop=None):
718 """iterate over all rev in this revlog (from start to stop)"""
722 """iterate over all rev in this revlog (from start to stop)"""
719 return storageutil.iterrevs(len(self), start=start, stop=stop)
723 return storageutil.iterrevs(len(self), start=start, stop=stop)
720
724
721 @property
725 @property
722 def nodemap(self):
726 def nodemap(self):
723 msg = (
727 msg = (
724 b"revlog.nodemap is deprecated, "
728 b"revlog.nodemap is deprecated, "
725 b"use revlog.index.[has_node|rev|get_rev]"
729 b"use revlog.index.[has_node|rev|get_rev]"
726 )
730 )
727 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
731 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
728 return self.index.nodemap
732 return self.index.nodemap
729
733
730 @property
734 @property
731 def _nodecache(self):
735 def _nodecache(self):
732 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
736 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
733 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
737 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
734 return self.index.nodemap
738 return self.index.nodemap
735
739
736 def hasnode(self, node):
740 def hasnode(self, node):
737 try:
741 try:
738 self.rev(node)
742 self.rev(node)
739 return True
743 return True
740 except KeyError:
744 except KeyError:
741 return False
745 return False
742
746
743 def candelta(self, baserev, rev):
747 def candelta(self, baserev, rev):
744 """whether two revisions (baserev, rev) can be delta-ed or not"""
748 """whether two revisions (baserev, rev) can be delta-ed or not"""
745 # Disable delta if either rev requires a content-changing flag
749 # Disable delta if either rev requires a content-changing flag
746 # processor (ex. LFS). This is because such flag processor can alter
750 # processor (ex. LFS). This is because such flag processor can alter
747 # the rawtext content that the delta will be based on, and two clients
751 # the rawtext content that the delta will be based on, and two clients
748 # could have a same revlog node with different flags (i.e. different
752 # could have a same revlog node with different flags (i.e. different
749 # rawtext contents) and the delta could be incompatible.
753 # rawtext contents) and the delta could be incompatible.
750 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
754 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
751 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
755 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
752 ):
756 ):
753 return False
757 return False
754 return True
758 return True
755
759
756 def update_caches(self, transaction):
760 def update_caches(self, transaction):
757 if self.nodemap_file is not None:
761 if self.nodemap_file is not None:
758 if transaction is None:
762 if transaction is None:
759 nodemaputil.update_persistent_nodemap(self)
763 nodemaputil.update_persistent_nodemap(self)
760 else:
764 else:
761 nodemaputil.setup_persistent_nodemap(transaction, self)
765 nodemaputil.setup_persistent_nodemap(transaction, self)
762
766
763 def clearcaches(self):
767 def clearcaches(self):
764 self._revisioncache = None
768 self._revisioncache = None
765 self._chainbasecache.clear()
769 self._chainbasecache.clear()
766 self._chunkcache = (0, b'')
770 self._chunkcache = (0, b'')
767 self._pcache = {}
771 self._pcache = {}
768 self._nodemap_docket = None
772 self._nodemap_docket = None
769 self.index.clearcaches()
773 self.index.clearcaches()
770 # The python code is the one responsible for validating the docket, we
774 # The python code is the one responsible for validating the docket, we
771 # end up having to refresh it here.
775 # end up having to refresh it here.
772 use_nodemap = (
776 use_nodemap = (
773 not self._inline
777 not self._inline
774 and self.nodemap_file is not None
778 and self.nodemap_file is not None
775 and util.safehasattr(self.index, 'update_nodemap_data')
779 and util.safehasattr(self.index, 'update_nodemap_data')
776 )
780 )
777 if use_nodemap:
781 if use_nodemap:
778 nodemap_data = nodemaputil.persisted_data(self)
782 nodemap_data = nodemaputil.persisted_data(self)
779 if nodemap_data is not None:
783 if nodemap_data is not None:
780 self._nodemap_docket = nodemap_data[0]
784 self._nodemap_docket = nodemap_data[0]
781 self.index.update_nodemap_data(*nodemap_data)
785 self.index.update_nodemap_data(*nodemap_data)
782
786
783 def rev(self, node):
787 def rev(self, node):
784 try:
788 try:
785 return self.index.rev(node)
789 return self.index.rev(node)
786 except TypeError:
790 except TypeError:
787 raise
791 raise
788 except error.RevlogError:
792 except error.RevlogError:
789 # parsers.c radix tree lookup failed
793 # parsers.c radix tree lookup failed
790 if (
794 if (
791 node == self.nodeconstants.wdirid
795 node == self.nodeconstants.wdirid
792 or node in self.nodeconstants.wdirfilenodeids
796 or node in self.nodeconstants.wdirfilenodeids
793 ):
797 ):
794 raise error.WdirUnsupported
798 raise error.WdirUnsupported
795 raise error.LookupError(node, self.indexfile, _(b'no node'))
799 raise error.LookupError(node, self.indexfile, _(b'no node'))
796
800
797 # Accessors for index entries.
801 # Accessors for index entries.
798
802
799 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
803 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
800 # are flags.
804 # are flags.
801 def start(self, rev):
805 def start(self, rev):
802 return int(self.index[rev][0] >> 16)
806 return int(self.index[rev][0] >> 16)
803
807
804 def flags(self, rev):
808 def flags(self, rev):
805 return self.index[rev][0] & 0xFFFF
809 return self.index[rev][0] & 0xFFFF
806
810
807 def length(self, rev):
811 def length(self, rev):
808 return self.index[rev][1]
812 return self.index[rev][1]
809
813
810 def sidedata_length(self, rev):
814 def sidedata_length(self, rev):
811 if self.version & 0xFFFF != REVLOGV2:
815 if self.version & 0xFFFF != REVLOGV2:
812 return 0
816 return 0
813 return self.index[rev][9]
817 return self.index[rev][9]
814
818
815 def rawsize(self, rev):
819 def rawsize(self, rev):
816 """return the length of the uncompressed text for a given revision"""
820 """return the length of the uncompressed text for a given revision"""
817 l = self.index[rev][2]
821 l = self.index[rev][2]
818 if l >= 0:
822 if l >= 0:
819 return l
823 return l
820
824
821 t = self.rawdata(rev)
825 t = self.rawdata(rev)
822 return len(t)
826 return len(t)
823
827
824 def size(self, rev):
828 def size(self, rev):
825 """length of non-raw text (processed by a "read" flag processor)"""
829 """length of non-raw text (processed by a "read" flag processor)"""
826 # fast path: if no "read" flag processor could change the content,
830 # fast path: if no "read" flag processor could change the content,
827 # size is rawsize. note: ELLIPSIS is known to not change the content.
831 # size is rawsize. note: ELLIPSIS is known to not change the content.
828 flags = self.flags(rev)
832 flags = self.flags(rev)
829 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
833 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
830 return self.rawsize(rev)
834 return self.rawsize(rev)
831
835
832 return len(self.revision(rev, raw=False))
836 return len(self.revision(rev, raw=False))
833
837
834 def chainbase(self, rev):
838 def chainbase(self, rev):
835 base = self._chainbasecache.get(rev)
839 base = self._chainbasecache.get(rev)
836 if base is not None:
840 if base is not None:
837 return base
841 return base
838
842
839 index = self.index
843 index = self.index
840 iterrev = rev
844 iterrev = rev
841 base = index[iterrev][3]
845 base = index[iterrev][3]
842 while base != iterrev:
846 while base != iterrev:
843 iterrev = base
847 iterrev = base
844 base = index[iterrev][3]
848 base = index[iterrev][3]
845
849
846 self._chainbasecache[rev] = base
850 self._chainbasecache[rev] = base
847 return base
851 return base
848
852
849 def linkrev(self, rev):
853 def linkrev(self, rev):
850 return self.index[rev][4]
854 return self.index[rev][4]
851
855
852 def parentrevs(self, rev):
856 def parentrevs(self, rev):
853 try:
857 try:
854 entry = self.index[rev]
858 entry = self.index[rev]
855 except IndexError:
859 except IndexError:
856 if rev == wdirrev:
860 if rev == wdirrev:
857 raise error.WdirUnsupported
861 raise error.WdirUnsupported
858 raise
862 raise
859 if entry[5] == nullrev:
863 if entry[5] == nullrev:
860 return entry[6], entry[5]
864 return entry[6], entry[5]
861 else:
865 else:
862 return entry[5], entry[6]
866 return entry[5], entry[6]
863
867
864 # fast parentrevs(rev) where rev isn't filtered
868 # fast parentrevs(rev) where rev isn't filtered
865 _uncheckedparentrevs = parentrevs
869 _uncheckedparentrevs = parentrevs
866
870
867 def node(self, rev):
871 def node(self, rev):
868 try:
872 try:
869 return self.index[rev][7]
873 return self.index[rev][7]
870 except IndexError:
874 except IndexError:
871 if rev == wdirrev:
875 if rev == wdirrev:
872 raise error.WdirUnsupported
876 raise error.WdirUnsupported
873 raise
877 raise
874
878
875 # Derived from index values.
879 # Derived from index values.
876
880
877 def end(self, rev):
881 def end(self, rev):
878 return self.start(rev) + self.length(rev)
882 return self.start(rev) + self.length(rev)
879
883
880 def parents(self, node):
884 def parents(self, node):
881 i = self.index
885 i = self.index
882 d = i[self.rev(node)]
886 d = i[self.rev(node)]
883 # inline node() to avoid function call overhead
887 # inline node() to avoid function call overhead
884 if d[5] == self.nullid:
888 if d[5] == self.nullid:
885 return i[d[6]][7], i[d[5]][7]
889 return i[d[6]][7], i[d[5]][7]
886 else:
890 else:
887 return i[d[5]][7], i[d[6]][7]
891 return i[d[5]][7], i[d[6]][7]
888
892
889 def chainlen(self, rev):
893 def chainlen(self, rev):
890 return self._chaininfo(rev)[0]
894 return self._chaininfo(rev)[0]
891
895
892 def _chaininfo(self, rev):
896 def _chaininfo(self, rev):
893 chaininfocache = self._chaininfocache
897 chaininfocache = self._chaininfocache
894 if rev in chaininfocache:
898 if rev in chaininfocache:
895 return chaininfocache[rev]
899 return chaininfocache[rev]
896 index = self.index
900 index = self.index
897 generaldelta = self._generaldelta
901 generaldelta = self._generaldelta
898 iterrev = rev
902 iterrev = rev
899 e = index[iterrev]
903 e = index[iterrev]
900 clen = 0
904 clen = 0
901 compresseddeltalen = 0
905 compresseddeltalen = 0
902 while iterrev != e[3]:
906 while iterrev != e[3]:
903 clen += 1
907 clen += 1
904 compresseddeltalen += e[1]
908 compresseddeltalen += e[1]
905 if generaldelta:
909 if generaldelta:
906 iterrev = e[3]
910 iterrev = e[3]
907 else:
911 else:
908 iterrev -= 1
912 iterrev -= 1
909 if iterrev in chaininfocache:
913 if iterrev in chaininfocache:
910 t = chaininfocache[iterrev]
914 t = chaininfocache[iterrev]
911 clen += t[0]
915 clen += t[0]
912 compresseddeltalen += t[1]
916 compresseddeltalen += t[1]
913 break
917 break
914 e = index[iterrev]
918 e = index[iterrev]
915 else:
919 else:
916 # Add text length of base since decompressing that also takes
920 # Add text length of base since decompressing that also takes
917 # work. For cache hits the length is already included.
921 # work. For cache hits the length is already included.
918 compresseddeltalen += e[1]
922 compresseddeltalen += e[1]
919 r = (clen, compresseddeltalen)
923 r = (clen, compresseddeltalen)
920 chaininfocache[rev] = r
924 chaininfocache[rev] = r
921 return r
925 return r
922
926
923 def _deltachain(self, rev, stoprev=None):
927 def _deltachain(self, rev, stoprev=None):
924 """Obtain the delta chain for a revision.
928 """Obtain the delta chain for a revision.
925
929
926 ``stoprev`` specifies a revision to stop at. If not specified, we
930 ``stoprev`` specifies a revision to stop at. If not specified, we
927 stop at the base of the chain.
931 stop at the base of the chain.
928
932
929 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
933 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
930 revs in ascending order and ``stopped`` is a bool indicating whether
934 revs in ascending order and ``stopped`` is a bool indicating whether
931 ``stoprev`` was hit.
935 ``stoprev`` was hit.
932 """
936 """
933 # Try C implementation.
937 # Try C implementation.
934 try:
938 try:
935 return self.index.deltachain(rev, stoprev, self._generaldelta)
939 return self.index.deltachain(rev, stoprev, self._generaldelta)
936 except AttributeError:
940 except AttributeError:
937 pass
941 pass
938
942
939 chain = []
943 chain = []
940
944
941 # Alias to prevent attribute lookup in tight loop.
945 # Alias to prevent attribute lookup in tight loop.
942 index = self.index
946 index = self.index
943 generaldelta = self._generaldelta
947 generaldelta = self._generaldelta
944
948
945 iterrev = rev
949 iterrev = rev
946 e = index[iterrev]
950 e = index[iterrev]
947 while iterrev != e[3] and iterrev != stoprev:
951 while iterrev != e[3] and iterrev != stoprev:
948 chain.append(iterrev)
952 chain.append(iterrev)
949 if generaldelta:
953 if generaldelta:
950 iterrev = e[3]
954 iterrev = e[3]
951 else:
955 else:
952 iterrev -= 1
956 iterrev -= 1
953 e = index[iterrev]
957 e = index[iterrev]
954
958
955 if iterrev == stoprev:
959 if iterrev == stoprev:
956 stopped = True
960 stopped = True
957 else:
961 else:
958 chain.append(iterrev)
962 chain.append(iterrev)
959 stopped = False
963 stopped = False
960
964
961 chain.reverse()
965 chain.reverse()
962 return chain, stopped
966 return chain, stopped
963
967
964 def ancestors(self, revs, stoprev=0, inclusive=False):
968 def ancestors(self, revs, stoprev=0, inclusive=False):
965 """Generate the ancestors of 'revs' in reverse revision order.
969 """Generate the ancestors of 'revs' in reverse revision order.
966 Does not generate revs lower than stoprev.
970 Does not generate revs lower than stoprev.
967
971
968 See the documentation for ancestor.lazyancestors for more details."""
972 See the documentation for ancestor.lazyancestors for more details."""
969
973
970 # first, make sure start revisions aren't filtered
974 # first, make sure start revisions aren't filtered
971 revs = list(revs)
975 revs = list(revs)
972 checkrev = self.node
976 checkrev = self.node
973 for r in revs:
977 for r in revs:
974 checkrev(r)
978 checkrev(r)
975 # and we're sure ancestors aren't filtered as well
979 # and we're sure ancestors aren't filtered as well
976
980
977 if rustancestor is not None:
981 if rustancestor is not None:
978 lazyancestors = rustancestor.LazyAncestors
982 lazyancestors = rustancestor.LazyAncestors
979 arg = self.index
983 arg = self.index
980 else:
984 else:
981 lazyancestors = ancestor.lazyancestors
985 lazyancestors = ancestor.lazyancestors
982 arg = self._uncheckedparentrevs
986 arg = self._uncheckedparentrevs
983 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
987 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
984
988
985 def descendants(self, revs):
989 def descendants(self, revs):
986 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
990 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
987
991
988 def findcommonmissing(self, common=None, heads=None):
992 def findcommonmissing(self, common=None, heads=None):
989 """Return a tuple of the ancestors of common and the ancestors of heads
993 """Return a tuple of the ancestors of common and the ancestors of heads
990 that are not ancestors of common. In revset terminology, we return the
994 that are not ancestors of common. In revset terminology, we return the
991 tuple:
995 tuple:
992
996
993 ::common, (::heads) - (::common)
997 ::common, (::heads) - (::common)
994
998
995 The list is sorted by revision number, meaning it is
999 The list is sorted by revision number, meaning it is
996 topologically sorted.
1000 topologically sorted.
997
1001
998 'heads' and 'common' are both lists of node IDs. If heads is
1002 'heads' and 'common' are both lists of node IDs. If heads is
999 not supplied, uses all of the revlog's heads. If common is not
1003 not supplied, uses all of the revlog's heads. If common is not
1000 supplied, uses nullid."""
1004 supplied, uses nullid."""
1001 if common is None:
1005 if common is None:
1002 common = [self.nullid]
1006 common = [self.nullid]
1003 if heads is None:
1007 if heads is None:
1004 heads = self.heads()
1008 heads = self.heads()
1005
1009
1006 common = [self.rev(n) for n in common]
1010 common = [self.rev(n) for n in common]
1007 heads = [self.rev(n) for n in heads]
1011 heads = [self.rev(n) for n in heads]
1008
1012
1009 # we want the ancestors, but inclusive
1013 # we want the ancestors, but inclusive
1010 class lazyset(object):
1014 class lazyset(object):
1011 def __init__(self, lazyvalues):
1015 def __init__(self, lazyvalues):
1012 self.addedvalues = set()
1016 self.addedvalues = set()
1013 self.lazyvalues = lazyvalues
1017 self.lazyvalues = lazyvalues
1014
1018
1015 def __contains__(self, value):
1019 def __contains__(self, value):
1016 return value in self.addedvalues or value in self.lazyvalues
1020 return value in self.addedvalues or value in self.lazyvalues
1017
1021
1018 def __iter__(self):
1022 def __iter__(self):
1019 added = self.addedvalues
1023 added = self.addedvalues
1020 for r in added:
1024 for r in added:
1021 yield r
1025 yield r
1022 for r in self.lazyvalues:
1026 for r in self.lazyvalues:
1023 if not r in added:
1027 if not r in added:
1024 yield r
1028 yield r
1025
1029
1026 def add(self, value):
1030 def add(self, value):
1027 self.addedvalues.add(value)
1031 self.addedvalues.add(value)
1028
1032
1029 def update(self, values):
1033 def update(self, values):
1030 self.addedvalues.update(values)
1034 self.addedvalues.update(values)
1031
1035
1032 has = lazyset(self.ancestors(common))
1036 has = lazyset(self.ancestors(common))
1033 has.add(nullrev)
1037 has.add(nullrev)
1034 has.update(common)
1038 has.update(common)
1035
1039
1036 # take all ancestors from heads that aren't in has
1040 # take all ancestors from heads that aren't in has
1037 missing = set()
1041 missing = set()
1038 visit = collections.deque(r for r in heads if r not in has)
1042 visit = collections.deque(r for r in heads if r not in has)
1039 while visit:
1043 while visit:
1040 r = visit.popleft()
1044 r = visit.popleft()
1041 if r in missing:
1045 if r in missing:
1042 continue
1046 continue
1043 else:
1047 else:
1044 missing.add(r)
1048 missing.add(r)
1045 for p in self.parentrevs(r):
1049 for p in self.parentrevs(r):
1046 if p not in has:
1050 if p not in has:
1047 visit.append(p)
1051 visit.append(p)
1048 missing = list(missing)
1052 missing = list(missing)
1049 missing.sort()
1053 missing.sort()
1050 return has, [self.node(miss) for miss in missing]
1054 return has, [self.node(miss) for miss in missing]
1051
1055
1052 def incrementalmissingrevs(self, common=None):
1056 def incrementalmissingrevs(self, common=None):
1053 """Return an object that can be used to incrementally compute the
1057 """Return an object that can be used to incrementally compute the
1054 revision numbers of the ancestors of arbitrary sets that are not
1058 revision numbers of the ancestors of arbitrary sets that are not
1055 ancestors of common. This is an ancestor.incrementalmissingancestors
1059 ancestors of common. This is an ancestor.incrementalmissingancestors
1056 object.
1060 object.
1057
1061
1058 'common' is a list of revision numbers. If common is not supplied, uses
1062 'common' is a list of revision numbers. If common is not supplied, uses
1059 nullrev.
1063 nullrev.
1060 """
1064 """
1061 if common is None:
1065 if common is None:
1062 common = [nullrev]
1066 common = [nullrev]
1063
1067
1064 if rustancestor is not None:
1068 if rustancestor is not None:
1065 return rustancestor.MissingAncestors(self.index, common)
1069 return rustancestor.MissingAncestors(self.index, common)
1066 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1070 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1067
1071
1068 def findmissingrevs(self, common=None, heads=None):
1072 def findmissingrevs(self, common=None, heads=None):
1069 """Return the revision numbers of the ancestors of heads that
1073 """Return the revision numbers of the ancestors of heads that
1070 are not ancestors of common.
1074 are not ancestors of common.
1071
1075
1072 More specifically, return a list of revision numbers corresponding to
1076 More specifically, return a list of revision numbers corresponding to
1073 nodes N such that every N satisfies the following constraints:
1077 nodes N such that every N satisfies the following constraints:
1074
1078
1075 1. N is an ancestor of some node in 'heads'
1079 1. N is an ancestor of some node in 'heads'
1076 2. N is not an ancestor of any node in 'common'
1080 2. N is not an ancestor of any node in 'common'
1077
1081
1078 The list is sorted by revision number, meaning it is
1082 The list is sorted by revision number, meaning it is
1079 topologically sorted.
1083 topologically sorted.
1080
1084
1081 'heads' and 'common' are both lists of revision numbers. If heads is
1085 'heads' and 'common' are both lists of revision numbers. If heads is
1082 not supplied, uses all of the revlog's heads. If common is not
1086 not supplied, uses all of the revlog's heads. If common is not
1083 supplied, uses nullid."""
1087 supplied, uses nullid."""
1084 if common is None:
1088 if common is None:
1085 common = [nullrev]
1089 common = [nullrev]
1086 if heads is None:
1090 if heads is None:
1087 heads = self.headrevs()
1091 heads = self.headrevs()
1088
1092
1089 inc = self.incrementalmissingrevs(common=common)
1093 inc = self.incrementalmissingrevs(common=common)
1090 return inc.missingancestors(heads)
1094 return inc.missingancestors(heads)
1091
1095
1092 def findmissing(self, common=None, heads=None):
1096 def findmissing(self, common=None, heads=None):
1093 """Return the ancestors of heads that are not ancestors of common.
1097 """Return the ancestors of heads that are not ancestors of common.
1094
1098
1095 More specifically, return a list of nodes N such that every N
1099 More specifically, return a list of nodes N such that every N
1096 satisfies the following constraints:
1100 satisfies the following constraints:
1097
1101
1098 1. N is an ancestor of some node in 'heads'
1102 1. N is an ancestor of some node in 'heads'
1099 2. N is not an ancestor of any node in 'common'
1103 2. N is not an ancestor of any node in 'common'
1100
1104
1101 The list is sorted by revision number, meaning it is
1105 The list is sorted by revision number, meaning it is
1102 topologically sorted.
1106 topologically sorted.
1103
1107
1104 'heads' and 'common' are both lists of node IDs. If heads is
1108 'heads' and 'common' are both lists of node IDs. If heads is
1105 not supplied, uses all of the revlog's heads. If common is not
1109 not supplied, uses all of the revlog's heads. If common is not
1106 supplied, uses nullid."""
1110 supplied, uses nullid."""
1107 if common is None:
1111 if common is None:
1108 common = [self.nullid]
1112 common = [self.nullid]
1109 if heads is None:
1113 if heads is None:
1110 heads = self.heads()
1114 heads = self.heads()
1111
1115
1112 common = [self.rev(n) for n in common]
1116 common = [self.rev(n) for n in common]
1113 heads = [self.rev(n) for n in heads]
1117 heads = [self.rev(n) for n in heads]
1114
1118
1115 inc = self.incrementalmissingrevs(common=common)
1119 inc = self.incrementalmissingrevs(common=common)
1116 return [self.node(r) for r in inc.missingancestors(heads)]
1120 return [self.node(r) for r in inc.missingancestors(heads)]
1117
1121
1118 def nodesbetween(self, roots=None, heads=None):
1122 def nodesbetween(self, roots=None, heads=None):
1119 """Return a topological path from 'roots' to 'heads'.
1123 """Return a topological path from 'roots' to 'heads'.
1120
1124
1121 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1125 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1122 topologically sorted list of all nodes N that satisfy both of
1126 topologically sorted list of all nodes N that satisfy both of
1123 these constraints:
1127 these constraints:
1124
1128
1125 1. N is a descendant of some node in 'roots'
1129 1. N is a descendant of some node in 'roots'
1126 2. N is an ancestor of some node in 'heads'
1130 2. N is an ancestor of some node in 'heads'
1127
1131
1128 Every node is considered to be both a descendant and an ancestor
1132 Every node is considered to be both a descendant and an ancestor
1129 of itself, so every reachable node in 'roots' and 'heads' will be
1133 of itself, so every reachable node in 'roots' and 'heads' will be
1130 included in 'nodes'.
1134 included in 'nodes'.
1131
1135
1132 'outroots' is the list of reachable nodes in 'roots', i.e., the
1136 'outroots' is the list of reachable nodes in 'roots', i.e., the
1133 subset of 'roots' that is returned in 'nodes'. Likewise,
1137 subset of 'roots' that is returned in 'nodes'. Likewise,
1134 'outheads' is the subset of 'heads' that is also in 'nodes'.
1138 'outheads' is the subset of 'heads' that is also in 'nodes'.
1135
1139
1136 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1140 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1137 unspecified, uses nullid as the only root. If 'heads' is
1141 unspecified, uses nullid as the only root. If 'heads' is
1138 unspecified, uses list of all of the revlog's heads."""
1142 unspecified, uses list of all of the revlog's heads."""
1139 nonodes = ([], [], [])
1143 nonodes = ([], [], [])
1140 if roots is not None:
1144 if roots is not None:
1141 roots = list(roots)
1145 roots = list(roots)
1142 if not roots:
1146 if not roots:
1143 return nonodes
1147 return nonodes
1144 lowestrev = min([self.rev(n) for n in roots])
1148 lowestrev = min([self.rev(n) for n in roots])
1145 else:
1149 else:
1146 roots = [self.nullid] # Everybody's a descendant of nullid
1150 roots = [self.nullid] # Everybody's a descendant of nullid
1147 lowestrev = nullrev
1151 lowestrev = nullrev
1148 if (lowestrev == nullrev) and (heads is None):
1152 if (lowestrev == nullrev) and (heads is None):
1149 # We want _all_ the nodes!
1153 # We want _all_ the nodes!
1150 return (
1154 return (
1151 [self.node(r) for r in self],
1155 [self.node(r) for r in self],
1152 [self.nullid],
1156 [self.nullid],
1153 list(self.heads()),
1157 list(self.heads()),
1154 )
1158 )
1155 if heads is None:
1159 if heads is None:
1156 # All nodes are ancestors, so the latest ancestor is the last
1160 # All nodes are ancestors, so the latest ancestor is the last
1157 # node.
1161 # node.
1158 highestrev = len(self) - 1
1162 highestrev = len(self) - 1
1159 # Set ancestors to None to signal that every node is an ancestor.
1163 # Set ancestors to None to signal that every node is an ancestor.
1160 ancestors = None
1164 ancestors = None
1161 # Set heads to an empty dictionary for later discovery of heads
1165 # Set heads to an empty dictionary for later discovery of heads
1162 heads = {}
1166 heads = {}
1163 else:
1167 else:
1164 heads = list(heads)
1168 heads = list(heads)
1165 if not heads:
1169 if not heads:
1166 return nonodes
1170 return nonodes
1167 ancestors = set()
1171 ancestors = set()
1168 # Turn heads into a dictionary so we can remove 'fake' heads.
1172 # Turn heads into a dictionary so we can remove 'fake' heads.
1169 # Also, later we will be using it to filter out the heads we can't
1173 # Also, later we will be using it to filter out the heads we can't
1170 # find from roots.
1174 # find from roots.
1171 heads = dict.fromkeys(heads, False)
1175 heads = dict.fromkeys(heads, False)
1172 # Start at the top and keep marking parents until we're done.
1176 # Start at the top and keep marking parents until we're done.
1173 nodestotag = set(heads)
1177 nodestotag = set(heads)
1174 # Remember where the top was so we can use it as a limit later.
1178 # Remember where the top was so we can use it as a limit later.
1175 highestrev = max([self.rev(n) for n in nodestotag])
1179 highestrev = max([self.rev(n) for n in nodestotag])
1176 while nodestotag:
1180 while nodestotag:
1177 # grab a node to tag
1181 # grab a node to tag
1178 n = nodestotag.pop()
1182 n = nodestotag.pop()
1179 # Never tag nullid
1183 # Never tag nullid
1180 if n == self.nullid:
1184 if n == self.nullid:
1181 continue
1185 continue
1182 # A node's revision number represents its place in a
1186 # A node's revision number represents its place in a
1183 # topologically sorted list of nodes.
1187 # topologically sorted list of nodes.
1184 r = self.rev(n)
1188 r = self.rev(n)
1185 if r >= lowestrev:
1189 if r >= lowestrev:
1186 if n not in ancestors:
1190 if n not in ancestors:
1187 # If we are possibly a descendant of one of the roots
1191 # If we are possibly a descendant of one of the roots
1188 # and we haven't already been marked as an ancestor
1192 # and we haven't already been marked as an ancestor
1189 ancestors.add(n) # Mark as ancestor
1193 ancestors.add(n) # Mark as ancestor
1190 # Add non-nullid parents to list of nodes to tag.
1194 # Add non-nullid parents to list of nodes to tag.
1191 nodestotag.update(
1195 nodestotag.update(
1192 [p for p in self.parents(n) if p != self.nullid]
1196 [p for p in self.parents(n) if p != self.nullid]
1193 )
1197 )
1194 elif n in heads: # We've seen it before, is it a fake head?
1198 elif n in heads: # We've seen it before, is it a fake head?
1195 # So it is, real heads should not be the ancestors of
1199 # So it is, real heads should not be the ancestors of
1196 # any other heads.
1200 # any other heads.
1197 heads.pop(n)
1201 heads.pop(n)
1198 if not ancestors:
1202 if not ancestors:
1199 return nonodes
1203 return nonodes
1200 # Now that we have our set of ancestors, we want to remove any
1204 # Now that we have our set of ancestors, we want to remove any
1201 # roots that are not ancestors.
1205 # roots that are not ancestors.
1202
1206
1203 # If one of the roots was nullid, everything is included anyway.
1207 # If one of the roots was nullid, everything is included anyway.
1204 if lowestrev > nullrev:
1208 if lowestrev > nullrev:
1205 # But, since we weren't, let's recompute the lowest rev to not
1209 # But, since we weren't, let's recompute the lowest rev to not
1206 # include roots that aren't ancestors.
1210 # include roots that aren't ancestors.
1207
1211
1208 # Filter out roots that aren't ancestors of heads
1212 # Filter out roots that aren't ancestors of heads
1209 roots = [root for root in roots if root in ancestors]
1213 roots = [root for root in roots if root in ancestors]
1210 # Recompute the lowest revision
1214 # Recompute the lowest revision
1211 if roots:
1215 if roots:
1212 lowestrev = min([self.rev(root) for root in roots])
1216 lowestrev = min([self.rev(root) for root in roots])
1213 else:
1217 else:
1214 # No more roots? Return empty list
1218 # No more roots? Return empty list
1215 return nonodes
1219 return nonodes
1216 else:
1220 else:
1217 # We are descending from nullid, and don't need to care about
1221 # We are descending from nullid, and don't need to care about
1218 # any other roots.
1222 # any other roots.
1219 lowestrev = nullrev
1223 lowestrev = nullrev
1220 roots = [self.nullid]
1224 roots = [self.nullid]
1221 # Transform our roots list into a set.
1225 # Transform our roots list into a set.
1222 descendants = set(roots)
1226 descendants = set(roots)
1223 # Also, keep the original roots so we can filter out roots that aren't
1227 # Also, keep the original roots so we can filter out roots that aren't
1224 # 'real' roots (i.e. are descended from other roots).
1228 # 'real' roots (i.e. are descended from other roots).
1225 roots = descendants.copy()
1229 roots = descendants.copy()
1226 # Our topologically sorted list of output nodes.
1230 # Our topologically sorted list of output nodes.
1227 orderedout = []
1231 orderedout = []
1228 # Don't start at nullid since we don't want nullid in our output list,
1232 # Don't start at nullid since we don't want nullid in our output list,
1229 # and if nullid shows up in descendants, empty parents will look like
1233 # and if nullid shows up in descendants, empty parents will look like
1230 # they're descendants.
1234 # they're descendants.
1231 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1235 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1232 n = self.node(r)
1236 n = self.node(r)
1233 isdescendant = False
1237 isdescendant = False
1234 if lowestrev == nullrev: # Everybody is a descendant of nullid
1238 if lowestrev == nullrev: # Everybody is a descendant of nullid
1235 isdescendant = True
1239 isdescendant = True
1236 elif n in descendants:
1240 elif n in descendants:
1237 # n is already a descendant
1241 # n is already a descendant
1238 isdescendant = True
1242 isdescendant = True
1239 # This check only needs to be done here because all the roots
1243 # This check only needs to be done here because all the roots
1240 # will start being marked is descendants before the loop.
1244 # will start being marked is descendants before the loop.
1241 if n in roots:
1245 if n in roots:
1242 # If n was a root, check if it's a 'real' root.
1246 # If n was a root, check if it's a 'real' root.
1243 p = tuple(self.parents(n))
1247 p = tuple(self.parents(n))
1244 # If any of its parents are descendants, it's not a root.
1248 # If any of its parents are descendants, it's not a root.
1245 if (p[0] in descendants) or (p[1] in descendants):
1249 if (p[0] in descendants) or (p[1] in descendants):
1246 roots.remove(n)
1250 roots.remove(n)
1247 else:
1251 else:
1248 p = tuple(self.parents(n))
1252 p = tuple(self.parents(n))
1249 # A node is a descendant if either of its parents are
1253 # A node is a descendant if either of its parents are
1250 # descendants. (We seeded the dependents list with the roots
1254 # descendants. (We seeded the dependents list with the roots
1251 # up there, remember?)
1255 # up there, remember?)
1252 if (p[0] in descendants) or (p[1] in descendants):
1256 if (p[0] in descendants) or (p[1] in descendants):
1253 descendants.add(n)
1257 descendants.add(n)
1254 isdescendant = True
1258 isdescendant = True
1255 if isdescendant and ((ancestors is None) or (n in ancestors)):
1259 if isdescendant and ((ancestors is None) or (n in ancestors)):
1256 # Only include nodes that are both descendants and ancestors.
1260 # Only include nodes that are both descendants and ancestors.
1257 orderedout.append(n)
1261 orderedout.append(n)
1258 if (ancestors is not None) and (n in heads):
1262 if (ancestors is not None) and (n in heads):
1259 # We're trying to figure out which heads are reachable
1263 # We're trying to figure out which heads are reachable
1260 # from roots.
1264 # from roots.
1261 # Mark this head as having been reached
1265 # Mark this head as having been reached
1262 heads[n] = True
1266 heads[n] = True
1263 elif ancestors is None:
1267 elif ancestors is None:
1264 # Otherwise, we're trying to discover the heads.
1268 # Otherwise, we're trying to discover the heads.
1265 # Assume this is a head because if it isn't, the next step
1269 # Assume this is a head because if it isn't, the next step
1266 # will eventually remove it.
1270 # will eventually remove it.
1267 heads[n] = True
1271 heads[n] = True
1268 # But, obviously its parents aren't.
1272 # But, obviously its parents aren't.
1269 for p in self.parents(n):
1273 for p in self.parents(n):
1270 heads.pop(p, None)
1274 heads.pop(p, None)
1271 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1275 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1272 roots = list(roots)
1276 roots = list(roots)
1273 assert orderedout
1277 assert orderedout
1274 assert roots
1278 assert roots
1275 assert heads
1279 assert heads
1276 return (orderedout, roots, heads)
1280 return (orderedout, roots, heads)
1277
1281
1278 def headrevs(self, revs=None):
1282 def headrevs(self, revs=None):
1279 if revs is None:
1283 if revs is None:
1280 try:
1284 try:
1281 return self.index.headrevs()
1285 return self.index.headrevs()
1282 except AttributeError:
1286 except AttributeError:
1283 return self._headrevs()
1287 return self._headrevs()
1284 if rustdagop is not None:
1288 if rustdagop is not None:
1285 return rustdagop.headrevs(self.index, revs)
1289 return rustdagop.headrevs(self.index, revs)
1286 return dagop.headrevs(revs, self._uncheckedparentrevs)
1290 return dagop.headrevs(revs, self._uncheckedparentrevs)
1287
1291
1288 def computephases(self, roots):
1292 def computephases(self, roots):
1289 return self.index.computephasesmapsets(roots)
1293 return self.index.computephasesmapsets(roots)
1290
1294
1291 def _headrevs(self):
1295 def _headrevs(self):
1292 count = len(self)
1296 count = len(self)
1293 if not count:
1297 if not count:
1294 return [nullrev]
1298 return [nullrev]
1295 # we won't iter over filtered rev so nobody is a head at start
1299 # we won't iter over filtered rev so nobody is a head at start
1296 ishead = [0] * (count + 1)
1300 ishead = [0] * (count + 1)
1297 index = self.index
1301 index = self.index
1298 for r in self:
1302 for r in self:
1299 ishead[r] = 1 # I may be an head
1303 ishead[r] = 1 # I may be an head
1300 e = index[r]
1304 e = index[r]
1301 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1305 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1302 return [r for r, val in enumerate(ishead) if val]
1306 return [r for r, val in enumerate(ishead) if val]
1303
1307
1304 def heads(self, start=None, stop=None):
1308 def heads(self, start=None, stop=None):
1305 """return the list of all nodes that have no children
1309 """return the list of all nodes that have no children
1306
1310
1307 if start is specified, only heads that are descendants of
1311 if start is specified, only heads that are descendants of
1308 start will be returned
1312 start will be returned
1309 if stop is specified, it will consider all the revs from stop
1313 if stop is specified, it will consider all the revs from stop
1310 as if they had no children
1314 as if they had no children
1311 """
1315 """
1312 if start is None and stop is None:
1316 if start is None and stop is None:
1313 if not len(self):
1317 if not len(self):
1314 return [self.nullid]
1318 return [self.nullid]
1315 return [self.node(r) for r in self.headrevs()]
1319 return [self.node(r) for r in self.headrevs()]
1316
1320
1317 if start is None:
1321 if start is None:
1318 start = nullrev
1322 start = nullrev
1319 else:
1323 else:
1320 start = self.rev(start)
1324 start = self.rev(start)
1321
1325
1322 stoprevs = {self.rev(n) for n in stop or []}
1326 stoprevs = {self.rev(n) for n in stop or []}
1323
1327
1324 revs = dagop.headrevssubset(
1328 revs = dagop.headrevssubset(
1325 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1329 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1326 )
1330 )
1327
1331
1328 return [self.node(rev) for rev in revs]
1332 return [self.node(rev) for rev in revs]
1329
1333
1330 def children(self, node):
1334 def children(self, node):
1331 """find the children of a given node"""
1335 """find the children of a given node"""
1332 c = []
1336 c = []
1333 p = self.rev(node)
1337 p = self.rev(node)
1334 for r in self.revs(start=p + 1):
1338 for r in self.revs(start=p + 1):
1335 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1339 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1336 if prevs:
1340 if prevs:
1337 for pr in prevs:
1341 for pr in prevs:
1338 if pr == p:
1342 if pr == p:
1339 c.append(self.node(r))
1343 c.append(self.node(r))
1340 elif p == nullrev:
1344 elif p == nullrev:
1341 c.append(self.node(r))
1345 c.append(self.node(r))
1342 return c
1346 return c
1343
1347
1344 def commonancestorsheads(self, a, b):
1348 def commonancestorsheads(self, a, b):
1345 """calculate all the heads of the common ancestors of nodes a and b"""
1349 """calculate all the heads of the common ancestors of nodes a and b"""
1346 a, b = self.rev(a), self.rev(b)
1350 a, b = self.rev(a), self.rev(b)
1347 ancs = self._commonancestorsheads(a, b)
1351 ancs = self._commonancestorsheads(a, b)
1348 return pycompat.maplist(self.node, ancs)
1352 return pycompat.maplist(self.node, ancs)
1349
1353
1350 def _commonancestorsheads(self, *revs):
1354 def _commonancestorsheads(self, *revs):
1351 """calculate all the heads of the common ancestors of revs"""
1355 """calculate all the heads of the common ancestors of revs"""
1352 try:
1356 try:
1353 ancs = self.index.commonancestorsheads(*revs)
1357 ancs = self.index.commonancestorsheads(*revs)
1354 except (AttributeError, OverflowError): # C implementation failed
1358 except (AttributeError, OverflowError): # C implementation failed
1355 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1359 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1356 return ancs
1360 return ancs
1357
1361
1358 def isancestor(self, a, b):
1362 def isancestor(self, a, b):
1359 """return True if node a is an ancestor of node b
1363 """return True if node a is an ancestor of node b
1360
1364
1361 A revision is considered an ancestor of itself."""
1365 A revision is considered an ancestor of itself."""
1362 a, b = self.rev(a), self.rev(b)
1366 a, b = self.rev(a), self.rev(b)
1363 return self.isancestorrev(a, b)
1367 return self.isancestorrev(a, b)
1364
1368
1365 def isancestorrev(self, a, b):
1369 def isancestorrev(self, a, b):
1366 """return True if revision a is an ancestor of revision b
1370 """return True if revision a is an ancestor of revision b
1367
1371
1368 A revision is considered an ancestor of itself.
1372 A revision is considered an ancestor of itself.
1369
1373
1370 The implementation of this is trivial but the use of
1374 The implementation of this is trivial but the use of
1371 reachableroots is not."""
1375 reachableroots is not."""
1372 if a == nullrev:
1376 if a == nullrev:
1373 return True
1377 return True
1374 elif a == b:
1378 elif a == b:
1375 return True
1379 return True
1376 elif a > b:
1380 elif a > b:
1377 return False
1381 return False
1378 return bool(self.reachableroots(a, [b], [a], includepath=False))
1382 return bool(self.reachableroots(a, [b], [a], includepath=False))
1379
1383
1380 def reachableroots(self, minroot, heads, roots, includepath=False):
1384 def reachableroots(self, minroot, heads, roots, includepath=False):
1381 """return (heads(::(<roots> and <roots>::<heads>)))
1385 """return (heads(::(<roots> and <roots>::<heads>)))
1382
1386
1383 If includepath is True, return (<roots>::<heads>)."""
1387 If includepath is True, return (<roots>::<heads>)."""
1384 try:
1388 try:
1385 return self.index.reachableroots2(
1389 return self.index.reachableroots2(
1386 minroot, heads, roots, includepath
1390 minroot, heads, roots, includepath
1387 )
1391 )
1388 except AttributeError:
1392 except AttributeError:
1389 return dagop._reachablerootspure(
1393 return dagop._reachablerootspure(
1390 self.parentrevs, minroot, roots, heads, includepath
1394 self.parentrevs, minroot, roots, heads, includepath
1391 )
1395 )
1392
1396
1393 def ancestor(self, a, b):
1397 def ancestor(self, a, b):
1394 """calculate the "best" common ancestor of nodes a and b"""
1398 """calculate the "best" common ancestor of nodes a and b"""
1395
1399
1396 a, b = self.rev(a), self.rev(b)
1400 a, b = self.rev(a), self.rev(b)
1397 try:
1401 try:
1398 ancs = self.index.ancestors(a, b)
1402 ancs = self.index.ancestors(a, b)
1399 except (AttributeError, OverflowError):
1403 except (AttributeError, OverflowError):
1400 ancs = ancestor.ancestors(self.parentrevs, a, b)
1404 ancs = ancestor.ancestors(self.parentrevs, a, b)
1401 if ancs:
1405 if ancs:
1402 # choose a consistent winner when there's a tie
1406 # choose a consistent winner when there's a tie
1403 return min(map(self.node, ancs))
1407 return min(map(self.node, ancs))
1404 return self.nullid
1408 return self.nullid
1405
1409
1406 def _match(self, id):
1410 def _match(self, id):
1407 if isinstance(id, int):
1411 if isinstance(id, int):
1408 # rev
1412 # rev
1409 return self.node(id)
1413 return self.node(id)
1410 if len(id) == 20:
1414 if len(id) == 20:
1411 # possibly a binary node
1415 # possibly a binary node
1412 # odds of a binary node being all hex in ASCII are 1 in 10**25
1416 # odds of a binary node being all hex in ASCII are 1 in 10**25
1413 try:
1417 try:
1414 node = id
1418 node = id
1415 self.rev(node) # quick search the index
1419 self.rev(node) # quick search the index
1416 return node
1420 return node
1417 except error.LookupError:
1421 except error.LookupError:
1418 pass # may be partial hex id
1422 pass # may be partial hex id
1419 try:
1423 try:
1420 # str(rev)
1424 # str(rev)
1421 rev = int(id)
1425 rev = int(id)
1422 if b"%d" % rev != id:
1426 if b"%d" % rev != id:
1423 raise ValueError
1427 raise ValueError
1424 if rev < 0:
1428 if rev < 0:
1425 rev = len(self) + rev
1429 rev = len(self) + rev
1426 if rev < 0 or rev >= len(self):
1430 if rev < 0 or rev >= len(self):
1427 raise ValueError
1431 raise ValueError
1428 return self.node(rev)
1432 return self.node(rev)
1429 except (ValueError, OverflowError):
1433 except (ValueError, OverflowError):
1430 pass
1434 pass
1431 if len(id) == 40:
1435 if len(id) == 40:
1432 try:
1436 try:
1433 # a full hex nodeid?
1437 # a full hex nodeid?
1434 node = bin(id)
1438 node = bin(id)
1435 self.rev(node)
1439 self.rev(node)
1436 return node
1440 return node
1437 except (TypeError, error.LookupError):
1441 except (TypeError, error.LookupError):
1438 pass
1442 pass
1439
1443
1440 def _partialmatch(self, id):
1444 def _partialmatch(self, id):
1441 # we don't care wdirfilenodeids as they should be always full hash
1445 # we don't care wdirfilenodeids as they should be always full hash
1442 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1446 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1443 try:
1447 try:
1444 partial = self.index.partialmatch(id)
1448 partial = self.index.partialmatch(id)
1445 if partial and self.hasnode(partial):
1449 if partial and self.hasnode(partial):
1446 if maybewdir:
1450 if maybewdir:
1447 # single 'ff...' match in radix tree, ambiguous with wdir
1451 # single 'ff...' match in radix tree, ambiguous with wdir
1448 raise error.RevlogError
1452 raise error.RevlogError
1449 return partial
1453 return partial
1450 if maybewdir:
1454 if maybewdir:
1451 # no 'ff...' match in radix tree, wdir identified
1455 # no 'ff...' match in radix tree, wdir identified
1452 raise error.WdirUnsupported
1456 raise error.WdirUnsupported
1453 return None
1457 return None
1454 except error.RevlogError:
1458 except error.RevlogError:
1455 # parsers.c radix tree lookup gave multiple matches
1459 # parsers.c radix tree lookup gave multiple matches
1456 # fast path: for unfiltered changelog, radix tree is accurate
1460 # fast path: for unfiltered changelog, radix tree is accurate
1457 if not getattr(self, 'filteredrevs', None):
1461 if not getattr(self, 'filteredrevs', None):
1458 raise error.AmbiguousPrefixLookupError(
1462 raise error.AmbiguousPrefixLookupError(
1459 id, self.indexfile, _(b'ambiguous identifier')
1463 id, self.indexfile, _(b'ambiguous identifier')
1460 )
1464 )
1461 # fall through to slow path that filters hidden revisions
1465 # fall through to slow path that filters hidden revisions
1462 except (AttributeError, ValueError):
1466 except (AttributeError, ValueError):
1463 # we are pure python, or key was too short to search radix tree
1467 # we are pure python, or key was too short to search radix tree
1464 pass
1468 pass
1465
1469
1466 if id in self._pcache:
1470 if id in self._pcache:
1467 return self._pcache[id]
1471 return self._pcache[id]
1468
1472
1469 if len(id) <= 40:
1473 if len(id) <= 40:
1470 try:
1474 try:
1471 # hex(node)[:...]
1475 # hex(node)[:...]
1472 l = len(id) // 2 # grab an even number of digits
1476 l = len(id) // 2 # grab an even number of digits
1473 prefix = bin(id[: l * 2])
1477 prefix = bin(id[: l * 2])
1474 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1478 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1475 nl = [
1479 nl = [
1476 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1480 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1477 ]
1481 ]
1478 if self.nodeconstants.nullhex.startswith(id):
1482 if self.nodeconstants.nullhex.startswith(id):
1479 nl.append(self.nullid)
1483 nl.append(self.nullid)
1480 if len(nl) > 0:
1484 if len(nl) > 0:
1481 if len(nl) == 1 and not maybewdir:
1485 if len(nl) == 1 and not maybewdir:
1482 self._pcache[id] = nl[0]
1486 self._pcache[id] = nl[0]
1483 return nl[0]
1487 return nl[0]
1484 raise error.AmbiguousPrefixLookupError(
1488 raise error.AmbiguousPrefixLookupError(
1485 id, self.indexfile, _(b'ambiguous identifier')
1489 id, self.indexfile, _(b'ambiguous identifier')
1486 )
1490 )
1487 if maybewdir:
1491 if maybewdir:
1488 raise error.WdirUnsupported
1492 raise error.WdirUnsupported
1489 return None
1493 return None
1490 except TypeError:
1494 except TypeError:
1491 pass
1495 pass
1492
1496
1493 def lookup(self, id):
1497 def lookup(self, id):
1494 """locate a node based on:
1498 """locate a node based on:
1495 - revision number or str(revision number)
1499 - revision number or str(revision number)
1496 - nodeid or subset of hex nodeid
1500 - nodeid or subset of hex nodeid
1497 """
1501 """
1498 n = self._match(id)
1502 n = self._match(id)
1499 if n is not None:
1503 if n is not None:
1500 return n
1504 return n
1501 n = self._partialmatch(id)
1505 n = self._partialmatch(id)
1502 if n:
1506 if n:
1503 return n
1507 return n
1504
1508
1505 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1509 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1506
1510
1507 def shortest(self, node, minlength=1):
1511 def shortest(self, node, minlength=1):
1508 """Find the shortest unambiguous prefix that matches node."""
1512 """Find the shortest unambiguous prefix that matches node."""
1509
1513
1510 def isvalid(prefix):
1514 def isvalid(prefix):
1511 try:
1515 try:
1512 matchednode = self._partialmatch(prefix)
1516 matchednode = self._partialmatch(prefix)
1513 except error.AmbiguousPrefixLookupError:
1517 except error.AmbiguousPrefixLookupError:
1514 return False
1518 return False
1515 except error.WdirUnsupported:
1519 except error.WdirUnsupported:
1516 # single 'ff...' match
1520 # single 'ff...' match
1517 return True
1521 return True
1518 if matchednode is None:
1522 if matchednode is None:
1519 raise error.LookupError(node, self.indexfile, _(b'no node'))
1523 raise error.LookupError(node, self.indexfile, _(b'no node'))
1520 return True
1524 return True
1521
1525
1522 def maybewdir(prefix):
1526 def maybewdir(prefix):
1523 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1527 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1524
1528
1525 hexnode = hex(node)
1529 hexnode = hex(node)
1526
1530
1527 def disambiguate(hexnode, minlength):
1531 def disambiguate(hexnode, minlength):
1528 """Disambiguate against wdirid."""
1532 """Disambiguate against wdirid."""
1529 for length in range(minlength, len(hexnode) + 1):
1533 for length in range(minlength, len(hexnode) + 1):
1530 prefix = hexnode[:length]
1534 prefix = hexnode[:length]
1531 if not maybewdir(prefix):
1535 if not maybewdir(prefix):
1532 return prefix
1536 return prefix
1533
1537
1534 if not getattr(self, 'filteredrevs', None):
1538 if not getattr(self, 'filteredrevs', None):
1535 try:
1539 try:
1536 length = max(self.index.shortest(node), minlength)
1540 length = max(self.index.shortest(node), minlength)
1537 return disambiguate(hexnode, length)
1541 return disambiguate(hexnode, length)
1538 except error.RevlogError:
1542 except error.RevlogError:
1539 if node != self.nodeconstants.wdirid:
1543 if node != self.nodeconstants.wdirid:
1540 raise error.LookupError(node, self.indexfile, _(b'no node'))
1544 raise error.LookupError(node, self.indexfile, _(b'no node'))
1541 except AttributeError:
1545 except AttributeError:
1542 # Fall through to pure code
1546 # Fall through to pure code
1543 pass
1547 pass
1544
1548
1545 if node == self.nodeconstants.wdirid:
1549 if node == self.nodeconstants.wdirid:
1546 for length in range(minlength, len(hexnode) + 1):
1550 for length in range(minlength, len(hexnode) + 1):
1547 prefix = hexnode[:length]
1551 prefix = hexnode[:length]
1548 if isvalid(prefix):
1552 if isvalid(prefix):
1549 return prefix
1553 return prefix
1550
1554
1551 for length in range(minlength, len(hexnode) + 1):
1555 for length in range(minlength, len(hexnode) + 1):
1552 prefix = hexnode[:length]
1556 prefix = hexnode[:length]
1553 if isvalid(prefix):
1557 if isvalid(prefix):
1554 return disambiguate(hexnode, length)
1558 return disambiguate(hexnode, length)
1555
1559
1556 def cmp(self, node, text):
1560 def cmp(self, node, text):
1557 """compare text with a given file revision
1561 """compare text with a given file revision
1558
1562
1559 returns True if text is different than what is stored.
1563 returns True if text is different than what is stored.
1560 """
1564 """
1561 p1, p2 = self.parents(node)
1565 p1, p2 = self.parents(node)
1562 return storageutil.hashrevisionsha1(text, p1, p2) != node
1566 return storageutil.hashrevisionsha1(text, p1, p2) != node
1563
1567
1564 def _cachesegment(self, offset, data):
1568 def _cachesegment(self, offset, data):
1565 """Add a segment to the revlog cache.
1569 """Add a segment to the revlog cache.
1566
1570
1567 Accepts an absolute offset and the data that is at that location.
1571 Accepts an absolute offset and the data that is at that location.
1568 """
1572 """
1569 o, d = self._chunkcache
1573 o, d = self._chunkcache
1570 # try to add to existing cache
1574 # try to add to existing cache
1571 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1575 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1572 self._chunkcache = o, d + data
1576 self._chunkcache = o, d + data
1573 else:
1577 else:
1574 self._chunkcache = offset, data
1578 self._chunkcache = offset, data
1575
1579
1576 def _readsegment(self, offset, length, df=None):
1580 def _readsegment(self, offset, length, df=None):
1577 """Load a segment of raw data from the revlog.
1581 """Load a segment of raw data from the revlog.
1578
1582
1579 Accepts an absolute offset, length to read, and an optional existing
1583 Accepts an absolute offset, length to read, and an optional existing
1580 file handle to read from.
1584 file handle to read from.
1581
1585
1582 If an existing file handle is passed, it will be seeked and the
1586 If an existing file handle is passed, it will be seeked and the
1583 original seek position will NOT be restored.
1587 original seek position will NOT be restored.
1584
1588
1585 Returns a str or buffer of raw byte data.
1589 Returns a str or buffer of raw byte data.
1586
1590
1587 Raises if the requested number of bytes could not be read.
1591 Raises if the requested number of bytes could not be read.
1588 """
1592 """
1589 # Cache data both forward and backward around the requested
1593 # Cache data both forward and backward around the requested
1590 # data, in a fixed size window. This helps speed up operations
1594 # data, in a fixed size window. This helps speed up operations
1591 # involving reading the revlog backwards.
1595 # involving reading the revlog backwards.
1592 cachesize = self._chunkcachesize
1596 cachesize = self._chunkcachesize
1593 realoffset = offset & ~(cachesize - 1)
1597 realoffset = offset & ~(cachesize - 1)
1594 reallength = (
1598 reallength = (
1595 (offset + length + cachesize) & ~(cachesize - 1)
1599 (offset + length + cachesize) & ~(cachesize - 1)
1596 ) - realoffset
1600 ) - realoffset
1597 with self._datareadfp(df) as df:
1601 with self._datareadfp(df) as df:
1598 df.seek(realoffset)
1602 df.seek(realoffset)
1599 d = df.read(reallength)
1603 d = df.read(reallength)
1600
1604
1601 self._cachesegment(realoffset, d)
1605 self._cachesegment(realoffset, d)
1602 if offset != realoffset or reallength != length:
1606 if offset != realoffset or reallength != length:
1603 startoffset = offset - realoffset
1607 startoffset = offset - realoffset
1604 if len(d) - startoffset < length:
1608 if len(d) - startoffset < length:
1605 raise error.RevlogError(
1609 raise error.RevlogError(
1606 _(
1610 _(
1607 b'partial read of revlog %s; expected %d bytes from '
1611 b'partial read of revlog %s; expected %d bytes from '
1608 b'offset %d, got %d'
1612 b'offset %d, got %d'
1609 )
1613 )
1610 % (
1614 % (
1611 self.indexfile if self._inline else self.datafile,
1615 self.indexfile if self._inline else self.datafile,
1612 length,
1616 length,
1613 realoffset,
1617 realoffset,
1614 len(d) - startoffset,
1618 len(d) - startoffset,
1615 )
1619 )
1616 )
1620 )
1617
1621
1618 return util.buffer(d, startoffset, length)
1622 return util.buffer(d, startoffset, length)
1619
1623
1620 if len(d) < length:
1624 if len(d) < length:
1621 raise error.RevlogError(
1625 raise error.RevlogError(
1622 _(
1626 _(
1623 b'partial read of revlog %s; expected %d bytes from offset '
1627 b'partial read of revlog %s; expected %d bytes from offset '
1624 b'%d, got %d'
1628 b'%d, got %d'
1625 )
1629 )
1626 % (
1630 % (
1627 self.indexfile if self._inline else self.datafile,
1631 self.indexfile if self._inline else self.datafile,
1628 length,
1632 length,
1629 offset,
1633 offset,
1630 len(d),
1634 len(d),
1631 )
1635 )
1632 )
1636 )
1633
1637
1634 return d
1638 return d
1635
1639
1636 def _getsegment(self, offset, length, df=None):
1640 def _getsegment(self, offset, length, df=None):
1637 """Obtain a segment of raw data from the revlog.
1641 """Obtain a segment of raw data from the revlog.
1638
1642
1639 Accepts an absolute offset, length of bytes to obtain, and an
1643 Accepts an absolute offset, length of bytes to obtain, and an
1640 optional file handle to the already-opened revlog. If the file
1644 optional file handle to the already-opened revlog. If the file
1641 handle is used, it's original seek position will not be preserved.
1645 handle is used, it's original seek position will not be preserved.
1642
1646
1643 Requests for data may be returned from a cache.
1647 Requests for data may be returned from a cache.
1644
1648
1645 Returns a str or a buffer instance of raw byte data.
1649 Returns a str or a buffer instance of raw byte data.
1646 """
1650 """
1647 o, d = self._chunkcache
1651 o, d = self._chunkcache
1648 l = len(d)
1652 l = len(d)
1649
1653
1650 # is it in the cache?
1654 # is it in the cache?
1651 cachestart = offset - o
1655 cachestart = offset - o
1652 cacheend = cachestart + length
1656 cacheend = cachestart + length
1653 if cachestart >= 0 and cacheend <= l:
1657 if cachestart >= 0 and cacheend <= l:
1654 if cachestart == 0 and cacheend == l:
1658 if cachestart == 0 and cacheend == l:
1655 return d # avoid a copy
1659 return d # avoid a copy
1656 return util.buffer(d, cachestart, cacheend - cachestart)
1660 return util.buffer(d, cachestart, cacheend - cachestart)
1657
1661
1658 return self._readsegment(offset, length, df=df)
1662 return self._readsegment(offset, length, df=df)
1659
1663
1660 def _getsegmentforrevs(self, startrev, endrev, df=None):
1664 def _getsegmentforrevs(self, startrev, endrev, df=None):
1661 """Obtain a segment of raw data corresponding to a range of revisions.
1665 """Obtain a segment of raw data corresponding to a range of revisions.
1662
1666
1663 Accepts the start and end revisions and an optional already-open
1667 Accepts the start and end revisions and an optional already-open
1664 file handle to be used for reading. If the file handle is read, its
1668 file handle to be used for reading. If the file handle is read, its
1665 seek position will not be preserved.
1669 seek position will not be preserved.
1666
1670
1667 Requests for data may be satisfied by a cache.
1671 Requests for data may be satisfied by a cache.
1668
1672
1669 Returns a 2-tuple of (offset, data) for the requested range of
1673 Returns a 2-tuple of (offset, data) for the requested range of
1670 revisions. Offset is the integer offset from the beginning of the
1674 revisions. Offset is the integer offset from the beginning of the
1671 revlog and data is a str or buffer of the raw byte data.
1675 revlog and data is a str or buffer of the raw byte data.
1672
1676
1673 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1677 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1674 to determine where each revision's data begins and ends.
1678 to determine where each revision's data begins and ends.
1675 """
1679 """
1676 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1680 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1677 # (functions are expensive).
1681 # (functions are expensive).
1678 index = self.index
1682 index = self.index
1679 istart = index[startrev]
1683 istart = index[startrev]
1680 start = int(istart[0] >> 16)
1684 start = int(istart[0] >> 16)
1681 if startrev == endrev:
1685 if startrev == endrev:
1682 end = start + istart[1]
1686 end = start + istart[1]
1683 else:
1687 else:
1684 iend = index[endrev]
1688 iend = index[endrev]
1685 end = int(iend[0] >> 16) + iend[1]
1689 end = int(iend[0] >> 16) + iend[1]
1686
1690
1687 if self._inline:
1691 if self._inline:
1688 start += (startrev + 1) * self.index.entry_size
1692 start += (startrev + 1) * self.index.entry_size
1689 end += (endrev + 1) * self.index.entry_size
1693 end += (endrev + 1) * self.index.entry_size
1690 length = end - start
1694 length = end - start
1691
1695
1692 return start, self._getsegment(start, length, df=df)
1696 return start, self._getsegment(start, length, df=df)
1693
1697
1694 def _chunk(self, rev, df=None):
1698 def _chunk(self, rev, df=None):
1695 """Obtain a single decompressed chunk for a revision.
1699 """Obtain a single decompressed chunk for a revision.
1696
1700
1697 Accepts an integer revision and an optional already-open file handle
1701 Accepts an integer revision and an optional already-open file handle
1698 to be used for reading. If used, the seek position of the file will not
1702 to be used for reading. If used, the seek position of the file will not
1699 be preserved.
1703 be preserved.
1700
1704
1701 Returns a str holding uncompressed data for the requested revision.
1705 Returns a str holding uncompressed data for the requested revision.
1702 """
1706 """
1703 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1707 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1704
1708
1705 def _chunks(self, revs, df=None, targetsize=None):
1709 def _chunks(self, revs, df=None, targetsize=None):
1706 """Obtain decompressed chunks for the specified revisions.
1710 """Obtain decompressed chunks for the specified revisions.
1707
1711
1708 Accepts an iterable of numeric revisions that are assumed to be in
1712 Accepts an iterable of numeric revisions that are assumed to be in
1709 ascending order. Also accepts an optional already-open file handle
1713 ascending order. Also accepts an optional already-open file handle
1710 to be used for reading. If used, the seek position of the file will
1714 to be used for reading. If used, the seek position of the file will
1711 not be preserved.
1715 not be preserved.
1712
1716
1713 This function is similar to calling ``self._chunk()`` multiple times,
1717 This function is similar to calling ``self._chunk()`` multiple times,
1714 but is faster.
1718 but is faster.
1715
1719
1716 Returns a list with decompressed data for each requested revision.
1720 Returns a list with decompressed data for each requested revision.
1717 """
1721 """
1718 if not revs:
1722 if not revs:
1719 return []
1723 return []
1720 start = self.start
1724 start = self.start
1721 length = self.length
1725 length = self.length
1722 inline = self._inline
1726 inline = self._inline
1723 iosize = self.index.entry_size
1727 iosize = self.index.entry_size
1724 buffer = util.buffer
1728 buffer = util.buffer
1725
1729
1726 l = []
1730 l = []
1727 ladd = l.append
1731 ladd = l.append
1728
1732
1729 if not self._withsparseread:
1733 if not self._withsparseread:
1730 slicedchunks = (revs,)
1734 slicedchunks = (revs,)
1731 else:
1735 else:
1732 slicedchunks = deltautil.slicechunk(
1736 slicedchunks = deltautil.slicechunk(
1733 self, revs, targetsize=targetsize
1737 self, revs, targetsize=targetsize
1734 )
1738 )
1735
1739
1736 for revschunk in slicedchunks:
1740 for revschunk in slicedchunks:
1737 firstrev = revschunk[0]
1741 firstrev = revschunk[0]
1738 # Skip trailing revisions with empty diff
1742 # Skip trailing revisions with empty diff
1739 for lastrev in revschunk[::-1]:
1743 for lastrev in revschunk[::-1]:
1740 if length(lastrev) != 0:
1744 if length(lastrev) != 0:
1741 break
1745 break
1742
1746
1743 try:
1747 try:
1744 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1748 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1745 except OverflowError:
1749 except OverflowError:
1746 # issue4215 - we can't cache a run of chunks greater than
1750 # issue4215 - we can't cache a run of chunks greater than
1747 # 2G on Windows
1751 # 2G on Windows
1748 return [self._chunk(rev, df=df) for rev in revschunk]
1752 return [self._chunk(rev, df=df) for rev in revschunk]
1749
1753
1750 decomp = self.decompress
1754 decomp = self.decompress
1751 for rev in revschunk:
1755 for rev in revschunk:
1752 chunkstart = start(rev)
1756 chunkstart = start(rev)
1753 if inline:
1757 if inline:
1754 chunkstart += (rev + 1) * iosize
1758 chunkstart += (rev + 1) * iosize
1755 chunklength = length(rev)
1759 chunklength = length(rev)
1756 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1760 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1757
1761
1758 return l
1762 return l
1759
1763
1760 def _chunkclear(self):
1764 def _chunkclear(self):
1761 """Clear the raw chunk cache."""
1765 """Clear the raw chunk cache."""
1762 self._chunkcache = (0, b'')
1766 self._chunkcache = (0, b'')
1763
1767
1764 def deltaparent(self, rev):
1768 def deltaparent(self, rev):
1765 """return deltaparent of the given revision"""
1769 """return deltaparent of the given revision"""
1766 base = self.index[rev][3]
1770 base = self.index[rev][3]
1767 if base == rev:
1771 if base == rev:
1768 return nullrev
1772 return nullrev
1769 elif self._generaldelta:
1773 elif self._generaldelta:
1770 return base
1774 return base
1771 else:
1775 else:
1772 return rev - 1
1776 return rev - 1
1773
1777
1774 def issnapshot(self, rev):
1778 def issnapshot(self, rev):
1775 """tells whether rev is a snapshot"""
1779 """tells whether rev is a snapshot"""
1776 if not self._sparserevlog:
1780 if not self._sparserevlog:
1777 return self.deltaparent(rev) == nullrev
1781 return self.deltaparent(rev) == nullrev
1778 elif util.safehasattr(self.index, b'issnapshot'):
1782 elif util.safehasattr(self.index, b'issnapshot'):
1779 # directly assign the method to cache the testing and access
1783 # directly assign the method to cache the testing and access
1780 self.issnapshot = self.index.issnapshot
1784 self.issnapshot = self.index.issnapshot
1781 return self.issnapshot(rev)
1785 return self.issnapshot(rev)
1782 if rev == nullrev:
1786 if rev == nullrev:
1783 return True
1787 return True
1784 entry = self.index[rev]
1788 entry = self.index[rev]
1785 base = entry[3]
1789 base = entry[3]
1786 if base == rev:
1790 if base == rev:
1787 return True
1791 return True
1788 if base == nullrev:
1792 if base == nullrev:
1789 return True
1793 return True
1790 p1 = entry[5]
1794 p1 = entry[5]
1791 p2 = entry[6]
1795 p2 = entry[6]
1792 if base == p1 or base == p2:
1796 if base == p1 or base == p2:
1793 return False
1797 return False
1794 return self.issnapshot(base)
1798 return self.issnapshot(base)
1795
1799
1796 def snapshotdepth(self, rev):
1800 def snapshotdepth(self, rev):
1797 """number of snapshot in the chain before this one"""
1801 """number of snapshot in the chain before this one"""
1798 if not self.issnapshot(rev):
1802 if not self.issnapshot(rev):
1799 raise error.ProgrammingError(b'revision %d not a snapshot')
1803 raise error.ProgrammingError(b'revision %d not a snapshot')
1800 return len(self._deltachain(rev)[0]) - 1
1804 return len(self._deltachain(rev)[0]) - 1
1801
1805
1802 def revdiff(self, rev1, rev2):
1806 def revdiff(self, rev1, rev2):
1803 """return or calculate a delta between two revisions
1807 """return or calculate a delta between two revisions
1804
1808
1805 The delta calculated is in binary form and is intended to be written to
1809 The delta calculated is in binary form and is intended to be written to
1806 revlog data directly. So this function needs raw revision data.
1810 revlog data directly. So this function needs raw revision data.
1807 """
1811 """
1808 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1812 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1809 return bytes(self._chunk(rev2))
1813 return bytes(self._chunk(rev2))
1810
1814
1811 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1815 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1812
1816
1813 def _processflags(self, text, flags, operation, raw=False):
1817 def _processflags(self, text, flags, operation, raw=False):
1814 """deprecated entry point to access flag processors"""
1818 """deprecated entry point to access flag processors"""
1815 msg = b'_processflag(...) use the specialized variant'
1819 msg = b'_processflag(...) use the specialized variant'
1816 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1820 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1817 if raw:
1821 if raw:
1818 return text, flagutil.processflagsraw(self, text, flags)
1822 return text, flagutil.processflagsraw(self, text, flags)
1819 elif operation == b'read':
1823 elif operation == b'read':
1820 return flagutil.processflagsread(self, text, flags)
1824 return flagutil.processflagsread(self, text, flags)
1821 else: # write operation
1825 else: # write operation
1822 return flagutil.processflagswrite(self, text, flags)
1826 return flagutil.processflagswrite(self, text, flags)
1823
1827
1824 def revision(self, nodeorrev, _df=None, raw=False):
1828 def revision(self, nodeorrev, _df=None, raw=False):
1825 """return an uncompressed revision of a given node or revision
1829 """return an uncompressed revision of a given node or revision
1826 number.
1830 number.
1827
1831
1828 _df - an existing file handle to read from. (internal-only)
1832 _df - an existing file handle to read from. (internal-only)
1829 raw - an optional argument specifying if the revision data is to be
1833 raw - an optional argument specifying if the revision data is to be
1830 treated as raw data when applying flag transforms. 'raw' should be set
1834 treated as raw data when applying flag transforms. 'raw' should be set
1831 to True when generating changegroups or in debug commands.
1835 to True when generating changegroups or in debug commands.
1832 """
1836 """
1833 if raw:
1837 if raw:
1834 msg = (
1838 msg = (
1835 b'revlog.revision(..., raw=True) is deprecated, '
1839 b'revlog.revision(..., raw=True) is deprecated, '
1836 b'use revlog.rawdata(...)'
1840 b'use revlog.rawdata(...)'
1837 )
1841 )
1838 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1842 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1839 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1843 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1840
1844
1841 def sidedata(self, nodeorrev, _df=None):
1845 def sidedata(self, nodeorrev, _df=None):
1842 """a map of extra data related to the changeset but not part of the hash
1846 """a map of extra data related to the changeset but not part of the hash
1843
1847
1844 This function currently return a dictionary. However, more advanced
1848 This function currently return a dictionary. However, more advanced
1845 mapping object will likely be used in the future for a more
1849 mapping object will likely be used in the future for a more
1846 efficient/lazy code.
1850 efficient/lazy code.
1847 """
1851 """
1848 return self._revisiondata(nodeorrev, _df)[1]
1852 return self._revisiondata(nodeorrev, _df)[1]
1849
1853
1850 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1854 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1851 # deal with <nodeorrev> argument type
1855 # deal with <nodeorrev> argument type
1852 if isinstance(nodeorrev, int):
1856 if isinstance(nodeorrev, int):
1853 rev = nodeorrev
1857 rev = nodeorrev
1854 node = self.node(rev)
1858 node = self.node(rev)
1855 else:
1859 else:
1856 node = nodeorrev
1860 node = nodeorrev
1857 rev = None
1861 rev = None
1858
1862
1859 # fast path the special `nullid` rev
1863 # fast path the special `nullid` rev
1860 if node == self.nullid:
1864 if node == self.nullid:
1861 return b"", {}
1865 return b"", {}
1862
1866
1863 # ``rawtext`` is the text as stored inside the revlog. Might be the
1867 # ``rawtext`` is the text as stored inside the revlog. Might be the
1864 # revision or might need to be processed to retrieve the revision.
1868 # revision or might need to be processed to retrieve the revision.
1865 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1869 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1866
1870
1867 if self.version & 0xFFFF == REVLOGV2:
1871 if self.version & 0xFFFF == REVLOGV2:
1868 if rev is None:
1872 if rev is None:
1869 rev = self.rev(node)
1873 rev = self.rev(node)
1870 sidedata = self._sidedata(rev)
1874 sidedata = self._sidedata(rev)
1871 else:
1875 else:
1872 sidedata = {}
1876 sidedata = {}
1873
1877
1874 if raw and validated:
1878 if raw and validated:
1875 # if we don't want to process the raw text and that raw
1879 # if we don't want to process the raw text and that raw
1876 # text is cached, we can exit early.
1880 # text is cached, we can exit early.
1877 return rawtext, sidedata
1881 return rawtext, sidedata
1878 if rev is None:
1882 if rev is None:
1879 rev = self.rev(node)
1883 rev = self.rev(node)
1880 # the revlog's flag for this revision
1884 # the revlog's flag for this revision
1881 # (usually alter its state or content)
1885 # (usually alter its state or content)
1882 flags = self.flags(rev)
1886 flags = self.flags(rev)
1883
1887
1884 if validated and flags == REVIDX_DEFAULT_FLAGS:
1888 if validated and flags == REVIDX_DEFAULT_FLAGS:
1885 # no extra flags set, no flag processor runs, text = rawtext
1889 # no extra flags set, no flag processor runs, text = rawtext
1886 return rawtext, sidedata
1890 return rawtext, sidedata
1887
1891
1888 if raw:
1892 if raw:
1889 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1893 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1890 text = rawtext
1894 text = rawtext
1891 else:
1895 else:
1892 r = flagutil.processflagsread(self, rawtext, flags)
1896 r = flagutil.processflagsread(self, rawtext, flags)
1893 text, validatehash = r
1897 text, validatehash = r
1894 if validatehash:
1898 if validatehash:
1895 self.checkhash(text, node, rev=rev)
1899 self.checkhash(text, node, rev=rev)
1896 if not validated:
1900 if not validated:
1897 self._revisioncache = (node, rev, rawtext)
1901 self._revisioncache = (node, rev, rawtext)
1898
1902
1899 return text, sidedata
1903 return text, sidedata
1900
1904
1901 def _rawtext(self, node, rev, _df=None):
1905 def _rawtext(self, node, rev, _df=None):
1902 """return the possibly unvalidated rawtext for a revision
1906 """return the possibly unvalidated rawtext for a revision
1903
1907
1904 returns (rev, rawtext, validated)
1908 returns (rev, rawtext, validated)
1905 """
1909 """
1906
1910
1907 # revision in the cache (could be useful to apply delta)
1911 # revision in the cache (could be useful to apply delta)
1908 cachedrev = None
1912 cachedrev = None
1909 # An intermediate text to apply deltas to
1913 # An intermediate text to apply deltas to
1910 basetext = None
1914 basetext = None
1911
1915
1912 # Check if we have the entry in cache
1916 # Check if we have the entry in cache
1913 # The cache entry looks like (node, rev, rawtext)
1917 # The cache entry looks like (node, rev, rawtext)
1914 if self._revisioncache:
1918 if self._revisioncache:
1915 if self._revisioncache[0] == node:
1919 if self._revisioncache[0] == node:
1916 return (rev, self._revisioncache[2], True)
1920 return (rev, self._revisioncache[2], True)
1917 cachedrev = self._revisioncache[1]
1921 cachedrev = self._revisioncache[1]
1918
1922
1919 if rev is None:
1923 if rev is None:
1920 rev = self.rev(node)
1924 rev = self.rev(node)
1921
1925
1922 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1926 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1923 if stopped:
1927 if stopped:
1924 basetext = self._revisioncache[2]
1928 basetext = self._revisioncache[2]
1925
1929
1926 # drop cache to save memory, the caller is expected to
1930 # drop cache to save memory, the caller is expected to
1927 # update self._revisioncache after validating the text
1931 # update self._revisioncache after validating the text
1928 self._revisioncache = None
1932 self._revisioncache = None
1929
1933
1930 targetsize = None
1934 targetsize = None
1931 rawsize = self.index[rev][2]
1935 rawsize = self.index[rev][2]
1932 if 0 <= rawsize:
1936 if 0 <= rawsize:
1933 targetsize = 4 * rawsize
1937 targetsize = 4 * rawsize
1934
1938
1935 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1939 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1936 if basetext is None:
1940 if basetext is None:
1937 basetext = bytes(bins[0])
1941 basetext = bytes(bins[0])
1938 bins = bins[1:]
1942 bins = bins[1:]
1939
1943
1940 rawtext = mdiff.patches(basetext, bins)
1944 rawtext = mdiff.patches(basetext, bins)
1941 del basetext # let us have a chance to free memory early
1945 del basetext # let us have a chance to free memory early
1942 return (rev, rawtext, False)
1946 return (rev, rawtext, False)
1943
1947
1944 def _sidedata(self, rev):
1948 def _sidedata(self, rev):
1945 """Return the sidedata for a given revision number."""
1949 """Return the sidedata for a given revision number."""
1946 index_entry = self.index[rev]
1950 index_entry = self.index[rev]
1947 sidedata_offset = index_entry[8]
1951 sidedata_offset = index_entry[8]
1948 sidedata_size = index_entry[9]
1952 sidedata_size = index_entry[9]
1949
1953
1950 if self._inline:
1954 if self._inline:
1951 sidedata_offset += self.index.entry_size * (1 + rev)
1955 sidedata_offset += self.index.entry_size * (1 + rev)
1952 if sidedata_size == 0:
1956 if sidedata_size == 0:
1953 return {}
1957 return {}
1954
1958
1955 segment = self._getsegment(sidedata_offset, sidedata_size)
1959 segment = self._getsegment(sidedata_offset, sidedata_size)
1956 sidedata = sidedatautil.deserialize_sidedata(segment)
1960 sidedata = sidedatautil.deserialize_sidedata(segment)
1957 return sidedata
1961 return sidedata
1958
1962
1959 def rawdata(self, nodeorrev, _df=None):
1963 def rawdata(self, nodeorrev, _df=None):
1960 """return an uncompressed raw data of a given node or revision number.
1964 """return an uncompressed raw data of a given node or revision number.
1961
1965
1962 _df - an existing file handle to read from. (internal-only)
1966 _df - an existing file handle to read from. (internal-only)
1963 """
1967 """
1964 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1968 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1965
1969
1966 def hash(self, text, p1, p2):
1970 def hash(self, text, p1, p2):
1967 """Compute a node hash.
1971 """Compute a node hash.
1968
1972
1969 Available as a function so that subclasses can replace the hash
1973 Available as a function so that subclasses can replace the hash
1970 as needed.
1974 as needed.
1971 """
1975 """
1972 return storageutil.hashrevisionsha1(text, p1, p2)
1976 return storageutil.hashrevisionsha1(text, p1, p2)
1973
1977
1974 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1978 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1975 """Check node hash integrity.
1979 """Check node hash integrity.
1976
1980
1977 Available as a function so that subclasses can extend hash mismatch
1981 Available as a function so that subclasses can extend hash mismatch
1978 behaviors as needed.
1982 behaviors as needed.
1979 """
1983 """
1980 try:
1984 try:
1981 if p1 is None and p2 is None:
1985 if p1 is None and p2 is None:
1982 p1, p2 = self.parents(node)
1986 p1, p2 = self.parents(node)
1983 if node != self.hash(text, p1, p2):
1987 if node != self.hash(text, p1, p2):
1984 # Clear the revision cache on hash failure. The revision cache
1988 # Clear the revision cache on hash failure. The revision cache
1985 # only stores the raw revision and clearing the cache does have
1989 # only stores the raw revision and clearing the cache does have
1986 # the side-effect that we won't have a cache hit when the raw
1990 # the side-effect that we won't have a cache hit when the raw
1987 # revision data is accessed. But this case should be rare and
1991 # revision data is accessed. But this case should be rare and
1988 # it is extra work to teach the cache about the hash
1992 # it is extra work to teach the cache about the hash
1989 # verification state.
1993 # verification state.
1990 if self._revisioncache and self._revisioncache[0] == node:
1994 if self._revisioncache and self._revisioncache[0] == node:
1991 self._revisioncache = None
1995 self._revisioncache = None
1992
1996
1993 revornode = rev
1997 revornode = rev
1994 if revornode is None:
1998 if revornode is None:
1995 revornode = templatefilters.short(hex(node))
1999 revornode = templatefilters.short(hex(node))
1996 raise error.RevlogError(
2000 raise error.RevlogError(
1997 _(b"integrity check failed on %s:%s")
2001 _(b"integrity check failed on %s:%s")
1998 % (self.indexfile, pycompat.bytestr(revornode))
2002 % (self.indexfile, pycompat.bytestr(revornode))
1999 )
2003 )
2000 except error.RevlogError:
2004 except error.RevlogError:
2001 if self._censorable and storageutil.iscensoredtext(text):
2005 if self._censorable and storageutil.iscensoredtext(text):
2002 raise error.CensoredNodeError(self.indexfile, node, text)
2006 raise error.CensoredNodeError(self.indexfile, node, text)
2003 raise
2007 raise
2004
2008
2005 def _enforceinlinesize(self, tr, fp=None):
2009 def _enforceinlinesize(self, tr, fp=None):
2006 """Check if the revlog is too big for inline and convert if so.
2010 """Check if the revlog is too big for inline and convert if so.
2007
2011
2008 This should be called after revisions are added to the revlog. If the
2012 This should be called after revisions are added to the revlog. If the
2009 revlog has grown too large to be an inline revlog, it will convert it
2013 revlog has grown too large to be an inline revlog, it will convert it
2010 to use multiple index and data files.
2014 to use multiple index and data files.
2011 """
2015 """
2012 tiprev = len(self) - 1
2016 tiprev = len(self) - 1
2013 if (
2017 if (
2014 not self._inline
2018 not self._inline
2015 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2019 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2016 ):
2020 ):
2017 return
2021 return
2018
2022
2019 troffset = tr.findoffset(self.indexfile)
2023 troffset = tr.findoffset(self.indexfile)
2020 if troffset is None:
2024 if troffset is None:
2021 raise error.RevlogError(
2025 raise error.RevlogError(
2022 _(b"%s not found in the transaction") % self.indexfile
2026 _(b"%s not found in the transaction") % self.indexfile
2023 )
2027 )
2024 trindex = 0
2028 trindex = 0
2025 tr.add(self.datafile, 0)
2029 tr.add(self.datafile, 0)
2026
2030
2027 if fp:
2031 if fp:
2028 fp.flush()
2032 fp.flush()
2029 fp.close()
2033 fp.close()
2030 # We can't use the cached file handle after close(). So prevent
2034 # We can't use the cached file handle after close(). So prevent
2031 # its usage.
2035 # its usage.
2032 self._writinghandles = None
2036 self._writinghandles = None
2033
2037
2034 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2038 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2035 for r in self:
2039 for r in self:
2036 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2040 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2037 if troffset <= self.start(r):
2041 if troffset <= self.start(r):
2038 trindex = r
2042 trindex = r
2039
2043
2040 with self._indexfp(b'w') as fp:
2044 with self._indexfp(b'w') as fp:
2041 self.version &= ~FLAG_INLINE_DATA
2045 self.version &= ~FLAG_INLINE_DATA
2042 self._inline = False
2046 self._inline = False
2043 for i in self:
2047 for i in self:
2044 e = self.index.entry_binary(i, self.version)
2048 e = self.index.entry_binary(i)
2049 if i == 0:
2050 header = self.index.pack_header(self.version)
2051 e = header + e
2045 fp.write(e)
2052 fp.write(e)
2046
2053
2047 # the temp file replace the real index when we exit the context
2054 # the temp file replace the real index when we exit the context
2048 # manager
2055 # manager
2049
2056
2050 tr.replace(self.indexfile, trindex * self.index.entry_size)
2057 tr.replace(self.indexfile, trindex * self.index.entry_size)
2051 nodemaputil.setup_persistent_nodemap(tr, self)
2058 nodemaputil.setup_persistent_nodemap(tr, self)
2052 self._chunkclear()
2059 self._chunkclear()
2053
2060
2054 def _nodeduplicatecallback(self, transaction, node):
2061 def _nodeduplicatecallback(self, transaction, node):
2055 """called when trying to add a node already stored."""
2062 """called when trying to add a node already stored."""
2056
2063
2057 def addrevision(
2064 def addrevision(
2058 self,
2065 self,
2059 text,
2066 text,
2060 transaction,
2067 transaction,
2061 link,
2068 link,
2062 p1,
2069 p1,
2063 p2,
2070 p2,
2064 cachedelta=None,
2071 cachedelta=None,
2065 node=None,
2072 node=None,
2066 flags=REVIDX_DEFAULT_FLAGS,
2073 flags=REVIDX_DEFAULT_FLAGS,
2067 deltacomputer=None,
2074 deltacomputer=None,
2068 sidedata=None,
2075 sidedata=None,
2069 ):
2076 ):
2070 """add a revision to the log
2077 """add a revision to the log
2071
2078
2072 text - the revision data to add
2079 text - the revision data to add
2073 transaction - the transaction object used for rollback
2080 transaction - the transaction object used for rollback
2074 link - the linkrev data to add
2081 link - the linkrev data to add
2075 p1, p2 - the parent nodeids of the revision
2082 p1, p2 - the parent nodeids of the revision
2076 cachedelta - an optional precomputed delta
2083 cachedelta - an optional precomputed delta
2077 node - nodeid of revision; typically node is not specified, and it is
2084 node - nodeid of revision; typically node is not specified, and it is
2078 computed by default as hash(text, p1, p2), however subclasses might
2085 computed by default as hash(text, p1, p2), however subclasses might
2079 use different hashing method (and override checkhash() in such case)
2086 use different hashing method (and override checkhash() in such case)
2080 flags - the known flags to set on the revision
2087 flags - the known flags to set on the revision
2081 deltacomputer - an optional deltacomputer instance shared between
2088 deltacomputer - an optional deltacomputer instance shared between
2082 multiple calls
2089 multiple calls
2083 """
2090 """
2084 if link == nullrev:
2091 if link == nullrev:
2085 raise error.RevlogError(
2092 raise error.RevlogError(
2086 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2093 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2087 )
2094 )
2088
2095
2089 if sidedata is None:
2096 if sidedata is None:
2090 sidedata = {}
2097 sidedata = {}
2091 elif not self.hassidedata:
2098 elif not self.hassidedata:
2092 raise error.ProgrammingError(
2099 raise error.ProgrammingError(
2093 _(b"trying to add sidedata to a revlog who don't support them")
2100 _(b"trying to add sidedata to a revlog who don't support them")
2094 )
2101 )
2095
2102
2096 if flags:
2103 if flags:
2097 node = node or self.hash(text, p1, p2)
2104 node = node or self.hash(text, p1, p2)
2098
2105
2099 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2106 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2100
2107
2101 # If the flag processor modifies the revision data, ignore any provided
2108 # If the flag processor modifies the revision data, ignore any provided
2102 # cachedelta.
2109 # cachedelta.
2103 if rawtext != text:
2110 if rawtext != text:
2104 cachedelta = None
2111 cachedelta = None
2105
2112
2106 if len(rawtext) > _maxentrysize:
2113 if len(rawtext) > _maxentrysize:
2107 raise error.RevlogError(
2114 raise error.RevlogError(
2108 _(
2115 _(
2109 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2116 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2110 )
2117 )
2111 % (self.indexfile, len(rawtext))
2118 % (self.indexfile, len(rawtext))
2112 )
2119 )
2113
2120
2114 node = node or self.hash(rawtext, p1, p2)
2121 node = node or self.hash(rawtext, p1, p2)
2115 rev = self.index.get_rev(node)
2122 rev = self.index.get_rev(node)
2116 if rev is not None:
2123 if rev is not None:
2117 return rev
2124 return rev
2118
2125
2119 if validatehash:
2126 if validatehash:
2120 self.checkhash(rawtext, node, p1=p1, p2=p2)
2127 self.checkhash(rawtext, node, p1=p1, p2=p2)
2121
2128
2122 return self.addrawrevision(
2129 return self.addrawrevision(
2123 rawtext,
2130 rawtext,
2124 transaction,
2131 transaction,
2125 link,
2132 link,
2126 p1,
2133 p1,
2127 p2,
2134 p2,
2128 node,
2135 node,
2129 flags,
2136 flags,
2130 cachedelta=cachedelta,
2137 cachedelta=cachedelta,
2131 deltacomputer=deltacomputer,
2138 deltacomputer=deltacomputer,
2132 sidedata=sidedata,
2139 sidedata=sidedata,
2133 )
2140 )
2134
2141
2135 def addrawrevision(
2142 def addrawrevision(
2136 self,
2143 self,
2137 rawtext,
2144 rawtext,
2138 transaction,
2145 transaction,
2139 link,
2146 link,
2140 p1,
2147 p1,
2141 p2,
2148 p2,
2142 node,
2149 node,
2143 flags,
2150 flags,
2144 cachedelta=None,
2151 cachedelta=None,
2145 deltacomputer=None,
2152 deltacomputer=None,
2146 sidedata=None,
2153 sidedata=None,
2147 ):
2154 ):
2148 """add a raw revision with known flags, node and parents
2155 """add a raw revision with known flags, node and parents
2149 useful when reusing a revision not stored in this revlog (ex: received
2156 useful when reusing a revision not stored in this revlog (ex: received
2150 over wire, or read from an external bundle).
2157 over wire, or read from an external bundle).
2151 """
2158 """
2152 dfh = None
2159 dfh = None
2153 if not self._inline:
2160 if not self._inline:
2154 dfh = self._datafp(b"a+")
2161 dfh = self._datafp(b"a+")
2155 ifh = self._indexfp(b"a+")
2162 ifh = self._indexfp(b"a+")
2156 try:
2163 try:
2157 return self._addrevision(
2164 return self._addrevision(
2158 node,
2165 node,
2159 rawtext,
2166 rawtext,
2160 transaction,
2167 transaction,
2161 link,
2168 link,
2162 p1,
2169 p1,
2163 p2,
2170 p2,
2164 flags,
2171 flags,
2165 cachedelta,
2172 cachedelta,
2166 ifh,
2173 ifh,
2167 dfh,
2174 dfh,
2168 deltacomputer=deltacomputer,
2175 deltacomputer=deltacomputer,
2169 sidedata=sidedata,
2176 sidedata=sidedata,
2170 )
2177 )
2171 finally:
2178 finally:
2172 if dfh:
2179 if dfh:
2173 dfh.close()
2180 dfh.close()
2174 ifh.close()
2181 ifh.close()
2175
2182
2176 def compress(self, data):
2183 def compress(self, data):
2177 """Generate a possibly-compressed representation of data."""
2184 """Generate a possibly-compressed representation of data."""
2178 if not data:
2185 if not data:
2179 return b'', data
2186 return b'', data
2180
2187
2181 compressed = self._compressor.compress(data)
2188 compressed = self._compressor.compress(data)
2182
2189
2183 if compressed:
2190 if compressed:
2184 # The revlog compressor added the header in the returned data.
2191 # The revlog compressor added the header in the returned data.
2185 return b'', compressed
2192 return b'', compressed
2186
2193
2187 if data[0:1] == b'\0':
2194 if data[0:1] == b'\0':
2188 return b'', data
2195 return b'', data
2189 return b'u', data
2196 return b'u', data
2190
2197
2191 def decompress(self, data):
2198 def decompress(self, data):
2192 """Decompress a revlog chunk.
2199 """Decompress a revlog chunk.
2193
2200
2194 The chunk is expected to begin with a header identifying the
2201 The chunk is expected to begin with a header identifying the
2195 format type so it can be routed to an appropriate decompressor.
2202 format type so it can be routed to an appropriate decompressor.
2196 """
2203 """
2197 if not data:
2204 if not data:
2198 return data
2205 return data
2199
2206
2200 # Revlogs are read much more frequently than they are written and many
2207 # Revlogs are read much more frequently than they are written and many
2201 # chunks only take microseconds to decompress, so performance is
2208 # chunks only take microseconds to decompress, so performance is
2202 # important here.
2209 # important here.
2203 #
2210 #
2204 # We can make a few assumptions about revlogs:
2211 # We can make a few assumptions about revlogs:
2205 #
2212 #
2206 # 1) the majority of chunks will be compressed (as opposed to inline
2213 # 1) the majority of chunks will be compressed (as opposed to inline
2207 # raw data).
2214 # raw data).
2208 # 2) decompressing *any* data will likely by at least 10x slower than
2215 # 2) decompressing *any* data will likely by at least 10x slower than
2209 # returning raw inline data.
2216 # returning raw inline data.
2210 # 3) we want to prioritize common and officially supported compression
2217 # 3) we want to prioritize common and officially supported compression
2211 # engines
2218 # engines
2212 #
2219 #
2213 # It follows that we want to optimize for "decompress compressed data
2220 # It follows that we want to optimize for "decompress compressed data
2214 # when encoded with common and officially supported compression engines"
2221 # when encoded with common and officially supported compression engines"
2215 # case over "raw data" and "data encoded by less common or non-official
2222 # case over "raw data" and "data encoded by less common or non-official
2216 # compression engines." That is why we have the inline lookup first
2223 # compression engines." That is why we have the inline lookup first
2217 # followed by the compengines lookup.
2224 # followed by the compengines lookup.
2218 #
2225 #
2219 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2226 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2220 # compressed chunks. And this matters for changelog and manifest reads.
2227 # compressed chunks. And this matters for changelog and manifest reads.
2221 t = data[0:1]
2228 t = data[0:1]
2222
2229
2223 if t == b'x':
2230 if t == b'x':
2224 try:
2231 try:
2225 return _zlibdecompress(data)
2232 return _zlibdecompress(data)
2226 except zlib.error as e:
2233 except zlib.error as e:
2227 raise error.RevlogError(
2234 raise error.RevlogError(
2228 _(b'revlog decompress error: %s')
2235 _(b'revlog decompress error: %s')
2229 % stringutil.forcebytestr(e)
2236 % stringutil.forcebytestr(e)
2230 )
2237 )
2231 # '\0' is more common than 'u' so it goes first.
2238 # '\0' is more common than 'u' so it goes first.
2232 elif t == b'\0':
2239 elif t == b'\0':
2233 return data
2240 return data
2234 elif t == b'u':
2241 elif t == b'u':
2235 return util.buffer(data, 1)
2242 return util.buffer(data, 1)
2236
2243
2237 try:
2244 try:
2238 compressor = self._decompressors[t]
2245 compressor = self._decompressors[t]
2239 except KeyError:
2246 except KeyError:
2240 try:
2247 try:
2241 engine = util.compengines.forrevlogheader(t)
2248 engine = util.compengines.forrevlogheader(t)
2242 compressor = engine.revlogcompressor(self._compengineopts)
2249 compressor = engine.revlogcompressor(self._compengineopts)
2243 self._decompressors[t] = compressor
2250 self._decompressors[t] = compressor
2244 except KeyError:
2251 except KeyError:
2245 raise error.RevlogError(
2252 raise error.RevlogError(
2246 _(b'unknown compression type %s') % binascii.hexlify(t)
2253 _(b'unknown compression type %s') % binascii.hexlify(t)
2247 )
2254 )
2248
2255
2249 return compressor.decompress(data)
2256 return compressor.decompress(data)
2250
2257
2251 def _addrevision(
2258 def _addrevision(
2252 self,
2259 self,
2253 node,
2260 node,
2254 rawtext,
2261 rawtext,
2255 transaction,
2262 transaction,
2256 link,
2263 link,
2257 p1,
2264 p1,
2258 p2,
2265 p2,
2259 flags,
2266 flags,
2260 cachedelta,
2267 cachedelta,
2261 ifh,
2268 ifh,
2262 dfh,
2269 dfh,
2263 alwayscache=False,
2270 alwayscache=False,
2264 deltacomputer=None,
2271 deltacomputer=None,
2265 sidedata=None,
2272 sidedata=None,
2266 ):
2273 ):
2267 """internal function to add revisions to the log
2274 """internal function to add revisions to the log
2268
2275
2269 see addrevision for argument descriptions.
2276 see addrevision for argument descriptions.
2270
2277
2271 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2278 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2272
2279
2273 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2280 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2274 be used.
2281 be used.
2275
2282
2276 invariants:
2283 invariants:
2277 - rawtext is optional (can be None); if not set, cachedelta must be set.
2284 - rawtext is optional (can be None); if not set, cachedelta must be set.
2278 if both are set, they must correspond to each other.
2285 if both are set, they must correspond to each other.
2279 """
2286 """
2280 if node == self.nullid:
2287 if node == self.nullid:
2281 raise error.RevlogError(
2288 raise error.RevlogError(
2282 _(b"%s: attempt to add null revision") % self.indexfile
2289 _(b"%s: attempt to add null revision") % self.indexfile
2283 )
2290 )
2284 if (
2291 if (
2285 node == self.nodeconstants.wdirid
2292 node == self.nodeconstants.wdirid
2286 or node in self.nodeconstants.wdirfilenodeids
2293 or node in self.nodeconstants.wdirfilenodeids
2287 ):
2294 ):
2288 raise error.RevlogError(
2295 raise error.RevlogError(
2289 _(b"%s: attempt to add wdir revision") % self.indexfile
2296 _(b"%s: attempt to add wdir revision") % self.indexfile
2290 )
2297 )
2291
2298
2292 if self._inline:
2299 if self._inline:
2293 fh = ifh
2300 fh = ifh
2294 else:
2301 else:
2295 fh = dfh
2302 fh = dfh
2296
2303
2297 btext = [rawtext]
2304 btext = [rawtext]
2298
2305
2299 curr = len(self)
2306 curr = len(self)
2300 prev = curr - 1
2307 prev = curr - 1
2301
2308
2302 offset = self._get_data_offset(prev)
2309 offset = self._get_data_offset(prev)
2303
2310
2304 if self._concurrencychecker:
2311 if self._concurrencychecker:
2305 if self._inline:
2312 if self._inline:
2306 # offset is "as if" it were in the .d file, so we need to add on
2313 # offset is "as if" it were in the .d file, so we need to add on
2307 # the size of the entry metadata.
2314 # the size of the entry metadata.
2308 self._concurrencychecker(
2315 self._concurrencychecker(
2309 ifh, self.indexfile, offset + curr * self.index.entry_size
2316 ifh, self.indexfile, offset + curr * self.index.entry_size
2310 )
2317 )
2311 else:
2318 else:
2312 # Entries in the .i are a consistent size.
2319 # Entries in the .i are a consistent size.
2313 self._concurrencychecker(
2320 self._concurrencychecker(
2314 ifh, self.indexfile, curr * self.index.entry_size
2321 ifh, self.indexfile, curr * self.index.entry_size
2315 )
2322 )
2316 self._concurrencychecker(dfh, self.datafile, offset)
2323 self._concurrencychecker(dfh, self.datafile, offset)
2317
2324
2318 p1r, p2r = self.rev(p1), self.rev(p2)
2325 p1r, p2r = self.rev(p1), self.rev(p2)
2319
2326
2320 # full versions are inserted when the needed deltas
2327 # full versions are inserted when the needed deltas
2321 # become comparable to the uncompressed text
2328 # become comparable to the uncompressed text
2322 if rawtext is None:
2329 if rawtext is None:
2323 # need rawtext size, before changed by flag processors, which is
2330 # need rawtext size, before changed by flag processors, which is
2324 # the non-raw size. use revlog explicitly to avoid filelog's extra
2331 # the non-raw size. use revlog explicitly to avoid filelog's extra
2325 # logic that might remove metadata size.
2332 # logic that might remove metadata size.
2326 textlen = mdiff.patchedsize(
2333 textlen = mdiff.patchedsize(
2327 revlog.size(self, cachedelta[0]), cachedelta[1]
2334 revlog.size(self, cachedelta[0]), cachedelta[1]
2328 )
2335 )
2329 else:
2336 else:
2330 textlen = len(rawtext)
2337 textlen = len(rawtext)
2331
2338
2332 if deltacomputer is None:
2339 if deltacomputer is None:
2333 deltacomputer = deltautil.deltacomputer(self)
2340 deltacomputer = deltautil.deltacomputer(self)
2334
2341
2335 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2342 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2336
2343
2337 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2344 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2338
2345
2339 if sidedata:
2346 if sidedata:
2340 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2347 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2341 sidedata_offset = offset + deltainfo.deltalen
2348 sidedata_offset = offset + deltainfo.deltalen
2342 else:
2349 else:
2343 serialized_sidedata = b""
2350 serialized_sidedata = b""
2344 # Don't store the offset if the sidedata is empty, that way
2351 # Don't store the offset if the sidedata is empty, that way
2345 # we can easily detect empty sidedata and they will be no different
2352 # we can easily detect empty sidedata and they will be no different
2346 # than ones we manually add.
2353 # than ones we manually add.
2347 sidedata_offset = 0
2354 sidedata_offset = 0
2348
2355
2349 e = (
2356 e = (
2350 offset_type(offset, flags),
2357 offset_type(offset, flags),
2351 deltainfo.deltalen,
2358 deltainfo.deltalen,
2352 textlen,
2359 textlen,
2353 deltainfo.base,
2360 deltainfo.base,
2354 link,
2361 link,
2355 p1r,
2362 p1r,
2356 p2r,
2363 p2r,
2357 node,
2364 node,
2358 sidedata_offset,
2365 sidedata_offset,
2359 len(serialized_sidedata),
2366 len(serialized_sidedata),
2360 )
2367 )
2361
2368
2362 if self.version & 0xFFFF != REVLOGV2:
2369 if self.version & 0xFFFF != REVLOGV2:
2363 e = e[:8]
2370 e = e[:8]
2364
2371
2365 self.index.append(e)
2372 self.index.append(e)
2366 entry = self.index.entry_binary(curr, self.version)
2373 entry = self.index.entry_binary(curr)
2374 if curr == 0:
2375 header = self.index.pack_header(self.version)
2376 entry = header + entry
2367 self._writeentry(
2377 self._writeentry(
2368 transaction,
2378 transaction,
2369 ifh,
2379 ifh,
2370 dfh,
2380 dfh,
2371 entry,
2381 entry,
2372 deltainfo.data,
2382 deltainfo.data,
2373 link,
2383 link,
2374 offset,
2384 offset,
2375 serialized_sidedata,
2385 serialized_sidedata,
2376 )
2386 )
2377
2387
2378 rawtext = btext[0]
2388 rawtext = btext[0]
2379
2389
2380 if alwayscache and rawtext is None:
2390 if alwayscache and rawtext is None:
2381 rawtext = deltacomputer.buildtext(revinfo, fh)
2391 rawtext = deltacomputer.buildtext(revinfo, fh)
2382
2392
2383 if type(rawtext) == bytes: # only accept immutable objects
2393 if type(rawtext) == bytes: # only accept immutable objects
2384 self._revisioncache = (node, curr, rawtext)
2394 self._revisioncache = (node, curr, rawtext)
2385 self._chainbasecache[curr] = deltainfo.chainbase
2395 self._chainbasecache[curr] = deltainfo.chainbase
2386 return curr
2396 return curr
2387
2397
2388 def _get_data_offset(self, prev):
2398 def _get_data_offset(self, prev):
2389 """Returns the current offset in the (in-transaction) data file.
2399 """Returns the current offset in the (in-transaction) data file.
2390 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2400 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2391 file to store that information: since sidedata can be rewritten to the
2401 file to store that information: since sidedata can be rewritten to the
2392 end of the data file within a transaction, you can have cases where, for
2402 end of the data file within a transaction, you can have cases where, for
2393 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2403 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2394 to `n - 1`'s sidedata being written after `n`'s data.
2404 to `n - 1`'s sidedata being written after `n`'s data.
2395
2405
2396 TODO cache this in a docket file before getting out of experimental."""
2406 TODO cache this in a docket file before getting out of experimental."""
2397 if self.version & 0xFFFF != REVLOGV2:
2407 if self.version & 0xFFFF != REVLOGV2:
2398 return self.end(prev)
2408 return self.end(prev)
2399
2409
2400 offset = 0
2410 offset = 0
2401 for rev, entry in enumerate(self.index):
2411 for rev, entry in enumerate(self.index):
2402 sidedata_end = entry[8] + entry[9]
2412 sidedata_end = entry[8] + entry[9]
2403 # Sidedata for a previous rev has potentially been written after
2413 # Sidedata for a previous rev has potentially been written after
2404 # this rev's end, so take the max.
2414 # this rev's end, so take the max.
2405 offset = max(self.end(rev), offset, sidedata_end)
2415 offset = max(self.end(rev), offset, sidedata_end)
2406 return offset
2416 return offset
2407
2417
2408 def _writeentry(
2418 def _writeentry(
2409 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2419 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2410 ):
2420 ):
2411 # Files opened in a+ mode have inconsistent behavior on various
2421 # Files opened in a+ mode have inconsistent behavior on various
2412 # platforms. Windows requires that a file positioning call be made
2422 # platforms. Windows requires that a file positioning call be made
2413 # when the file handle transitions between reads and writes. See
2423 # when the file handle transitions between reads and writes. See
2414 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2424 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2415 # platforms, Python or the platform itself can be buggy. Some versions
2425 # platforms, Python or the platform itself can be buggy. Some versions
2416 # of Solaris have been observed to not append at the end of the file
2426 # of Solaris have been observed to not append at the end of the file
2417 # if the file was seeked to before the end. See issue4943 for more.
2427 # if the file was seeked to before the end. See issue4943 for more.
2418 #
2428 #
2419 # We work around this issue by inserting a seek() before writing.
2429 # We work around this issue by inserting a seek() before writing.
2420 # Note: This is likely not necessary on Python 3. However, because
2430 # Note: This is likely not necessary on Python 3. However, because
2421 # the file handle is reused for reads and may be seeked there, we need
2431 # the file handle is reused for reads and may be seeked there, we need
2422 # to be careful before changing this.
2432 # to be careful before changing this.
2423 ifh.seek(0, os.SEEK_END)
2433 ifh.seek(0, os.SEEK_END)
2424 if dfh:
2434 if dfh:
2425 dfh.seek(0, os.SEEK_END)
2435 dfh.seek(0, os.SEEK_END)
2426
2436
2427 curr = len(self) - 1
2437 curr = len(self) - 1
2428 if not self._inline:
2438 if not self._inline:
2429 transaction.add(self.datafile, offset)
2439 transaction.add(self.datafile, offset)
2430 transaction.add(self.indexfile, curr * len(entry))
2440 transaction.add(self.indexfile, curr * len(entry))
2431 if data[0]:
2441 if data[0]:
2432 dfh.write(data[0])
2442 dfh.write(data[0])
2433 dfh.write(data[1])
2443 dfh.write(data[1])
2434 if sidedata:
2444 if sidedata:
2435 dfh.write(sidedata)
2445 dfh.write(sidedata)
2436 ifh.write(entry)
2446 ifh.write(entry)
2437 else:
2447 else:
2438 offset += curr * self.index.entry_size
2448 offset += curr * self.index.entry_size
2439 transaction.add(self.indexfile, offset)
2449 transaction.add(self.indexfile, offset)
2440 ifh.write(entry)
2450 ifh.write(entry)
2441 ifh.write(data[0])
2451 ifh.write(data[0])
2442 ifh.write(data[1])
2452 ifh.write(data[1])
2443 if sidedata:
2453 if sidedata:
2444 ifh.write(sidedata)
2454 ifh.write(sidedata)
2445 self._enforceinlinesize(transaction, ifh)
2455 self._enforceinlinesize(transaction, ifh)
2446 nodemaputil.setup_persistent_nodemap(transaction, self)
2456 nodemaputil.setup_persistent_nodemap(transaction, self)
2447
2457
2448 def addgroup(
2458 def addgroup(
2449 self,
2459 self,
2450 deltas,
2460 deltas,
2451 linkmapper,
2461 linkmapper,
2452 transaction,
2462 transaction,
2453 alwayscache=False,
2463 alwayscache=False,
2454 addrevisioncb=None,
2464 addrevisioncb=None,
2455 duplicaterevisioncb=None,
2465 duplicaterevisioncb=None,
2456 ):
2466 ):
2457 """
2467 """
2458 add a delta group
2468 add a delta group
2459
2469
2460 given a set of deltas, add them to the revision log. the
2470 given a set of deltas, add them to the revision log. the
2461 first delta is against its parent, which should be in our
2471 first delta is against its parent, which should be in our
2462 log, the rest are against the previous delta.
2472 log, the rest are against the previous delta.
2463
2473
2464 If ``addrevisioncb`` is defined, it will be called with arguments of
2474 If ``addrevisioncb`` is defined, it will be called with arguments of
2465 this revlog and the node that was added.
2475 this revlog and the node that was added.
2466 """
2476 """
2467
2477
2468 if self._writinghandles:
2478 if self._writinghandles:
2469 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2479 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2470
2480
2471 r = len(self)
2481 r = len(self)
2472 end = 0
2482 end = 0
2473 if r:
2483 if r:
2474 end = self.end(r - 1)
2484 end = self.end(r - 1)
2475 ifh = self._indexfp(b"a+")
2485 ifh = self._indexfp(b"a+")
2476 isize = r * self.index.entry_size
2486 isize = r * self.index.entry_size
2477 if self._inline:
2487 if self._inline:
2478 transaction.add(self.indexfile, end + isize)
2488 transaction.add(self.indexfile, end + isize)
2479 dfh = None
2489 dfh = None
2480 else:
2490 else:
2481 transaction.add(self.indexfile, isize)
2491 transaction.add(self.indexfile, isize)
2482 transaction.add(self.datafile, end)
2492 transaction.add(self.datafile, end)
2483 dfh = self._datafp(b"a+")
2493 dfh = self._datafp(b"a+")
2484
2494
2485 def flush():
2495 def flush():
2486 if dfh:
2496 if dfh:
2487 dfh.flush()
2497 dfh.flush()
2488 ifh.flush()
2498 ifh.flush()
2489
2499
2490 self._writinghandles = (ifh, dfh)
2500 self._writinghandles = (ifh, dfh)
2491 empty = True
2501 empty = True
2492
2502
2493 try:
2503 try:
2494 deltacomputer = deltautil.deltacomputer(self)
2504 deltacomputer = deltautil.deltacomputer(self)
2495 # loop through our set of deltas
2505 # loop through our set of deltas
2496 for data in deltas:
2506 for data in deltas:
2497 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2507 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2498 link = linkmapper(linknode)
2508 link = linkmapper(linknode)
2499 flags = flags or REVIDX_DEFAULT_FLAGS
2509 flags = flags or REVIDX_DEFAULT_FLAGS
2500
2510
2501 rev = self.index.get_rev(node)
2511 rev = self.index.get_rev(node)
2502 if rev is not None:
2512 if rev is not None:
2503 # this can happen if two branches make the same change
2513 # this can happen if two branches make the same change
2504 self._nodeduplicatecallback(transaction, rev)
2514 self._nodeduplicatecallback(transaction, rev)
2505 if duplicaterevisioncb:
2515 if duplicaterevisioncb:
2506 duplicaterevisioncb(self, rev)
2516 duplicaterevisioncb(self, rev)
2507 empty = False
2517 empty = False
2508 continue
2518 continue
2509
2519
2510 for p in (p1, p2):
2520 for p in (p1, p2):
2511 if not self.index.has_node(p):
2521 if not self.index.has_node(p):
2512 raise error.LookupError(
2522 raise error.LookupError(
2513 p, self.indexfile, _(b'unknown parent')
2523 p, self.indexfile, _(b'unknown parent')
2514 )
2524 )
2515
2525
2516 if not self.index.has_node(deltabase):
2526 if not self.index.has_node(deltabase):
2517 raise error.LookupError(
2527 raise error.LookupError(
2518 deltabase, self.indexfile, _(b'unknown delta base')
2528 deltabase, self.indexfile, _(b'unknown delta base')
2519 )
2529 )
2520
2530
2521 baserev = self.rev(deltabase)
2531 baserev = self.rev(deltabase)
2522
2532
2523 if baserev != nullrev and self.iscensored(baserev):
2533 if baserev != nullrev and self.iscensored(baserev):
2524 # if base is censored, delta must be full replacement in a
2534 # if base is censored, delta must be full replacement in a
2525 # single patch operation
2535 # single patch operation
2526 hlen = struct.calcsize(b">lll")
2536 hlen = struct.calcsize(b">lll")
2527 oldlen = self.rawsize(baserev)
2537 oldlen = self.rawsize(baserev)
2528 newlen = len(delta) - hlen
2538 newlen = len(delta) - hlen
2529 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2539 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2530 raise error.CensoredBaseError(
2540 raise error.CensoredBaseError(
2531 self.indexfile, self.node(baserev)
2541 self.indexfile, self.node(baserev)
2532 )
2542 )
2533
2543
2534 if not flags and self._peek_iscensored(baserev, delta, flush):
2544 if not flags and self._peek_iscensored(baserev, delta, flush):
2535 flags |= REVIDX_ISCENSORED
2545 flags |= REVIDX_ISCENSORED
2536
2546
2537 # We assume consumers of addrevisioncb will want to retrieve
2547 # We assume consumers of addrevisioncb will want to retrieve
2538 # the added revision, which will require a call to
2548 # the added revision, which will require a call to
2539 # revision(). revision() will fast path if there is a cache
2549 # revision(). revision() will fast path if there is a cache
2540 # hit. So, we tell _addrevision() to always cache in this case.
2550 # hit. So, we tell _addrevision() to always cache in this case.
2541 # We're only using addgroup() in the context of changegroup
2551 # We're only using addgroup() in the context of changegroup
2542 # generation so the revision data can always be handled as raw
2552 # generation so the revision data can always be handled as raw
2543 # by the flagprocessor.
2553 # by the flagprocessor.
2544 rev = self._addrevision(
2554 rev = self._addrevision(
2545 node,
2555 node,
2546 None,
2556 None,
2547 transaction,
2557 transaction,
2548 link,
2558 link,
2549 p1,
2559 p1,
2550 p2,
2560 p2,
2551 flags,
2561 flags,
2552 (baserev, delta),
2562 (baserev, delta),
2553 ifh,
2563 ifh,
2554 dfh,
2564 dfh,
2555 alwayscache=alwayscache,
2565 alwayscache=alwayscache,
2556 deltacomputer=deltacomputer,
2566 deltacomputer=deltacomputer,
2557 sidedata=sidedata,
2567 sidedata=sidedata,
2558 )
2568 )
2559
2569
2560 if addrevisioncb:
2570 if addrevisioncb:
2561 addrevisioncb(self, rev)
2571 addrevisioncb(self, rev)
2562 empty = False
2572 empty = False
2563
2573
2564 if not dfh and not self._inline:
2574 if not dfh and not self._inline:
2565 # addrevision switched from inline to conventional
2575 # addrevision switched from inline to conventional
2566 # reopen the index
2576 # reopen the index
2567 ifh.close()
2577 ifh.close()
2568 dfh = self._datafp(b"a+")
2578 dfh = self._datafp(b"a+")
2569 ifh = self._indexfp(b"a+")
2579 ifh = self._indexfp(b"a+")
2570 self._writinghandles = (ifh, dfh)
2580 self._writinghandles = (ifh, dfh)
2571 finally:
2581 finally:
2572 self._writinghandles = None
2582 self._writinghandles = None
2573
2583
2574 if dfh:
2584 if dfh:
2575 dfh.close()
2585 dfh.close()
2576 ifh.close()
2586 ifh.close()
2577 return not empty
2587 return not empty
2578
2588
2579 def iscensored(self, rev):
2589 def iscensored(self, rev):
2580 """Check if a file revision is censored."""
2590 """Check if a file revision is censored."""
2581 if not self._censorable:
2591 if not self._censorable:
2582 return False
2592 return False
2583
2593
2584 return self.flags(rev) & REVIDX_ISCENSORED
2594 return self.flags(rev) & REVIDX_ISCENSORED
2585
2595
2586 def _peek_iscensored(self, baserev, delta, flush):
2596 def _peek_iscensored(self, baserev, delta, flush):
2587 """Quickly check if a delta produces a censored revision."""
2597 """Quickly check if a delta produces a censored revision."""
2588 if not self._censorable:
2598 if not self._censorable:
2589 return False
2599 return False
2590
2600
2591 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2601 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2592
2602
2593 def getstrippoint(self, minlink):
2603 def getstrippoint(self, minlink):
2594 """find the minimum rev that must be stripped to strip the linkrev
2604 """find the minimum rev that must be stripped to strip the linkrev
2595
2605
2596 Returns a tuple containing the minimum rev and a set of all revs that
2606 Returns a tuple containing the minimum rev and a set of all revs that
2597 have linkrevs that will be broken by this strip.
2607 have linkrevs that will be broken by this strip.
2598 """
2608 """
2599 return storageutil.resolvestripinfo(
2609 return storageutil.resolvestripinfo(
2600 minlink,
2610 minlink,
2601 len(self) - 1,
2611 len(self) - 1,
2602 self.headrevs(),
2612 self.headrevs(),
2603 self.linkrev,
2613 self.linkrev,
2604 self.parentrevs,
2614 self.parentrevs,
2605 )
2615 )
2606
2616
2607 def strip(self, minlink, transaction):
2617 def strip(self, minlink, transaction):
2608 """truncate the revlog on the first revision with a linkrev >= minlink
2618 """truncate the revlog on the first revision with a linkrev >= minlink
2609
2619
2610 This function is called when we're stripping revision minlink and
2620 This function is called when we're stripping revision minlink and
2611 its descendants from the repository.
2621 its descendants from the repository.
2612
2622
2613 We have to remove all revisions with linkrev >= minlink, because
2623 We have to remove all revisions with linkrev >= minlink, because
2614 the equivalent changelog revisions will be renumbered after the
2624 the equivalent changelog revisions will be renumbered after the
2615 strip.
2625 strip.
2616
2626
2617 So we truncate the revlog on the first of these revisions, and
2627 So we truncate the revlog on the first of these revisions, and
2618 trust that the caller has saved the revisions that shouldn't be
2628 trust that the caller has saved the revisions that shouldn't be
2619 removed and that it'll re-add them after this truncation.
2629 removed and that it'll re-add them after this truncation.
2620 """
2630 """
2621 if len(self) == 0:
2631 if len(self) == 0:
2622 return
2632 return
2623
2633
2624 rev, _ = self.getstrippoint(minlink)
2634 rev, _ = self.getstrippoint(minlink)
2625 if rev == len(self):
2635 if rev == len(self):
2626 return
2636 return
2627
2637
2628 # first truncate the files on disk
2638 # first truncate the files on disk
2629 end = self.start(rev)
2639 end = self.start(rev)
2630 if not self._inline:
2640 if not self._inline:
2631 transaction.add(self.datafile, end)
2641 transaction.add(self.datafile, end)
2632 end = rev * self.index.entry_size
2642 end = rev * self.index.entry_size
2633 else:
2643 else:
2634 end += rev * self.index.entry_size
2644 end += rev * self.index.entry_size
2635
2645
2636 transaction.add(self.indexfile, end)
2646 transaction.add(self.indexfile, end)
2637
2647
2638 # then reset internal state in memory to forget those revisions
2648 # then reset internal state in memory to forget those revisions
2639 self._revisioncache = None
2649 self._revisioncache = None
2640 self._chaininfocache = util.lrucachedict(500)
2650 self._chaininfocache = util.lrucachedict(500)
2641 self._chunkclear()
2651 self._chunkclear()
2642
2652
2643 del self.index[rev:-1]
2653 del self.index[rev:-1]
2644
2654
2645 def checksize(self):
2655 def checksize(self):
2646 """Check size of index and data files
2656 """Check size of index and data files
2647
2657
2648 return a (dd, di) tuple.
2658 return a (dd, di) tuple.
2649 - dd: extra bytes for the "data" file
2659 - dd: extra bytes for the "data" file
2650 - di: extra bytes for the "index" file
2660 - di: extra bytes for the "index" file
2651
2661
2652 A healthy revlog will return (0, 0).
2662 A healthy revlog will return (0, 0).
2653 """
2663 """
2654 expected = 0
2664 expected = 0
2655 if len(self):
2665 if len(self):
2656 expected = max(0, self.end(len(self) - 1))
2666 expected = max(0, self.end(len(self) - 1))
2657
2667
2658 try:
2668 try:
2659 with self._datafp() as f:
2669 with self._datafp() as f:
2660 f.seek(0, io.SEEK_END)
2670 f.seek(0, io.SEEK_END)
2661 actual = f.tell()
2671 actual = f.tell()
2662 dd = actual - expected
2672 dd = actual - expected
2663 except IOError as inst:
2673 except IOError as inst:
2664 if inst.errno != errno.ENOENT:
2674 if inst.errno != errno.ENOENT:
2665 raise
2675 raise
2666 dd = 0
2676 dd = 0
2667
2677
2668 try:
2678 try:
2669 f = self.opener(self.indexfile)
2679 f = self.opener(self.indexfile)
2670 f.seek(0, io.SEEK_END)
2680 f.seek(0, io.SEEK_END)
2671 actual = f.tell()
2681 actual = f.tell()
2672 f.close()
2682 f.close()
2673 s = self.index.entry_size
2683 s = self.index.entry_size
2674 i = max(0, actual // s)
2684 i = max(0, actual // s)
2675 di = actual - (i * s)
2685 di = actual - (i * s)
2676 if self._inline:
2686 if self._inline:
2677 databytes = 0
2687 databytes = 0
2678 for r in self:
2688 for r in self:
2679 databytes += max(0, self.length(r))
2689 databytes += max(0, self.length(r))
2680 dd = 0
2690 dd = 0
2681 di = actual - len(self) * s - databytes
2691 di = actual - len(self) * s - databytes
2682 except IOError as inst:
2692 except IOError as inst:
2683 if inst.errno != errno.ENOENT:
2693 if inst.errno != errno.ENOENT:
2684 raise
2694 raise
2685 di = 0
2695 di = 0
2686
2696
2687 return (dd, di)
2697 return (dd, di)
2688
2698
2689 def files(self):
2699 def files(self):
2690 res = [self.indexfile]
2700 res = [self.indexfile]
2691 if not self._inline:
2701 if not self._inline:
2692 res.append(self.datafile)
2702 res.append(self.datafile)
2693 return res
2703 return res
2694
2704
2695 def emitrevisions(
2705 def emitrevisions(
2696 self,
2706 self,
2697 nodes,
2707 nodes,
2698 nodesorder=None,
2708 nodesorder=None,
2699 revisiondata=False,
2709 revisiondata=False,
2700 assumehaveparentrevisions=False,
2710 assumehaveparentrevisions=False,
2701 deltamode=repository.CG_DELTAMODE_STD,
2711 deltamode=repository.CG_DELTAMODE_STD,
2702 sidedata_helpers=None,
2712 sidedata_helpers=None,
2703 ):
2713 ):
2704 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2714 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2705 raise error.ProgrammingError(
2715 raise error.ProgrammingError(
2706 b'unhandled value for nodesorder: %s' % nodesorder
2716 b'unhandled value for nodesorder: %s' % nodesorder
2707 )
2717 )
2708
2718
2709 if nodesorder is None and not self._generaldelta:
2719 if nodesorder is None and not self._generaldelta:
2710 nodesorder = b'storage'
2720 nodesorder = b'storage'
2711
2721
2712 if (
2722 if (
2713 not self._storedeltachains
2723 not self._storedeltachains
2714 and deltamode != repository.CG_DELTAMODE_PREV
2724 and deltamode != repository.CG_DELTAMODE_PREV
2715 ):
2725 ):
2716 deltamode = repository.CG_DELTAMODE_FULL
2726 deltamode = repository.CG_DELTAMODE_FULL
2717
2727
2718 return storageutil.emitrevisions(
2728 return storageutil.emitrevisions(
2719 self,
2729 self,
2720 nodes,
2730 nodes,
2721 nodesorder,
2731 nodesorder,
2722 revlogrevisiondelta,
2732 revlogrevisiondelta,
2723 deltaparentfn=self.deltaparent,
2733 deltaparentfn=self.deltaparent,
2724 candeltafn=self.candelta,
2734 candeltafn=self.candelta,
2725 rawsizefn=self.rawsize,
2735 rawsizefn=self.rawsize,
2726 revdifffn=self.revdiff,
2736 revdifffn=self.revdiff,
2727 flagsfn=self.flags,
2737 flagsfn=self.flags,
2728 deltamode=deltamode,
2738 deltamode=deltamode,
2729 revisiondata=revisiondata,
2739 revisiondata=revisiondata,
2730 assumehaveparentrevisions=assumehaveparentrevisions,
2740 assumehaveparentrevisions=assumehaveparentrevisions,
2731 sidedata_helpers=sidedata_helpers,
2741 sidedata_helpers=sidedata_helpers,
2732 )
2742 )
2733
2743
2734 DELTAREUSEALWAYS = b'always'
2744 DELTAREUSEALWAYS = b'always'
2735 DELTAREUSESAMEREVS = b'samerevs'
2745 DELTAREUSESAMEREVS = b'samerevs'
2736 DELTAREUSENEVER = b'never'
2746 DELTAREUSENEVER = b'never'
2737
2747
2738 DELTAREUSEFULLADD = b'fulladd'
2748 DELTAREUSEFULLADD = b'fulladd'
2739
2749
2740 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2750 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2741
2751
2742 def clone(
2752 def clone(
2743 self,
2753 self,
2744 tr,
2754 tr,
2745 destrevlog,
2755 destrevlog,
2746 addrevisioncb=None,
2756 addrevisioncb=None,
2747 deltareuse=DELTAREUSESAMEREVS,
2757 deltareuse=DELTAREUSESAMEREVS,
2748 forcedeltabothparents=None,
2758 forcedeltabothparents=None,
2749 sidedatacompanion=None,
2759 sidedatacompanion=None,
2750 ):
2760 ):
2751 """Copy this revlog to another, possibly with format changes.
2761 """Copy this revlog to another, possibly with format changes.
2752
2762
2753 The destination revlog will contain the same revisions and nodes.
2763 The destination revlog will contain the same revisions and nodes.
2754 However, it may not be bit-for-bit identical due to e.g. delta encoding
2764 However, it may not be bit-for-bit identical due to e.g. delta encoding
2755 differences.
2765 differences.
2756
2766
2757 The ``deltareuse`` argument control how deltas from the existing revlog
2767 The ``deltareuse`` argument control how deltas from the existing revlog
2758 are preserved in the destination revlog. The argument can have the
2768 are preserved in the destination revlog. The argument can have the
2759 following values:
2769 following values:
2760
2770
2761 DELTAREUSEALWAYS
2771 DELTAREUSEALWAYS
2762 Deltas will always be reused (if possible), even if the destination
2772 Deltas will always be reused (if possible), even if the destination
2763 revlog would not select the same revisions for the delta. This is the
2773 revlog would not select the same revisions for the delta. This is the
2764 fastest mode of operation.
2774 fastest mode of operation.
2765 DELTAREUSESAMEREVS
2775 DELTAREUSESAMEREVS
2766 Deltas will be reused if the destination revlog would pick the same
2776 Deltas will be reused if the destination revlog would pick the same
2767 revisions for the delta. This mode strikes a balance between speed
2777 revisions for the delta. This mode strikes a balance between speed
2768 and optimization.
2778 and optimization.
2769 DELTAREUSENEVER
2779 DELTAREUSENEVER
2770 Deltas will never be reused. This is the slowest mode of execution.
2780 Deltas will never be reused. This is the slowest mode of execution.
2771 This mode can be used to recompute deltas (e.g. if the diff/delta
2781 This mode can be used to recompute deltas (e.g. if the diff/delta
2772 algorithm changes).
2782 algorithm changes).
2773 DELTAREUSEFULLADD
2783 DELTAREUSEFULLADD
2774 Revision will be re-added as if their were new content. This is
2784 Revision will be re-added as if their were new content. This is
2775 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2785 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2776 eg: large file detection and handling.
2786 eg: large file detection and handling.
2777
2787
2778 Delta computation can be slow, so the choice of delta reuse policy can
2788 Delta computation can be slow, so the choice of delta reuse policy can
2779 significantly affect run time.
2789 significantly affect run time.
2780
2790
2781 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2791 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2782 two extremes. Deltas will be reused if they are appropriate. But if the
2792 two extremes. Deltas will be reused if they are appropriate. But if the
2783 delta could choose a better revision, it will do so. This means if you
2793 delta could choose a better revision, it will do so. This means if you
2784 are converting a non-generaldelta revlog to a generaldelta revlog,
2794 are converting a non-generaldelta revlog to a generaldelta revlog,
2785 deltas will be recomputed if the delta's parent isn't a parent of the
2795 deltas will be recomputed if the delta's parent isn't a parent of the
2786 revision.
2796 revision.
2787
2797
2788 In addition to the delta policy, the ``forcedeltabothparents``
2798 In addition to the delta policy, the ``forcedeltabothparents``
2789 argument controls whether to force compute deltas against both parents
2799 argument controls whether to force compute deltas against both parents
2790 for merges. By default, the current default is used.
2800 for merges. By default, the current default is used.
2791
2801
2792 If not None, the `sidedatacompanion` is callable that accept two
2802 If not None, the `sidedatacompanion` is callable that accept two
2793 arguments:
2803 arguments:
2794
2804
2795 (srcrevlog, rev)
2805 (srcrevlog, rev)
2796
2806
2797 and return a quintet that control changes to sidedata content from the
2807 and return a quintet that control changes to sidedata content from the
2798 old revision to the new clone result:
2808 old revision to the new clone result:
2799
2809
2800 (dropall, filterout, update, new_flags, dropped_flags)
2810 (dropall, filterout, update, new_flags, dropped_flags)
2801
2811
2802 * if `dropall` is True, all sidedata should be dropped
2812 * if `dropall` is True, all sidedata should be dropped
2803 * `filterout` is a set of sidedata keys that should be dropped
2813 * `filterout` is a set of sidedata keys that should be dropped
2804 * `update` is a mapping of additionnal/new key -> value
2814 * `update` is a mapping of additionnal/new key -> value
2805 * new_flags is a bitfields of new flags that the revision should get
2815 * new_flags is a bitfields of new flags that the revision should get
2806 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2816 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2807 """
2817 """
2808 if deltareuse not in self.DELTAREUSEALL:
2818 if deltareuse not in self.DELTAREUSEALL:
2809 raise ValueError(
2819 raise ValueError(
2810 _(b'value for deltareuse invalid: %s') % deltareuse
2820 _(b'value for deltareuse invalid: %s') % deltareuse
2811 )
2821 )
2812
2822
2813 if len(destrevlog):
2823 if len(destrevlog):
2814 raise ValueError(_(b'destination revlog is not empty'))
2824 raise ValueError(_(b'destination revlog is not empty'))
2815
2825
2816 if getattr(self, 'filteredrevs', None):
2826 if getattr(self, 'filteredrevs', None):
2817 raise ValueError(_(b'source revlog has filtered revisions'))
2827 raise ValueError(_(b'source revlog has filtered revisions'))
2818 if getattr(destrevlog, 'filteredrevs', None):
2828 if getattr(destrevlog, 'filteredrevs', None):
2819 raise ValueError(_(b'destination revlog has filtered revisions'))
2829 raise ValueError(_(b'destination revlog has filtered revisions'))
2820
2830
2821 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2831 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2822 # if possible.
2832 # if possible.
2823 oldlazydelta = destrevlog._lazydelta
2833 oldlazydelta = destrevlog._lazydelta
2824 oldlazydeltabase = destrevlog._lazydeltabase
2834 oldlazydeltabase = destrevlog._lazydeltabase
2825 oldamd = destrevlog._deltabothparents
2835 oldamd = destrevlog._deltabothparents
2826
2836
2827 try:
2837 try:
2828 if deltareuse == self.DELTAREUSEALWAYS:
2838 if deltareuse == self.DELTAREUSEALWAYS:
2829 destrevlog._lazydeltabase = True
2839 destrevlog._lazydeltabase = True
2830 destrevlog._lazydelta = True
2840 destrevlog._lazydelta = True
2831 elif deltareuse == self.DELTAREUSESAMEREVS:
2841 elif deltareuse == self.DELTAREUSESAMEREVS:
2832 destrevlog._lazydeltabase = False
2842 destrevlog._lazydeltabase = False
2833 destrevlog._lazydelta = True
2843 destrevlog._lazydelta = True
2834 elif deltareuse == self.DELTAREUSENEVER:
2844 elif deltareuse == self.DELTAREUSENEVER:
2835 destrevlog._lazydeltabase = False
2845 destrevlog._lazydeltabase = False
2836 destrevlog._lazydelta = False
2846 destrevlog._lazydelta = False
2837
2847
2838 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2848 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2839
2849
2840 self._clone(
2850 self._clone(
2841 tr,
2851 tr,
2842 destrevlog,
2852 destrevlog,
2843 addrevisioncb,
2853 addrevisioncb,
2844 deltareuse,
2854 deltareuse,
2845 forcedeltabothparents,
2855 forcedeltabothparents,
2846 sidedatacompanion,
2856 sidedatacompanion,
2847 )
2857 )
2848
2858
2849 finally:
2859 finally:
2850 destrevlog._lazydelta = oldlazydelta
2860 destrevlog._lazydelta = oldlazydelta
2851 destrevlog._lazydeltabase = oldlazydeltabase
2861 destrevlog._lazydeltabase = oldlazydeltabase
2852 destrevlog._deltabothparents = oldamd
2862 destrevlog._deltabothparents = oldamd
2853
2863
2854 def _clone(
2864 def _clone(
2855 self,
2865 self,
2856 tr,
2866 tr,
2857 destrevlog,
2867 destrevlog,
2858 addrevisioncb,
2868 addrevisioncb,
2859 deltareuse,
2869 deltareuse,
2860 forcedeltabothparents,
2870 forcedeltabothparents,
2861 sidedatacompanion,
2871 sidedatacompanion,
2862 ):
2872 ):
2863 """perform the core duty of `revlog.clone` after parameter processing"""
2873 """perform the core duty of `revlog.clone` after parameter processing"""
2864 deltacomputer = deltautil.deltacomputer(destrevlog)
2874 deltacomputer = deltautil.deltacomputer(destrevlog)
2865 index = self.index
2875 index = self.index
2866 for rev in self:
2876 for rev in self:
2867 entry = index[rev]
2877 entry = index[rev]
2868
2878
2869 # Some classes override linkrev to take filtered revs into
2879 # Some classes override linkrev to take filtered revs into
2870 # account. Use raw entry from index.
2880 # account. Use raw entry from index.
2871 flags = entry[0] & 0xFFFF
2881 flags = entry[0] & 0xFFFF
2872 linkrev = entry[4]
2882 linkrev = entry[4]
2873 p1 = index[entry[5]][7]
2883 p1 = index[entry[5]][7]
2874 p2 = index[entry[6]][7]
2884 p2 = index[entry[6]][7]
2875 node = entry[7]
2885 node = entry[7]
2876
2886
2877 sidedataactions = (False, [], {}, 0, 0)
2887 sidedataactions = (False, [], {}, 0, 0)
2878 if sidedatacompanion is not None:
2888 if sidedatacompanion is not None:
2879 sidedataactions = sidedatacompanion(self, rev)
2889 sidedataactions = sidedatacompanion(self, rev)
2880
2890
2881 # (Possibly) reuse the delta from the revlog if allowed and
2891 # (Possibly) reuse the delta from the revlog if allowed and
2882 # the revlog chunk is a delta.
2892 # the revlog chunk is a delta.
2883 cachedelta = None
2893 cachedelta = None
2884 rawtext = None
2894 rawtext = None
2885 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2895 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2886 dropall = sidedataactions[0]
2896 dropall = sidedataactions[0]
2887 filterout = sidedataactions[1]
2897 filterout = sidedataactions[1]
2888 update = sidedataactions[2]
2898 update = sidedataactions[2]
2889 new_flags = sidedataactions[3]
2899 new_flags = sidedataactions[3]
2890 dropped_flags = sidedataactions[4]
2900 dropped_flags = sidedataactions[4]
2891 text, sidedata = self._revisiondata(rev)
2901 text, sidedata = self._revisiondata(rev)
2892 if dropall:
2902 if dropall:
2893 sidedata = {}
2903 sidedata = {}
2894 for key in filterout:
2904 for key in filterout:
2895 sidedata.pop(key, None)
2905 sidedata.pop(key, None)
2896 sidedata.update(update)
2906 sidedata.update(update)
2897 if not sidedata:
2907 if not sidedata:
2898 sidedata = None
2908 sidedata = None
2899
2909
2900 flags |= new_flags
2910 flags |= new_flags
2901 flags &= ~dropped_flags
2911 flags &= ~dropped_flags
2902
2912
2903 destrevlog.addrevision(
2913 destrevlog.addrevision(
2904 text,
2914 text,
2905 tr,
2915 tr,
2906 linkrev,
2916 linkrev,
2907 p1,
2917 p1,
2908 p2,
2918 p2,
2909 cachedelta=cachedelta,
2919 cachedelta=cachedelta,
2910 node=node,
2920 node=node,
2911 flags=flags,
2921 flags=flags,
2912 deltacomputer=deltacomputer,
2922 deltacomputer=deltacomputer,
2913 sidedata=sidedata,
2923 sidedata=sidedata,
2914 )
2924 )
2915 else:
2925 else:
2916 if destrevlog._lazydelta:
2926 if destrevlog._lazydelta:
2917 dp = self.deltaparent(rev)
2927 dp = self.deltaparent(rev)
2918 if dp != nullrev:
2928 if dp != nullrev:
2919 cachedelta = (dp, bytes(self._chunk(rev)))
2929 cachedelta = (dp, bytes(self._chunk(rev)))
2920
2930
2921 if not cachedelta:
2931 if not cachedelta:
2922 rawtext = self.rawdata(rev)
2932 rawtext = self.rawdata(rev)
2923
2933
2924 ifh = destrevlog.opener(
2934 ifh = destrevlog.opener(
2925 destrevlog.indexfile, b'a+', checkambig=False
2935 destrevlog.indexfile, b'a+', checkambig=False
2926 )
2936 )
2927 dfh = None
2937 dfh = None
2928 if not destrevlog._inline:
2938 if not destrevlog._inline:
2929 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2939 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2930 try:
2940 try:
2931 destrevlog._addrevision(
2941 destrevlog._addrevision(
2932 node,
2942 node,
2933 rawtext,
2943 rawtext,
2934 tr,
2944 tr,
2935 linkrev,
2945 linkrev,
2936 p1,
2946 p1,
2937 p2,
2947 p2,
2938 flags,
2948 flags,
2939 cachedelta,
2949 cachedelta,
2940 ifh,
2950 ifh,
2941 dfh,
2951 dfh,
2942 deltacomputer=deltacomputer,
2952 deltacomputer=deltacomputer,
2943 )
2953 )
2944 finally:
2954 finally:
2945 if dfh:
2955 if dfh:
2946 dfh.close()
2956 dfh.close()
2947 ifh.close()
2957 ifh.close()
2948
2958
2949 if addrevisioncb:
2959 if addrevisioncb:
2950 addrevisioncb(self, rev, node)
2960 addrevisioncb(self, rev, node)
2951
2961
2952 def censorrevision(self, tr, censornode, tombstone=b''):
2962 def censorrevision(self, tr, censornode, tombstone=b''):
2953 if (self.version & 0xFFFF) == REVLOGV0:
2963 if (self.version & 0xFFFF) == REVLOGV0:
2954 raise error.RevlogError(
2964 raise error.RevlogError(
2955 _(b'cannot censor with version %d revlogs') % self.version
2965 _(b'cannot censor with version %d revlogs') % self.version
2956 )
2966 )
2957
2967
2958 censorrev = self.rev(censornode)
2968 censorrev = self.rev(censornode)
2959 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2969 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2960
2970
2961 if len(tombstone) > self.rawsize(censorrev):
2971 if len(tombstone) > self.rawsize(censorrev):
2962 raise error.Abort(
2972 raise error.Abort(
2963 _(b'censor tombstone must be no longer than censored data')
2973 _(b'censor tombstone must be no longer than censored data')
2964 )
2974 )
2965
2975
2966 # Rewriting the revlog in place is hard. Our strategy for censoring is
2976 # Rewriting the revlog in place is hard. Our strategy for censoring is
2967 # to create a new revlog, copy all revisions to it, then replace the
2977 # to create a new revlog, copy all revisions to it, then replace the
2968 # revlogs on transaction close.
2978 # revlogs on transaction close.
2969
2979
2970 newindexfile = self.indexfile + b'.tmpcensored'
2980 newindexfile = self.indexfile + b'.tmpcensored'
2971 newdatafile = self.datafile + b'.tmpcensored'
2981 newdatafile = self.datafile + b'.tmpcensored'
2972
2982
2973 # This is a bit dangerous. We could easily have a mismatch of state.
2983 # This is a bit dangerous. We could easily have a mismatch of state.
2974 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2984 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2975 newrl.version = self.version
2985 newrl.version = self.version
2976 newrl._generaldelta = self._generaldelta
2986 newrl._generaldelta = self._generaldelta
2977 newrl._parse_index = self._parse_index
2987 newrl._parse_index = self._parse_index
2978
2988
2979 for rev in self.revs():
2989 for rev in self.revs():
2980 node = self.node(rev)
2990 node = self.node(rev)
2981 p1, p2 = self.parents(node)
2991 p1, p2 = self.parents(node)
2982
2992
2983 if rev == censorrev:
2993 if rev == censorrev:
2984 newrl.addrawrevision(
2994 newrl.addrawrevision(
2985 tombstone,
2995 tombstone,
2986 tr,
2996 tr,
2987 self.linkrev(censorrev),
2997 self.linkrev(censorrev),
2988 p1,
2998 p1,
2989 p2,
2999 p2,
2990 censornode,
3000 censornode,
2991 REVIDX_ISCENSORED,
3001 REVIDX_ISCENSORED,
2992 )
3002 )
2993
3003
2994 if newrl.deltaparent(rev) != nullrev:
3004 if newrl.deltaparent(rev) != nullrev:
2995 raise error.Abort(
3005 raise error.Abort(
2996 _(
3006 _(
2997 b'censored revision stored as delta; '
3007 b'censored revision stored as delta; '
2998 b'cannot censor'
3008 b'cannot censor'
2999 ),
3009 ),
3000 hint=_(
3010 hint=_(
3001 b'censoring of revlogs is not '
3011 b'censoring of revlogs is not '
3002 b'fully implemented; please report '
3012 b'fully implemented; please report '
3003 b'this bug'
3013 b'this bug'
3004 ),
3014 ),
3005 )
3015 )
3006 continue
3016 continue
3007
3017
3008 if self.iscensored(rev):
3018 if self.iscensored(rev):
3009 if self.deltaparent(rev) != nullrev:
3019 if self.deltaparent(rev) != nullrev:
3010 raise error.Abort(
3020 raise error.Abort(
3011 _(
3021 _(
3012 b'cannot censor due to censored '
3022 b'cannot censor due to censored '
3013 b'revision having delta stored'
3023 b'revision having delta stored'
3014 )
3024 )
3015 )
3025 )
3016 rawtext = self._chunk(rev)
3026 rawtext = self._chunk(rev)
3017 else:
3027 else:
3018 rawtext = self.rawdata(rev)
3028 rawtext = self.rawdata(rev)
3019
3029
3020 newrl.addrawrevision(
3030 newrl.addrawrevision(
3021 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3031 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3022 )
3032 )
3023
3033
3024 tr.addbackup(self.indexfile, location=b'store')
3034 tr.addbackup(self.indexfile, location=b'store')
3025 if not self._inline:
3035 if not self._inline:
3026 tr.addbackup(self.datafile, location=b'store')
3036 tr.addbackup(self.datafile, location=b'store')
3027
3037
3028 self.opener.rename(newrl.indexfile, self.indexfile)
3038 self.opener.rename(newrl.indexfile, self.indexfile)
3029 if not self._inline:
3039 if not self._inline:
3030 self.opener.rename(newrl.datafile, self.datafile)
3040 self.opener.rename(newrl.datafile, self.datafile)
3031
3041
3032 self.clearcaches()
3042 self.clearcaches()
3033 self._loadindex()
3043 self._loadindex()
3034
3044
3035 def verifyintegrity(self, state):
3045 def verifyintegrity(self, state):
3036 """Verifies the integrity of the revlog.
3046 """Verifies the integrity of the revlog.
3037
3047
3038 Yields ``revlogproblem`` instances describing problems that are
3048 Yields ``revlogproblem`` instances describing problems that are
3039 found.
3049 found.
3040 """
3050 """
3041 dd, di = self.checksize()
3051 dd, di = self.checksize()
3042 if dd:
3052 if dd:
3043 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3053 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3044 if di:
3054 if di:
3045 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3055 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3046
3056
3047 version = self.version & 0xFFFF
3057 version = self.version & 0xFFFF
3048
3058
3049 # The verifier tells us what version revlog we should be.
3059 # The verifier tells us what version revlog we should be.
3050 if version != state[b'expectedversion']:
3060 if version != state[b'expectedversion']:
3051 yield revlogproblem(
3061 yield revlogproblem(
3052 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3062 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3053 % (self.indexfile, version, state[b'expectedversion'])
3063 % (self.indexfile, version, state[b'expectedversion'])
3054 )
3064 )
3055
3065
3056 state[b'skipread'] = set()
3066 state[b'skipread'] = set()
3057 state[b'safe_renamed'] = set()
3067 state[b'safe_renamed'] = set()
3058
3068
3059 for rev in self:
3069 for rev in self:
3060 node = self.node(rev)
3070 node = self.node(rev)
3061
3071
3062 # Verify contents. 4 cases to care about:
3072 # Verify contents. 4 cases to care about:
3063 #
3073 #
3064 # common: the most common case
3074 # common: the most common case
3065 # rename: with a rename
3075 # rename: with a rename
3066 # meta: file content starts with b'\1\n', the metadata
3076 # meta: file content starts with b'\1\n', the metadata
3067 # header defined in filelog.py, but without a rename
3077 # header defined in filelog.py, but without a rename
3068 # ext: content stored externally
3078 # ext: content stored externally
3069 #
3079 #
3070 # More formally, their differences are shown below:
3080 # More formally, their differences are shown below:
3071 #
3081 #
3072 # | common | rename | meta | ext
3082 # | common | rename | meta | ext
3073 # -------------------------------------------------------
3083 # -------------------------------------------------------
3074 # flags() | 0 | 0 | 0 | not 0
3084 # flags() | 0 | 0 | 0 | not 0
3075 # renamed() | False | True | False | ?
3085 # renamed() | False | True | False | ?
3076 # rawtext[0:2]=='\1\n'| False | True | True | ?
3086 # rawtext[0:2]=='\1\n'| False | True | True | ?
3077 #
3087 #
3078 # "rawtext" means the raw text stored in revlog data, which
3088 # "rawtext" means the raw text stored in revlog data, which
3079 # could be retrieved by "rawdata(rev)". "text"
3089 # could be retrieved by "rawdata(rev)". "text"
3080 # mentioned below is "revision(rev)".
3090 # mentioned below is "revision(rev)".
3081 #
3091 #
3082 # There are 3 different lengths stored physically:
3092 # There are 3 different lengths stored physically:
3083 # 1. L1: rawsize, stored in revlog index
3093 # 1. L1: rawsize, stored in revlog index
3084 # 2. L2: len(rawtext), stored in revlog data
3094 # 2. L2: len(rawtext), stored in revlog data
3085 # 3. L3: len(text), stored in revlog data if flags==0, or
3095 # 3. L3: len(text), stored in revlog data if flags==0, or
3086 # possibly somewhere else if flags!=0
3096 # possibly somewhere else if flags!=0
3087 #
3097 #
3088 # L1 should be equal to L2. L3 could be different from them.
3098 # L1 should be equal to L2. L3 could be different from them.
3089 # "text" may or may not affect commit hash depending on flag
3099 # "text" may or may not affect commit hash depending on flag
3090 # processors (see flagutil.addflagprocessor).
3100 # processors (see flagutil.addflagprocessor).
3091 #
3101 #
3092 # | common | rename | meta | ext
3102 # | common | rename | meta | ext
3093 # -------------------------------------------------
3103 # -------------------------------------------------
3094 # rawsize() | L1 | L1 | L1 | L1
3104 # rawsize() | L1 | L1 | L1 | L1
3095 # size() | L1 | L2-LM | L1(*) | L1 (?)
3105 # size() | L1 | L2-LM | L1(*) | L1 (?)
3096 # len(rawtext) | L2 | L2 | L2 | L2
3106 # len(rawtext) | L2 | L2 | L2 | L2
3097 # len(text) | L2 | L2 | L2 | L3
3107 # len(text) | L2 | L2 | L2 | L3
3098 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3108 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3099 #
3109 #
3100 # LM: length of metadata, depending on rawtext
3110 # LM: length of metadata, depending on rawtext
3101 # (*): not ideal, see comment in filelog.size
3111 # (*): not ideal, see comment in filelog.size
3102 # (?): could be "- len(meta)" if the resolved content has
3112 # (?): could be "- len(meta)" if the resolved content has
3103 # rename metadata
3113 # rename metadata
3104 #
3114 #
3105 # Checks needed to be done:
3115 # Checks needed to be done:
3106 # 1. length check: L1 == L2, in all cases.
3116 # 1. length check: L1 == L2, in all cases.
3107 # 2. hash check: depending on flag processor, we may need to
3117 # 2. hash check: depending on flag processor, we may need to
3108 # use either "text" (external), or "rawtext" (in revlog).
3118 # use either "text" (external), or "rawtext" (in revlog).
3109
3119
3110 try:
3120 try:
3111 skipflags = state.get(b'skipflags', 0)
3121 skipflags = state.get(b'skipflags', 0)
3112 if skipflags:
3122 if skipflags:
3113 skipflags &= self.flags(rev)
3123 skipflags &= self.flags(rev)
3114
3124
3115 _verify_revision(self, skipflags, state, node)
3125 _verify_revision(self, skipflags, state, node)
3116
3126
3117 l1 = self.rawsize(rev)
3127 l1 = self.rawsize(rev)
3118 l2 = len(self.rawdata(node))
3128 l2 = len(self.rawdata(node))
3119
3129
3120 if l1 != l2:
3130 if l1 != l2:
3121 yield revlogproblem(
3131 yield revlogproblem(
3122 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3132 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3123 node=node,
3133 node=node,
3124 )
3134 )
3125
3135
3126 except error.CensoredNodeError:
3136 except error.CensoredNodeError:
3127 if state[b'erroroncensored']:
3137 if state[b'erroroncensored']:
3128 yield revlogproblem(
3138 yield revlogproblem(
3129 error=_(b'censored file data'), node=node
3139 error=_(b'censored file data'), node=node
3130 )
3140 )
3131 state[b'skipread'].add(node)
3141 state[b'skipread'].add(node)
3132 except Exception as e:
3142 except Exception as e:
3133 yield revlogproblem(
3143 yield revlogproblem(
3134 error=_(b'unpacking %s: %s')
3144 error=_(b'unpacking %s: %s')
3135 % (short(node), stringutil.forcebytestr(e)),
3145 % (short(node), stringutil.forcebytestr(e)),
3136 node=node,
3146 node=node,
3137 )
3147 )
3138 state[b'skipread'].add(node)
3148 state[b'skipread'].add(node)
3139
3149
3140 def storageinfo(
3150 def storageinfo(
3141 self,
3151 self,
3142 exclusivefiles=False,
3152 exclusivefiles=False,
3143 sharedfiles=False,
3153 sharedfiles=False,
3144 revisionscount=False,
3154 revisionscount=False,
3145 trackedsize=False,
3155 trackedsize=False,
3146 storedsize=False,
3156 storedsize=False,
3147 ):
3157 ):
3148 d = {}
3158 d = {}
3149
3159
3150 if exclusivefiles:
3160 if exclusivefiles:
3151 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3161 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3152 if not self._inline:
3162 if not self._inline:
3153 d[b'exclusivefiles'].append((self.opener, self.datafile))
3163 d[b'exclusivefiles'].append((self.opener, self.datafile))
3154
3164
3155 if sharedfiles:
3165 if sharedfiles:
3156 d[b'sharedfiles'] = []
3166 d[b'sharedfiles'] = []
3157
3167
3158 if revisionscount:
3168 if revisionscount:
3159 d[b'revisionscount'] = len(self)
3169 d[b'revisionscount'] = len(self)
3160
3170
3161 if trackedsize:
3171 if trackedsize:
3162 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3172 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3163
3173
3164 if storedsize:
3174 if storedsize:
3165 d[b'storedsize'] = sum(
3175 d[b'storedsize'] = sum(
3166 self.opener.stat(path).st_size for path in self.files()
3176 self.opener.stat(path).st_size for path in self.files()
3167 )
3177 )
3168
3178
3169 return d
3179 return d
3170
3180
3171 def rewrite_sidedata(self, helpers, startrev, endrev):
3181 def rewrite_sidedata(self, helpers, startrev, endrev):
3172 if self.version & 0xFFFF != REVLOGV2:
3182 if self.version & 0xFFFF != REVLOGV2:
3173 return
3183 return
3174 # inline are not yet supported because they suffer from an issue when
3184 # inline are not yet supported because they suffer from an issue when
3175 # rewriting them (since it's not an append-only operation).
3185 # rewriting them (since it's not an append-only operation).
3176 # See issue6485.
3186 # See issue6485.
3177 assert not self._inline
3187 assert not self._inline
3178 if not helpers[1] and not helpers[2]:
3188 if not helpers[1] and not helpers[2]:
3179 # Nothing to generate or remove
3189 # Nothing to generate or remove
3180 return
3190 return
3181
3191
3182 new_entries = []
3192 new_entries = []
3183 # append the new sidedata
3193 # append the new sidedata
3184 with self._datafp(b'a+') as fp:
3194 with self._datafp(b'a+') as fp:
3185 # Maybe this bug still exists, see revlog._writeentry
3195 # Maybe this bug still exists, see revlog._writeentry
3186 fp.seek(0, os.SEEK_END)
3196 fp.seek(0, os.SEEK_END)
3187 current_offset = fp.tell()
3197 current_offset = fp.tell()
3188 for rev in range(startrev, endrev + 1):
3198 for rev in range(startrev, endrev + 1):
3189 entry = self.index[rev]
3199 entry = self.index[rev]
3190 new_sidedata = storageutil.run_sidedata_helpers(
3200 new_sidedata = storageutil.run_sidedata_helpers(
3191 store=self,
3201 store=self,
3192 sidedata_helpers=helpers,
3202 sidedata_helpers=helpers,
3193 sidedata={},
3203 sidedata={},
3194 rev=rev,
3204 rev=rev,
3195 )
3205 )
3196
3206
3197 serialized_sidedata = sidedatautil.serialize_sidedata(
3207 serialized_sidedata = sidedatautil.serialize_sidedata(
3198 new_sidedata
3208 new_sidedata
3199 )
3209 )
3200 if entry[8] != 0 or entry[9] != 0:
3210 if entry[8] != 0 or entry[9] != 0:
3201 # rewriting entries that already have sidedata is not
3211 # rewriting entries that already have sidedata is not
3202 # supported yet, because it introduces garbage data in the
3212 # supported yet, because it introduces garbage data in the
3203 # revlog.
3213 # revlog.
3204 msg = b"Rewriting existing sidedata is not supported yet"
3214 msg = b"Rewriting existing sidedata is not supported yet"
3205 raise error.Abort(msg)
3215 raise error.Abort(msg)
3206 entry = entry[:8]
3216 entry = entry[:8]
3207 entry += (current_offset, len(serialized_sidedata))
3217 entry += (current_offset, len(serialized_sidedata))
3208
3218
3209 fp.write(serialized_sidedata)
3219 fp.write(serialized_sidedata)
3210 new_entries.append(entry)
3220 new_entries.append(entry)
3211 current_offset += len(serialized_sidedata)
3221 current_offset += len(serialized_sidedata)
3212
3222
3213 # rewrite the new index entries
3223 # rewrite the new index entries
3214 with self._indexfp(b'w+') as fp:
3224 with self._indexfp(b'w+') as fp:
3215 fp.seek(startrev * self.index.entry_size)
3225 fp.seek(startrev * self.index.entry_size)
3216 for i, entry in enumerate(new_entries):
3226 for i, entry in enumerate(new_entries):
3217 rev = startrev + i
3227 rev = startrev + i
3218 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3228 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3219 packed = self.index.entry_binary(rev, self.version)
3229 packed = self.index.entry_binary(rev)
3230 if rev == 0:
3231 header = self.index.pack_header(self.version)
3232 packed = header + packed
3220 fp.write(packed)
3233 fp.write(packed)
@@ -1,499 +1,504 b''
1 // revlog.rs
1 // revlog.rs
2 //
2 //
3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::{
8 use crate::{
9 cindex,
9 cindex,
10 utils::{node_from_py_bytes, node_from_py_object},
10 utils::{node_from_py_bytes, node_from_py_object},
11 };
11 };
12 use cpython::{
12 use cpython::{
13 buffer::{Element, PyBuffer},
13 buffer::{Element, PyBuffer},
14 exc::{IndexError, ValueError},
14 exc::{IndexError, ValueError},
15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
16 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
16 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
17 };
17 };
18 use hg::{
18 use hg::{
19 nodemap::{Block, NodeMapError, NodeTree},
19 nodemap::{Block, NodeMapError, NodeTree},
20 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
20 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
21 Revision,
21 Revision,
22 };
22 };
23 use std::cell::RefCell;
23 use std::cell::RefCell;
24
24
25 /// Return a Struct implementing the Graph trait
25 /// Return a Struct implementing the Graph trait
26 pub(crate) fn pyindex_to_graph(
26 pub(crate) fn pyindex_to_graph(
27 py: Python,
27 py: Python,
28 index: PyObject,
28 index: PyObject,
29 ) -> PyResult<cindex::Index> {
29 ) -> PyResult<cindex::Index> {
30 match index.extract::<MixedIndex>(py) {
30 match index.extract::<MixedIndex>(py) {
31 Ok(midx) => Ok(midx.clone_cindex(py)),
31 Ok(midx) => Ok(midx.clone_cindex(py)),
32 Err(_) => cindex::Index::new(py, index),
32 Err(_) => cindex::Index::new(py, index),
33 }
33 }
34 }
34 }
35
35
36 py_class!(pub class MixedIndex |py| {
36 py_class!(pub class MixedIndex |py| {
37 data cindex: RefCell<cindex::Index>;
37 data cindex: RefCell<cindex::Index>;
38 data nt: RefCell<Option<NodeTree>>;
38 data nt: RefCell<Option<NodeTree>>;
39 data docket: RefCell<Option<PyObject>>;
39 data docket: RefCell<Option<PyObject>>;
40 // Holds a reference to the mmap'ed persistent nodemap data
40 // Holds a reference to the mmap'ed persistent nodemap data
41 data mmap: RefCell<Option<PyBuffer>>;
41 data mmap: RefCell<Option<PyBuffer>>;
42
42
43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
44 Self::new(py, cindex)
44 Self::new(py, cindex)
45 }
45 }
46
46
47 /// Compatibility layer used for Python consumers needing access to the C index
47 /// Compatibility layer used for Python consumers needing access to the C index
48 ///
48 ///
49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
50 /// that may need to build a custom `nodetree`, based on a specified revset.
50 /// that may need to build a custom `nodetree`, based on a specified revset.
51 /// With a Rust implementation of the nodemap, we will be able to get rid of
51 /// With a Rust implementation of the nodemap, we will be able to get rid of
52 /// this, by exposing our own standalone nodemap class,
52 /// this, by exposing our own standalone nodemap class,
53 /// ready to accept `MixedIndex`.
53 /// ready to accept `MixedIndex`.
54 def get_cindex(&self) -> PyResult<PyObject> {
54 def get_cindex(&self) -> PyResult<PyObject> {
55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
56 }
56 }
57
57
58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
59
59
60 /// Return Revision if found, raises a bare `error.RevlogError`
60 /// Return Revision if found, raises a bare `error.RevlogError`
61 /// in case of ambiguity, same as C version does
61 /// in case of ambiguity, same as C version does
62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
63 let opt = self.get_nodetree(py)?.borrow();
63 let opt = self.get_nodetree(py)?.borrow();
64 let nt = opt.as_ref().unwrap();
64 let nt = opt.as_ref().unwrap();
65 let idx = &*self.cindex(py).borrow();
65 let idx = &*self.cindex(py).borrow();
66 let node = node_from_py_bytes(py, &node)?;
66 let node = node_from_py_bytes(py, &node)?;
67 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
67 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
68 }
68 }
69
69
70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
71 /// is not found.
71 /// is not found.
72 ///
72 ///
73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
74 /// will catch and rewrap with it
74 /// will catch and rewrap with it
75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
77 }
77 }
78
78
79 /// return True if the node exist in the index
79 /// return True if the node exist in the index
80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
81 self.get_rev(py, node).map(|opt| opt.is_some())
81 self.get_rev(py, node).map(|opt| opt.is_some())
82 }
82 }
83
83
84 /// find length of shortest hex nodeid of a binary ID
84 /// find length of shortest hex nodeid of a binary ID
85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
86 let opt = self.get_nodetree(py)?.borrow();
86 let opt = self.get_nodetree(py)?.borrow();
87 let nt = opt.as_ref().unwrap();
87 let nt = opt.as_ref().unwrap();
88 let idx = &*self.cindex(py).borrow();
88 let idx = &*self.cindex(py).borrow();
89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
90 {
90 {
91 Ok(Some(l)) => Ok(l),
91 Ok(Some(l)) => Ok(l),
92 Ok(None) => Err(revlog_error(py)),
92 Ok(None) => Err(revlog_error(py)),
93 Err(e) => Err(nodemap_error(py, e)),
93 Err(e) => Err(nodemap_error(py, e)),
94 }
94 }
95 }
95 }
96
96
97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
98 let opt = self.get_nodetree(py)?.borrow();
98 let opt = self.get_nodetree(py)?.borrow();
99 let nt = opt.as_ref().unwrap();
99 let nt = opt.as_ref().unwrap();
100 let idx = &*self.cindex(py).borrow();
100 let idx = &*self.cindex(py).borrow();
101
101
102 let node_as_string = if cfg!(feature = "python3-sys") {
102 let node_as_string = if cfg!(feature = "python3-sys") {
103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
104 }
104 }
105 else {
105 else {
106 let node = node.extract::<PyBytes>(py)?;
106 let node = node.extract::<PyBytes>(py)?;
107 String::from_utf8_lossy(node.data(py)).to_string()
107 String::from_utf8_lossy(node.data(py)).to_string()
108 };
108 };
109
109
110 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
110 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
111
111
112 nt.find_bin(idx, prefix)
112 nt.find_bin(idx, prefix)
113 // TODO make an inner API returning the node directly
113 // TODO make an inner API returning the node directly
114 .map(|opt| opt.map(
114 .map(|opt| opt.map(
115 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
115 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
116 .map_err(|e| nodemap_error(py, e))
116 .map_err(|e| nodemap_error(py, e))
117
117
118 }
118 }
119
119
120 /// append an index entry
120 /// append an index entry
121 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
121 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
122 if tup.len(py) < 8 {
122 if tup.len(py) < 8 {
123 // this is better than the panic promised by tup.get_item()
123 // this is better than the panic promised by tup.get_item()
124 return Err(
124 return Err(
125 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
125 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
126 }
126 }
127 let node_bytes = tup.get_item(py, 7).extract(py)?;
127 let node_bytes = tup.get_item(py, 7).extract(py)?;
128 let node = node_from_py_object(py, &node_bytes)?;
128 let node = node_from_py_object(py, &node_bytes)?;
129
129
130 let mut idx = self.cindex(py).borrow_mut();
130 let mut idx = self.cindex(py).borrow_mut();
131 let rev = idx.len() as Revision;
131 let rev = idx.len() as Revision;
132
132
133 idx.append(py, tup)?;
133 idx.append(py, tup)?;
134 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
134 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
135 .insert(&*idx, &node, rev)
135 .insert(&*idx, &node, rev)
136 .map_err(|e| nodemap_error(py, e))?;
136 .map_err(|e| nodemap_error(py, e))?;
137 Ok(py.None())
137 Ok(py.None())
138 }
138 }
139
139
140 def __delitem__(&self, key: PyObject) -> PyResult<()> {
140 def __delitem__(&self, key: PyObject) -> PyResult<()> {
141 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
141 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
142 self.cindex(py).borrow().inner().del_item(py, key)?;
142 self.cindex(py).borrow().inner().del_item(py, key)?;
143 let mut opt = self.get_nodetree(py)?.borrow_mut();
143 let mut opt = self.get_nodetree(py)?.borrow_mut();
144 let mut nt = opt.as_mut().unwrap();
144 let mut nt = opt.as_mut().unwrap();
145 nt.invalidate_all();
145 nt.invalidate_all();
146 self.fill_nodemap(py, &mut nt)?;
146 self.fill_nodemap(py, &mut nt)?;
147 Ok(())
147 Ok(())
148 }
148 }
149
149
150 //
150 //
151 // Reforwarded C index API
151 // Reforwarded C index API
152 //
152 //
153
153
154 // index_methods (tp_methods). Same ordering as in revlog.c
154 // index_methods (tp_methods). Same ordering as in revlog.c
155
155
156 /// return the gca set of the given revs
156 /// return the gca set of the given revs
157 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
157 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
158 self.call_cindex(py, "ancestors", args, kw)
158 self.call_cindex(py, "ancestors", args, kw)
159 }
159 }
160
160
161 /// return the heads of the common ancestors of the given revs
161 /// return the heads of the common ancestors of the given revs
162 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
162 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
163 self.call_cindex(py, "commonancestorsheads", args, kw)
163 self.call_cindex(py, "commonancestorsheads", args, kw)
164 }
164 }
165
165
166 /// Clear the index caches and inner py_class data.
166 /// Clear the index caches and inner py_class data.
167 /// It is Python's responsibility to call `update_nodemap_data` again.
167 /// It is Python's responsibility to call `update_nodemap_data` again.
168 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
168 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
169 self.nt(py).borrow_mut().take();
169 self.nt(py).borrow_mut().take();
170 self.docket(py).borrow_mut().take();
170 self.docket(py).borrow_mut().take();
171 self.mmap(py).borrow_mut().take();
171 self.mmap(py).borrow_mut().take();
172 self.call_cindex(py, "clearcaches", args, kw)
172 self.call_cindex(py, "clearcaches", args, kw)
173 }
173 }
174
174
175 /// return the raw binary string representing a revision
175 /// return the raw binary string representing a revision
176 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
176 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
177 self.call_cindex(py, "entry_binary", args, kw)
177 self.call_cindex(py, "entry_binary", args, kw)
178 }
178 }
179
179
180 /// return a binary packed version of the header
181 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
182 self.call_cindex(py, "pack_header", args, kw)
183 }
184
180 /// get an index entry
185 /// get an index entry
181 def get(&self, *args, **kw) -> PyResult<PyObject> {
186 def get(&self, *args, **kw) -> PyResult<PyObject> {
182 self.call_cindex(py, "get", args, kw)
187 self.call_cindex(py, "get", args, kw)
183 }
188 }
184
189
185 /// compute phases
190 /// compute phases
186 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
191 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
187 self.call_cindex(py, "computephasesmapsets", args, kw)
192 self.call_cindex(py, "computephasesmapsets", args, kw)
188 }
193 }
189
194
190 /// reachableroots
195 /// reachableroots
191 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
196 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
192 self.call_cindex(py, "reachableroots2", args, kw)
197 self.call_cindex(py, "reachableroots2", args, kw)
193 }
198 }
194
199
195 /// get head revisions
200 /// get head revisions
196 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
201 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
197 self.call_cindex(py, "headrevs", args, kw)
202 self.call_cindex(py, "headrevs", args, kw)
198 }
203 }
199
204
200 /// get filtered head revisions
205 /// get filtered head revisions
201 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
206 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
202 self.call_cindex(py, "headrevsfiltered", args, kw)
207 self.call_cindex(py, "headrevsfiltered", args, kw)
203 }
208 }
204
209
205 /// True if the object is a snapshot
210 /// True if the object is a snapshot
206 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
211 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
207 self.call_cindex(py, "issnapshot", args, kw)
212 self.call_cindex(py, "issnapshot", args, kw)
208 }
213 }
209
214
210 /// Gather snapshot data in a cache dict
215 /// Gather snapshot data in a cache dict
211 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
216 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
212 self.call_cindex(py, "findsnapshots", args, kw)
217 self.call_cindex(py, "findsnapshots", args, kw)
213 }
218 }
214
219
215 /// determine revisions with deltas to reconstruct fulltext
220 /// determine revisions with deltas to reconstruct fulltext
216 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
221 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
217 self.call_cindex(py, "deltachain", args, kw)
222 self.call_cindex(py, "deltachain", args, kw)
218 }
223 }
219
224
220 /// slice planned chunk read to reach a density threshold
225 /// slice planned chunk read to reach a density threshold
221 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
226 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
222 self.call_cindex(py, "slicechunktodensity", args, kw)
227 self.call_cindex(py, "slicechunktodensity", args, kw)
223 }
228 }
224
229
225 /// stats for the index
230 /// stats for the index
226 def stats(&self, *args, **kw) -> PyResult<PyObject> {
231 def stats(&self, *args, **kw) -> PyResult<PyObject> {
227 self.call_cindex(py, "stats", args, kw)
232 self.call_cindex(py, "stats", args, kw)
228 }
233 }
229
234
230 // index_sequence_methods and index_mapping_methods.
235 // index_sequence_methods and index_mapping_methods.
231 //
236 //
232 // Since we call back through the high level Python API,
237 // Since we call back through the high level Python API,
233 // there's no point making a distinction between index_get
238 // there's no point making a distinction between index_get
234 // and index_getitem.
239 // and index_getitem.
235
240
236 def __len__(&self) -> PyResult<usize> {
241 def __len__(&self) -> PyResult<usize> {
237 self.cindex(py).borrow().inner().len(py)
242 self.cindex(py).borrow().inner().len(py)
238 }
243 }
239
244
240 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
245 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
241 // this conversion seems needless, but that's actually because
246 // this conversion seems needless, but that's actually because
242 // `index_getitem` does not handle conversion from PyLong,
247 // `index_getitem` does not handle conversion from PyLong,
243 // which expressions such as [e for e in index] internally use.
248 // which expressions such as [e for e in index] internally use.
244 // Note that we don't seem to have a direct way to call
249 // Note that we don't seem to have a direct way to call
245 // PySequence_GetItem (does the job), which would possibly be better
250 // PySequence_GetItem (does the job), which would possibly be better
246 // for performance
251 // for performance
247 let key = match key.extract::<Revision>(py) {
252 let key = match key.extract::<Revision>(py) {
248 Ok(rev) => rev.to_py_object(py).into_object(),
253 Ok(rev) => rev.to_py_object(py).into_object(),
249 Err(_) => key,
254 Err(_) => key,
250 };
255 };
251 self.cindex(py).borrow().inner().get_item(py, key)
256 self.cindex(py).borrow().inner().get_item(py, key)
252 }
257 }
253
258
254 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
259 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
255 self.cindex(py).borrow().inner().set_item(py, key, value)
260 self.cindex(py).borrow().inner().set_item(py, key, value)
256 }
261 }
257
262
258 def __contains__(&self, item: PyObject) -> PyResult<bool> {
263 def __contains__(&self, item: PyObject) -> PyResult<bool> {
259 // ObjectProtocol does not seem to provide contains(), so
264 // ObjectProtocol does not seem to provide contains(), so
260 // this is an equivalent implementation of the index_contains()
265 // this is an equivalent implementation of the index_contains()
261 // defined in revlog.c
266 // defined in revlog.c
262 let cindex = self.cindex(py).borrow();
267 let cindex = self.cindex(py).borrow();
263 match item.extract::<Revision>(py) {
268 match item.extract::<Revision>(py) {
264 Ok(rev) => {
269 Ok(rev) => {
265 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
270 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
266 }
271 }
267 Err(_) => {
272 Err(_) => {
268 cindex.inner().call_method(
273 cindex.inner().call_method(
269 py,
274 py,
270 "has_node",
275 "has_node",
271 PyTuple::new(py, &[item]),
276 PyTuple::new(py, &[item]),
272 None)?
277 None)?
273 .extract(py)
278 .extract(py)
274 }
279 }
275 }
280 }
276 }
281 }
277
282
278 def nodemap_data_all(&self) -> PyResult<PyBytes> {
283 def nodemap_data_all(&self) -> PyResult<PyBytes> {
279 self.inner_nodemap_data_all(py)
284 self.inner_nodemap_data_all(py)
280 }
285 }
281
286
282 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
287 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
283 self.inner_nodemap_data_incremental(py)
288 self.inner_nodemap_data_incremental(py)
284 }
289 }
285 def update_nodemap_data(
290 def update_nodemap_data(
286 &self,
291 &self,
287 docket: PyObject,
292 docket: PyObject,
288 nm_data: PyObject
293 nm_data: PyObject
289 ) -> PyResult<PyObject> {
294 ) -> PyResult<PyObject> {
290 self.inner_update_nodemap_data(py, docket, nm_data)
295 self.inner_update_nodemap_data(py, docket, nm_data)
291 }
296 }
292
297
293 @property
298 @property
294 def entry_size(&self) -> PyResult<PyInt> {
299 def entry_size(&self) -> PyResult<PyInt> {
295 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
300 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
296 }
301 }
297
302
298 });
303 });
299
304
300 impl MixedIndex {
305 impl MixedIndex {
301 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
306 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
302 Self::create_instance(
307 Self::create_instance(
303 py,
308 py,
304 RefCell::new(cindex::Index::new(py, cindex)?),
309 RefCell::new(cindex::Index::new(py, cindex)?),
305 RefCell::new(None),
310 RefCell::new(None),
306 RefCell::new(None),
311 RefCell::new(None),
307 RefCell::new(None),
312 RefCell::new(None),
308 )
313 )
309 }
314 }
310
315
311 /// This is scaffolding at this point, but it could also become
316 /// This is scaffolding at this point, but it could also become
312 /// a way to start a persistent nodemap or perform a
317 /// a way to start a persistent nodemap or perform a
313 /// vacuum / repack operation
318 /// vacuum / repack operation
314 fn fill_nodemap(
319 fn fill_nodemap(
315 &self,
320 &self,
316 py: Python,
321 py: Python,
317 nt: &mut NodeTree,
322 nt: &mut NodeTree,
318 ) -> PyResult<PyObject> {
323 ) -> PyResult<PyObject> {
319 let index = self.cindex(py).borrow();
324 let index = self.cindex(py).borrow();
320 for r in 0..index.len() {
325 for r in 0..index.len() {
321 let rev = r as Revision;
326 let rev = r as Revision;
322 // in this case node() won't ever return None
327 // in this case node() won't ever return None
323 nt.insert(&*index, index.node(rev).unwrap(), rev)
328 nt.insert(&*index, index.node(rev).unwrap(), rev)
324 .map_err(|e| nodemap_error(py, e))?
329 .map_err(|e| nodemap_error(py, e))?
325 }
330 }
326 Ok(py.None())
331 Ok(py.None())
327 }
332 }
328
333
329 fn get_nodetree<'a>(
334 fn get_nodetree<'a>(
330 &'a self,
335 &'a self,
331 py: Python<'a>,
336 py: Python<'a>,
332 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
337 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
333 if self.nt(py).borrow().is_none() {
338 if self.nt(py).borrow().is_none() {
334 let readonly = Box::new(Vec::new());
339 let readonly = Box::new(Vec::new());
335 let mut nt = NodeTree::load_bytes(readonly, 0);
340 let mut nt = NodeTree::load_bytes(readonly, 0);
336 self.fill_nodemap(py, &mut nt)?;
341 self.fill_nodemap(py, &mut nt)?;
337 self.nt(py).borrow_mut().replace(nt);
342 self.nt(py).borrow_mut().replace(nt);
338 }
343 }
339 Ok(self.nt(py))
344 Ok(self.nt(py))
340 }
345 }
341
346
342 /// forward a method call to the underlying C index
347 /// forward a method call to the underlying C index
343 fn call_cindex(
348 fn call_cindex(
344 &self,
349 &self,
345 py: Python,
350 py: Python,
346 name: &str,
351 name: &str,
347 args: &PyTuple,
352 args: &PyTuple,
348 kwargs: Option<&PyDict>,
353 kwargs: Option<&PyDict>,
349 ) -> PyResult<PyObject> {
354 ) -> PyResult<PyObject> {
350 self.cindex(py)
355 self.cindex(py)
351 .borrow()
356 .borrow()
352 .inner()
357 .inner()
353 .call_method(py, name, args, kwargs)
358 .call_method(py, name, args, kwargs)
354 }
359 }
355
360
356 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
361 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
357 self.cindex(py).borrow().clone_ref(py)
362 self.cindex(py).borrow().clone_ref(py)
358 }
363 }
359
364
360 /// Returns the full nodemap bytes to be written as-is to disk
365 /// Returns the full nodemap bytes to be written as-is to disk
361 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
366 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
362 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
367 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
363 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
368 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
364
369
365 // If there's anything readonly, we need to build the data again from
370 // If there's anything readonly, we need to build the data again from
366 // scratch
371 // scratch
367 let bytes = if readonly.len() > 0 {
372 let bytes = if readonly.len() > 0 {
368 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
373 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
369 self.fill_nodemap(py, &mut nt)?;
374 self.fill_nodemap(py, &mut nt)?;
370
375
371 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
376 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
372 assert_eq!(readonly.len(), 0);
377 assert_eq!(readonly.len(), 0);
373
378
374 bytes
379 bytes
375 } else {
380 } else {
376 bytes
381 bytes
377 };
382 };
378
383
379 let bytes = PyBytes::new(py, &bytes);
384 let bytes = PyBytes::new(py, &bytes);
380 Ok(bytes)
385 Ok(bytes)
381 }
386 }
382
387
383 /// Returns the last saved docket along with the size of any changed data
388 /// Returns the last saved docket along with the size of any changed data
384 /// (in number of blocks), and said data as bytes.
389 /// (in number of blocks), and said data as bytes.
385 fn inner_nodemap_data_incremental(
390 fn inner_nodemap_data_incremental(
386 &self,
391 &self,
387 py: Python,
392 py: Python,
388 ) -> PyResult<PyObject> {
393 ) -> PyResult<PyObject> {
389 let docket = self.docket(py).borrow();
394 let docket = self.docket(py).borrow();
390 let docket = match docket.as_ref() {
395 let docket = match docket.as_ref() {
391 Some(d) => d,
396 Some(d) => d,
392 None => return Ok(py.None()),
397 None => return Ok(py.None()),
393 };
398 };
394
399
395 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
400 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
396 let masked_blocks = node_tree.masked_readonly_blocks();
401 let masked_blocks = node_tree.masked_readonly_blocks();
397 let (_, data) = node_tree.into_readonly_and_added_bytes();
402 let (_, data) = node_tree.into_readonly_and_added_bytes();
398 let changed = masked_blocks * std::mem::size_of::<Block>();
403 let changed = masked_blocks * std::mem::size_of::<Block>();
399
404
400 Ok((docket, changed, PyBytes::new(py, &data))
405 Ok((docket, changed, PyBytes::new(py, &data))
401 .to_py_object(py)
406 .to_py_object(py)
402 .into_object())
407 .into_object())
403 }
408 }
404
409
405 /// Update the nodemap from the new (mmaped) data.
410 /// Update the nodemap from the new (mmaped) data.
406 /// The docket is kept as a reference for later incremental calls.
411 /// The docket is kept as a reference for later incremental calls.
407 fn inner_update_nodemap_data(
412 fn inner_update_nodemap_data(
408 &self,
413 &self,
409 py: Python,
414 py: Python,
410 docket: PyObject,
415 docket: PyObject,
411 nm_data: PyObject,
416 nm_data: PyObject,
412 ) -> PyResult<PyObject> {
417 ) -> PyResult<PyObject> {
413 let buf = PyBuffer::get(py, &nm_data)?;
418 let buf = PyBuffer::get(py, &nm_data)?;
414 let len = buf.item_count();
419 let len = buf.item_count();
415
420
416 // Build a slice from the mmap'ed buffer data
421 // Build a slice from the mmap'ed buffer data
417 let cbuf = buf.buf_ptr();
422 let cbuf = buf.buf_ptr();
418 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
423 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
419 && buf.is_c_contiguous()
424 && buf.is_c_contiguous()
420 && u8::is_compatible_format(buf.format())
425 && u8::is_compatible_format(buf.format())
421 {
426 {
422 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
427 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
423 } else {
428 } else {
424 return Err(PyErr::new::<ValueError, _>(
429 return Err(PyErr::new::<ValueError, _>(
425 py,
430 py,
426 "Nodemap data buffer has an invalid memory representation"
431 "Nodemap data buffer has an invalid memory representation"
427 .to_string(),
432 .to_string(),
428 ));
433 ));
429 };
434 };
430
435
431 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
436 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
432 // pointer.
437 // pointer.
433 self.mmap(py).borrow_mut().replace(buf);
438 self.mmap(py).borrow_mut().replace(buf);
434
439
435 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
440 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
436
441
437 let data_tip =
442 let data_tip =
438 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
443 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
439 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
444 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
440 let idx = self.cindex(py).borrow();
445 let idx = self.cindex(py).borrow();
441 let current_tip = idx.len();
446 let current_tip = idx.len();
442
447
443 for r in (data_tip + 1)..current_tip as Revision {
448 for r in (data_tip + 1)..current_tip as Revision {
444 let rev = r as Revision;
449 let rev = r as Revision;
445 // in this case node() won't ever return None
450 // in this case node() won't ever return None
446 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
451 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
447 .map_err(|e| nodemap_error(py, e))?
452 .map_err(|e| nodemap_error(py, e))?
448 }
453 }
449
454
450 *self.nt(py).borrow_mut() = Some(nt);
455 *self.nt(py).borrow_mut() = Some(nt);
451
456
452 Ok(py.None())
457 Ok(py.None())
453 }
458 }
454 }
459 }
455
460
456 fn revlog_error(py: Python) -> PyErr {
461 fn revlog_error(py: Python) -> PyErr {
457 match py
462 match py
458 .import("mercurial.error")
463 .import("mercurial.error")
459 .and_then(|m| m.get(py, "RevlogError"))
464 .and_then(|m| m.get(py, "RevlogError"))
460 {
465 {
461 Err(e) => e,
466 Err(e) => e,
462 Ok(cls) => PyErr::from_instance(py, cls),
467 Ok(cls) => PyErr::from_instance(py, cls),
463 }
468 }
464 }
469 }
465
470
466 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
471 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
467 PyErr::new::<ValueError, _>(
472 PyErr::new::<ValueError, _>(
468 py,
473 py,
469 format!(
474 format!(
470 "Inconsistency: Revision {} found in nodemap \
475 "Inconsistency: Revision {} found in nodemap \
471 is not in revlog index",
476 is not in revlog index",
472 rev
477 rev
473 ),
478 ),
474 )
479 )
475 }
480 }
476
481
477 /// Standard treatment of NodeMapError
482 /// Standard treatment of NodeMapError
478 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
483 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
479 match err {
484 match err {
480 NodeMapError::MultipleResults => revlog_error(py),
485 NodeMapError::MultipleResults => revlog_error(py),
481 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
486 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
482 }
487 }
483 }
488 }
484
489
485 /// Create the module, with __package__ given from parent
490 /// Create the module, with __package__ given from parent
486 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
491 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
487 let dotted_name = &format!("{}.revlog", package);
492 let dotted_name = &format!("{}.revlog", package);
488 let m = PyModule::new(py, dotted_name)?;
493 let m = PyModule::new(py, dotted_name)?;
489 m.add(py, "__package__", package)?;
494 m.add(py, "__package__", package)?;
490 m.add(py, "__doc__", "RevLog - Rust implementations")?;
495 m.add(py, "__doc__", "RevLog - Rust implementations")?;
491
496
492 m.add_class::<MixedIndex>(py)?;
497 m.add_class::<MixedIndex>(py)?;
493
498
494 let sys = PyModule::import(py, "sys")?;
499 let sys = PyModule::import(py, "sys")?;
495 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
500 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
496 sys_modules.set_item(py, dotted_name, &m)?;
501 sys_modules.set_item(py, dotted_name, &m)?;
497
502
498 Ok(m)
503 Ok(m)
499 }
504 }
General Comments 0
You need to be logged in to leave comments. Login now