##// END OF EJS Templates
revlog: add a `entry_binary` method on index...
marmoute -
r47808:0d8ff1f4 default
parent child Browse files
Show More
@@ -1,2984 +1,3018 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <assert.h>
12 #include <assert.h>
13 #include <ctype.h>
13 #include <ctype.h>
14 #include <limits.h>
14 #include <limits.h>
15 #include <stddef.h>
15 #include <stddef.h>
16 #include <stdlib.h>
16 #include <stdlib.h>
17 #include <string.h>
17 #include <string.h>
18 #include <structmember.h>
18 #include <structmember.h>
19
19
20 #include "bitmanipulation.h"
20 #include "bitmanipulation.h"
21 #include "charencode.h"
21 #include "charencode.h"
22 #include "compat.h"
22 #include "compat.h"
23 #include "revlog.h"
23 #include "revlog.h"
24 #include "util.h"
24 #include "util.h"
25
25
26 #ifdef IS_PY3K
26 #ifdef IS_PY3K
27 /* The mapping of Python types is meant to be temporary to get Python
27 /* The mapping of Python types is meant to be temporary to get Python
28 * 3 to compile. We should remove this once Python 3 support is fully
28 * 3 to compile. We should remove this once Python 3 support is fully
29 * supported and proper types are used in the extensions themselves. */
29 * supported and proper types are used in the extensions themselves. */
30 #define PyInt_Check PyLong_Check
30 #define PyInt_Check PyLong_Check
31 #define PyInt_FromLong PyLong_FromLong
31 #define PyInt_FromLong PyLong_FromLong
32 #define PyInt_FromSsize_t PyLong_FromSsize_t
32 #define PyInt_FromSsize_t PyLong_FromSsize_t
33 #define PyInt_AsLong PyLong_AsLong
33 #define PyInt_AsLong PyLong_AsLong
34 #endif
34 #endif
35
35
36 typedef struct indexObjectStruct indexObject;
36 typedef struct indexObjectStruct indexObject;
37
37
38 typedef struct {
38 typedef struct {
39 int children[16];
39 int children[16];
40 } nodetreenode;
40 } nodetreenode;
41
41
42 typedef struct {
42 typedef struct {
43 int abi_version;
43 int abi_version;
44 Py_ssize_t (*index_length)(const indexObject *);
44 Py_ssize_t (*index_length)(const indexObject *);
45 const char *(*index_node)(indexObject *, Py_ssize_t);
45 const char *(*index_node)(indexObject *, Py_ssize_t);
46 int (*index_parents)(PyObject *, int, int *);
46 int (*index_parents)(PyObject *, int, int *);
47 } Revlog_CAPI;
47 } Revlog_CAPI;
48
48
49 /*
49 /*
50 * A base-16 trie for fast node->rev mapping.
50 * A base-16 trie for fast node->rev mapping.
51 *
51 *
52 * Positive value is index of the next node in the trie
52 * Positive value is index of the next node in the trie
53 * Negative value is a leaf: -(rev + 2)
53 * Negative value is a leaf: -(rev + 2)
54 * Zero is empty
54 * Zero is empty
55 */
55 */
56 typedef struct {
56 typedef struct {
57 indexObject *index;
57 indexObject *index;
58 nodetreenode *nodes;
58 nodetreenode *nodes;
59 Py_ssize_t nodelen;
59 Py_ssize_t nodelen;
60 size_t length; /* # nodes in use */
60 size_t length; /* # nodes in use */
61 size_t capacity; /* # nodes allocated */
61 size_t capacity; /* # nodes allocated */
62 int depth; /* maximum depth of tree */
62 int depth; /* maximum depth of tree */
63 int splits; /* # splits performed */
63 int splits; /* # splits performed */
64 } nodetree;
64 } nodetree;
65
65
66 typedef struct {
66 typedef struct {
67 PyObject_HEAD /* ; */
67 PyObject_HEAD /* ; */
68 nodetree nt;
68 nodetree nt;
69 } nodetreeObject;
69 } nodetreeObject;
70
70
71 /*
71 /*
72 * This class has two behaviors.
72 * This class has two behaviors.
73 *
73 *
74 * When used in a list-like way (with integer keys), we decode an
74 * When used in a list-like way (with integer keys), we decode an
75 * entry in a RevlogNG index file on demand. We have limited support for
75 * entry in a RevlogNG index file on demand. We have limited support for
76 * integer-keyed insert and delete, only at elements right before the
76 * integer-keyed insert and delete, only at elements right before the
77 * end.
77 * end.
78 *
78 *
79 * With string keys, we lazily perform a reverse mapping from node to
79 * With string keys, we lazily perform a reverse mapping from node to
80 * rev, using a base-16 trie.
80 * rev, using a base-16 trie.
81 */
81 */
82 struct indexObjectStruct {
82 struct indexObjectStruct {
83 PyObject_HEAD
83 PyObject_HEAD
84 /* Type-specific fields go here. */
84 /* Type-specific fields go here. */
85 PyObject *data; /* raw bytes of index */
85 PyObject *data; /* raw bytes of index */
86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
87 PyObject *nullentry; /* fast path for references to null */
87 PyObject *nullentry; /* fast path for references to null */
88 Py_buffer buf; /* buffer of data */
88 Py_buffer buf; /* buffer of data */
89 const char **offsets; /* populated on demand */
89 const char **offsets; /* populated on demand */
90 Py_ssize_t length; /* current on-disk number of elements */
90 Py_ssize_t length; /* current on-disk number of elements */
91 unsigned new_length; /* number of added elements */
91 unsigned new_length; /* number of added elements */
92 unsigned added_length; /* space reserved for added elements */
92 unsigned added_length; /* space reserved for added elements */
93 char *added; /* populated on demand */
93 char *added; /* populated on demand */
94 PyObject *headrevs; /* cache, invalidated on changes */
94 PyObject *headrevs; /* cache, invalidated on changes */
95 PyObject *filteredrevs; /* filtered revs set */
95 PyObject *filteredrevs; /* filtered revs set */
96 nodetree nt; /* base-16 trie */
96 nodetree nt; /* base-16 trie */
97 int ntinitialized; /* 0 or 1 */
97 int ntinitialized; /* 0 or 1 */
98 int ntrev; /* last rev scanned */
98 int ntrev; /* last rev scanned */
99 int ntlookups; /* # lookups */
99 int ntlookups; /* # lookups */
100 int ntmisses; /* # lookups that miss the cache */
100 int ntmisses; /* # lookups that miss the cache */
101 int inlined;
101 int inlined;
102 long hdrsize; /* size of index headers. Differs in v1 v.s. v2 format */
102 long hdrsize; /* size of index headers. Differs in v1 v.s. v2 format */
103 };
103 };
104
104
105 static Py_ssize_t index_length(const indexObject *self)
105 static Py_ssize_t index_length(const indexObject *self)
106 {
106 {
107 return self->length + self->new_length;
107 return self->length + self->new_length;
108 }
108 }
109
109
110 static const char nullid[32] = {0};
110 static const char nullid[32] = {0};
111 static const Py_ssize_t nullrev = -1;
111 static const Py_ssize_t nullrev = -1;
112
112
113 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
113 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
114
114
115 static int index_find_node(indexObject *self, const char *node);
115 static int index_find_node(indexObject *self, const char *node);
116
116
117 #if LONG_MAX == 0x7fffffffL
117 #if LONG_MAX == 0x7fffffffL
118 static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
118 static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
119 static const char *const v2_tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki");
119 static const char *const v2_tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki");
120 #else
120 #else
121 static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
121 static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
122 static const char *const v2_tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki");
122 static const char *const v2_tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki");
123 #endif
123 #endif
124
124
125 /* A RevlogNG v1 index entry is 64 bytes long. */
125 /* A RevlogNG v1 index entry is 64 bytes long. */
126 static const long v1_hdrsize = 64;
126 static const long v1_hdrsize = 64;
127
127
128 /* A Revlogv2 index entry is 96 bytes long. */
128 /* A Revlogv2 index entry is 96 bytes long. */
129 static const long v2_hdrsize = 96;
129 static const long v2_hdrsize = 96;
130
130
131 static void raise_revlog_error(void)
131 static void raise_revlog_error(void)
132 {
132 {
133 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
133 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
134
134
135 mod = PyImport_ImportModule("mercurial.error");
135 mod = PyImport_ImportModule("mercurial.error");
136 if (mod == NULL) {
136 if (mod == NULL) {
137 goto cleanup;
137 goto cleanup;
138 }
138 }
139
139
140 dict = PyModule_GetDict(mod);
140 dict = PyModule_GetDict(mod);
141 if (dict == NULL) {
141 if (dict == NULL) {
142 goto cleanup;
142 goto cleanup;
143 }
143 }
144 Py_INCREF(dict);
144 Py_INCREF(dict);
145
145
146 errclass = PyDict_GetItemString(dict, "RevlogError");
146 errclass = PyDict_GetItemString(dict, "RevlogError");
147 if (errclass == NULL) {
147 if (errclass == NULL) {
148 PyErr_SetString(PyExc_SystemError,
148 PyErr_SetString(PyExc_SystemError,
149 "could not find RevlogError");
149 "could not find RevlogError");
150 goto cleanup;
150 goto cleanup;
151 }
151 }
152
152
153 /* value of exception is ignored by callers */
153 /* value of exception is ignored by callers */
154 PyErr_SetString(errclass, "RevlogError");
154 PyErr_SetString(errclass, "RevlogError");
155
155
156 cleanup:
156 cleanup:
157 Py_XDECREF(dict);
157 Py_XDECREF(dict);
158 Py_XDECREF(mod);
158 Py_XDECREF(mod);
159 }
159 }
160
160
161 /*
161 /*
162 * Return a pointer to the beginning of a RevlogNG record.
162 * Return a pointer to the beginning of a RevlogNG record.
163 */
163 */
164 static const char *index_deref(indexObject *self, Py_ssize_t pos)
164 static const char *index_deref(indexObject *self, Py_ssize_t pos)
165 {
165 {
166 if (pos >= self->length)
166 if (pos >= self->length)
167 return self->added + (pos - self->length) * self->hdrsize;
167 return self->added + (pos - self->length) * self->hdrsize;
168
168
169 if (self->inlined && pos > 0) {
169 if (self->inlined && pos > 0) {
170 if (self->offsets == NULL) {
170 if (self->offsets == NULL) {
171 Py_ssize_t ret;
171 Py_ssize_t ret;
172 self->offsets =
172 self->offsets =
173 PyMem_Malloc(self->length * sizeof(*self->offsets));
173 PyMem_Malloc(self->length * sizeof(*self->offsets));
174 if (self->offsets == NULL)
174 if (self->offsets == NULL)
175 return (const char *)PyErr_NoMemory();
175 return (const char *)PyErr_NoMemory();
176 ret = inline_scan(self, self->offsets);
176 ret = inline_scan(self, self->offsets);
177 if (ret == -1) {
177 if (ret == -1) {
178 return NULL;
178 return NULL;
179 };
179 };
180 }
180 }
181 return self->offsets[pos];
181 return self->offsets[pos];
182 }
182 }
183
183
184 return (const char *)(self->buf.buf) + pos * self->hdrsize;
184 return (const char *)(self->buf.buf) + pos * self->hdrsize;
185 }
185 }
186
186
187 /*
187 /*
188 * Get parents of the given rev.
188 * Get parents of the given rev.
189 *
189 *
190 * The specified rev must be valid and must not be nullrev. A returned
190 * The specified rev must be valid and must not be nullrev. A returned
191 * parent revision may be nullrev, but is guaranteed to be in valid range.
191 * parent revision may be nullrev, but is guaranteed to be in valid range.
192 */
192 */
193 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
193 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
194 int maxrev)
194 int maxrev)
195 {
195 {
196 const char *data = index_deref(self, rev);
196 const char *data = index_deref(self, rev);
197
197
198 ps[0] = getbe32(data + 24);
198 ps[0] = getbe32(data + 24);
199 ps[1] = getbe32(data + 28);
199 ps[1] = getbe32(data + 28);
200
200
201 /* If index file is corrupted, ps[] may point to invalid revisions. So
201 /* If index file is corrupted, ps[] may point to invalid revisions. So
202 * there is a risk of buffer overflow to trust them unconditionally. */
202 * there is a risk of buffer overflow to trust them unconditionally. */
203 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
203 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
204 PyErr_SetString(PyExc_ValueError, "parent out of range");
204 PyErr_SetString(PyExc_ValueError, "parent out of range");
205 return -1;
205 return -1;
206 }
206 }
207 return 0;
207 return 0;
208 }
208 }
209
209
210 /*
210 /*
211 * Get parents of the given rev.
211 * Get parents of the given rev.
212 *
212 *
213 * If the specified rev is out of range, IndexError will be raised. If the
213 * If the specified rev is out of range, IndexError will be raised. If the
214 * revlog entry is corrupted, ValueError may be raised.
214 * revlog entry is corrupted, ValueError may be raised.
215 *
215 *
216 * Returns 0 on success or -1 on failure.
216 * Returns 0 on success or -1 on failure.
217 */
217 */
218 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
218 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
219 {
219 {
220 int tiprev;
220 int tiprev;
221 if (!op || !HgRevlogIndex_Check(op) || !ps) {
221 if (!op || !HgRevlogIndex_Check(op) || !ps) {
222 PyErr_BadInternalCall();
222 PyErr_BadInternalCall();
223 return -1;
223 return -1;
224 }
224 }
225 tiprev = (int)index_length((indexObject *)op) - 1;
225 tiprev = (int)index_length((indexObject *)op) - 1;
226 if (rev < -1 || rev > tiprev) {
226 if (rev < -1 || rev > tiprev) {
227 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
227 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
228 return -1;
228 return -1;
229 } else if (rev == -1) {
229 } else if (rev == -1) {
230 ps[0] = ps[1] = -1;
230 ps[0] = ps[1] = -1;
231 return 0;
231 return 0;
232 } else {
232 } else {
233 return index_get_parents((indexObject *)op, rev, ps, tiprev);
233 return index_get_parents((indexObject *)op, rev, ps, tiprev);
234 }
234 }
235 }
235 }
236
236
237 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
237 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
238 {
238 {
239 const char *data;
239 const char *data;
240 uint64_t offset;
240 uint64_t offset;
241
241
242 if (rev == nullrev)
242 if (rev == nullrev)
243 return 0;
243 return 0;
244
244
245 data = index_deref(self, rev);
245 data = index_deref(self, rev);
246 offset = getbe32(data + 4);
246 offset = getbe32(data + 4);
247 if (rev == 0) {
247 if (rev == 0) {
248 /* mask out version number for the first entry */
248 /* mask out version number for the first entry */
249 offset &= 0xFFFF;
249 offset &= 0xFFFF;
250 } else {
250 } else {
251 uint32_t offset_high = getbe32(data);
251 uint32_t offset_high = getbe32(data);
252 offset |= ((uint64_t)offset_high) << 32;
252 offset |= ((uint64_t)offset_high) << 32;
253 }
253 }
254 return (int64_t)(offset >> 16);
254 return (int64_t)(offset >> 16);
255 }
255 }
256
256
257 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
257 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
258 {
258 {
259 const char *data;
259 const char *data;
260 int tmp;
260 int tmp;
261
261
262 if (rev == nullrev)
262 if (rev == nullrev)
263 return 0;
263 return 0;
264
264
265 data = index_deref(self, rev);
265 data = index_deref(self, rev);
266
266
267 tmp = (int)getbe32(data + 8);
267 tmp = (int)getbe32(data + 8);
268 if (tmp < 0) {
268 if (tmp < 0) {
269 PyErr_Format(PyExc_OverflowError,
269 PyErr_Format(PyExc_OverflowError,
270 "revlog entry size out of bound (%d)", tmp);
270 "revlog entry size out of bound (%d)", tmp);
271 return -1;
271 return -1;
272 }
272 }
273 return tmp;
273 return tmp;
274 }
274 }
275
275
276 /*
276 /*
277 * RevlogNG format (all in big endian, data may be inlined):
277 * RevlogNG format (all in big endian, data may be inlined):
278 * 6 bytes: offset
278 * 6 bytes: offset
279 * 2 bytes: flags
279 * 2 bytes: flags
280 * 4 bytes: compressed length
280 * 4 bytes: compressed length
281 * 4 bytes: uncompressed length
281 * 4 bytes: uncompressed length
282 * 4 bytes: base revision
282 * 4 bytes: base revision
283 * 4 bytes: link revision
283 * 4 bytes: link revision
284 * 4 bytes: parent 1 revision
284 * 4 bytes: parent 1 revision
285 * 4 bytes: parent 2 revision
285 * 4 bytes: parent 2 revision
286 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
286 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
287 */
287 */
288 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
288 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
289 {
289 {
290 uint64_t offset_flags, sidedata_offset;
290 uint64_t offset_flags, sidedata_offset;
291 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
291 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
292 sidedata_comp_len;
292 sidedata_comp_len;
293 const char *c_node_id;
293 const char *c_node_id;
294 const char *data;
294 const char *data;
295 Py_ssize_t length = index_length(self);
295 Py_ssize_t length = index_length(self);
296
296
297 if (pos == nullrev) {
297 if (pos == nullrev) {
298 Py_INCREF(self->nullentry);
298 Py_INCREF(self->nullentry);
299 return self->nullentry;
299 return self->nullentry;
300 }
300 }
301
301
302 if (pos < 0 || pos >= length) {
302 if (pos < 0 || pos >= length) {
303 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
303 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
304 return NULL;
304 return NULL;
305 }
305 }
306
306
307 data = index_deref(self, pos);
307 data = index_deref(self, pos);
308 if (data == NULL)
308 if (data == NULL)
309 return NULL;
309 return NULL;
310
310
311 offset_flags = getbe32(data + 4);
311 offset_flags = getbe32(data + 4);
312 /*
312 /*
313 * The first entry on-disk needs the version number masked out,
313 * The first entry on-disk needs the version number masked out,
314 * but this doesn't apply if entries are added to an empty index.
314 * but this doesn't apply if entries are added to an empty index.
315 */
315 */
316 if (self->length && pos == 0)
316 if (self->length && pos == 0)
317 offset_flags &= 0xFFFF;
317 offset_flags &= 0xFFFF;
318 else {
318 else {
319 uint32_t offset_high = getbe32(data);
319 uint32_t offset_high = getbe32(data);
320 offset_flags |= ((uint64_t)offset_high) << 32;
320 offset_flags |= ((uint64_t)offset_high) << 32;
321 }
321 }
322
322
323 comp_len = getbe32(data + 8);
323 comp_len = getbe32(data + 8);
324 uncomp_len = getbe32(data + 12);
324 uncomp_len = getbe32(data + 12);
325 base_rev = getbe32(data + 16);
325 base_rev = getbe32(data + 16);
326 link_rev = getbe32(data + 20);
326 link_rev = getbe32(data + 20);
327 parent_1 = getbe32(data + 24);
327 parent_1 = getbe32(data + 24);
328 parent_2 = getbe32(data + 28);
328 parent_2 = getbe32(data + 28);
329 c_node_id = data + 32;
329 c_node_id = data + 32;
330
330
331 if (self->hdrsize == v1_hdrsize) {
331 if (self->hdrsize == v1_hdrsize) {
332 return Py_BuildValue(v1_tuple_format, offset_flags, comp_len,
332 return Py_BuildValue(v1_tuple_format, offset_flags, comp_len,
333 uncomp_len, base_rev, link_rev, parent_1,
333 uncomp_len, base_rev, link_rev, parent_1,
334 parent_2, c_node_id, self->nodelen);
334 parent_2, c_node_id, self->nodelen);
335 } else {
335 } else {
336 sidedata_offset = getbe64(data + 64);
336 sidedata_offset = getbe64(data + 64);
337 sidedata_comp_len = getbe32(data + 72);
337 sidedata_comp_len = getbe32(data + 72);
338
338
339 return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
339 return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
340 uncomp_len, base_rev, link_rev, parent_1,
340 uncomp_len, base_rev, link_rev, parent_1,
341 parent_2, c_node_id, self->nodelen,
341 parent_2, c_node_id, self->nodelen,
342 sidedata_offset, sidedata_comp_len);
342 sidedata_offset, sidedata_comp_len);
343 }
343 }
344 }
344 }
345 /*
346 * Return the raw binary string representing a revision
347 */
348 static PyObject *index_entry_binary(indexObject *self, PyObject *args)
349 {
350 long rev;
351 int header;
352 const char *data;
353 char entry[v2_hdrsize];
354
355 Py_ssize_t length = index_length(self);
356
357 if (!PyArg_ParseTuple(args, "lI", &rev, &header)) {
358 return NULL;
359 }
360 if (rev < 0 || rev >= length) {
361 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
362 rev);
363 return NULL;
364 };
365
366 data = index_deref(self, rev);
367 if (data == NULL)
368 return NULL;
369 if (rev == 0) {
370 // put the header at the start of the first entry
371 memcpy(entry, data, self->hdrsize);
372 putbe32(header, entry);
373 return PyBytes_FromStringAndSize(entry, self->hdrsize);
374 }
375 return PyBytes_FromStringAndSize(data, self->hdrsize);
376 }
345
377
346 /*
378 /*
347 * Return the hash of node corresponding to the given rev.
379 * Return the hash of node corresponding to the given rev.
348 */
380 */
349 static const char *index_node(indexObject *self, Py_ssize_t pos)
381 static const char *index_node(indexObject *self, Py_ssize_t pos)
350 {
382 {
351 Py_ssize_t length = index_length(self);
383 Py_ssize_t length = index_length(self);
352 const char *data;
384 const char *data;
353
385
354 if (pos == nullrev)
386 if (pos == nullrev)
355 return nullid;
387 return nullid;
356
388
357 if (pos >= length)
389 if (pos >= length)
358 return NULL;
390 return NULL;
359
391
360 data = index_deref(self, pos);
392 data = index_deref(self, pos);
361 return data ? data + 32 : NULL;
393 return data ? data + 32 : NULL;
362 }
394 }
363
395
364 /*
396 /*
365 * Return the hash of the node corresponding to the given rev. The
397 * Return the hash of the node corresponding to the given rev. The
366 * rev is assumed to be existing. If not, an exception is set.
398 * rev is assumed to be existing. If not, an exception is set.
367 */
399 */
368 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
400 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
369 {
401 {
370 const char *node = index_node(self, pos);
402 const char *node = index_node(self, pos);
371 if (node == NULL) {
403 if (node == NULL) {
372 PyErr_Format(PyExc_IndexError, "could not access rev %d",
404 PyErr_Format(PyExc_IndexError, "could not access rev %d",
373 (int)pos);
405 (int)pos);
374 }
406 }
375 return node;
407 return node;
376 }
408 }
377
409
378 static int nt_insert(nodetree *self, const char *node, int rev);
410 static int nt_insert(nodetree *self, const char *node, int rev);
379
411
380 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
412 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
381 {
413 {
382 Py_ssize_t thisnodelen;
414 Py_ssize_t thisnodelen;
383 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
415 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
384 return -1;
416 return -1;
385 if (nodelen == thisnodelen)
417 if (nodelen == thisnodelen)
386 return 0;
418 return 0;
387 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
419 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
388 thisnodelen, nodelen);
420 thisnodelen, nodelen);
389 return -1;
421 return -1;
390 }
422 }
391
423
392 static PyObject *index_append(indexObject *self, PyObject *obj)
424 static PyObject *index_append(indexObject *self, PyObject *obj)
393 {
425 {
394 uint64_t offset_flags, sidedata_offset;
426 uint64_t offset_flags, sidedata_offset;
395 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
427 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
396 Py_ssize_t c_node_id_len, sidedata_comp_len;
428 Py_ssize_t c_node_id_len, sidedata_comp_len;
397 const char *c_node_id;
429 const char *c_node_id;
398 char *data;
430 char *data;
399
431
400 if (self->hdrsize == v1_hdrsize) {
432 if (self->hdrsize == v1_hdrsize) {
401 if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags,
433 if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags,
402 &comp_len, &uncomp_len, &base_rev,
434 &comp_len, &uncomp_len, &base_rev,
403 &link_rev, &parent_1, &parent_2,
435 &link_rev, &parent_1, &parent_2,
404 &c_node_id, &c_node_id_len)) {
436 &c_node_id, &c_node_id_len)) {
405 PyErr_SetString(PyExc_TypeError, "8-tuple required");
437 PyErr_SetString(PyExc_TypeError, "8-tuple required");
406 return NULL;
438 return NULL;
407 }
439 }
408 } else {
440 } else {
409 if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags,
441 if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags,
410 &comp_len, &uncomp_len, &base_rev,
442 &comp_len, &uncomp_len, &base_rev,
411 &link_rev, &parent_1, &parent_2,
443 &link_rev, &parent_1, &parent_2,
412 &c_node_id, &c_node_id_len,
444 &c_node_id, &c_node_id_len,
413 &sidedata_offset, &sidedata_comp_len)) {
445 &sidedata_offset, &sidedata_comp_len)) {
414 PyErr_SetString(PyExc_TypeError, "10-tuple required");
446 PyErr_SetString(PyExc_TypeError, "10-tuple required");
415 return NULL;
447 return NULL;
416 }
448 }
417 }
449 }
418
450
419 if (c_node_id_len != self->nodelen) {
451 if (c_node_id_len != self->nodelen) {
420 PyErr_SetString(PyExc_TypeError, "invalid node");
452 PyErr_SetString(PyExc_TypeError, "invalid node");
421 return NULL;
453 return NULL;
422 }
454 }
423
455
424 if (self->new_length == self->added_length) {
456 if (self->new_length == self->added_length) {
425 size_t new_added_length =
457 size_t new_added_length =
426 self->added_length ? self->added_length * 2 : 4096;
458 self->added_length ? self->added_length * 2 : 4096;
427 void *new_added = PyMem_Realloc(self->added, new_added_length *
459 void *new_added = PyMem_Realloc(self->added, new_added_length *
428 self->hdrsize);
460 self->hdrsize);
429 if (!new_added)
461 if (!new_added)
430 return PyErr_NoMemory();
462 return PyErr_NoMemory();
431 self->added = new_added;
463 self->added = new_added;
432 self->added_length = new_added_length;
464 self->added_length = new_added_length;
433 }
465 }
434 rev = self->length + self->new_length;
466 rev = self->length + self->new_length;
435 data = self->added + self->hdrsize * self->new_length++;
467 data = self->added + self->hdrsize * self->new_length++;
436 putbe32(offset_flags >> 32, data);
468 putbe32(offset_flags >> 32, data);
437 putbe32(offset_flags & 0xffffffffU, data + 4);
469 putbe32(offset_flags & 0xffffffffU, data + 4);
438 putbe32(comp_len, data + 8);
470 putbe32(comp_len, data + 8);
439 putbe32(uncomp_len, data + 12);
471 putbe32(uncomp_len, data + 12);
440 putbe32(base_rev, data + 16);
472 putbe32(base_rev, data + 16);
441 putbe32(link_rev, data + 20);
473 putbe32(link_rev, data + 20);
442 putbe32(parent_1, data + 24);
474 putbe32(parent_1, data + 24);
443 putbe32(parent_2, data + 28);
475 putbe32(parent_2, data + 28);
444 memcpy(data + 32, c_node_id, c_node_id_len);
476 memcpy(data + 32, c_node_id, c_node_id_len);
445 /* Padding since SHA-1 is only 20 bytes for now */
477 /* Padding since SHA-1 is only 20 bytes for now */
446 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
478 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
447 if (self->hdrsize != v1_hdrsize) {
479 if (self->hdrsize != v1_hdrsize) {
448 putbe64(sidedata_offset, data + 64);
480 putbe64(sidedata_offset, data + 64);
449 putbe32(sidedata_comp_len, data + 72);
481 putbe32(sidedata_comp_len, data + 72);
450 /* Padding for 96 bytes alignment */
482 /* Padding for 96 bytes alignment */
451 memset(data + 76, 0, self->hdrsize - 76);
483 memset(data + 76, 0, self->hdrsize - 76);
452 }
484 }
453
485
454 if (self->ntinitialized)
486 if (self->ntinitialized)
455 nt_insert(&self->nt, c_node_id, rev);
487 nt_insert(&self->nt, c_node_id, rev);
456
488
457 Py_CLEAR(self->headrevs);
489 Py_CLEAR(self->headrevs);
458 Py_RETURN_NONE;
490 Py_RETURN_NONE;
459 }
491 }
460
492
461 /* Replace an existing index entry's sidedata offset and length with new ones.
493 /* Replace an existing index entry's sidedata offset and length with new ones.
462 This cannot be used outside of the context of sidedata rewriting,
494 This cannot be used outside of the context of sidedata rewriting,
463 inside the transaction that creates the given revision. */
495 inside the transaction that creates the given revision. */
464 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
496 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
465 {
497 {
466 uint64_t sidedata_offset;
498 uint64_t sidedata_offset;
467 int rev;
499 int rev;
468 Py_ssize_t sidedata_comp_len;
500 Py_ssize_t sidedata_comp_len;
469 char *data;
501 char *data;
470 #if LONG_MAX == 0x7fffffffL
502 #if LONG_MAX == 0x7fffffffL
471 const char *const sidedata_format = PY23("nKi", "nKi");
503 const char *const sidedata_format = PY23("nKi", "nKi");
472 #else
504 #else
473 const char *const sidedata_format = PY23("nki", "nki");
505 const char *const sidedata_format = PY23("nki", "nki");
474 #endif
506 #endif
475
507
476 if (self->hdrsize == v1_hdrsize || self->inlined) {
508 if (self->hdrsize == v1_hdrsize || self->inlined) {
477 /*
509 /*
478 There is a bug in the transaction handling when going from an
510 There is a bug in the transaction handling when going from an
479 inline revlog to a separate index and data file. Turn it off until
511 inline revlog to a separate index and data file. Turn it off until
480 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
512 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
481 See issue6485.
513 See issue6485.
482 */
514 */
483 raise_revlog_error();
515 raise_revlog_error();
484 return NULL;
516 return NULL;
485 }
517 }
486
518
487 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
519 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
488 &sidedata_comp_len))
520 &sidedata_comp_len))
489 return NULL;
521 return NULL;
490
522
491 if (rev < 0 || rev >= index_length(self)) {
523 if (rev < 0 || rev >= index_length(self)) {
492 PyErr_SetString(PyExc_IndexError, "revision outside index");
524 PyErr_SetString(PyExc_IndexError, "revision outside index");
493 return NULL;
525 return NULL;
494 }
526 }
495 if (rev < self->length) {
527 if (rev < self->length) {
496 PyErr_SetString(
528 PyErr_SetString(
497 PyExc_IndexError,
529 PyExc_IndexError,
498 "cannot rewrite entries outside of this transaction");
530 "cannot rewrite entries outside of this transaction");
499 return NULL;
531 return NULL;
500 }
532 }
501
533
502 /* Find the newly added node, offset from the "already on-disk" length
534 /* Find the newly added node, offset from the "already on-disk" length
503 */
535 */
504 data = self->added + self->hdrsize * (rev - self->length);
536 data = self->added + self->hdrsize * (rev - self->length);
505 putbe64(sidedata_offset, data + 64);
537 putbe64(sidedata_offset, data + 64);
506 putbe32(sidedata_comp_len, data + 72);
538 putbe32(sidedata_comp_len, data + 72);
507
539
508 Py_RETURN_NONE;
540 Py_RETURN_NONE;
509 }
541 }
510
542
511 static PyObject *index_stats(indexObject *self)
543 static PyObject *index_stats(indexObject *self)
512 {
544 {
513 PyObject *obj = PyDict_New();
545 PyObject *obj = PyDict_New();
514 PyObject *s = NULL;
546 PyObject *s = NULL;
515 PyObject *t = NULL;
547 PyObject *t = NULL;
516
548
517 if (obj == NULL)
549 if (obj == NULL)
518 return NULL;
550 return NULL;
519
551
520 #define istat(__n, __d) \
552 #define istat(__n, __d) \
521 do { \
553 do { \
522 s = PyBytes_FromString(__d); \
554 s = PyBytes_FromString(__d); \
523 t = PyInt_FromSsize_t(self->__n); \
555 t = PyInt_FromSsize_t(self->__n); \
524 if (!s || !t) \
556 if (!s || !t) \
525 goto bail; \
557 goto bail; \
526 if (PyDict_SetItem(obj, s, t) == -1) \
558 if (PyDict_SetItem(obj, s, t) == -1) \
527 goto bail; \
559 goto bail; \
528 Py_CLEAR(s); \
560 Py_CLEAR(s); \
529 Py_CLEAR(t); \
561 Py_CLEAR(t); \
530 } while (0)
562 } while (0)
531
563
532 if (self->added_length)
564 if (self->added_length)
533 istat(new_length, "index entries added");
565 istat(new_length, "index entries added");
534 istat(length, "revs in memory");
566 istat(length, "revs in memory");
535 istat(ntlookups, "node trie lookups");
567 istat(ntlookups, "node trie lookups");
536 istat(ntmisses, "node trie misses");
568 istat(ntmisses, "node trie misses");
537 istat(ntrev, "node trie last rev scanned");
569 istat(ntrev, "node trie last rev scanned");
538 if (self->ntinitialized) {
570 if (self->ntinitialized) {
539 istat(nt.capacity, "node trie capacity");
571 istat(nt.capacity, "node trie capacity");
540 istat(nt.depth, "node trie depth");
572 istat(nt.depth, "node trie depth");
541 istat(nt.length, "node trie count");
573 istat(nt.length, "node trie count");
542 istat(nt.splits, "node trie splits");
574 istat(nt.splits, "node trie splits");
543 }
575 }
544
576
545 #undef istat
577 #undef istat
546
578
547 return obj;
579 return obj;
548
580
549 bail:
581 bail:
550 Py_XDECREF(obj);
582 Py_XDECREF(obj);
551 Py_XDECREF(s);
583 Py_XDECREF(s);
552 Py_XDECREF(t);
584 Py_XDECREF(t);
553 return NULL;
585 return NULL;
554 }
586 }
555
587
556 /*
588 /*
557 * When we cache a list, we want to be sure the caller can't mutate
589 * When we cache a list, we want to be sure the caller can't mutate
558 * the cached copy.
590 * the cached copy.
559 */
591 */
560 static PyObject *list_copy(PyObject *list)
592 static PyObject *list_copy(PyObject *list)
561 {
593 {
562 Py_ssize_t len = PyList_GET_SIZE(list);
594 Py_ssize_t len = PyList_GET_SIZE(list);
563 PyObject *newlist = PyList_New(len);
595 PyObject *newlist = PyList_New(len);
564 Py_ssize_t i;
596 Py_ssize_t i;
565
597
566 if (newlist == NULL)
598 if (newlist == NULL)
567 return NULL;
599 return NULL;
568
600
569 for (i = 0; i < len; i++) {
601 for (i = 0; i < len; i++) {
570 PyObject *obj = PyList_GET_ITEM(list, i);
602 PyObject *obj = PyList_GET_ITEM(list, i);
571 Py_INCREF(obj);
603 Py_INCREF(obj);
572 PyList_SET_ITEM(newlist, i, obj);
604 PyList_SET_ITEM(newlist, i, obj);
573 }
605 }
574
606
575 return newlist;
607 return newlist;
576 }
608 }
577
609
578 static int check_filter(PyObject *filter, Py_ssize_t arg)
610 static int check_filter(PyObject *filter, Py_ssize_t arg)
579 {
611 {
580 if (filter) {
612 if (filter) {
581 PyObject *arglist, *result;
613 PyObject *arglist, *result;
582 int isfiltered;
614 int isfiltered;
583
615
584 arglist = Py_BuildValue("(n)", arg);
616 arglist = Py_BuildValue("(n)", arg);
585 if (!arglist) {
617 if (!arglist) {
586 return -1;
618 return -1;
587 }
619 }
588
620
589 result = PyObject_Call(filter, arglist, NULL);
621 result = PyObject_Call(filter, arglist, NULL);
590 Py_DECREF(arglist);
622 Py_DECREF(arglist);
591 if (!result) {
623 if (!result) {
592 return -1;
624 return -1;
593 }
625 }
594
626
595 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
627 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
596 * same as this function, so we can just return it directly.*/
628 * same as this function, so we can just return it directly.*/
597 isfiltered = PyObject_IsTrue(result);
629 isfiltered = PyObject_IsTrue(result);
598 Py_DECREF(result);
630 Py_DECREF(result);
599 return isfiltered;
631 return isfiltered;
600 } else {
632 } else {
601 return 0;
633 return 0;
602 }
634 }
603 }
635 }
604
636
605 static inline void set_phase_from_parents(char *phases, int parent_1,
637 static inline void set_phase_from_parents(char *phases, int parent_1,
606 int parent_2, Py_ssize_t i)
638 int parent_2, Py_ssize_t i)
607 {
639 {
608 if (parent_1 >= 0 && phases[parent_1] > phases[i])
640 if (parent_1 >= 0 && phases[parent_1] > phases[i])
609 phases[i] = phases[parent_1];
641 phases[i] = phases[parent_1];
610 if (parent_2 >= 0 && phases[parent_2] > phases[i])
642 if (parent_2 >= 0 && phases[parent_2] > phases[i])
611 phases[i] = phases[parent_2];
643 phases[i] = phases[parent_2];
612 }
644 }
613
645
614 static PyObject *reachableroots2(indexObject *self, PyObject *args)
646 static PyObject *reachableroots2(indexObject *self, PyObject *args)
615 {
647 {
616
648
617 /* Input */
649 /* Input */
618 long minroot;
650 long minroot;
619 PyObject *includepatharg = NULL;
651 PyObject *includepatharg = NULL;
620 int includepath = 0;
652 int includepath = 0;
621 /* heads and roots are lists */
653 /* heads and roots are lists */
622 PyObject *heads = NULL;
654 PyObject *heads = NULL;
623 PyObject *roots = NULL;
655 PyObject *roots = NULL;
624 PyObject *reachable = NULL;
656 PyObject *reachable = NULL;
625
657
626 PyObject *val;
658 PyObject *val;
627 Py_ssize_t len = index_length(self);
659 Py_ssize_t len = index_length(self);
628 long revnum;
660 long revnum;
629 Py_ssize_t k;
661 Py_ssize_t k;
630 Py_ssize_t i;
662 Py_ssize_t i;
631 Py_ssize_t l;
663 Py_ssize_t l;
632 int r;
664 int r;
633 int parents[2];
665 int parents[2];
634
666
635 /* Internal data structure:
667 /* Internal data structure:
636 * tovisit: array of length len+1 (all revs + nullrev), filled upto
668 * tovisit: array of length len+1 (all revs + nullrev), filled upto
637 * lentovisit
669 * lentovisit
638 *
670 *
639 * revstates: array of length len+1 (all revs + nullrev) */
671 * revstates: array of length len+1 (all revs + nullrev) */
640 int *tovisit = NULL;
672 int *tovisit = NULL;
641 long lentovisit = 0;
673 long lentovisit = 0;
642 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
674 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
643 char *revstates = NULL;
675 char *revstates = NULL;
644
676
645 /* Get arguments */
677 /* Get arguments */
646 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
678 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
647 &PyList_Type, &roots, &PyBool_Type,
679 &PyList_Type, &roots, &PyBool_Type,
648 &includepatharg))
680 &includepatharg))
649 goto bail;
681 goto bail;
650
682
651 if (includepatharg == Py_True)
683 if (includepatharg == Py_True)
652 includepath = 1;
684 includepath = 1;
653
685
654 /* Initialize return set */
686 /* Initialize return set */
655 reachable = PyList_New(0);
687 reachable = PyList_New(0);
656 if (reachable == NULL)
688 if (reachable == NULL)
657 goto bail;
689 goto bail;
658
690
659 /* Initialize internal datastructures */
691 /* Initialize internal datastructures */
660 tovisit = (int *)malloc((len + 1) * sizeof(int));
692 tovisit = (int *)malloc((len + 1) * sizeof(int));
661 if (tovisit == NULL) {
693 if (tovisit == NULL) {
662 PyErr_NoMemory();
694 PyErr_NoMemory();
663 goto bail;
695 goto bail;
664 }
696 }
665
697
666 revstates = (char *)calloc(len + 1, 1);
698 revstates = (char *)calloc(len + 1, 1);
667 if (revstates == NULL) {
699 if (revstates == NULL) {
668 PyErr_NoMemory();
700 PyErr_NoMemory();
669 goto bail;
701 goto bail;
670 }
702 }
671
703
672 l = PyList_GET_SIZE(roots);
704 l = PyList_GET_SIZE(roots);
673 for (i = 0; i < l; i++) {
705 for (i = 0; i < l; i++) {
674 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
706 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
675 if (revnum == -1 && PyErr_Occurred())
707 if (revnum == -1 && PyErr_Occurred())
676 goto bail;
708 goto bail;
677 /* If root is out of range, e.g. wdir(), it must be unreachable
709 /* If root is out of range, e.g. wdir(), it must be unreachable
678 * from heads. So we can just ignore it. */
710 * from heads. So we can just ignore it. */
679 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
711 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
680 continue;
712 continue;
681 revstates[revnum + 1] |= RS_ROOT;
713 revstates[revnum + 1] |= RS_ROOT;
682 }
714 }
683
715
684 /* Populate tovisit with all the heads */
716 /* Populate tovisit with all the heads */
685 l = PyList_GET_SIZE(heads);
717 l = PyList_GET_SIZE(heads);
686 for (i = 0; i < l; i++) {
718 for (i = 0; i < l; i++) {
687 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
719 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
688 if (revnum == -1 && PyErr_Occurred())
720 if (revnum == -1 && PyErr_Occurred())
689 goto bail;
721 goto bail;
690 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
722 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
691 PyErr_SetString(PyExc_IndexError, "head out of range");
723 PyErr_SetString(PyExc_IndexError, "head out of range");
692 goto bail;
724 goto bail;
693 }
725 }
694 if (!(revstates[revnum + 1] & RS_SEEN)) {
726 if (!(revstates[revnum + 1] & RS_SEEN)) {
695 tovisit[lentovisit++] = (int)revnum;
727 tovisit[lentovisit++] = (int)revnum;
696 revstates[revnum + 1] |= RS_SEEN;
728 revstates[revnum + 1] |= RS_SEEN;
697 }
729 }
698 }
730 }
699
731
700 /* Visit the tovisit list and find the reachable roots */
732 /* Visit the tovisit list and find the reachable roots */
701 k = 0;
733 k = 0;
702 while (k < lentovisit) {
734 while (k < lentovisit) {
703 /* Add the node to reachable if it is a root*/
735 /* Add the node to reachable if it is a root*/
704 revnum = tovisit[k++];
736 revnum = tovisit[k++];
705 if (revstates[revnum + 1] & RS_ROOT) {
737 if (revstates[revnum + 1] & RS_ROOT) {
706 revstates[revnum + 1] |= RS_REACHABLE;
738 revstates[revnum + 1] |= RS_REACHABLE;
707 val = PyInt_FromLong(revnum);
739 val = PyInt_FromLong(revnum);
708 if (val == NULL)
740 if (val == NULL)
709 goto bail;
741 goto bail;
710 r = PyList_Append(reachable, val);
742 r = PyList_Append(reachable, val);
711 Py_DECREF(val);
743 Py_DECREF(val);
712 if (r < 0)
744 if (r < 0)
713 goto bail;
745 goto bail;
714 if (includepath == 0)
746 if (includepath == 0)
715 continue;
747 continue;
716 }
748 }
717
749
718 /* Add its parents to the list of nodes to visit */
750 /* Add its parents to the list of nodes to visit */
719 if (revnum == nullrev)
751 if (revnum == nullrev)
720 continue;
752 continue;
721 r = index_get_parents(self, revnum, parents, (int)len - 1);
753 r = index_get_parents(self, revnum, parents, (int)len - 1);
722 if (r < 0)
754 if (r < 0)
723 goto bail;
755 goto bail;
724 for (i = 0; i < 2; i++) {
756 for (i = 0; i < 2; i++) {
725 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
757 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
726 parents[i] >= minroot) {
758 parents[i] >= minroot) {
727 tovisit[lentovisit++] = parents[i];
759 tovisit[lentovisit++] = parents[i];
728 revstates[parents[i] + 1] |= RS_SEEN;
760 revstates[parents[i] + 1] |= RS_SEEN;
729 }
761 }
730 }
762 }
731 }
763 }
732
764
733 /* Find all the nodes in between the roots we found and the heads
765 /* Find all the nodes in between the roots we found and the heads
734 * and add them to the reachable set */
766 * and add them to the reachable set */
735 if (includepath == 1) {
767 if (includepath == 1) {
736 long minidx = minroot;
768 long minidx = minroot;
737 if (minidx < 0)
769 if (minidx < 0)
738 minidx = 0;
770 minidx = 0;
739 for (i = minidx; i < len; i++) {
771 for (i = minidx; i < len; i++) {
740 if (!(revstates[i + 1] & RS_SEEN))
772 if (!(revstates[i + 1] & RS_SEEN))
741 continue;
773 continue;
742 r = index_get_parents(self, i, parents, (int)len - 1);
774 r = index_get_parents(self, i, parents, (int)len - 1);
743 /* Corrupted index file, error is set from
775 /* Corrupted index file, error is set from
744 * index_get_parents */
776 * index_get_parents */
745 if (r < 0)
777 if (r < 0)
746 goto bail;
778 goto bail;
747 if (((revstates[parents[0] + 1] |
779 if (((revstates[parents[0] + 1] |
748 revstates[parents[1] + 1]) &
780 revstates[parents[1] + 1]) &
749 RS_REACHABLE) &&
781 RS_REACHABLE) &&
750 !(revstates[i + 1] & RS_REACHABLE)) {
782 !(revstates[i + 1] & RS_REACHABLE)) {
751 revstates[i + 1] |= RS_REACHABLE;
783 revstates[i + 1] |= RS_REACHABLE;
752 val = PyInt_FromSsize_t(i);
784 val = PyInt_FromSsize_t(i);
753 if (val == NULL)
785 if (val == NULL)
754 goto bail;
786 goto bail;
755 r = PyList_Append(reachable, val);
787 r = PyList_Append(reachable, val);
756 Py_DECREF(val);
788 Py_DECREF(val);
757 if (r < 0)
789 if (r < 0)
758 goto bail;
790 goto bail;
759 }
791 }
760 }
792 }
761 }
793 }
762
794
763 free(revstates);
795 free(revstates);
764 free(tovisit);
796 free(tovisit);
765 return reachable;
797 return reachable;
766 bail:
798 bail:
767 Py_XDECREF(reachable);
799 Py_XDECREF(reachable);
768 free(revstates);
800 free(revstates);
769 free(tovisit);
801 free(tovisit);
770 return NULL;
802 return NULL;
771 }
803 }
772
804
773 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
805 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
774 char phase)
806 char phase)
775 {
807 {
776 Py_ssize_t len = index_length(self);
808 Py_ssize_t len = index_length(self);
777 PyObject *item;
809 PyObject *item;
778 PyObject *iterator;
810 PyObject *iterator;
779 int rev, minrev = -1;
811 int rev, minrev = -1;
780 char *node;
812 char *node;
781
813
782 if (!PySet_Check(roots)) {
814 if (!PySet_Check(roots)) {
783 PyErr_SetString(PyExc_TypeError,
815 PyErr_SetString(PyExc_TypeError,
784 "roots must be a set of nodes");
816 "roots must be a set of nodes");
785 return -2;
817 return -2;
786 }
818 }
787 iterator = PyObject_GetIter(roots);
819 iterator = PyObject_GetIter(roots);
788 if (iterator == NULL)
820 if (iterator == NULL)
789 return -2;
821 return -2;
790 while ((item = PyIter_Next(iterator))) {
822 while ((item = PyIter_Next(iterator))) {
791 if (node_check(self->nodelen, item, &node) == -1)
823 if (node_check(self->nodelen, item, &node) == -1)
792 goto failed;
824 goto failed;
793 rev = index_find_node(self, node);
825 rev = index_find_node(self, node);
794 /* null is implicitly public, so negative is invalid */
826 /* null is implicitly public, so negative is invalid */
795 if (rev < 0 || rev >= len)
827 if (rev < 0 || rev >= len)
796 goto failed;
828 goto failed;
797 phases[rev] = phase;
829 phases[rev] = phase;
798 if (minrev == -1 || minrev > rev)
830 if (minrev == -1 || minrev > rev)
799 minrev = rev;
831 minrev = rev;
800 Py_DECREF(item);
832 Py_DECREF(item);
801 }
833 }
802 Py_DECREF(iterator);
834 Py_DECREF(iterator);
803 return minrev;
835 return minrev;
804 failed:
836 failed:
805 Py_DECREF(iterator);
837 Py_DECREF(iterator);
806 Py_DECREF(item);
838 Py_DECREF(item);
807 return -2;
839 return -2;
808 }
840 }
809
841
810 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
842 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
811 {
843 {
812 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
844 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
813 96: internal */
845 96: internal */
814 static const char trackedphases[] = {1, 2, 32, 96};
846 static const char trackedphases[] = {1, 2, 32, 96};
815 PyObject *roots = Py_None;
847 PyObject *roots = Py_None;
816 PyObject *phasesetsdict = NULL;
848 PyObject *phasesetsdict = NULL;
817 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
849 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
818 Py_ssize_t len = index_length(self);
850 Py_ssize_t len = index_length(self);
819 char *phases = NULL;
851 char *phases = NULL;
820 int minphaserev = -1, rev, i;
852 int minphaserev = -1, rev, i;
821 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
853 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
822
854
823 if (!PyArg_ParseTuple(args, "O", &roots))
855 if (!PyArg_ParseTuple(args, "O", &roots))
824 return NULL;
856 return NULL;
825 if (roots == NULL || !PyDict_Check(roots)) {
857 if (roots == NULL || !PyDict_Check(roots)) {
826 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
858 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
827 return NULL;
859 return NULL;
828 }
860 }
829
861
830 phases = calloc(len, 1);
862 phases = calloc(len, 1);
831 if (phases == NULL) {
863 if (phases == NULL) {
832 PyErr_NoMemory();
864 PyErr_NoMemory();
833 return NULL;
865 return NULL;
834 }
866 }
835
867
836 for (i = 0; i < numphases; ++i) {
868 for (i = 0; i < numphases; ++i) {
837 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
869 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
838 PyObject *phaseroots = NULL;
870 PyObject *phaseroots = NULL;
839 if (pyphase == NULL)
871 if (pyphase == NULL)
840 goto release;
872 goto release;
841 phaseroots = PyDict_GetItem(roots, pyphase);
873 phaseroots = PyDict_GetItem(roots, pyphase);
842 Py_DECREF(pyphase);
874 Py_DECREF(pyphase);
843 if (phaseroots == NULL)
875 if (phaseroots == NULL)
844 continue;
876 continue;
845 rev = add_roots_get_min(self, phaseroots, phases,
877 rev = add_roots_get_min(self, phaseroots, phases,
846 trackedphases[i]);
878 trackedphases[i]);
847 if (rev == -2)
879 if (rev == -2)
848 goto release;
880 goto release;
849 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
881 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
850 minphaserev = rev;
882 minphaserev = rev;
851 }
883 }
852
884
853 for (i = 0; i < numphases; ++i) {
885 for (i = 0; i < numphases; ++i) {
854 phasesets[i] = PySet_New(NULL);
886 phasesets[i] = PySet_New(NULL);
855 if (phasesets[i] == NULL)
887 if (phasesets[i] == NULL)
856 goto release;
888 goto release;
857 }
889 }
858
890
859 if (minphaserev == -1)
891 if (minphaserev == -1)
860 minphaserev = len;
892 minphaserev = len;
861 for (rev = minphaserev; rev < len; ++rev) {
893 for (rev = minphaserev; rev < len; ++rev) {
862 PyObject *pyphase = NULL;
894 PyObject *pyphase = NULL;
863 PyObject *pyrev = NULL;
895 PyObject *pyrev = NULL;
864 int parents[2];
896 int parents[2];
865 /*
897 /*
866 * The parent lookup could be skipped for phaseroots, but
898 * The parent lookup could be skipped for phaseroots, but
867 * phase --force would historically not recompute them
899 * phase --force would historically not recompute them
868 * correctly, leaving descendents with a lower phase around.
900 * correctly, leaving descendents with a lower phase around.
869 * As such, unconditionally recompute the phase.
901 * As such, unconditionally recompute the phase.
870 */
902 */
871 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
903 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
872 goto release;
904 goto release;
873 set_phase_from_parents(phases, parents[0], parents[1], rev);
905 set_phase_from_parents(phases, parents[0], parents[1], rev);
874 switch (phases[rev]) {
906 switch (phases[rev]) {
875 case 0:
907 case 0:
876 continue;
908 continue;
877 case 1:
909 case 1:
878 pyphase = phasesets[0];
910 pyphase = phasesets[0];
879 break;
911 break;
880 case 2:
912 case 2:
881 pyphase = phasesets[1];
913 pyphase = phasesets[1];
882 break;
914 break;
883 case 32:
915 case 32:
884 pyphase = phasesets[2];
916 pyphase = phasesets[2];
885 break;
917 break;
886 case 96:
918 case 96:
887 pyphase = phasesets[3];
919 pyphase = phasesets[3];
888 break;
920 break;
889 default:
921 default:
890 /* this should never happen since the phase number is
922 /* this should never happen since the phase number is
891 * specified by this function. */
923 * specified by this function. */
892 PyErr_SetString(PyExc_SystemError,
924 PyErr_SetString(PyExc_SystemError,
893 "bad phase number in internal list");
925 "bad phase number in internal list");
894 goto release;
926 goto release;
895 }
927 }
896 pyrev = PyInt_FromLong(rev);
928 pyrev = PyInt_FromLong(rev);
897 if (pyrev == NULL)
929 if (pyrev == NULL)
898 goto release;
930 goto release;
899 if (PySet_Add(pyphase, pyrev) == -1) {
931 if (PySet_Add(pyphase, pyrev) == -1) {
900 Py_DECREF(pyrev);
932 Py_DECREF(pyrev);
901 goto release;
933 goto release;
902 }
934 }
903 Py_DECREF(pyrev);
935 Py_DECREF(pyrev);
904 }
936 }
905
937
906 phasesetsdict = _dict_new_presized(numphases);
938 phasesetsdict = _dict_new_presized(numphases);
907 if (phasesetsdict == NULL)
939 if (phasesetsdict == NULL)
908 goto release;
940 goto release;
909 for (i = 0; i < numphases; ++i) {
941 for (i = 0; i < numphases; ++i) {
910 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
942 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
911 if (pyphase == NULL)
943 if (pyphase == NULL)
912 goto release;
944 goto release;
913 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
945 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
914 -1) {
946 -1) {
915 Py_DECREF(pyphase);
947 Py_DECREF(pyphase);
916 goto release;
948 goto release;
917 }
949 }
918 Py_DECREF(phasesets[i]);
950 Py_DECREF(phasesets[i]);
919 phasesets[i] = NULL;
951 phasesets[i] = NULL;
920 }
952 }
921
953
922 return Py_BuildValue("nN", len, phasesetsdict);
954 return Py_BuildValue("nN", len, phasesetsdict);
923
955
924 release:
956 release:
925 for (i = 0; i < numphases; ++i)
957 for (i = 0; i < numphases; ++i)
926 Py_XDECREF(phasesets[i]);
958 Py_XDECREF(phasesets[i]);
927 Py_XDECREF(phasesetsdict);
959 Py_XDECREF(phasesetsdict);
928
960
929 free(phases);
961 free(phases);
930 return NULL;
962 return NULL;
931 }
963 }
932
964
933 static PyObject *index_headrevs(indexObject *self, PyObject *args)
965 static PyObject *index_headrevs(indexObject *self, PyObject *args)
934 {
966 {
935 Py_ssize_t i, j, len;
967 Py_ssize_t i, j, len;
936 char *nothead = NULL;
968 char *nothead = NULL;
937 PyObject *heads = NULL;
969 PyObject *heads = NULL;
938 PyObject *filter = NULL;
970 PyObject *filter = NULL;
939 PyObject *filteredrevs = Py_None;
971 PyObject *filteredrevs = Py_None;
940
972
941 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
973 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
942 return NULL;
974 return NULL;
943 }
975 }
944
976
945 if (self->headrevs && filteredrevs == self->filteredrevs)
977 if (self->headrevs && filteredrevs == self->filteredrevs)
946 return list_copy(self->headrevs);
978 return list_copy(self->headrevs);
947
979
948 Py_DECREF(self->filteredrevs);
980 Py_DECREF(self->filteredrevs);
949 self->filteredrevs = filteredrevs;
981 self->filteredrevs = filteredrevs;
950 Py_INCREF(filteredrevs);
982 Py_INCREF(filteredrevs);
951
983
952 if (filteredrevs != Py_None) {
984 if (filteredrevs != Py_None) {
953 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
985 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
954 if (!filter) {
986 if (!filter) {
955 PyErr_SetString(
987 PyErr_SetString(
956 PyExc_TypeError,
988 PyExc_TypeError,
957 "filteredrevs has no attribute __contains__");
989 "filteredrevs has no attribute __contains__");
958 goto bail;
990 goto bail;
959 }
991 }
960 }
992 }
961
993
962 len = index_length(self);
994 len = index_length(self);
963 heads = PyList_New(0);
995 heads = PyList_New(0);
964 if (heads == NULL)
996 if (heads == NULL)
965 goto bail;
997 goto bail;
966 if (len == 0) {
998 if (len == 0) {
967 PyObject *nullid = PyInt_FromLong(-1);
999 PyObject *nullid = PyInt_FromLong(-1);
968 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1000 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
969 Py_XDECREF(nullid);
1001 Py_XDECREF(nullid);
970 goto bail;
1002 goto bail;
971 }
1003 }
972 goto done;
1004 goto done;
973 }
1005 }
974
1006
975 nothead = calloc(len, 1);
1007 nothead = calloc(len, 1);
976 if (nothead == NULL) {
1008 if (nothead == NULL) {
977 PyErr_NoMemory();
1009 PyErr_NoMemory();
978 goto bail;
1010 goto bail;
979 }
1011 }
980
1012
981 for (i = len - 1; i >= 0; i--) {
1013 for (i = len - 1; i >= 0; i--) {
982 int isfiltered;
1014 int isfiltered;
983 int parents[2];
1015 int parents[2];
984
1016
985 /* If nothead[i] == 1, it means we've seen an unfiltered child
1017 /* If nothead[i] == 1, it means we've seen an unfiltered child
986 * of this node already, and therefore this node is not
1018 * of this node already, and therefore this node is not
987 * filtered. So we can skip the expensive check_filter step.
1019 * filtered. So we can skip the expensive check_filter step.
988 */
1020 */
989 if (nothead[i] != 1) {
1021 if (nothead[i] != 1) {
990 isfiltered = check_filter(filter, i);
1022 isfiltered = check_filter(filter, i);
991 if (isfiltered == -1) {
1023 if (isfiltered == -1) {
992 PyErr_SetString(PyExc_TypeError,
1024 PyErr_SetString(PyExc_TypeError,
993 "unable to check filter");
1025 "unable to check filter");
994 goto bail;
1026 goto bail;
995 }
1027 }
996
1028
997 if (isfiltered) {
1029 if (isfiltered) {
998 nothead[i] = 1;
1030 nothead[i] = 1;
999 continue;
1031 continue;
1000 }
1032 }
1001 }
1033 }
1002
1034
1003 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1035 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1004 goto bail;
1036 goto bail;
1005 for (j = 0; j < 2; j++) {
1037 for (j = 0; j < 2; j++) {
1006 if (parents[j] >= 0)
1038 if (parents[j] >= 0)
1007 nothead[parents[j]] = 1;
1039 nothead[parents[j]] = 1;
1008 }
1040 }
1009 }
1041 }
1010
1042
1011 for (i = 0; i < len; i++) {
1043 for (i = 0; i < len; i++) {
1012 PyObject *head;
1044 PyObject *head;
1013
1045
1014 if (nothead[i])
1046 if (nothead[i])
1015 continue;
1047 continue;
1016 head = PyInt_FromSsize_t(i);
1048 head = PyInt_FromSsize_t(i);
1017 if (head == NULL || PyList_Append(heads, head) == -1) {
1049 if (head == NULL || PyList_Append(heads, head) == -1) {
1018 Py_XDECREF(head);
1050 Py_XDECREF(head);
1019 goto bail;
1051 goto bail;
1020 }
1052 }
1021 }
1053 }
1022
1054
1023 done:
1055 done:
1024 self->headrevs = heads;
1056 self->headrevs = heads;
1025 Py_XDECREF(filter);
1057 Py_XDECREF(filter);
1026 free(nothead);
1058 free(nothead);
1027 return list_copy(self->headrevs);
1059 return list_copy(self->headrevs);
1028 bail:
1060 bail:
1029 Py_XDECREF(filter);
1061 Py_XDECREF(filter);
1030 Py_XDECREF(heads);
1062 Py_XDECREF(heads);
1031 free(nothead);
1063 free(nothead);
1032 return NULL;
1064 return NULL;
1033 }
1065 }
1034
1066
1035 /**
1067 /**
1036 * Obtain the base revision index entry.
1068 * Obtain the base revision index entry.
1037 *
1069 *
1038 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1070 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1039 */
1071 */
1040 static inline int index_baserev(indexObject *self, int rev)
1072 static inline int index_baserev(indexObject *self, int rev)
1041 {
1073 {
1042 const char *data;
1074 const char *data;
1043 int result;
1075 int result;
1044
1076
1045 data = index_deref(self, rev);
1077 data = index_deref(self, rev);
1046 if (data == NULL)
1078 if (data == NULL)
1047 return -2;
1079 return -2;
1048 result = getbe32(data + 16);
1080 result = getbe32(data + 16);
1049
1081
1050 if (result > rev) {
1082 if (result > rev) {
1051 PyErr_Format(
1083 PyErr_Format(
1052 PyExc_ValueError,
1084 PyExc_ValueError,
1053 "corrupted revlog, revision base above revision: %d, %d",
1085 "corrupted revlog, revision base above revision: %d, %d",
1054 rev, result);
1086 rev, result);
1055 return -2;
1087 return -2;
1056 }
1088 }
1057 if (result < -1) {
1089 if (result < -1) {
1058 PyErr_Format(
1090 PyErr_Format(
1059 PyExc_ValueError,
1091 PyExc_ValueError,
1060 "corrupted revlog, revision base out of range: %d, %d", rev,
1092 "corrupted revlog, revision base out of range: %d, %d", rev,
1061 result);
1093 result);
1062 return -2;
1094 return -2;
1063 }
1095 }
1064 return result;
1096 return result;
1065 }
1097 }
1066
1098
1067 /**
1099 /**
1068 * Find if a revision is a snapshot or not
1100 * Find if a revision is a snapshot or not
1069 *
1101 *
1070 * Only relevant for sparse-revlog case.
1102 * Only relevant for sparse-revlog case.
1071 * Callers must ensure that rev is in a valid range.
1103 * Callers must ensure that rev is in a valid range.
1072 */
1104 */
1073 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1105 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1074 {
1106 {
1075 int ps[2];
1107 int ps[2];
1076 Py_ssize_t base;
1108 Py_ssize_t base;
1077 while (rev >= 0) {
1109 while (rev >= 0) {
1078 base = (Py_ssize_t)index_baserev(self, rev);
1110 base = (Py_ssize_t)index_baserev(self, rev);
1079 if (base == rev) {
1111 if (base == rev) {
1080 base = -1;
1112 base = -1;
1081 }
1113 }
1082 if (base == -2) {
1114 if (base == -2) {
1083 assert(PyErr_Occurred());
1115 assert(PyErr_Occurred());
1084 return -1;
1116 return -1;
1085 }
1117 }
1086 if (base == -1) {
1118 if (base == -1) {
1087 return 1;
1119 return 1;
1088 }
1120 }
1089 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1121 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1090 assert(PyErr_Occurred());
1122 assert(PyErr_Occurred());
1091 return -1;
1123 return -1;
1092 };
1124 };
1093 if (base == ps[0] || base == ps[1]) {
1125 if (base == ps[0] || base == ps[1]) {
1094 return 0;
1126 return 0;
1095 }
1127 }
1096 rev = base;
1128 rev = base;
1097 }
1129 }
1098 return rev == -1;
1130 return rev == -1;
1099 }
1131 }
1100
1132
1101 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1133 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1102 {
1134 {
1103 long rev;
1135 long rev;
1104 int issnap;
1136 int issnap;
1105 Py_ssize_t length = index_length(self);
1137 Py_ssize_t length = index_length(self);
1106
1138
1107 if (!pylong_to_long(value, &rev)) {
1139 if (!pylong_to_long(value, &rev)) {
1108 return NULL;
1140 return NULL;
1109 }
1141 }
1110 if (rev < -1 || rev >= length) {
1142 if (rev < -1 || rev >= length) {
1111 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1143 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1112 rev);
1144 rev);
1113 return NULL;
1145 return NULL;
1114 };
1146 };
1115 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1147 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1116 if (issnap < 0) {
1148 if (issnap < 0) {
1117 return NULL;
1149 return NULL;
1118 };
1150 };
1119 return PyBool_FromLong((long)issnap);
1151 return PyBool_FromLong((long)issnap);
1120 }
1152 }
1121
1153
1122 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1154 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1123 {
1155 {
1124 Py_ssize_t start_rev;
1156 Py_ssize_t start_rev;
1125 PyObject *cache;
1157 PyObject *cache;
1126 Py_ssize_t base;
1158 Py_ssize_t base;
1127 Py_ssize_t rev;
1159 Py_ssize_t rev;
1128 PyObject *key = NULL;
1160 PyObject *key = NULL;
1129 PyObject *value = NULL;
1161 PyObject *value = NULL;
1130 const Py_ssize_t length = index_length(self);
1162 const Py_ssize_t length = index_length(self);
1131 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1163 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1132 return NULL;
1164 return NULL;
1133 }
1165 }
1134 for (rev = start_rev; rev < length; rev++) {
1166 for (rev = start_rev; rev < length; rev++) {
1135 int issnap;
1167 int issnap;
1136 PyObject *allvalues = NULL;
1168 PyObject *allvalues = NULL;
1137 issnap = index_issnapshotrev(self, rev);
1169 issnap = index_issnapshotrev(self, rev);
1138 if (issnap < 0) {
1170 if (issnap < 0) {
1139 goto bail;
1171 goto bail;
1140 }
1172 }
1141 if (issnap == 0) {
1173 if (issnap == 0) {
1142 continue;
1174 continue;
1143 }
1175 }
1144 base = (Py_ssize_t)index_baserev(self, rev);
1176 base = (Py_ssize_t)index_baserev(self, rev);
1145 if (base == rev) {
1177 if (base == rev) {
1146 base = -1;
1178 base = -1;
1147 }
1179 }
1148 if (base == -2) {
1180 if (base == -2) {
1149 assert(PyErr_Occurred());
1181 assert(PyErr_Occurred());
1150 goto bail;
1182 goto bail;
1151 }
1183 }
1152 key = PyInt_FromSsize_t(base);
1184 key = PyInt_FromSsize_t(base);
1153 allvalues = PyDict_GetItem(cache, key);
1185 allvalues = PyDict_GetItem(cache, key);
1154 if (allvalues == NULL && PyErr_Occurred()) {
1186 if (allvalues == NULL && PyErr_Occurred()) {
1155 goto bail;
1187 goto bail;
1156 }
1188 }
1157 if (allvalues == NULL) {
1189 if (allvalues == NULL) {
1158 int r;
1190 int r;
1159 allvalues = PyList_New(0);
1191 allvalues = PyList_New(0);
1160 if (!allvalues) {
1192 if (!allvalues) {
1161 goto bail;
1193 goto bail;
1162 }
1194 }
1163 r = PyDict_SetItem(cache, key, allvalues);
1195 r = PyDict_SetItem(cache, key, allvalues);
1164 Py_DECREF(allvalues);
1196 Py_DECREF(allvalues);
1165 if (r < 0) {
1197 if (r < 0) {
1166 goto bail;
1198 goto bail;
1167 }
1199 }
1168 }
1200 }
1169 value = PyInt_FromSsize_t(rev);
1201 value = PyInt_FromSsize_t(rev);
1170 if (PyList_Append(allvalues, value)) {
1202 if (PyList_Append(allvalues, value)) {
1171 goto bail;
1203 goto bail;
1172 }
1204 }
1173 Py_CLEAR(key);
1205 Py_CLEAR(key);
1174 Py_CLEAR(value);
1206 Py_CLEAR(value);
1175 }
1207 }
1176 Py_RETURN_NONE;
1208 Py_RETURN_NONE;
1177 bail:
1209 bail:
1178 Py_XDECREF(key);
1210 Py_XDECREF(key);
1179 Py_XDECREF(value);
1211 Py_XDECREF(value);
1180 return NULL;
1212 return NULL;
1181 }
1213 }
1182
1214
1183 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1215 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1184 {
1216 {
1185 int rev, generaldelta;
1217 int rev, generaldelta;
1186 PyObject *stoparg;
1218 PyObject *stoparg;
1187 int stoprev, iterrev, baserev = -1;
1219 int stoprev, iterrev, baserev = -1;
1188 int stopped;
1220 int stopped;
1189 PyObject *chain = NULL, *result = NULL;
1221 PyObject *chain = NULL, *result = NULL;
1190 const Py_ssize_t length = index_length(self);
1222 const Py_ssize_t length = index_length(self);
1191
1223
1192 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1224 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1193 return NULL;
1225 return NULL;
1194 }
1226 }
1195
1227
1196 if (PyInt_Check(stoparg)) {
1228 if (PyInt_Check(stoparg)) {
1197 stoprev = (int)PyInt_AsLong(stoparg);
1229 stoprev = (int)PyInt_AsLong(stoparg);
1198 if (stoprev == -1 && PyErr_Occurred()) {
1230 if (stoprev == -1 && PyErr_Occurred()) {
1199 return NULL;
1231 return NULL;
1200 }
1232 }
1201 } else if (stoparg == Py_None) {
1233 } else if (stoparg == Py_None) {
1202 stoprev = -2;
1234 stoprev = -2;
1203 } else {
1235 } else {
1204 PyErr_SetString(PyExc_ValueError,
1236 PyErr_SetString(PyExc_ValueError,
1205 "stoprev must be integer or None");
1237 "stoprev must be integer or None");
1206 return NULL;
1238 return NULL;
1207 }
1239 }
1208
1240
1209 if (rev < 0 || rev >= length) {
1241 if (rev < 0 || rev >= length) {
1210 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1242 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1211 return NULL;
1243 return NULL;
1212 }
1244 }
1213
1245
1214 chain = PyList_New(0);
1246 chain = PyList_New(0);
1215 if (chain == NULL) {
1247 if (chain == NULL) {
1216 return NULL;
1248 return NULL;
1217 }
1249 }
1218
1250
1219 baserev = index_baserev(self, rev);
1251 baserev = index_baserev(self, rev);
1220
1252
1221 /* This should never happen. */
1253 /* This should never happen. */
1222 if (baserev <= -2) {
1254 if (baserev <= -2) {
1223 /* Error should be set by index_deref() */
1255 /* Error should be set by index_deref() */
1224 assert(PyErr_Occurred());
1256 assert(PyErr_Occurred());
1225 goto bail;
1257 goto bail;
1226 }
1258 }
1227
1259
1228 iterrev = rev;
1260 iterrev = rev;
1229
1261
1230 while (iterrev != baserev && iterrev != stoprev) {
1262 while (iterrev != baserev && iterrev != stoprev) {
1231 PyObject *value = PyInt_FromLong(iterrev);
1263 PyObject *value = PyInt_FromLong(iterrev);
1232 if (value == NULL) {
1264 if (value == NULL) {
1233 goto bail;
1265 goto bail;
1234 }
1266 }
1235 if (PyList_Append(chain, value)) {
1267 if (PyList_Append(chain, value)) {
1236 Py_DECREF(value);
1268 Py_DECREF(value);
1237 goto bail;
1269 goto bail;
1238 }
1270 }
1239 Py_DECREF(value);
1271 Py_DECREF(value);
1240
1272
1241 if (generaldelta) {
1273 if (generaldelta) {
1242 iterrev = baserev;
1274 iterrev = baserev;
1243 } else {
1275 } else {
1244 iterrev--;
1276 iterrev--;
1245 }
1277 }
1246
1278
1247 if (iterrev < 0) {
1279 if (iterrev < 0) {
1248 break;
1280 break;
1249 }
1281 }
1250
1282
1251 if (iterrev >= length) {
1283 if (iterrev >= length) {
1252 PyErr_SetString(PyExc_IndexError,
1284 PyErr_SetString(PyExc_IndexError,
1253 "revision outside index");
1285 "revision outside index");
1254 return NULL;
1286 return NULL;
1255 }
1287 }
1256
1288
1257 baserev = index_baserev(self, iterrev);
1289 baserev = index_baserev(self, iterrev);
1258
1290
1259 /* This should never happen. */
1291 /* This should never happen. */
1260 if (baserev <= -2) {
1292 if (baserev <= -2) {
1261 /* Error should be set by index_deref() */
1293 /* Error should be set by index_deref() */
1262 assert(PyErr_Occurred());
1294 assert(PyErr_Occurred());
1263 goto bail;
1295 goto bail;
1264 }
1296 }
1265 }
1297 }
1266
1298
1267 if (iterrev == stoprev) {
1299 if (iterrev == stoprev) {
1268 stopped = 1;
1300 stopped = 1;
1269 } else {
1301 } else {
1270 PyObject *value = PyInt_FromLong(iterrev);
1302 PyObject *value = PyInt_FromLong(iterrev);
1271 if (value == NULL) {
1303 if (value == NULL) {
1272 goto bail;
1304 goto bail;
1273 }
1305 }
1274 if (PyList_Append(chain, value)) {
1306 if (PyList_Append(chain, value)) {
1275 Py_DECREF(value);
1307 Py_DECREF(value);
1276 goto bail;
1308 goto bail;
1277 }
1309 }
1278 Py_DECREF(value);
1310 Py_DECREF(value);
1279
1311
1280 stopped = 0;
1312 stopped = 0;
1281 }
1313 }
1282
1314
1283 if (PyList_Reverse(chain)) {
1315 if (PyList_Reverse(chain)) {
1284 goto bail;
1316 goto bail;
1285 }
1317 }
1286
1318
1287 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1319 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1288 Py_DECREF(chain);
1320 Py_DECREF(chain);
1289 return result;
1321 return result;
1290
1322
1291 bail:
1323 bail:
1292 Py_DECREF(chain);
1324 Py_DECREF(chain);
1293 return NULL;
1325 return NULL;
1294 }
1326 }
1295
1327
1296 static inline int64_t
1328 static inline int64_t
1297 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1329 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1298 {
1330 {
1299 int64_t start_offset;
1331 int64_t start_offset;
1300 int64_t end_offset;
1332 int64_t end_offset;
1301 int end_size;
1333 int end_size;
1302 start_offset = index_get_start(self, start_rev);
1334 start_offset = index_get_start(self, start_rev);
1303 if (start_offset < 0) {
1335 if (start_offset < 0) {
1304 return -1;
1336 return -1;
1305 }
1337 }
1306 end_offset = index_get_start(self, end_rev);
1338 end_offset = index_get_start(self, end_rev);
1307 if (end_offset < 0) {
1339 if (end_offset < 0) {
1308 return -1;
1340 return -1;
1309 }
1341 }
1310 end_size = index_get_length(self, end_rev);
1342 end_size = index_get_length(self, end_rev);
1311 if (end_size < 0) {
1343 if (end_size < 0) {
1312 return -1;
1344 return -1;
1313 }
1345 }
1314 if (end_offset < start_offset) {
1346 if (end_offset < start_offset) {
1315 PyErr_Format(PyExc_ValueError,
1347 PyErr_Format(PyExc_ValueError,
1316 "corrupted revlog index: inconsistent offset "
1348 "corrupted revlog index: inconsistent offset "
1317 "between revisions (%zd) and (%zd)",
1349 "between revisions (%zd) and (%zd)",
1318 start_rev, end_rev);
1350 start_rev, end_rev);
1319 return -1;
1351 return -1;
1320 }
1352 }
1321 return (end_offset - start_offset) + (int64_t)end_size;
1353 return (end_offset - start_offset) + (int64_t)end_size;
1322 }
1354 }
1323
1355
1324 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1356 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1325 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1357 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1326 Py_ssize_t startidx, Py_ssize_t endidx)
1358 Py_ssize_t startidx, Py_ssize_t endidx)
1327 {
1359 {
1328 int length;
1360 int length;
1329 while (endidx > 1 && endidx > startidx) {
1361 while (endidx > 1 && endidx > startidx) {
1330 length = index_get_length(self, revs[endidx - 1]);
1362 length = index_get_length(self, revs[endidx - 1]);
1331 if (length < 0) {
1363 if (length < 0) {
1332 return -1;
1364 return -1;
1333 }
1365 }
1334 if (length != 0) {
1366 if (length != 0) {
1335 break;
1367 break;
1336 }
1368 }
1337 endidx -= 1;
1369 endidx -= 1;
1338 }
1370 }
1339 return endidx;
1371 return endidx;
1340 }
1372 }
1341
1373
1342 struct Gap {
1374 struct Gap {
1343 int64_t size;
1375 int64_t size;
1344 Py_ssize_t idx;
1376 Py_ssize_t idx;
1345 };
1377 };
1346
1378
1347 static int gap_compare(const void *left, const void *right)
1379 static int gap_compare(const void *left, const void *right)
1348 {
1380 {
1349 const struct Gap *l_left = ((const struct Gap *)left);
1381 const struct Gap *l_left = ((const struct Gap *)left);
1350 const struct Gap *l_right = ((const struct Gap *)right);
1382 const struct Gap *l_right = ((const struct Gap *)right);
1351 if (l_left->size < l_right->size) {
1383 if (l_left->size < l_right->size) {
1352 return -1;
1384 return -1;
1353 } else if (l_left->size > l_right->size) {
1385 } else if (l_left->size > l_right->size) {
1354 return 1;
1386 return 1;
1355 }
1387 }
1356 return 0;
1388 return 0;
1357 }
1389 }
1358 static int Py_ssize_t_compare(const void *left, const void *right)
1390 static int Py_ssize_t_compare(const void *left, const void *right)
1359 {
1391 {
1360 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1392 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1361 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1393 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1362 if (l_left < l_right) {
1394 if (l_left < l_right) {
1363 return -1;
1395 return -1;
1364 } else if (l_left > l_right) {
1396 } else if (l_left > l_right) {
1365 return 1;
1397 return 1;
1366 }
1398 }
1367 return 0;
1399 return 0;
1368 }
1400 }
1369
1401
1370 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1402 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1371 {
1403 {
1372 /* method arguments */
1404 /* method arguments */
1373 PyObject *list_revs = NULL; /* revisions in the chain */
1405 PyObject *list_revs = NULL; /* revisions in the chain */
1374 double targetdensity = 0; /* min density to achieve */
1406 double targetdensity = 0; /* min density to achieve */
1375 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1407 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1376
1408
1377 /* other core variables */
1409 /* other core variables */
1378 Py_ssize_t idxlen = index_length(self);
1410 Py_ssize_t idxlen = index_length(self);
1379 Py_ssize_t i; /* used for various iteration */
1411 Py_ssize_t i; /* used for various iteration */
1380 PyObject *result = NULL; /* the final return of the function */
1412 PyObject *result = NULL; /* the final return of the function */
1381
1413
1382 /* generic information about the delta chain being slice */
1414 /* generic information about the delta chain being slice */
1383 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1415 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1384 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1416 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1385 int64_t chainpayload = 0; /* sum of all delta in the chain */
1417 int64_t chainpayload = 0; /* sum of all delta in the chain */
1386 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1418 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1387
1419
1388 /* variable used for slicing the delta chain */
1420 /* variable used for slicing the delta chain */
1389 int64_t readdata = 0; /* amount of data currently planned to be read */
1421 int64_t readdata = 0; /* amount of data currently planned to be read */
1390 double density = 0; /* ration of payload data compared to read ones */
1422 double density = 0; /* ration of payload data compared to read ones */
1391 int64_t previous_end;
1423 int64_t previous_end;
1392 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1424 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1393 Py_ssize_t num_gaps =
1425 Py_ssize_t num_gaps =
1394 0; /* total number of notable gap recorded so far */
1426 0; /* total number of notable gap recorded so far */
1395 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1427 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1396 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1428 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1397 PyObject *chunk = NULL; /* individual slice */
1429 PyObject *chunk = NULL; /* individual slice */
1398 PyObject *allchunks = NULL; /* all slices */
1430 PyObject *allchunks = NULL; /* all slices */
1399 Py_ssize_t previdx;
1431 Py_ssize_t previdx;
1400
1432
1401 /* parsing argument */
1433 /* parsing argument */
1402 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1434 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1403 &targetdensity, &mingapsize)) {
1435 &targetdensity, &mingapsize)) {
1404 goto bail;
1436 goto bail;
1405 }
1437 }
1406
1438
1407 /* If the delta chain contains a single element, we do not need slicing
1439 /* If the delta chain contains a single element, we do not need slicing
1408 */
1440 */
1409 num_revs = PyList_GET_SIZE(list_revs);
1441 num_revs = PyList_GET_SIZE(list_revs);
1410 if (num_revs <= 1) {
1442 if (num_revs <= 1) {
1411 result = PyTuple_Pack(1, list_revs);
1443 result = PyTuple_Pack(1, list_revs);
1412 goto done;
1444 goto done;
1413 }
1445 }
1414
1446
1415 /* Turn the python list into a native integer array (for efficiency) */
1447 /* Turn the python list into a native integer array (for efficiency) */
1416 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1448 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1417 if (revs == NULL) {
1449 if (revs == NULL) {
1418 PyErr_NoMemory();
1450 PyErr_NoMemory();
1419 goto bail;
1451 goto bail;
1420 }
1452 }
1421 for (i = 0; i < num_revs; i++) {
1453 for (i = 0; i < num_revs; i++) {
1422 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1454 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1423 if (revnum == -1 && PyErr_Occurred()) {
1455 if (revnum == -1 && PyErr_Occurred()) {
1424 goto bail;
1456 goto bail;
1425 }
1457 }
1426 if (revnum < nullrev || revnum >= idxlen) {
1458 if (revnum < nullrev || revnum >= idxlen) {
1427 PyErr_Format(PyExc_IndexError,
1459 PyErr_Format(PyExc_IndexError,
1428 "index out of range: %zd", revnum);
1460 "index out of range: %zd", revnum);
1429 goto bail;
1461 goto bail;
1430 }
1462 }
1431 revs[i] = revnum;
1463 revs[i] = revnum;
1432 }
1464 }
1433
1465
1434 /* Compute and check various property of the unsliced delta chain */
1466 /* Compute and check various property of the unsliced delta chain */
1435 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1467 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1436 if (deltachainspan < 0) {
1468 if (deltachainspan < 0) {
1437 goto bail;
1469 goto bail;
1438 }
1470 }
1439
1471
1440 if (deltachainspan <= mingapsize) {
1472 if (deltachainspan <= mingapsize) {
1441 result = PyTuple_Pack(1, list_revs);
1473 result = PyTuple_Pack(1, list_revs);
1442 goto done;
1474 goto done;
1443 }
1475 }
1444 chainpayload = 0;
1476 chainpayload = 0;
1445 for (i = 0; i < num_revs; i++) {
1477 for (i = 0; i < num_revs; i++) {
1446 int tmp = index_get_length(self, revs[i]);
1478 int tmp = index_get_length(self, revs[i]);
1447 if (tmp < 0) {
1479 if (tmp < 0) {
1448 goto bail;
1480 goto bail;
1449 }
1481 }
1450 chainpayload += tmp;
1482 chainpayload += tmp;
1451 }
1483 }
1452
1484
1453 readdata = deltachainspan;
1485 readdata = deltachainspan;
1454 density = 1.0;
1486 density = 1.0;
1455
1487
1456 if (0 < deltachainspan) {
1488 if (0 < deltachainspan) {
1457 density = (double)chainpayload / (double)deltachainspan;
1489 density = (double)chainpayload / (double)deltachainspan;
1458 }
1490 }
1459
1491
1460 if (density >= targetdensity) {
1492 if (density >= targetdensity) {
1461 result = PyTuple_Pack(1, list_revs);
1493 result = PyTuple_Pack(1, list_revs);
1462 goto done;
1494 goto done;
1463 }
1495 }
1464
1496
1465 /* if chain is too sparse, look for relevant gaps */
1497 /* if chain is too sparse, look for relevant gaps */
1466 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1498 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1467 if (gaps == NULL) {
1499 if (gaps == NULL) {
1468 PyErr_NoMemory();
1500 PyErr_NoMemory();
1469 goto bail;
1501 goto bail;
1470 }
1502 }
1471
1503
1472 previous_end = -1;
1504 previous_end = -1;
1473 for (i = 0; i < num_revs; i++) {
1505 for (i = 0; i < num_revs; i++) {
1474 int64_t revstart;
1506 int64_t revstart;
1475 int revsize;
1507 int revsize;
1476 revstart = index_get_start(self, revs[i]);
1508 revstart = index_get_start(self, revs[i]);
1477 if (revstart < 0) {
1509 if (revstart < 0) {
1478 goto bail;
1510 goto bail;
1479 };
1511 };
1480 revsize = index_get_length(self, revs[i]);
1512 revsize = index_get_length(self, revs[i]);
1481 if (revsize < 0) {
1513 if (revsize < 0) {
1482 goto bail;
1514 goto bail;
1483 };
1515 };
1484 if (revsize == 0) {
1516 if (revsize == 0) {
1485 continue;
1517 continue;
1486 }
1518 }
1487 if (previous_end >= 0) {
1519 if (previous_end >= 0) {
1488 int64_t gapsize = revstart - previous_end;
1520 int64_t gapsize = revstart - previous_end;
1489 if (gapsize > mingapsize) {
1521 if (gapsize > mingapsize) {
1490 gaps[num_gaps].size = gapsize;
1522 gaps[num_gaps].size = gapsize;
1491 gaps[num_gaps].idx = i;
1523 gaps[num_gaps].idx = i;
1492 num_gaps += 1;
1524 num_gaps += 1;
1493 }
1525 }
1494 }
1526 }
1495 previous_end = revstart + revsize;
1527 previous_end = revstart + revsize;
1496 }
1528 }
1497 if (num_gaps == 0) {
1529 if (num_gaps == 0) {
1498 result = PyTuple_Pack(1, list_revs);
1530 result = PyTuple_Pack(1, list_revs);
1499 goto done;
1531 goto done;
1500 }
1532 }
1501 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1533 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1502
1534
1503 /* Slice the largest gap first, they improve the density the most */
1535 /* Slice the largest gap first, they improve the density the most */
1504 selected_indices =
1536 selected_indices =
1505 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1537 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1506 if (selected_indices == NULL) {
1538 if (selected_indices == NULL) {
1507 PyErr_NoMemory();
1539 PyErr_NoMemory();
1508 goto bail;
1540 goto bail;
1509 }
1541 }
1510
1542
1511 for (i = num_gaps - 1; i >= 0; i--) {
1543 for (i = num_gaps - 1; i >= 0; i--) {
1512 selected_indices[num_selected] = gaps[i].idx;
1544 selected_indices[num_selected] = gaps[i].idx;
1513 readdata -= gaps[i].size;
1545 readdata -= gaps[i].size;
1514 num_selected += 1;
1546 num_selected += 1;
1515 if (readdata <= 0) {
1547 if (readdata <= 0) {
1516 density = 1.0;
1548 density = 1.0;
1517 } else {
1549 } else {
1518 density = (double)chainpayload / (double)readdata;
1550 density = (double)chainpayload / (double)readdata;
1519 }
1551 }
1520 if (density >= targetdensity) {
1552 if (density >= targetdensity) {
1521 break;
1553 break;
1522 }
1554 }
1523 }
1555 }
1524 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1556 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1525 &Py_ssize_t_compare);
1557 &Py_ssize_t_compare);
1526
1558
1527 /* create the resulting slice */
1559 /* create the resulting slice */
1528 allchunks = PyList_New(0);
1560 allchunks = PyList_New(0);
1529 if (allchunks == NULL) {
1561 if (allchunks == NULL) {
1530 goto bail;
1562 goto bail;
1531 }
1563 }
1532 previdx = 0;
1564 previdx = 0;
1533 selected_indices[num_selected] = num_revs;
1565 selected_indices[num_selected] = num_revs;
1534 for (i = 0; i <= num_selected; i++) {
1566 for (i = 0; i <= num_selected; i++) {
1535 Py_ssize_t idx = selected_indices[i];
1567 Py_ssize_t idx = selected_indices[i];
1536 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1568 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1537 if (endidx < 0) {
1569 if (endidx < 0) {
1538 goto bail;
1570 goto bail;
1539 }
1571 }
1540 if (previdx < endidx) {
1572 if (previdx < endidx) {
1541 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1573 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1542 if (chunk == NULL) {
1574 if (chunk == NULL) {
1543 goto bail;
1575 goto bail;
1544 }
1576 }
1545 if (PyList_Append(allchunks, chunk) == -1) {
1577 if (PyList_Append(allchunks, chunk) == -1) {
1546 goto bail;
1578 goto bail;
1547 }
1579 }
1548 Py_DECREF(chunk);
1580 Py_DECREF(chunk);
1549 chunk = NULL;
1581 chunk = NULL;
1550 }
1582 }
1551 previdx = idx;
1583 previdx = idx;
1552 }
1584 }
1553 result = allchunks;
1585 result = allchunks;
1554 goto done;
1586 goto done;
1555
1587
1556 bail:
1588 bail:
1557 Py_XDECREF(allchunks);
1589 Py_XDECREF(allchunks);
1558 Py_XDECREF(chunk);
1590 Py_XDECREF(chunk);
1559 done:
1591 done:
1560 free(revs);
1592 free(revs);
1561 free(gaps);
1593 free(gaps);
1562 free(selected_indices);
1594 free(selected_indices);
1563 return result;
1595 return result;
1564 }
1596 }
1565
1597
1566 static inline int nt_level(const char *node, Py_ssize_t level)
1598 static inline int nt_level(const char *node, Py_ssize_t level)
1567 {
1599 {
1568 int v = node[level >> 1];
1600 int v = node[level >> 1];
1569 if (!(level & 1))
1601 if (!(level & 1))
1570 v >>= 4;
1602 v >>= 4;
1571 return v & 0xf;
1603 return v & 0xf;
1572 }
1604 }
1573
1605
1574 /*
1606 /*
1575 * Return values:
1607 * Return values:
1576 *
1608 *
1577 * -4: match is ambiguous (multiple candidates)
1609 * -4: match is ambiguous (multiple candidates)
1578 * -2: not found
1610 * -2: not found
1579 * rest: valid rev
1611 * rest: valid rev
1580 */
1612 */
1581 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1613 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1582 int hex)
1614 int hex)
1583 {
1615 {
1584 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1616 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1585 int level, maxlevel, off;
1617 int level, maxlevel, off;
1586
1618
1587 /* If the input is binary, do a fast check for the nullid first. */
1619 /* If the input is binary, do a fast check for the nullid first. */
1588 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1620 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1589 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1621 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1590 return -1;
1622 return -1;
1591
1623
1592 if (hex)
1624 if (hex)
1593 maxlevel = nodelen;
1625 maxlevel = nodelen;
1594 else
1626 else
1595 maxlevel = 2 * nodelen;
1627 maxlevel = 2 * nodelen;
1596 if (maxlevel > 2 * self->nodelen)
1628 if (maxlevel > 2 * self->nodelen)
1597 maxlevel = 2 * self->nodelen;
1629 maxlevel = 2 * self->nodelen;
1598
1630
1599 for (level = off = 0; level < maxlevel; level++) {
1631 for (level = off = 0; level < maxlevel; level++) {
1600 int k = getnybble(node, level);
1632 int k = getnybble(node, level);
1601 nodetreenode *n = &self->nodes[off];
1633 nodetreenode *n = &self->nodes[off];
1602 int v = n->children[k];
1634 int v = n->children[k];
1603
1635
1604 if (v < 0) {
1636 if (v < 0) {
1605 const char *n;
1637 const char *n;
1606 Py_ssize_t i;
1638 Py_ssize_t i;
1607
1639
1608 v = -(v + 2);
1640 v = -(v + 2);
1609 n = index_node(self->index, v);
1641 n = index_node(self->index, v);
1610 if (n == NULL)
1642 if (n == NULL)
1611 return -2;
1643 return -2;
1612 for (i = level; i < maxlevel; i++)
1644 for (i = level; i < maxlevel; i++)
1613 if (getnybble(node, i) != nt_level(n, i))
1645 if (getnybble(node, i) != nt_level(n, i))
1614 return -2;
1646 return -2;
1615 return v;
1647 return v;
1616 }
1648 }
1617 if (v == 0)
1649 if (v == 0)
1618 return -2;
1650 return -2;
1619 off = v;
1651 off = v;
1620 }
1652 }
1621 /* multiple matches against an ambiguous prefix */
1653 /* multiple matches against an ambiguous prefix */
1622 return -4;
1654 return -4;
1623 }
1655 }
1624
1656
1625 static int nt_new(nodetree *self)
1657 static int nt_new(nodetree *self)
1626 {
1658 {
1627 if (self->length == self->capacity) {
1659 if (self->length == self->capacity) {
1628 size_t newcapacity;
1660 size_t newcapacity;
1629 nodetreenode *newnodes;
1661 nodetreenode *newnodes;
1630 newcapacity = self->capacity * 2;
1662 newcapacity = self->capacity * 2;
1631 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1663 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1632 PyErr_SetString(PyExc_MemoryError,
1664 PyErr_SetString(PyExc_MemoryError,
1633 "overflow in nt_new");
1665 "overflow in nt_new");
1634 return -1;
1666 return -1;
1635 }
1667 }
1636 newnodes =
1668 newnodes =
1637 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1669 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1638 if (newnodes == NULL) {
1670 if (newnodes == NULL) {
1639 PyErr_SetString(PyExc_MemoryError, "out of memory");
1671 PyErr_SetString(PyExc_MemoryError, "out of memory");
1640 return -1;
1672 return -1;
1641 }
1673 }
1642 self->capacity = newcapacity;
1674 self->capacity = newcapacity;
1643 self->nodes = newnodes;
1675 self->nodes = newnodes;
1644 memset(&self->nodes[self->length], 0,
1676 memset(&self->nodes[self->length], 0,
1645 sizeof(nodetreenode) * (self->capacity - self->length));
1677 sizeof(nodetreenode) * (self->capacity - self->length));
1646 }
1678 }
1647 return self->length++;
1679 return self->length++;
1648 }
1680 }
1649
1681
1650 static int nt_insert(nodetree *self, const char *node, int rev)
1682 static int nt_insert(nodetree *self, const char *node, int rev)
1651 {
1683 {
1652 int level = 0;
1684 int level = 0;
1653 int off = 0;
1685 int off = 0;
1654
1686
1655 while (level < 2 * self->nodelen) {
1687 while (level < 2 * self->nodelen) {
1656 int k = nt_level(node, level);
1688 int k = nt_level(node, level);
1657 nodetreenode *n;
1689 nodetreenode *n;
1658 int v;
1690 int v;
1659
1691
1660 n = &self->nodes[off];
1692 n = &self->nodes[off];
1661 v = n->children[k];
1693 v = n->children[k];
1662
1694
1663 if (v == 0) {
1695 if (v == 0) {
1664 n->children[k] = -rev - 2;
1696 n->children[k] = -rev - 2;
1665 return 0;
1697 return 0;
1666 }
1698 }
1667 if (v < 0) {
1699 if (v < 0) {
1668 const char *oldnode =
1700 const char *oldnode =
1669 index_node_existing(self->index, -(v + 2));
1701 index_node_existing(self->index, -(v + 2));
1670 int noff;
1702 int noff;
1671
1703
1672 if (oldnode == NULL)
1704 if (oldnode == NULL)
1673 return -1;
1705 return -1;
1674 if (!memcmp(oldnode, node, self->nodelen)) {
1706 if (!memcmp(oldnode, node, self->nodelen)) {
1675 n->children[k] = -rev - 2;
1707 n->children[k] = -rev - 2;
1676 return 0;
1708 return 0;
1677 }
1709 }
1678 noff = nt_new(self);
1710 noff = nt_new(self);
1679 if (noff == -1)
1711 if (noff == -1)
1680 return -1;
1712 return -1;
1681 /* self->nodes may have been changed by realloc */
1713 /* self->nodes may have been changed by realloc */
1682 self->nodes[off].children[k] = noff;
1714 self->nodes[off].children[k] = noff;
1683 off = noff;
1715 off = noff;
1684 n = &self->nodes[off];
1716 n = &self->nodes[off];
1685 n->children[nt_level(oldnode, ++level)] = v;
1717 n->children[nt_level(oldnode, ++level)] = v;
1686 if (level > self->depth)
1718 if (level > self->depth)
1687 self->depth = level;
1719 self->depth = level;
1688 self->splits += 1;
1720 self->splits += 1;
1689 } else {
1721 } else {
1690 level += 1;
1722 level += 1;
1691 off = v;
1723 off = v;
1692 }
1724 }
1693 }
1725 }
1694
1726
1695 return -1;
1727 return -1;
1696 }
1728 }
1697
1729
1698 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1730 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1699 {
1731 {
1700 Py_ssize_t rev;
1732 Py_ssize_t rev;
1701 const char *node;
1733 const char *node;
1702 Py_ssize_t length;
1734 Py_ssize_t length;
1703 if (!PyArg_ParseTuple(args, "n", &rev))
1735 if (!PyArg_ParseTuple(args, "n", &rev))
1704 return NULL;
1736 return NULL;
1705 length = index_length(self->nt.index);
1737 length = index_length(self->nt.index);
1706 if (rev < 0 || rev >= length) {
1738 if (rev < 0 || rev >= length) {
1707 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1739 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1708 return NULL;
1740 return NULL;
1709 }
1741 }
1710 node = index_node_existing(self->nt.index, rev);
1742 node = index_node_existing(self->nt.index, rev);
1711 if (nt_insert(&self->nt, node, (int)rev) == -1)
1743 if (nt_insert(&self->nt, node, (int)rev) == -1)
1712 return NULL;
1744 return NULL;
1713 Py_RETURN_NONE;
1745 Py_RETURN_NONE;
1714 }
1746 }
1715
1747
1716 static int nt_delete_node(nodetree *self, const char *node)
1748 static int nt_delete_node(nodetree *self, const char *node)
1717 {
1749 {
1718 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1750 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1719 */
1751 */
1720 return nt_insert(self, node, -2);
1752 return nt_insert(self, node, -2);
1721 }
1753 }
1722
1754
1723 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1755 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1724 {
1756 {
1725 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1757 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1726 self->nodes = NULL;
1758 self->nodes = NULL;
1727
1759
1728 self->index = index;
1760 self->index = index;
1729 /* The input capacity is in terms of revisions, while the field is in
1761 /* The input capacity is in terms of revisions, while the field is in
1730 * terms of nodetree nodes. */
1762 * terms of nodetree nodes. */
1731 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1763 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1732 self->nodelen = index->nodelen;
1764 self->nodelen = index->nodelen;
1733 self->depth = 0;
1765 self->depth = 0;
1734 self->splits = 0;
1766 self->splits = 0;
1735 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1767 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1736 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1768 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1737 return -1;
1769 return -1;
1738 }
1770 }
1739 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1771 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1740 if (self->nodes == NULL) {
1772 if (self->nodes == NULL) {
1741 PyErr_NoMemory();
1773 PyErr_NoMemory();
1742 return -1;
1774 return -1;
1743 }
1775 }
1744 self->length = 1;
1776 self->length = 1;
1745 return 0;
1777 return 0;
1746 }
1778 }
1747
1779
1748 static int ntobj_init(nodetreeObject *self, PyObject *args)
1780 static int ntobj_init(nodetreeObject *self, PyObject *args)
1749 {
1781 {
1750 PyObject *index;
1782 PyObject *index;
1751 unsigned capacity;
1783 unsigned capacity;
1752 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1784 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1753 &capacity))
1785 &capacity))
1754 return -1;
1786 return -1;
1755 Py_INCREF(index);
1787 Py_INCREF(index);
1756 return nt_init(&self->nt, (indexObject *)index, capacity);
1788 return nt_init(&self->nt, (indexObject *)index, capacity);
1757 }
1789 }
1758
1790
1759 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1791 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1760 {
1792 {
1761 return nt_find(self, node, nodelen, 1);
1793 return nt_find(self, node, nodelen, 1);
1762 }
1794 }
1763
1795
1764 /*
1796 /*
1765 * Find the length of the shortest unique prefix of node.
1797 * Find the length of the shortest unique prefix of node.
1766 *
1798 *
1767 * Return values:
1799 * Return values:
1768 *
1800 *
1769 * -3: error (exception set)
1801 * -3: error (exception set)
1770 * -2: not found (no exception set)
1802 * -2: not found (no exception set)
1771 * rest: length of shortest prefix
1803 * rest: length of shortest prefix
1772 */
1804 */
1773 static int nt_shortest(nodetree *self, const char *node)
1805 static int nt_shortest(nodetree *self, const char *node)
1774 {
1806 {
1775 int level, off;
1807 int level, off;
1776
1808
1777 for (level = off = 0; level < 2 * self->nodelen; level++) {
1809 for (level = off = 0; level < 2 * self->nodelen; level++) {
1778 int k, v;
1810 int k, v;
1779 nodetreenode *n = &self->nodes[off];
1811 nodetreenode *n = &self->nodes[off];
1780 k = nt_level(node, level);
1812 k = nt_level(node, level);
1781 v = n->children[k];
1813 v = n->children[k];
1782 if (v < 0) {
1814 if (v < 0) {
1783 const char *n;
1815 const char *n;
1784 v = -(v + 2);
1816 v = -(v + 2);
1785 n = index_node_existing(self->index, v);
1817 n = index_node_existing(self->index, v);
1786 if (n == NULL)
1818 if (n == NULL)
1787 return -3;
1819 return -3;
1788 if (memcmp(node, n, self->nodelen) != 0)
1820 if (memcmp(node, n, self->nodelen) != 0)
1789 /*
1821 /*
1790 * Found a unique prefix, but it wasn't for the
1822 * Found a unique prefix, but it wasn't for the
1791 * requested node (i.e the requested node does
1823 * requested node (i.e the requested node does
1792 * not exist).
1824 * not exist).
1793 */
1825 */
1794 return -2;
1826 return -2;
1795 return level + 1;
1827 return level + 1;
1796 }
1828 }
1797 if (v == 0)
1829 if (v == 0)
1798 return -2;
1830 return -2;
1799 off = v;
1831 off = v;
1800 }
1832 }
1801 /*
1833 /*
1802 * The node was still not unique after 40 hex digits, so this won't
1834 * The node was still not unique after 40 hex digits, so this won't
1803 * happen. Also, if we get here, then there's a programming error in
1835 * happen. Also, if we get here, then there's a programming error in
1804 * this file that made us insert a node longer than 40 hex digits.
1836 * this file that made us insert a node longer than 40 hex digits.
1805 */
1837 */
1806 PyErr_SetString(PyExc_Exception, "broken node tree");
1838 PyErr_SetString(PyExc_Exception, "broken node tree");
1807 return -3;
1839 return -3;
1808 }
1840 }
1809
1841
1810 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1842 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1811 {
1843 {
1812 PyObject *val;
1844 PyObject *val;
1813 char *node;
1845 char *node;
1814 int length;
1846 int length;
1815
1847
1816 if (!PyArg_ParseTuple(args, "O", &val))
1848 if (!PyArg_ParseTuple(args, "O", &val))
1817 return NULL;
1849 return NULL;
1818 if (node_check(self->nt.nodelen, val, &node) == -1)
1850 if (node_check(self->nt.nodelen, val, &node) == -1)
1819 return NULL;
1851 return NULL;
1820
1852
1821 length = nt_shortest(&self->nt, node);
1853 length = nt_shortest(&self->nt, node);
1822 if (length == -3)
1854 if (length == -3)
1823 return NULL;
1855 return NULL;
1824 if (length == -2) {
1856 if (length == -2) {
1825 raise_revlog_error();
1857 raise_revlog_error();
1826 return NULL;
1858 return NULL;
1827 }
1859 }
1828 return PyInt_FromLong(length);
1860 return PyInt_FromLong(length);
1829 }
1861 }
1830
1862
1831 static void nt_dealloc(nodetree *self)
1863 static void nt_dealloc(nodetree *self)
1832 {
1864 {
1833 free(self->nodes);
1865 free(self->nodes);
1834 self->nodes = NULL;
1866 self->nodes = NULL;
1835 }
1867 }
1836
1868
1837 static void ntobj_dealloc(nodetreeObject *self)
1869 static void ntobj_dealloc(nodetreeObject *self)
1838 {
1870 {
1839 Py_XDECREF(self->nt.index);
1871 Py_XDECREF(self->nt.index);
1840 nt_dealloc(&self->nt);
1872 nt_dealloc(&self->nt);
1841 PyObject_Del(self);
1873 PyObject_Del(self);
1842 }
1874 }
1843
1875
1844 static PyMethodDef ntobj_methods[] = {
1876 static PyMethodDef ntobj_methods[] = {
1845 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1877 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1846 "insert an index entry"},
1878 "insert an index entry"},
1847 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1879 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1848 "find length of shortest hex nodeid of a binary ID"},
1880 "find length of shortest hex nodeid of a binary ID"},
1849 {NULL} /* Sentinel */
1881 {NULL} /* Sentinel */
1850 };
1882 };
1851
1883
1852 static PyTypeObject nodetreeType = {
1884 static PyTypeObject nodetreeType = {
1853 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1885 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1854 "parsers.nodetree", /* tp_name */
1886 "parsers.nodetree", /* tp_name */
1855 sizeof(nodetreeObject), /* tp_basicsize */
1887 sizeof(nodetreeObject), /* tp_basicsize */
1856 0, /* tp_itemsize */
1888 0, /* tp_itemsize */
1857 (destructor)ntobj_dealloc, /* tp_dealloc */
1889 (destructor)ntobj_dealloc, /* tp_dealloc */
1858 0, /* tp_print */
1890 0, /* tp_print */
1859 0, /* tp_getattr */
1891 0, /* tp_getattr */
1860 0, /* tp_setattr */
1892 0, /* tp_setattr */
1861 0, /* tp_compare */
1893 0, /* tp_compare */
1862 0, /* tp_repr */
1894 0, /* tp_repr */
1863 0, /* tp_as_number */
1895 0, /* tp_as_number */
1864 0, /* tp_as_sequence */
1896 0, /* tp_as_sequence */
1865 0, /* tp_as_mapping */
1897 0, /* tp_as_mapping */
1866 0, /* tp_hash */
1898 0, /* tp_hash */
1867 0, /* tp_call */
1899 0, /* tp_call */
1868 0, /* tp_str */
1900 0, /* tp_str */
1869 0, /* tp_getattro */
1901 0, /* tp_getattro */
1870 0, /* tp_setattro */
1902 0, /* tp_setattro */
1871 0, /* tp_as_buffer */
1903 0, /* tp_as_buffer */
1872 Py_TPFLAGS_DEFAULT, /* tp_flags */
1904 Py_TPFLAGS_DEFAULT, /* tp_flags */
1873 "nodetree", /* tp_doc */
1905 "nodetree", /* tp_doc */
1874 0, /* tp_traverse */
1906 0, /* tp_traverse */
1875 0, /* tp_clear */
1907 0, /* tp_clear */
1876 0, /* tp_richcompare */
1908 0, /* tp_richcompare */
1877 0, /* tp_weaklistoffset */
1909 0, /* tp_weaklistoffset */
1878 0, /* tp_iter */
1910 0, /* tp_iter */
1879 0, /* tp_iternext */
1911 0, /* tp_iternext */
1880 ntobj_methods, /* tp_methods */
1912 ntobj_methods, /* tp_methods */
1881 0, /* tp_members */
1913 0, /* tp_members */
1882 0, /* tp_getset */
1914 0, /* tp_getset */
1883 0, /* tp_base */
1915 0, /* tp_base */
1884 0, /* tp_dict */
1916 0, /* tp_dict */
1885 0, /* tp_descr_get */
1917 0, /* tp_descr_get */
1886 0, /* tp_descr_set */
1918 0, /* tp_descr_set */
1887 0, /* tp_dictoffset */
1919 0, /* tp_dictoffset */
1888 (initproc)ntobj_init, /* tp_init */
1920 (initproc)ntobj_init, /* tp_init */
1889 0, /* tp_alloc */
1921 0, /* tp_alloc */
1890 };
1922 };
1891
1923
1892 static int index_init_nt(indexObject *self)
1924 static int index_init_nt(indexObject *self)
1893 {
1925 {
1894 if (!self->ntinitialized) {
1926 if (!self->ntinitialized) {
1895 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1927 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1896 nt_dealloc(&self->nt);
1928 nt_dealloc(&self->nt);
1897 return -1;
1929 return -1;
1898 }
1930 }
1899 if (nt_insert(&self->nt, nullid, -1) == -1) {
1931 if (nt_insert(&self->nt, nullid, -1) == -1) {
1900 nt_dealloc(&self->nt);
1932 nt_dealloc(&self->nt);
1901 return -1;
1933 return -1;
1902 }
1934 }
1903 self->ntinitialized = 1;
1935 self->ntinitialized = 1;
1904 self->ntrev = (int)index_length(self);
1936 self->ntrev = (int)index_length(self);
1905 self->ntlookups = 1;
1937 self->ntlookups = 1;
1906 self->ntmisses = 0;
1938 self->ntmisses = 0;
1907 }
1939 }
1908 return 0;
1940 return 0;
1909 }
1941 }
1910
1942
1911 /*
1943 /*
1912 * Return values:
1944 * Return values:
1913 *
1945 *
1914 * -3: error (exception set)
1946 * -3: error (exception set)
1915 * -2: not found (no exception set)
1947 * -2: not found (no exception set)
1916 * rest: valid rev
1948 * rest: valid rev
1917 */
1949 */
1918 static int index_find_node(indexObject *self, const char *node)
1950 static int index_find_node(indexObject *self, const char *node)
1919 {
1951 {
1920 int rev;
1952 int rev;
1921
1953
1922 if (index_init_nt(self) == -1)
1954 if (index_init_nt(self) == -1)
1923 return -3;
1955 return -3;
1924
1956
1925 self->ntlookups++;
1957 self->ntlookups++;
1926 rev = nt_find(&self->nt, node, self->nodelen, 0);
1958 rev = nt_find(&self->nt, node, self->nodelen, 0);
1927 if (rev >= -1)
1959 if (rev >= -1)
1928 return rev;
1960 return rev;
1929
1961
1930 /*
1962 /*
1931 * For the first handful of lookups, we scan the entire index,
1963 * For the first handful of lookups, we scan the entire index,
1932 * and cache only the matching nodes. This optimizes for cases
1964 * and cache only the matching nodes. This optimizes for cases
1933 * like "hg tip", where only a few nodes are accessed.
1965 * like "hg tip", where only a few nodes are accessed.
1934 *
1966 *
1935 * After that, we cache every node we visit, using a single
1967 * After that, we cache every node we visit, using a single
1936 * scan amortized over multiple lookups. This gives the best
1968 * scan amortized over multiple lookups. This gives the best
1937 * bulk performance, e.g. for "hg log".
1969 * bulk performance, e.g. for "hg log".
1938 */
1970 */
1939 if (self->ntmisses++ < 4) {
1971 if (self->ntmisses++ < 4) {
1940 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1972 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1941 const char *n = index_node_existing(self, rev);
1973 const char *n = index_node_existing(self, rev);
1942 if (n == NULL)
1974 if (n == NULL)
1943 return -3;
1975 return -3;
1944 if (memcmp(node, n, self->nodelen) == 0) {
1976 if (memcmp(node, n, self->nodelen) == 0) {
1945 if (nt_insert(&self->nt, n, rev) == -1)
1977 if (nt_insert(&self->nt, n, rev) == -1)
1946 return -3;
1978 return -3;
1947 break;
1979 break;
1948 }
1980 }
1949 }
1981 }
1950 } else {
1982 } else {
1951 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1983 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1952 const char *n = index_node_existing(self, rev);
1984 const char *n = index_node_existing(self, rev);
1953 if (n == NULL)
1985 if (n == NULL)
1954 return -3;
1986 return -3;
1955 if (nt_insert(&self->nt, n, rev) == -1) {
1987 if (nt_insert(&self->nt, n, rev) == -1) {
1956 self->ntrev = rev + 1;
1988 self->ntrev = rev + 1;
1957 return -3;
1989 return -3;
1958 }
1990 }
1959 if (memcmp(node, n, self->nodelen) == 0) {
1991 if (memcmp(node, n, self->nodelen) == 0) {
1960 break;
1992 break;
1961 }
1993 }
1962 }
1994 }
1963 self->ntrev = rev;
1995 self->ntrev = rev;
1964 }
1996 }
1965
1997
1966 if (rev >= 0)
1998 if (rev >= 0)
1967 return rev;
1999 return rev;
1968 return -2;
2000 return -2;
1969 }
2001 }
1970
2002
1971 static PyObject *index_getitem(indexObject *self, PyObject *value)
2003 static PyObject *index_getitem(indexObject *self, PyObject *value)
1972 {
2004 {
1973 char *node;
2005 char *node;
1974 int rev;
2006 int rev;
1975
2007
1976 if (PyInt_Check(value)) {
2008 if (PyInt_Check(value)) {
1977 long idx;
2009 long idx;
1978 if (!pylong_to_long(value, &idx)) {
2010 if (!pylong_to_long(value, &idx)) {
1979 return NULL;
2011 return NULL;
1980 }
2012 }
1981 return index_get(self, idx);
2013 return index_get(self, idx);
1982 }
2014 }
1983
2015
1984 if (node_check(self->nodelen, value, &node) == -1)
2016 if (node_check(self->nodelen, value, &node) == -1)
1985 return NULL;
2017 return NULL;
1986 rev = index_find_node(self, node);
2018 rev = index_find_node(self, node);
1987 if (rev >= -1)
2019 if (rev >= -1)
1988 return PyInt_FromLong(rev);
2020 return PyInt_FromLong(rev);
1989 if (rev == -2)
2021 if (rev == -2)
1990 raise_revlog_error();
2022 raise_revlog_error();
1991 return NULL;
2023 return NULL;
1992 }
2024 }
1993
2025
1994 /*
2026 /*
1995 * Fully populate the radix tree.
2027 * Fully populate the radix tree.
1996 */
2028 */
1997 static int index_populate_nt(indexObject *self)
2029 static int index_populate_nt(indexObject *self)
1998 {
2030 {
1999 int rev;
2031 int rev;
2000 if (self->ntrev > 0) {
2032 if (self->ntrev > 0) {
2001 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2033 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2002 const char *n = index_node_existing(self, rev);
2034 const char *n = index_node_existing(self, rev);
2003 if (n == NULL)
2035 if (n == NULL)
2004 return -1;
2036 return -1;
2005 if (nt_insert(&self->nt, n, rev) == -1)
2037 if (nt_insert(&self->nt, n, rev) == -1)
2006 return -1;
2038 return -1;
2007 }
2039 }
2008 self->ntrev = -1;
2040 self->ntrev = -1;
2009 }
2041 }
2010 return 0;
2042 return 0;
2011 }
2043 }
2012
2044
2013 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2045 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2014 {
2046 {
2015 const char *fullnode;
2047 const char *fullnode;
2016 Py_ssize_t nodelen;
2048 Py_ssize_t nodelen;
2017 char *node;
2049 char *node;
2018 int rev, i;
2050 int rev, i;
2019
2051
2020 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2052 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2021 return NULL;
2053 return NULL;
2022
2054
2023 if (nodelen < 1) {
2055 if (nodelen < 1) {
2024 PyErr_SetString(PyExc_ValueError, "key too short");
2056 PyErr_SetString(PyExc_ValueError, "key too short");
2025 return NULL;
2057 return NULL;
2026 }
2058 }
2027
2059
2028 if (nodelen > 2 * self->nodelen) {
2060 if (nodelen > 2 * self->nodelen) {
2029 PyErr_SetString(PyExc_ValueError, "key too long");
2061 PyErr_SetString(PyExc_ValueError, "key too long");
2030 return NULL;
2062 return NULL;
2031 }
2063 }
2032
2064
2033 for (i = 0; i < nodelen; i++)
2065 for (i = 0; i < nodelen; i++)
2034 hexdigit(node, i);
2066 hexdigit(node, i);
2035 if (PyErr_Occurred()) {
2067 if (PyErr_Occurred()) {
2036 /* input contains non-hex characters */
2068 /* input contains non-hex characters */
2037 PyErr_Clear();
2069 PyErr_Clear();
2038 Py_RETURN_NONE;
2070 Py_RETURN_NONE;
2039 }
2071 }
2040
2072
2041 if (index_init_nt(self) == -1)
2073 if (index_init_nt(self) == -1)
2042 return NULL;
2074 return NULL;
2043 if (index_populate_nt(self) == -1)
2075 if (index_populate_nt(self) == -1)
2044 return NULL;
2076 return NULL;
2045 rev = nt_partialmatch(&self->nt, node, nodelen);
2077 rev = nt_partialmatch(&self->nt, node, nodelen);
2046
2078
2047 switch (rev) {
2079 switch (rev) {
2048 case -4:
2080 case -4:
2049 raise_revlog_error();
2081 raise_revlog_error();
2050 return NULL;
2082 return NULL;
2051 case -2:
2083 case -2:
2052 Py_RETURN_NONE;
2084 Py_RETURN_NONE;
2053 case -1:
2085 case -1:
2054 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2086 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2055 }
2087 }
2056
2088
2057 fullnode = index_node_existing(self, rev);
2089 fullnode = index_node_existing(self, rev);
2058 if (fullnode == NULL) {
2090 if (fullnode == NULL) {
2059 return NULL;
2091 return NULL;
2060 }
2092 }
2061 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2093 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2062 }
2094 }
2063
2095
2064 static PyObject *index_shortest(indexObject *self, PyObject *args)
2096 static PyObject *index_shortest(indexObject *self, PyObject *args)
2065 {
2097 {
2066 PyObject *val;
2098 PyObject *val;
2067 char *node;
2099 char *node;
2068 int length;
2100 int length;
2069
2101
2070 if (!PyArg_ParseTuple(args, "O", &val))
2102 if (!PyArg_ParseTuple(args, "O", &val))
2071 return NULL;
2103 return NULL;
2072 if (node_check(self->nodelen, val, &node) == -1)
2104 if (node_check(self->nodelen, val, &node) == -1)
2073 return NULL;
2105 return NULL;
2074
2106
2075 self->ntlookups++;
2107 self->ntlookups++;
2076 if (index_init_nt(self) == -1)
2108 if (index_init_nt(self) == -1)
2077 return NULL;
2109 return NULL;
2078 if (index_populate_nt(self) == -1)
2110 if (index_populate_nt(self) == -1)
2079 return NULL;
2111 return NULL;
2080 length = nt_shortest(&self->nt, node);
2112 length = nt_shortest(&self->nt, node);
2081 if (length == -3)
2113 if (length == -3)
2082 return NULL;
2114 return NULL;
2083 if (length == -2) {
2115 if (length == -2) {
2084 raise_revlog_error();
2116 raise_revlog_error();
2085 return NULL;
2117 return NULL;
2086 }
2118 }
2087 return PyInt_FromLong(length);
2119 return PyInt_FromLong(length);
2088 }
2120 }
2089
2121
2090 static PyObject *index_m_get(indexObject *self, PyObject *args)
2122 static PyObject *index_m_get(indexObject *self, PyObject *args)
2091 {
2123 {
2092 PyObject *val;
2124 PyObject *val;
2093 char *node;
2125 char *node;
2094 int rev;
2126 int rev;
2095
2127
2096 if (!PyArg_ParseTuple(args, "O", &val))
2128 if (!PyArg_ParseTuple(args, "O", &val))
2097 return NULL;
2129 return NULL;
2098 if (node_check(self->nodelen, val, &node) == -1)
2130 if (node_check(self->nodelen, val, &node) == -1)
2099 return NULL;
2131 return NULL;
2100 rev = index_find_node(self, node);
2132 rev = index_find_node(self, node);
2101 if (rev == -3)
2133 if (rev == -3)
2102 return NULL;
2134 return NULL;
2103 if (rev == -2)
2135 if (rev == -2)
2104 Py_RETURN_NONE;
2136 Py_RETURN_NONE;
2105 return PyInt_FromLong(rev);
2137 return PyInt_FromLong(rev);
2106 }
2138 }
2107
2139
2108 static int index_contains(indexObject *self, PyObject *value)
2140 static int index_contains(indexObject *self, PyObject *value)
2109 {
2141 {
2110 char *node;
2142 char *node;
2111
2143
2112 if (PyInt_Check(value)) {
2144 if (PyInt_Check(value)) {
2113 long rev;
2145 long rev;
2114 if (!pylong_to_long(value, &rev)) {
2146 if (!pylong_to_long(value, &rev)) {
2115 return -1;
2147 return -1;
2116 }
2148 }
2117 return rev >= -1 && rev < index_length(self);
2149 return rev >= -1 && rev < index_length(self);
2118 }
2150 }
2119
2151
2120 if (node_check(self->nodelen, value, &node) == -1)
2152 if (node_check(self->nodelen, value, &node) == -1)
2121 return -1;
2153 return -1;
2122
2154
2123 switch (index_find_node(self, node)) {
2155 switch (index_find_node(self, node)) {
2124 case -3:
2156 case -3:
2125 return -1;
2157 return -1;
2126 case -2:
2158 case -2:
2127 return 0;
2159 return 0;
2128 default:
2160 default:
2129 return 1;
2161 return 1;
2130 }
2162 }
2131 }
2163 }
2132
2164
2133 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2165 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2134 {
2166 {
2135 int ret = index_contains(self, args);
2167 int ret = index_contains(self, args);
2136 if (ret < 0)
2168 if (ret < 0)
2137 return NULL;
2169 return NULL;
2138 return PyBool_FromLong((long)ret);
2170 return PyBool_FromLong((long)ret);
2139 }
2171 }
2140
2172
2141 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2173 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2142 {
2174 {
2143 char *node;
2175 char *node;
2144 int rev;
2176 int rev;
2145
2177
2146 if (node_check(self->nodelen, val, &node) == -1)
2178 if (node_check(self->nodelen, val, &node) == -1)
2147 return NULL;
2179 return NULL;
2148 rev = index_find_node(self, node);
2180 rev = index_find_node(self, node);
2149 if (rev >= -1)
2181 if (rev >= -1)
2150 return PyInt_FromLong(rev);
2182 return PyInt_FromLong(rev);
2151 if (rev == -2)
2183 if (rev == -2)
2152 raise_revlog_error();
2184 raise_revlog_error();
2153 return NULL;
2185 return NULL;
2154 }
2186 }
2155
2187
2156 typedef uint64_t bitmask;
2188 typedef uint64_t bitmask;
2157
2189
2158 /*
2190 /*
2159 * Given a disjoint set of revs, return all candidates for the
2191 * Given a disjoint set of revs, return all candidates for the
2160 * greatest common ancestor. In revset notation, this is the set
2192 * greatest common ancestor. In revset notation, this is the set
2161 * "heads(::a and ::b and ...)"
2193 * "heads(::a and ::b and ...)"
2162 */
2194 */
2163 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2195 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2164 int revcount)
2196 int revcount)
2165 {
2197 {
2166 const bitmask allseen = (1ull << revcount) - 1;
2198 const bitmask allseen = (1ull << revcount) - 1;
2167 const bitmask poison = 1ull << revcount;
2199 const bitmask poison = 1ull << revcount;
2168 PyObject *gca = PyList_New(0);
2200 PyObject *gca = PyList_New(0);
2169 int i, v, interesting;
2201 int i, v, interesting;
2170 int maxrev = -1;
2202 int maxrev = -1;
2171 bitmask sp;
2203 bitmask sp;
2172 bitmask *seen;
2204 bitmask *seen;
2173
2205
2174 if (gca == NULL)
2206 if (gca == NULL)
2175 return PyErr_NoMemory();
2207 return PyErr_NoMemory();
2176
2208
2177 for (i = 0; i < revcount; i++) {
2209 for (i = 0; i < revcount; i++) {
2178 if (revs[i] > maxrev)
2210 if (revs[i] > maxrev)
2179 maxrev = revs[i];
2211 maxrev = revs[i];
2180 }
2212 }
2181
2213
2182 seen = calloc(sizeof(*seen), maxrev + 1);
2214 seen = calloc(sizeof(*seen), maxrev + 1);
2183 if (seen == NULL) {
2215 if (seen == NULL) {
2184 Py_DECREF(gca);
2216 Py_DECREF(gca);
2185 return PyErr_NoMemory();
2217 return PyErr_NoMemory();
2186 }
2218 }
2187
2219
2188 for (i = 0; i < revcount; i++)
2220 for (i = 0; i < revcount; i++)
2189 seen[revs[i]] = 1ull << i;
2221 seen[revs[i]] = 1ull << i;
2190
2222
2191 interesting = revcount;
2223 interesting = revcount;
2192
2224
2193 for (v = maxrev; v >= 0 && interesting; v--) {
2225 for (v = maxrev; v >= 0 && interesting; v--) {
2194 bitmask sv = seen[v];
2226 bitmask sv = seen[v];
2195 int parents[2];
2227 int parents[2];
2196
2228
2197 if (!sv)
2229 if (!sv)
2198 continue;
2230 continue;
2199
2231
2200 if (sv < poison) {
2232 if (sv < poison) {
2201 interesting -= 1;
2233 interesting -= 1;
2202 if (sv == allseen) {
2234 if (sv == allseen) {
2203 PyObject *obj = PyInt_FromLong(v);
2235 PyObject *obj = PyInt_FromLong(v);
2204 if (obj == NULL)
2236 if (obj == NULL)
2205 goto bail;
2237 goto bail;
2206 if (PyList_Append(gca, obj) == -1) {
2238 if (PyList_Append(gca, obj) == -1) {
2207 Py_DECREF(obj);
2239 Py_DECREF(obj);
2208 goto bail;
2240 goto bail;
2209 }
2241 }
2210 sv |= poison;
2242 sv |= poison;
2211 for (i = 0; i < revcount; i++) {
2243 for (i = 0; i < revcount; i++) {
2212 if (revs[i] == v)
2244 if (revs[i] == v)
2213 goto done;
2245 goto done;
2214 }
2246 }
2215 }
2247 }
2216 }
2248 }
2217 if (index_get_parents(self, v, parents, maxrev) < 0)
2249 if (index_get_parents(self, v, parents, maxrev) < 0)
2218 goto bail;
2250 goto bail;
2219
2251
2220 for (i = 0; i < 2; i++) {
2252 for (i = 0; i < 2; i++) {
2221 int p = parents[i];
2253 int p = parents[i];
2222 if (p == -1)
2254 if (p == -1)
2223 continue;
2255 continue;
2224 sp = seen[p];
2256 sp = seen[p];
2225 if (sv < poison) {
2257 if (sv < poison) {
2226 if (sp == 0) {
2258 if (sp == 0) {
2227 seen[p] = sv;
2259 seen[p] = sv;
2228 interesting++;
2260 interesting++;
2229 } else if (sp != sv)
2261 } else if (sp != sv)
2230 seen[p] |= sv;
2262 seen[p] |= sv;
2231 } else {
2263 } else {
2232 if (sp && sp < poison)
2264 if (sp && sp < poison)
2233 interesting--;
2265 interesting--;
2234 seen[p] = sv;
2266 seen[p] = sv;
2235 }
2267 }
2236 }
2268 }
2237 }
2269 }
2238
2270
2239 done:
2271 done:
2240 free(seen);
2272 free(seen);
2241 return gca;
2273 return gca;
2242 bail:
2274 bail:
2243 free(seen);
2275 free(seen);
2244 Py_XDECREF(gca);
2276 Py_XDECREF(gca);
2245 return NULL;
2277 return NULL;
2246 }
2278 }
2247
2279
2248 /*
2280 /*
2249 * Given a disjoint set of revs, return the subset with the longest
2281 * Given a disjoint set of revs, return the subset with the longest
2250 * path to the root.
2282 * path to the root.
2251 */
2283 */
2252 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2284 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2253 {
2285 {
2254 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2286 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2255 static const Py_ssize_t capacity = 24;
2287 static const Py_ssize_t capacity = 24;
2256 int *depth, *interesting = NULL;
2288 int *depth, *interesting = NULL;
2257 int i, j, v, ninteresting;
2289 int i, j, v, ninteresting;
2258 PyObject *dict = NULL, *keys = NULL;
2290 PyObject *dict = NULL, *keys = NULL;
2259 long *seen = NULL;
2291 long *seen = NULL;
2260 int maxrev = -1;
2292 int maxrev = -1;
2261 long final;
2293 long final;
2262
2294
2263 if (revcount > capacity) {
2295 if (revcount > capacity) {
2264 PyErr_Format(PyExc_OverflowError,
2296 PyErr_Format(PyExc_OverflowError,
2265 "bitset size (%ld) > capacity (%ld)",
2297 "bitset size (%ld) > capacity (%ld)",
2266 (long)revcount, (long)capacity);
2298 (long)revcount, (long)capacity);
2267 return NULL;
2299 return NULL;
2268 }
2300 }
2269
2301
2270 for (i = 0; i < revcount; i++) {
2302 for (i = 0; i < revcount; i++) {
2271 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2303 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2272 if (n > maxrev)
2304 if (n > maxrev)
2273 maxrev = n;
2305 maxrev = n;
2274 }
2306 }
2275
2307
2276 depth = calloc(sizeof(*depth), maxrev + 1);
2308 depth = calloc(sizeof(*depth), maxrev + 1);
2277 if (depth == NULL)
2309 if (depth == NULL)
2278 return PyErr_NoMemory();
2310 return PyErr_NoMemory();
2279
2311
2280 seen = calloc(sizeof(*seen), maxrev + 1);
2312 seen = calloc(sizeof(*seen), maxrev + 1);
2281 if (seen == NULL) {
2313 if (seen == NULL) {
2282 PyErr_NoMemory();
2314 PyErr_NoMemory();
2283 goto bail;
2315 goto bail;
2284 }
2316 }
2285
2317
2286 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2318 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2287 if (interesting == NULL) {
2319 if (interesting == NULL) {
2288 PyErr_NoMemory();
2320 PyErr_NoMemory();
2289 goto bail;
2321 goto bail;
2290 }
2322 }
2291
2323
2292 if (PyList_Sort(revs) == -1)
2324 if (PyList_Sort(revs) == -1)
2293 goto bail;
2325 goto bail;
2294
2326
2295 for (i = 0; i < revcount; i++) {
2327 for (i = 0; i < revcount; i++) {
2296 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2328 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2297 long b = 1l << i;
2329 long b = 1l << i;
2298 depth[n] = 1;
2330 depth[n] = 1;
2299 seen[n] = b;
2331 seen[n] = b;
2300 interesting[b] = 1;
2332 interesting[b] = 1;
2301 }
2333 }
2302
2334
2303 /* invariant: ninteresting is the number of non-zero entries in
2335 /* invariant: ninteresting is the number of non-zero entries in
2304 * interesting. */
2336 * interesting. */
2305 ninteresting = (int)revcount;
2337 ninteresting = (int)revcount;
2306
2338
2307 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2339 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2308 int dv = depth[v];
2340 int dv = depth[v];
2309 int parents[2];
2341 int parents[2];
2310 long sv;
2342 long sv;
2311
2343
2312 if (dv == 0)
2344 if (dv == 0)
2313 continue;
2345 continue;
2314
2346
2315 sv = seen[v];
2347 sv = seen[v];
2316 if (index_get_parents(self, v, parents, maxrev) < 0)
2348 if (index_get_parents(self, v, parents, maxrev) < 0)
2317 goto bail;
2349 goto bail;
2318
2350
2319 for (i = 0; i < 2; i++) {
2351 for (i = 0; i < 2; i++) {
2320 int p = parents[i];
2352 int p = parents[i];
2321 long sp;
2353 long sp;
2322 int dp;
2354 int dp;
2323
2355
2324 if (p == -1)
2356 if (p == -1)
2325 continue;
2357 continue;
2326
2358
2327 dp = depth[p];
2359 dp = depth[p];
2328 sp = seen[p];
2360 sp = seen[p];
2329 if (dp <= dv) {
2361 if (dp <= dv) {
2330 depth[p] = dv + 1;
2362 depth[p] = dv + 1;
2331 if (sp != sv) {
2363 if (sp != sv) {
2332 interesting[sv] += 1;
2364 interesting[sv] += 1;
2333 seen[p] = sv;
2365 seen[p] = sv;
2334 if (sp) {
2366 if (sp) {
2335 interesting[sp] -= 1;
2367 interesting[sp] -= 1;
2336 if (interesting[sp] == 0)
2368 if (interesting[sp] == 0)
2337 ninteresting -= 1;
2369 ninteresting -= 1;
2338 }
2370 }
2339 }
2371 }
2340 } else if (dv == dp - 1) {
2372 } else if (dv == dp - 1) {
2341 long nsp = sp | sv;
2373 long nsp = sp | sv;
2342 if (nsp == sp)
2374 if (nsp == sp)
2343 continue;
2375 continue;
2344 seen[p] = nsp;
2376 seen[p] = nsp;
2345 interesting[sp] -= 1;
2377 interesting[sp] -= 1;
2346 if (interesting[sp] == 0)
2378 if (interesting[sp] == 0)
2347 ninteresting -= 1;
2379 ninteresting -= 1;
2348 if (interesting[nsp] == 0)
2380 if (interesting[nsp] == 0)
2349 ninteresting += 1;
2381 ninteresting += 1;
2350 interesting[nsp] += 1;
2382 interesting[nsp] += 1;
2351 }
2383 }
2352 }
2384 }
2353 interesting[sv] -= 1;
2385 interesting[sv] -= 1;
2354 if (interesting[sv] == 0)
2386 if (interesting[sv] == 0)
2355 ninteresting -= 1;
2387 ninteresting -= 1;
2356 }
2388 }
2357
2389
2358 final = 0;
2390 final = 0;
2359 j = ninteresting;
2391 j = ninteresting;
2360 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2392 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2361 if (interesting[i] == 0)
2393 if (interesting[i] == 0)
2362 continue;
2394 continue;
2363 final |= i;
2395 final |= i;
2364 j -= 1;
2396 j -= 1;
2365 }
2397 }
2366 if (final == 0) {
2398 if (final == 0) {
2367 keys = PyList_New(0);
2399 keys = PyList_New(0);
2368 goto bail;
2400 goto bail;
2369 }
2401 }
2370
2402
2371 dict = PyDict_New();
2403 dict = PyDict_New();
2372 if (dict == NULL)
2404 if (dict == NULL)
2373 goto bail;
2405 goto bail;
2374
2406
2375 for (i = 0; i < revcount; i++) {
2407 for (i = 0; i < revcount; i++) {
2376 PyObject *key;
2408 PyObject *key;
2377
2409
2378 if ((final & (1 << i)) == 0)
2410 if ((final & (1 << i)) == 0)
2379 continue;
2411 continue;
2380
2412
2381 key = PyList_GET_ITEM(revs, i);
2413 key = PyList_GET_ITEM(revs, i);
2382 Py_INCREF(key);
2414 Py_INCREF(key);
2383 Py_INCREF(Py_None);
2415 Py_INCREF(Py_None);
2384 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2416 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2385 Py_DECREF(key);
2417 Py_DECREF(key);
2386 Py_DECREF(Py_None);
2418 Py_DECREF(Py_None);
2387 goto bail;
2419 goto bail;
2388 }
2420 }
2389 }
2421 }
2390
2422
2391 keys = PyDict_Keys(dict);
2423 keys = PyDict_Keys(dict);
2392
2424
2393 bail:
2425 bail:
2394 free(depth);
2426 free(depth);
2395 free(seen);
2427 free(seen);
2396 free(interesting);
2428 free(interesting);
2397 Py_XDECREF(dict);
2429 Py_XDECREF(dict);
2398
2430
2399 return keys;
2431 return keys;
2400 }
2432 }
2401
2433
2402 /*
2434 /*
2403 * Given a (possibly overlapping) set of revs, return all the
2435 * Given a (possibly overlapping) set of revs, return all the
2404 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2436 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2405 */
2437 */
2406 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2438 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2407 {
2439 {
2408 PyObject *ret = NULL;
2440 PyObject *ret = NULL;
2409 Py_ssize_t argcount, i, len;
2441 Py_ssize_t argcount, i, len;
2410 bitmask repeat = 0;
2442 bitmask repeat = 0;
2411 int revcount = 0;
2443 int revcount = 0;
2412 int *revs;
2444 int *revs;
2413
2445
2414 argcount = PySequence_Length(args);
2446 argcount = PySequence_Length(args);
2415 revs = PyMem_Malloc(argcount * sizeof(*revs));
2447 revs = PyMem_Malloc(argcount * sizeof(*revs));
2416 if (argcount > 0 && revs == NULL)
2448 if (argcount > 0 && revs == NULL)
2417 return PyErr_NoMemory();
2449 return PyErr_NoMemory();
2418 len = index_length(self);
2450 len = index_length(self);
2419
2451
2420 for (i = 0; i < argcount; i++) {
2452 for (i = 0; i < argcount; i++) {
2421 static const int capacity = 24;
2453 static const int capacity = 24;
2422 PyObject *obj = PySequence_GetItem(args, i);
2454 PyObject *obj = PySequence_GetItem(args, i);
2423 bitmask x;
2455 bitmask x;
2424 long val;
2456 long val;
2425
2457
2426 if (!PyInt_Check(obj)) {
2458 if (!PyInt_Check(obj)) {
2427 PyErr_SetString(PyExc_TypeError,
2459 PyErr_SetString(PyExc_TypeError,
2428 "arguments must all be ints");
2460 "arguments must all be ints");
2429 Py_DECREF(obj);
2461 Py_DECREF(obj);
2430 goto bail;
2462 goto bail;
2431 }
2463 }
2432 val = PyInt_AsLong(obj);
2464 val = PyInt_AsLong(obj);
2433 Py_DECREF(obj);
2465 Py_DECREF(obj);
2434 if (val == -1) {
2466 if (val == -1) {
2435 ret = PyList_New(0);
2467 ret = PyList_New(0);
2436 goto done;
2468 goto done;
2437 }
2469 }
2438 if (val < 0 || val >= len) {
2470 if (val < 0 || val >= len) {
2439 PyErr_SetString(PyExc_IndexError, "index out of range");
2471 PyErr_SetString(PyExc_IndexError, "index out of range");
2440 goto bail;
2472 goto bail;
2441 }
2473 }
2442 /* this cheesy bloom filter lets us avoid some more
2474 /* this cheesy bloom filter lets us avoid some more
2443 * expensive duplicate checks in the common set-is-disjoint
2475 * expensive duplicate checks in the common set-is-disjoint
2444 * case */
2476 * case */
2445 x = 1ull << (val & 0x3f);
2477 x = 1ull << (val & 0x3f);
2446 if (repeat & x) {
2478 if (repeat & x) {
2447 int k;
2479 int k;
2448 for (k = 0; k < revcount; k++) {
2480 for (k = 0; k < revcount; k++) {
2449 if (val == revs[k])
2481 if (val == revs[k])
2450 goto duplicate;
2482 goto duplicate;
2451 }
2483 }
2452 } else
2484 } else
2453 repeat |= x;
2485 repeat |= x;
2454 if (revcount >= capacity) {
2486 if (revcount >= capacity) {
2455 PyErr_Format(PyExc_OverflowError,
2487 PyErr_Format(PyExc_OverflowError,
2456 "bitset size (%d) > capacity (%d)",
2488 "bitset size (%d) > capacity (%d)",
2457 revcount, capacity);
2489 revcount, capacity);
2458 goto bail;
2490 goto bail;
2459 }
2491 }
2460 revs[revcount++] = (int)val;
2492 revs[revcount++] = (int)val;
2461 duplicate:;
2493 duplicate:;
2462 }
2494 }
2463
2495
2464 if (revcount == 0) {
2496 if (revcount == 0) {
2465 ret = PyList_New(0);
2497 ret = PyList_New(0);
2466 goto done;
2498 goto done;
2467 }
2499 }
2468 if (revcount == 1) {
2500 if (revcount == 1) {
2469 PyObject *obj;
2501 PyObject *obj;
2470 ret = PyList_New(1);
2502 ret = PyList_New(1);
2471 if (ret == NULL)
2503 if (ret == NULL)
2472 goto bail;
2504 goto bail;
2473 obj = PyInt_FromLong(revs[0]);
2505 obj = PyInt_FromLong(revs[0]);
2474 if (obj == NULL)
2506 if (obj == NULL)
2475 goto bail;
2507 goto bail;
2476 PyList_SET_ITEM(ret, 0, obj);
2508 PyList_SET_ITEM(ret, 0, obj);
2477 goto done;
2509 goto done;
2478 }
2510 }
2479
2511
2480 ret = find_gca_candidates(self, revs, revcount);
2512 ret = find_gca_candidates(self, revs, revcount);
2481 if (ret == NULL)
2513 if (ret == NULL)
2482 goto bail;
2514 goto bail;
2483
2515
2484 done:
2516 done:
2485 PyMem_Free(revs);
2517 PyMem_Free(revs);
2486 return ret;
2518 return ret;
2487
2519
2488 bail:
2520 bail:
2489 PyMem_Free(revs);
2521 PyMem_Free(revs);
2490 Py_XDECREF(ret);
2522 Py_XDECREF(ret);
2491 return NULL;
2523 return NULL;
2492 }
2524 }
2493
2525
2494 /*
2526 /*
2495 * Given a (possibly overlapping) set of revs, return the greatest
2527 * Given a (possibly overlapping) set of revs, return the greatest
2496 * common ancestors: those with the longest path to the root.
2528 * common ancestors: those with the longest path to the root.
2497 */
2529 */
2498 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2530 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2499 {
2531 {
2500 PyObject *ret;
2532 PyObject *ret;
2501 PyObject *gca = index_commonancestorsheads(self, args);
2533 PyObject *gca = index_commonancestorsheads(self, args);
2502 if (gca == NULL)
2534 if (gca == NULL)
2503 return NULL;
2535 return NULL;
2504
2536
2505 if (PyList_GET_SIZE(gca) <= 1) {
2537 if (PyList_GET_SIZE(gca) <= 1) {
2506 return gca;
2538 return gca;
2507 }
2539 }
2508
2540
2509 ret = find_deepest(self, gca);
2541 ret = find_deepest(self, gca);
2510 Py_DECREF(gca);
2542 Py_DECREF(gca);
2511 return ret;
2543 return ret;
2512 }
2544 }
2513
2545
2514 /*
2546 /*
2515 * Invalidate any trie entries introduced by added revs.
2547 * Invalidate any trie entries introduced by added revs.
2516 */
2548 */
2517 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2549 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2518 {
2550 {
2519 Py_ssize_t i, len;
2551 Py_ssize_t i, len;
2520
2552
2521 len = self->length + self->new_length;
2553 len = self->length + self->new_length;
2522 i = start - self->length;
2554 i = start - self->length;
2523 if (i < 0)
2555 if (i < 0)
2524 return;
2556 return;
2525
2557
2526 for (i = start; i < len; i++)
2558 for (i = start; i < len; i++)
2527 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2559 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2528
2560
2529 self->new_length = start - self->length;
2561 self->new_length = start - self->length;
2530 }
2562 }
2531
2563
2532 /*
2564 /*
2533 * Delete a numeric range of revs, which must be at the end of the
2565 * Delete a numeric range of revs, which must be at the end of the
2534 * range.
2566 * range.
2535 */
2567 */
2536 static int index_slice_del(indexObject *self, PyObject *item)
2568 static int index_slice_del(indexObject *self, PyObject *item)
2537 {
2569 {
2538 Py_ssize_t start, stop, step, slicelength;
2570 Py_ssize_t start, stop, step, slicelength;
2539 Py_ssize_t length = index_length(self) + 1;
2571 Py_ssize_t length = index_length(self) + 1;
2540 int ret = 0;
2572 int ret = 0;
2541
2573
2542 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2574 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2543 #ifdef IS_PY3K
2575 #ifdef IS_PY3K
2544 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2576 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2545 &slicelength) < 0)
2577 &slicelength) < 0)
2546 #else
2578 #else
2547 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2579 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2548 &step, &slicelength) < 0)
2580 &step, &slicelength) < 0)
2549 #endif
2581 #endif
2550 return -1;
2582 return -1;
2551
2583
2552 if (slicelength <= 0)
2584 if (slicelength <= 0)
2553 return 0;
2585 return 0;
2554
2586
2555 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2587 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2556 stop = start;
2588 stop = start;
2557
2589
2558 if (step < 0) {
2590 if (step < 0) {
2559 stop = start + 1;
2591 stop = start + 1;
2560 start = stop + step * (slicelength - 1) - 1;
2592 start = stop + step * (slicelength - 1) - 1;
2561 step = -step;
2593 step = -step;
2562 }
2594 }
2563
2595
2564 if (step != 1) {
2596 if (step != 1) {
2565 PyErr_SetString(PyExc_ValueError,
2597 PyErr_SetString(PyExc_ValueError,
2566 "revlog index delete requires step size of 1");
2598 "revlog index delete requires step size of 1");
2567 return -1;
2599 return -1;
2568 }
2600 }
2569
2601
2570 if (stop != length - 1) {
2602 if (stop != length - 1) {
2571 PyErr_SetString(PyExc_IndexError,
2603 PyErr_SetString(PyExc_IndexError,
2572 "revlog index deletion indices are invalid");
2604 "revlog index deletion indices are invalid");
2573 return -1;
2605 return -1;
2574 }
2606 }
2575
2607
2576 if (start < self->length) {
2608 if (start < self->length) {
2577 if (self->ntinitialized) {
2609 if (self->ntinitialized) {
2578 Py_ssize_t i;
2610 Py_ssize_t i;
2579
2611
2580 for (i = start; i < self->length; i++) {
2612 for (i = start; i < self->length; i++) {
2581 const char *node = index_node_existing(self, i);
2613 const char *node = index_node_existing(self, i);
2582 if (node == NULL)
2614 if (node == NULL)
2583 return -1;
2615 return -1;
2584
2616
2585 nt_delete_node(&self->nt, node);
2617 nt_delete_node(&self->nt, node);
2586 }
2618 }
2587 if (self->new_length)
2619 if (self->new_length)
2588 index_invalidate_added(self, self->length);
2620 index_invalidate_added(self, self->length);
2589 if (self->ntrev > start)
2621 if (self->ntrev > start)
2590 self->ntrev = (int)start;
2622 self->ntrev = (int)start;
2591 } else if (self->new_length) {
2623 } else if (self->new_length) {
2592 self->new_length = 0;
2624 self->new_length = 0;
2593 }
2625 }
2594
2626
2595 self->length = start;
2627 self->length = start;
2596 goto done;
2628 goto done;
2597 }
2629 }
2598
2630
2599 if (self->ntinitialized) {
2631 if (self->ntinitialized) {
2600 index_invalidate_added(self, start);
2632 index_invalidate_added(self, start);
2601 if (self->ntrev > start)
2633 if (self->ntrev > start)
2602 self->ntrev = (int)start;
2634 self->ntrev = (int)start;
2603 } else {
2635 } else {
2604 self->new_length = start - self->length;
2636 self->new_length = start - self->length;
2605 }
2637 }
2606 done:
2638 done:
2607 Py_CLEAR(self->headrevs);
2639 Py_CLEAR(self->headrevs);
2608 return ret;
2640 return ret;
2609 }
2641 }
2610
2642
2611 /*
2643 /*
2612 * Supported ops:
2644 * Supported ops:
2613 *
2645 *
2614 * slice deletion
2646 * slice deletion
2615 * string assignment (extend node->rev mapping)
2647 * string assignment (extend node->rev mapping)
2616 * string deletion (shrink node->rev mapping)
2648 * string deletion (shrink node->rev mapping)
2617 */
2649 */
2618 static int index_assign_subscript(indexObject *self, PyObject *item,
2650 static int index_assign_subscript(indexObject *self, PyObject *item,
2619 PyObject *value)
2651 PyObject *value)
2620 {
2652 {
2621 char *node;
2653 char *node;
2622 long rev;
2654 long rev;
2623
2655
2624 if (PySlice_Check(item) && value == NULL)
2656 if (PySlice_Check(item) && value == NULL)
2625 return index_slice_del(self, item);
2657 return index_slice_del(self, item);
2626
2658
2627 if (node_check(self->nodelen, item, &node) == -1)
2659 if (node_check(self->nodelen, item, &node) == -1)
2628 return -1;
2660 return -1;
2629
2661
2630 if (value == NULL)
2662 if (value == NULL)
2631 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2663 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2632 : 0;
2664 : 0;
2633 rev = PyInt_AsLong(value);
2665 rev = PyInt_AsLong(value);
2634 if (rev > INT_MAX || rev < 0) {
2666 if (rev > INT_MAX || rev < 0) {
2635 if (!PyErr_Occurred())
2667 if (!PyErr_Occurred())
2636 PyErr_SetString(PyExc_ValueError, "rev out of range");
2668 PyErr_SetString(PyExc_ValueError, "rev out of range");
2637 return -1;
2669 return -1;
2638 }
2670 }
2639
2671
2640 if (index_init_nt(self) == -1)
2672 if (index_init_nt(self) == -1)
2641 return -1;
2673 return -1;
2642 return nt_insert(&self->nt, node, (int)rev);
2674 return nt_insert(&self->nt, node, (int)rev);
2643 }
2675 }
2644
2676
2645 /*
2677 /*
2646 * Find all RevlogNG entries in an index that has inline data. Update
2678 * Find all RevlogNG entries in an index that has inline data. Update
2647 * the optional "offsets" table with those entries.
2679 * the optional "offsets" table with those entries.
2648 */
2680 */
2649 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2681 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2650 {
2682 {
2651 const char *data = (const char *)self->buf.buf;
2683 const char *data = (const char *)self->buf.buf;
2652 Py_ssize_t pos = 0;
2684 Py_ssize_t pos = 0;
2653 Py_ssize_t end = self->buf.len;
2685 Py_ssize_t end = self->buf.len;
2654 long incr = self->hdrsize;
2686 long incr = self->hdrsize;
2655 Py_ssize_t len = 0;
2687 Py_ssize_t len = 0;
2656
2688
2657 while (pos + self->hdrsize <= end && pos >= 0) {
2689 while (pos + self->hdrsize <= end && pos >= 0) {
2658 uint32_t comp_len, sidedata_comp_len = 0;
2690 uint32_t comp_len, sidedata_comp_len = 0;
2659 /* 3rd element of header is length of compressed inline data */
2691 /* 3rd element of header is length of compressed inline data */
2660 comp_len = getbe32(data + pos + 8);
2692 comp_len = getbe32(data + pos + 8);
2661 if (self->hdrsize == v2_hdrsize) {
2693 if (self->hdrsize == v2_hdrsize) {
2662 sidedata_comp_len = getbe32(data + pos + 72);
2694 sidedata_comp_len = getbe32(data + pos + 72);
2663 }
2695 }
2664 incr = self->hdrsize + comp_len + sidedata_comp_len;
2696 incr = self->hdrsize + comp_len + sidedata_comp_len;
2665 if (offsets)
2697 if (offsets)
2666 offsets[len] = data + pos;
2698 offsets[len] = data + pos;
2667 len++;
2699 len++;
2668 pos += incr;
2700 pos += incr;
2669 }
2701 }
2670
2702
2671 if (pos != end) {
2703 if (pos != end) {
2672 if (!PyErr_Occurred())
2704 if (!PyErr_Occurred())
2673 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2705 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2674 return -1;
2706 return -1;
2675 }
2707 }
2676
2708
2677 return len;
2709 return len;
2678 }
2710 }
2679
2711
2680 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2712 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2681 {
2713 {
2682 PyObject *data_obj, *inlined_obj, *revlogv2;
2714 PyObject *data_obj, *inlined_obj, *revlogv2;
2683 Py_ssize_t size;
2715 Py_ssize_t size;
2684
2716
2685 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2717 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2686
2718
2687 /* Initialize before argument-checking to avoid index_dealloc() crash.
2719 /* Initialize before argument-checking to avoid index_dealloc() crash.
2688 */
2720 */
2689 self->added = NULL;
2721 self->added = NULL;
2690 self->new_length = 0;
2722 self->new_length = 0;
2691 self->added_length = 0;
2723 self->added_length = 0;
2692 self->data = NULL;
2724 self->data = NULL;
2693 memset(&self->buf, 0, sizeof(self->buf));
2725 memset(&self->buf, 0, sizeof(self->buf));
2694 self->headrevs = NULL;
2726 self->headrevs = NULL;
2695 self->filteredrevs = Py_None;
2727 self->filteredrevs = Py_None;
2696 Py_INCREF(Py_None);
2728 Py_INCREF(Py_None);
2697 self->ntinitialized = 0;
2729 self->ntinitialized = 0;
2698 self->offsets = NULL;
2730 self->offsets = NULL;
2699 self->nodelen = 20;
2731 self->nodelen = 20;
2700 self->nullentry = NULL;
2732 self->nullentry = NULL;
2701
2733
2702 revlogv2 = NULL;
2734 revlogv2 = NULL;
2703 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2735 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2704 &data_obj, &inlined_obj, &revlogv2))
2736 &data_obj, &inlined_obj, &revlogv2))
2705 return -1;
2737 return -1;
2706 if (!PyObject_CheckBuffer(data_obj)) {
2738 if (!PyObject_CheckBuffer(data_obj)) {
2707 PyErr_SetString(PyExc_TypeError,
2739 PyErr_SetString(PyExc_TypeError,
2708 "data does not support buffer interface");
2740 "data does not support buffer interface");
2709 return -1;
2741 return -1;
2710 }
2742 }
2711 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2743 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2712 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2744 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2713 return -1;
2745 return -1;
2714 }
2746 }
2715
2747
2716 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2748 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2717 self->hdrsize = v2_hdrsize;
2749 self->hdrsize = v2_hdrsize;
2718 } else {
2750 } else {
2719 self->hdrsize = v1_hdrsize;
2751 self->hdrsize = v1_hdrsize;
2720 }
2752 }
2721
2753
2722 if (self->hdrsize == v1_hdrsize) {
2754 if (self->hdrsize == v1_hdrsize) {
2723 self->nullentry =
2755 self->nullentry =
2724 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
2756 Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
2725 -1, -1, -1, nullid, self->nodelen);
2757 -1, -1, -1, nullid, self->nodelen);
2726 } else {
2758 } else {
2727 self->nullentry =
2759 self->nullentry =
2728 Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0,
2760 Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0,
2729 -1, -1, -1, -1, nullid, self->nodelen, 0, 0);
2761 -1, -1, -1, -1, nullid, self->nodelen, 0, 0);
2730 }
2762 }
2731
2763
2732 if (!self->nullentry)
2764 if (!self->nullentry)
2733 return -1;
2765 return -1;
2734 PyObject_GC_UnTrack(self->nullentry);
2766 PyObject_GC_UnTrack(self->nullentry);
2735
2767
2736 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2768 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2737 return -1;
2769 return -1;
2738 size = self->buf.len;
2770 size = self->buf.len;
2739
2771
2740 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2772 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2741 self->data = data_obj;
2773 self->data = data_obj;
2742
2774
2743 self->ntlookups = self->ntmisses = 0;
2775 self->ntlookups = self->ntmisses = 0;
2744 self->ntrev = -1;
2776 self->ntrev = -1;
2745 Py_INCREF(self->data);
2777 Py_INCREF(self->data);
2746
2778
2747 if (self->inlined) {
2779 if (self->inlined) {
2748 Py_ssize_t len = inline_scan(self, NULL);
2780 Py_ssize_t len = inline_scan(self, NULL);
2749 if (len == -1)
2781 if (len == -1)
2750 goto bail;
2782 goto bail;
2751 self->length = len;
2783 self->length = len;
2752 } else {
2784 } else {
2753 if (size % self->hdrsize) {
2785 if (size % self->hdrsize) {
2754 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2786 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2755 goto bail;
2787 goto bail;
2756 }
2788 }
2757 self->length = size / self->hdrsize;
2789 self->length = size / self->hdrsize;
2758 }
2790 }
2759
2791
2760 return 0;
2792 return 0;
2761 bail:
2793 bail:
2762 return -1;
2794 return -1;
2763 }
2795 }
2764
2796
2765 static PyObject *index_nodemap(indexObject *self)
2797 static PyObject *index_nodemap(indexObject *self)
2766 {
2798 {
2767 Py_INCREF(self);
2799 Py_INCREF(self);
2768 return (PyObject *)self;
2800 return (PyObject *)self;
2769 }
2801 }
2770
2802
2771 static void _index_clearcaches(indexObject *self)
2803 static void _index_clearcaches(indexObject *self)
2772 {
2804 {
2773 if (self->offsets) {
2805 if (self->offsets) {
2774 PyMem_Free((void *)self->offsets);
2806 PyMem_Free((void *)self->offsets);
2775 self->offsets = NULL;
2807 self->offsets = NULL;
2776 }
2808 }
2777 if (self->ntinitialized) {
2809 if (self->ntinitialized) {
2778 nt_dealloc(&self->nt);
2810 nt_dealloc(&self->nt);
2779 }
2811 }
2780 self->ntinitialized = 0;
2812 self->ntinitialized = 0;
2781 Py_CLEAR(self->headrevs);
2813 Py_CLEAR(self->headrevs);
2782 }
2814 }
2783
2815
2784 static PyObject *index_clearcaches(indexObject *self)
2816 static PyObject *index_clearcaches(indexObject *self)
2785 {
2817 {
2786 _index_clearcaches(self);
2818 _index_clearcaches(self);
2787 self->ntrev = -1;
2819 self->ntrev = -1;
2788 self->ntlookups = self->ntmisses = 0;
2820 self->ntlookups = self->ntmisses = 0;
2789 Py_RETURN_NONE;
2821 Py_RETURN_NONE;
2790 }
2822 }
2791
2823
2792 static void index_dealloc(indexObject *self)
2824 static void index_dealloc(indexObject *self)
2793 {
2825 {
2794 _index_clearcaches(self);
2826 _index_clearcaches(self);
2795 Py_XDECREF(self->filteredrevs);
2827 Py_XDECREF(self->filteredrevs);
2796 if (self->buf.buf) {
2828 if (self->buf.buf) {
2797 PyBuffer_Release(&self->buf);
2829 PyBuffer_Release(&self->buf);
2798 memset(&self->buf, 0, sizeof(self->buf));
2830 memset(&self->buf, 0, sizeof(self->buf));
2799 }
2831 }
2800 Py_XDECREF(self->data);
2832 Py_XDECREF(self->data);
2801 PyMem_Free(self->added);
2833 PyMem_Free(self->added);
2802 Py_XDECREF(self->nullentry);
2834 Py_XDECREF(self->nullentry);
2803 PyObject_Del(self);
2835 PyObject_Del(self);
2804 }
2836 }
2805
2837
2806 static PySequenceMethods index_sequence_methods = {
2838 static PySequenceMethods index_sequence_methods = {
2807 (lenfunc)index_length, /* sq_length */
2839 (lenfunc)index_length, /* sq_length */
2808 0, /* sq_concat */
2840 0, /* sq_concat */
2809 0, /* sq_repeat */
2841 0, /* sq_repeat */
2810 (ssizeargfunc)index_get, /* sq_item */
2842 (ssizeargfunc)index_get, /* sq_item */
2811 0, /* sq_slice */
2843 0, /* sq_slice */
2812 0, /* sq_ass_item */
2844 0, /* sq_ass_item */
2813 0, /* sq_ass_slice */
2845 0, /* sq_ass_slice */
2814 (objobjproc)index_contains, /* sq_contains */
2846 (objobjproc)index_contains, /* sq_contains */
2815 };
2847 };
2816
2848
2817 static PyMappingMethods index_mapping_methods = {
2849 static PyMappingMethods index_mapping_methods = {
2818 (lenfunc)index_length, /* mp_length */
2850 (lenfunc)index_length, /* mp_length */
2819 (binaryfunc)index_getitem, /* mp_subscript */
2851 (binaryfunc)index_getitem, /* mp_subscript */
2820 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2852 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2821 };
2853 };
2822
2854
2823 static PyMethodDef index_methods[] = {
2855 static PyMethodDef index_methods[] = {
2824 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2856 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2825 "return the gca set of the given revs"},
2857 "return the gca set of the given revs"},
2826 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2858 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2827 METH_VARARGS,
2859 METH_VARARGS,
2828 "return the heads of the common ancestors of the given revs"},
2860 "return the heads of the common ancestors of the given revs"},
2829 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2861 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2830 "clear the index caches"},
2862 "clear the index caches"},
2831 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2863 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2832 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2864 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2833 "return `rev` associated with a node or None"},
2865 "return `rev` associated with a node or None"},
2834 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2866 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2835 "return True if the node exist in the index"},
2867 "return True if the node exist in the index"},
2836 {"rev", (PyCFunction)index_m_rev, METH_O,
2868 {"rev", (PyCFunction)index_m_rev, METH_O,
2837 "return `rev` associated with a node or raise RevlogError"},
2869 "return `rev` associated with a node or raise RevlogError"},
2838 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2870 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2839 "compute phases"},
2871 "compute phases"},
2840 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2872 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2841 "reachableroots"},
2873 "reachableroots"},
2842 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2874 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2843 METH_VARARGS, "replace an existing index entry with a new value"},
2875 METH_VARARGS, "replace an existing index entry with a new value"},
2844 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2876 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2845 "get head revisions"}, /* Can do filtering since 3.2 */
2877 "get head revisions"}, /* Can do filtering since 3.2 */
2846 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2878 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2847 "get filtered head revisions"}, /* Can always do filtering */
2879 "get filtered head revisions"}, /* Can always do filtering */
2848 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2880 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2849 "True if the object is a snapshot"},
2881 "True if the object is a snapshot"},
2850 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2882 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2851 "Gather snapshot data in a cache dict"},
2883 "Gather snapshot data in a cache dict"},
2852 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2884 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2853 "determine revisions with deltas to reconstruct fulltext"},
2885 "determine revisions with deltas to reconstruct fulltext"},
2854 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2886 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2855 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2887 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2856 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2888 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2857 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2889 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2858 "match a potentially ambiguous node ID"},
2890 "match a potentially ambiguous node ID"},
2859 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2891 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2860 "find length of shortest hex nodeid of a binary ID"},
2892 "find length of shortest hex nodeid of a binary ID"},
2861 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2893 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2894 {"entry_binary", (PyCFunction)index_entry_binary, METH_VARARGS,
2895 "return an entry in binary form"},
2862 {NULL} /* Sentinel */
2896 {NULL} /* Sentinel */
2863 };
2897 };
2864
2898
2865 static PyGetSetDef index_getset[] = {
2899 static PyGetSetDef index_getset[] = {
2866 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2900 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2867 {NULL} /* Sentinel */
2901 {NULL} /* Sentinel */
2868 };
2902 };
2869
2903
2870 static PyMemberDef index_members[] = {
2904 static PyMemberDef index_members[] = {
2871 {"entry_size", T_LONG, offsetof(indexObject, hdrsize), 0,
2905 {"entry_size", T_LONG, offsetof(indexObject, hdrsize), 0,
2872 "size of an index entry"},
2906 "size of an index entry"},
2873 {NULL} /* Sentinel */
2907 {NULL} /* Sentinel */
2874 };
2908 };
2875
2909
2876 PyTypeObject HgRevlogIndex_Type = {
2910 PyTypeObject HgRevlogIndex_Type = {
2877 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2911 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2878 "parsers.index", /* tp_name */
2912 "parsers.index", /* tp_name */
2879 sizeof(indexObject), /* tp_basicsize */
2913 sizeof(indexObject), /* tp_basicsize */
2880 0, /* tp_itemsize */
2914 0, /* tp_itemsize */
2881 (destructor)index_dealloc, /* tp_dealloc */
2915 (destructor)index_dealloc, /* tp_dealloc */
2882 0, /* tp_print */
2916 0, /* tp_print */
2883 0, /* tp_getattr */
2917 0, /* tp_getattr */
2884 0, /* tp_setattr */
2918 0, /* tp_setattr */
2885 0, /* tp_compare */
2919 0, /* tp_compare */
2886 0, /* tp_repr */
2920 0, /* tp_repr */
2887 0, /* tp_as_number */
2921 0, /* tp_as_number */
2888 &index_sequence_methods, /* tp_as_sequence */
2922 &index_sequence_methods, /* tp_as_sequence */
2889 &index_mapping_methods, /* tp_as_mapping */
2923 &index_mapping_methods, /* tp_as_mapping */
2890 0, /* tp_hash */
2924 0, /* tp_hash */
2891 0, /* tp_call */
2925 0, /* tp_call */
2892 0, /* tp_str */
2926 0, /* tp_str */
2893 0, /* tp_getattro */
2927 0, /* tp_getattro */
2894 0, /* tp_setattro */
2928 0, /* tp_setattro */
2895 0, /* tp_as_buffer */
2929 0, /* tp_as_buffer */
2896 Py_TPFLAGS_DEFAULT, /* tp_flags */
2930 Py_TPFLAGS_DEFAULT, /* tp_flags */
2897 "revlog index", /* tp_doc */
2931 "revlog index", /* tp_doc */
2898 0, /* tp_traverse */
2932 0, /* tp_traverse */
2899 0, /* tp_clear */
2933 0, /* tp_clear */
2900 0, /* tp_richcompare */
2934 0, /* tp_richcompare */
2901 0, /* tp_weaklistoffset */
2935 0, /* tp_weaklistoffset */
2902 0, /* tp_iter */
2936 0, /* tp_iter */
2903 0, /* tp_iternext */
2937 0, /* tp_iternext */
2904 index_methods, /* tp_methods */
2938 index_methods, /* tp_methods */
2905 index_members, /* tp_members */
2939 index_members, /* tp_members */
2906 index_getset, /* tp_getset */
2940 index_getset, /* tp_getset */
2907 0, /* tp_base */
2941 0, /* tp_base */
2908 0, /* tp_dict */
2942 0, /* tp_dict */
2909 0, /* tp_descr_get */
2943 0, /* tp_descr_get */
2910 0, /* tp_descr_set */
2944 0, /* tp_descr_set */
2911 0, /* tp_dictoffset */
2945 0, /* tp_dictoffset */
2912 (initproc)index_init, /* tp_init */
2946 (initproc)index_init, /* tp_init */
2913 0, /* tp_alloc */
2947 0, /* tp_alloc */
2914 };
2948 };
2915
2949
2916 /*
2950 /*
2917 * returns a tuple of the form (index, cache) with elements as
2951 * returns a tuple of the form (index, cache) with elements as
2918 * follows:
2952 * follows:
2919 *
2953 *
2920 * index: an index object that lazily parses Revlog (v1 or v2) records
2954 * index: an index object that lazily parses Revlog (v1 or v2) records
2921 * cache: if data is inlined, a tuple (0, index_file_content), else None
2955 * cache: if data is inlined, a tuple (0, index_file_content), else None
2922 * index_file_content could be a string, or a buffer
2956 * index_file_content could be a string, or a buffer
2923 *
2957 *
2924 * added complications are for backwards compatibility
2958 * added complications are for backwards compatibility
2925 */
2959 */
2926 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2960 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2927 {
2961 {
2928 PyObject *cache = NULL;
2962 PyObject *cache = NULL;
2929 indexObject *idx;
2963 indexObject *idx;
2930 int ret;
2964 int ret;
2931
2965
2932 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2966 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
2933 if (idx == NULL)
2967 if (idx == NULL)
2934 goto bail;
2968 goto bail;
2935
2969
2936 ret = index_init(idx, args, kwargs);
2970 ret = index_init(idx, args, kwargs);
2937 if (ret == -1)
2971 if (ret == -1)
2938 goto bail;
2972 goto bail;
2939
2973
2940 if (idx->inlined) {
2974 if (idx->inlined) {
2941 cache = Py_BuildValue("iO", 0, idx->data);
2975 cache = Py_BuildValue("iO", 0, idx->data);
2942 if (cache == NULL)
2976 if (cache == NULL)
2943 goto bail;
2977 goto bail;
2944 } else {
2978 } else {
2945 cache = Py_None;
2979 cache = Py_None;
2946 Py_INCREF(cache);
2980 Py_INCREF(cache);
2947 }
2981 }
2948
2982
2949 return Py_BuildValue("NN", idx, cache);
2983 return Py_BuildValue("NN", idx, cache);
2950
2984
2951 bail:
2985 bail:
2952 Py_XDECREF(idx);
2986 Py_XDECREF(idx);
2953 Py_XDECREF(cache);
2987 Py_XDECREF(cache);
2954 return NULL;
2988 return NULL;
2955 }
2989 }
2956
2990
2957 static Revlog_CAPI CAPI = {
2991 static Revlog_CAPI CAPI = {
2958 /* increment the abi_version field upon each change in the Revlog_CAPI
2992 /* increment the abi_version field upon each change in the Revlog_CAPI
2959 struct or in the ABI of the listed functions */
2993 struct or in the ABI of the listed functions */
2960 2,
2994 2,
2961 index_length,
2995 index_length,
2962 index_node,
2996 index_node,
2963 HgRevlogIndex_GetParents,
2997 HgRevlogIndex_GetParents,
2964 };
2998 };
2965
2999
2966 void revlog_module_init(PyObject *mod)
3000 void revlog_module_init(PyObject *mod)
2967 {
3001 {
2968 PyObject *caps = NULL;
3002 PyObject *caps = NULL;
2969 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3003 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
2970 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3004 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
2971 return;
3005 return;
2972 Py_INCREF(&HgRevlogIndex_Type);
3006 Py_INCREF(&HgRevlogIndex_Type);
2973 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3007 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
2974
3008
2975 nodetreeType.tp_new = PyType_GenericNew;
3009 nodetreeType.tp_new = PyType_GenericNew;
2976 if (PyType_Ready(&nodetreeType) < 0)
3010 if (PyType_Ready(&nodetreeType) < 0)
2977 return;
3011 return;
2978 Py_INCREF(&nodetreeType);
3012 Py_INCREF(&nodetreeType);
2979 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3013 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
2980
3014
2981 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3015 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
2982 if (caps != NULL)
3016 if (caps != NULL)
2983 PyModule_AddObject(mod, "revlog_CAPI", caps);
3017 PyModule_AddObject(mod, "revlog_CAPI", caps);
2984 }
3018 }
@@ -1,356 +1,380 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import (
13 from ..node import (
14 nullrev,
14 nullrev,
15 sha1nodeconstants,
15 sha1nodeconstants,
16 )
16 )
17 from .. import (
17 from .. import (
18 pycompat,
18 pycompat,
19 util,
19 util,
20 )
20 )
21
21
22 from ..revlogutils import nodemap as nodemaputil
22 from ..revlogutils import nodemap as nodemaputil
23 from ..revlogutils import constants as revlog_constants
23 from ..revlogutils import constants as revlog_constants
24
24
25 stringio = pycompat.bytesio
25 stringio = pycompat.bytesio
26
26
27
27
28 _pack = struct.pack
28 _pack = struct.pack
29 _unpack = struct.unpack
29 _unpack = struct.unpack
30 _compress = zlib.compress
30 _compress = zlib.compress
31 _decompress = zlib.decompress
31 _decompress = zlib.decompress
32
32
33 # Some code below makes tuples directly because it's more convenient. However,
33 # Some code below makes tuples directly because it's more convenient. However,
34 # code outside this module should always use dirstatetuple.
34 # code outside this module should always use dirstatetuple.
35 def dirstatetuple(*x):
35 def dirstatetuple(*x):
36 # x is a tuple
36 # x is a tuple
37 return x
37 return x
38
38
39
39
40 def gettype(q):
40 def gettype(q):
41 return int(q & 0xFFFF)
41 return int(q & 0xFFFF)
42
42
43
43
44 def offset_type(offset, type):
44 def offset_type(offset, type):
45 return int(int(offset) << 16 | type)
45 return int(int(offset) << 16 | type)
46
46
47
47
48 class BaseIndexObject(object):
48 class BaseIndexObject(object):
49 # Format of an index entry according to Python's `struct` language
49 # Format of an index entry according to Python's `struct` language
50 index_format = revlog_constants.INDEX_ENTRY_V1
50 index_format = revlog_constants.INDEX_ENTRY_V1
51 # Size of a C unsigned long long int, platform independent
51 # Size of a C unsigned long long int, platform independent
52 big_int_size = struct.calcsize(b'>Q')
52 big_int_size = struct.calcsize(b'>Q')
53 # Size of a C long int, platform independent
53 # Size of a C long int, platform independent
54 int_size = struct.calcsize(b'>i')
54 int_size = struct.calcsize(b'>i')
55 # An empty index entry, used as a default value to be overridden, or nullrev
55 # An empty index entry, used as a default value to be overridden, or nullrev
56 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
56 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
57
57
58 @util.propertycache
58 @util.propertycache
59 def entry_size(self):
59 def entry_size(self):
60 return self.index_format.size
60 return self.index_format.size
61
61
62 @property
62 @property
63 def nodemap(self):
63 def nodemap(self):
64 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
64 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
65 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
65 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
66 return self._nodemap
66 return self._nodemap
67
67
68 @util.propertycache
68 @util.propertycache
69 def _nodemap(self):
69 def _nodemap(self):
70 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
70 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
71 for r in range(0, len(self)):
71 for r in range(0, len(self)):
72 n = self[r][7]
72 n = self[r][7]
73 nodemap[n] = r
73 nodemap[n] = r
74 return nodemap
74 return nodemap
75
75
76 def has_node(self, node):
76 def has_node(self, node):
77 """return True if the node exist in the index"""
77 """return True if the node exist in the index"""
78 return node in self._nodemap
78 return node in self._nodemap
79
79
80 def rev(self, node):
80 def rev(self, node):
81 """return a revision for a node
81 """return a revision for a node
82
82
83 If the node is unknown, raise a RevlogError"""
83 If the node is unknown, raise a RevlogError"""
84 return self._nodemap[node]
84 return self._nodemap[node]
85
85
86 def get_rev(self, node):
86 def get_rev(self, node):
87 """return a revision for a node
87 """return a revision for a node
88
88
89 If the node is unknown, return None"""
89 If the node is unknown, return None"""
90 return self._nodemap.get(node)
90 return self._nodemap.get(node)
91
91
92 def _stripnodes(self, start):
92 def _stripnodes(self, start):
93 if '_nodemap' in vars(self):
93 if '_nodemap' in vars(self):
94 for r in range(start, len(self)):
94 for r in range(start, len(self)):
95 n = self[r][7]
95 n = self[r][7]
96 del self._nodemap[n]
96 del self._nodemap[n]
97
97
98 def clearcaches(self):
98 def clearcaches(self):
99 self.__dict__.pop('_nodemap', None)
99 self.__dict__.pop('_nodemap', None)
100
100
101 def __len__(self):
101 def __len__(self):
102 return self._lgt + len(self._extra)
102 return self._lgt + len(self._extra)
103
103
104 def append(self, tup):
104 def append(self, tup):
105 if '_nodemap' in vars(self):
105 if '_nodemap' in vars(self):
106 self._nodemap[tup[7]] = len(self)
106 self._nodemap[tup[7]] = len(self)
107 data = self.index_format.pack(*tup)
107 data = self.index_format.pack(*tup)
108 self._extra.append(data)
108 self._extra.append(data)
109
109
110 def _check_index(self, i):
110 def _check_index(self, i):
111 if not isinstance(i, int):
111 if not isinstance(i, int):
112 raise TypeError(b"expecting int indexes")
112 raise TypeError(b"expecting int indexes")
113 if i < 0 or i >= len(self):
113 if i < 0 or i >= len(self):
114 raise IndexError
114 raise IndexError
115
115
116 def __getitem__(self, i):
116 def __getitem__(self, i):
117 if i == -1:
117 if i == -1:
118 return self.null_item
118 return self.null_item
119 self._check_index(i)
119 self._check_index(i)
120 if i >= self._lgt:
120 if i >= self._lgt:
121 data = self._extra[i - self._lgt]
121 data = self._extra[i - self._lgt]
122 else:
122 else:
123 index = self._calculate_index(i)
123 index = self._calculate_index(i)
124 data = self._data[index : index + self.entry_size]
124 data = self._data[index : index + self.entry_size]
125 r = self.index_format.unpack(data)
125 r = self.index_format.unpack(data)
126 if self._lgt and i == 0:
126 if self._lgt and i == 0:
127 r = (offset_type(0, gettype(r[0])),) + r[1:]
127 r = (offset_type(0, gettype(r[0])),) + r[1:]
128 return r
128 return r
129
129
130 def entry_binary(self, rev, header):
131 """return the raw binary string representing a revision"""
132 entry = self[rev]
133 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry)
134 if rev == 0:
135 v_fmt = revlog_constants.INDEX_HEADER
136 v_bin = v_fmt.pack(header)
137 p = v_bin + p[v_fmt.size :]
138 return p
139
130
140
131 class IndexObject(BaseIndexObject):
141 class IndexObject(BaseIndexObject):
132 def __init__(self, data):
142 def __init__(self, data):
133 assert len(data) % self.entry_size == 0
143 assert len(data) % self.entry_size == 0, (
144 len(data),
145 self.entry_size,
146 len(data) % self.entry_size,
147 )
134 self._data = data
148 self._data = data
135 self._lgt = len(data) // self.entry_size
149 self._lgt = len(data) // self.entry_size
136 self._extra = []
150 self._extra = []
137
151
138 def _calculate_index(self, i):
152 def _calculate_index(self, i):
139 return i * self.entry_size
153 return i * self.entry_size
140
154
141 def __delitem__(self, i):
155 def __delitem__(self, i):
142 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
156 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
143 raise ValueError(b"deleting slices only supports a:-1 with step 1")
157 raise ValueError(b"deleting slices only supports a:-1 with step 1")
144 i = i.start
158 i = i.start
145 self._check_index(i)
159 self._check_index(i)
146 self._stripnodes(i)
160 self._stripnodes(i)
147 if i < self._lgt:
161 if i < self._lgt:
148 self._data = self._data[: i * self.entry_size]
162 self._data = self._data[: i * self.entry_size]
149 self._lgt = i
163 self._lgt = i
150 self._extra = []
164 self._extra = []
151 else:
165 else:
152 self._extra = self._extra[: i - self._lgt]
166 self._extra = self._extra[: i - self._lgt]
153
167
154
168
155 class PersistentNodeMapIndexObject(IndexObject):
169 class PersistentNodeMapIndexObject(IndexObject):
156 """a Debug oriented class to test persistent nodemap
170 """a Debug oriented class to test persistent nodemap
157
171
158 We need a simple python object to test API and higher level behavior. See
172 We need a simple python object to test API and higher level behavior. See
159 the Rust implementation for more serious usage. This should be used only
173 the Rust implementation for more serious usage. This should be used only
160 through the dedicated `devel.persistent-nodemap` config.
174 through the dedicated `devel.persistent-nodemap` config.
161 """
175 """
162
176
163 def nodemap_data_all(self):
177 def nodemap_data_all(self):
164 """Return bytes containing a full serialization of a nodemap
178 """Return bytes containing a full serialization of a nodemap
165
179
166 The nodemap should be valid for the full set of revisions in the
180 The nodemap should be valid for the full set of revisions in the
167 index."""
181 index."""
168 return nodemaputil.persistent_data(self)
182 return nodemaputil.persistent_data(self)
169
183
170 def nodemap_data_incremental(self):
184 def nodemap_data_incremental(self):
171 """Return bytes containing a incremental update to persistent nodemap
185 """Return bytes containing a incremental update to persistent nodemap
172
186
173 This containst the data for an append-only update of the data provided
187 This containst the data for an append-only update of the data provided
174 in the last call to `update_nodemap_data`.
188 in the last call to `update_nodemap_data`.
175 """
189 """
176 if self._nm_root is None:
190 if self._nm_root is None:
177 return None
191 return None
178 docket = self._nm_docket
192 docket = self._nm_docket
179 changed, data = nodemaputil.update_persistent_data(
193 changed, data = nodemaputil.update_persistent_data(
180 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
194 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
181 )
195 )
182
196
183 self._nm_root = self._nm_max_idx = self._nm_docket = None
197 self._nm_root = self._nm_max_idx = self._nm_docket = None
184 return docket, changed, data
198 return docket, changed, data
185
199
186 def update_nodemap_data(self, docket, nm_data):
200 def update_nodemap_data(self, docket, nm_data):
187 """provide full block of persisted binary data for a nodemap
201 """provide full block of persisted binary data for a nodemap
188
202
189 The data are expected to come from disk. See `nodemap_data_all` for a
203 The data are expected to come from disk. See `nodemap_data_all` for a
190 produceur of such data."""
204 produceur of such data."""
191 if nm_data is not None:
205 if nm_data is not None:
192 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
206 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
193 if self._nm_root:
207 if self._nm_root:
194 self._nm_docket = docket
208 self._nm_docket = docket
195 else:
209 else:
196 self._nm_root = self._nm_max_idx = self._nm_docket = None
210 self._nm_root = self._nm_max_idx = self._nm_docket = None
197
211
198
212
199 class InlinedIndexObject(BaseIndexObject):
213 class InlinedIndexObject(BaseIndexObject):
200 def __init__(self, data, inline=0):
214 def __init__(self, data, inline=0):
201 self._data = data
215 self._data = data
202 self._lgt = self._inline_scan(None)
216 self._lgt = self._inline_scan(None)
203 self._inline_scan(self._lgt)
217 self._inline_scan(self._lgt)
204 self._extra = []
218 self._extra = []
205
219
206 def _inline_scan(self, lgt):
220 def _inline_scan(self, lgt):
207 off = 0
221 off = 0
208 if lgt is not None:
222 if lgt is not None:
209 self._offsets = [0] * lgt
223 self._offsets = [0] * lgt
210 count = 0
224 count = 0
211 while off <= len(self._data) - self.entry_size:
225 while off <= len(self._data) - self.entry_size:
212 start = off + self.big_int_size
226 start = off + self.big_int_size
213 (s,) = struct.unpack(
227 (s,) = struct.unpack(
214 b'>i',
228 b'>i',
215 self._data[start : start + self.int_size],
229 self._data[start : start + self.int_size],
216 )
230 )
217 if lgt is not None:
231 if lgt is not None:
218 self._offsets[count] = off
232 self._offsets[count] = off
219 count += 1
233 count += 1
220 off += self.entry_size + s
234 off += self.entry_size + s
221 if off != len(self._data):
235 if off != len(self._data):
222 raise ValueError(b"corrupted data")
236 raise ValueError(b"corrupted data")
223 return count
237 return count
224
238
225 def __delitem__(self, i):
239 def __delitem__(self, i):
226 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
240 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
227 raise ValueError(b"deleting slices only supports a:-1 with step 1")
241 raise ValueError(b"deleting slices only supports a:-1 with step 1")
228 i = i.start
242 i = i.start
229 self._check_index(i)
243 self._check_index(i)
230 self._stripnodes(i)
244 self._stripnodes(i)
231 if i < self._lgt:
245 if i < self._lgt:
232 self._offsets = self._offsets[:i]
246 self._offsets = self._offsets[:i]
233 self._lgt = i
247 self._lgt = i
234 self._extra = []
248 self._extra = []
235 else:
249 else:
236 self._extra = self._extra[: i - self._lgt]
250 self._extra = self._extra[: i - self._lgt]
237
251
238 def _calculate_index(self, i):
252 def _calculate_index(self, i):
239 return self._offsets[i]
253 return self._offsets[i]
240
254
241
255
242 def parse_index2(data, inline, revlogv2=False):
256 def parse_index2(data, inline, revlogv2=False):
243 if not inline:
257 if not inline:
244 cls = IndexObject2 if revlogv2 else IndexObject
258 cls = IndexObject2 if revlogv2 else IndexObject
245 return cls(data), None
259 return cls(data), None
246 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
260 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
247 return cls(data, inline), (0, data)
261 return cls(data, inline), (0, data)
248
262
249
263
250 class Index2Mixin(object):
264 class Index2Mixin(object):
251 index_format = revlog_constants.INDEX_ENTRY_V2
265 index_format = revlog_constants.INDEX_ENTRY_V2
252 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
266 null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
253
267
254 def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
268 def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
255 """
269 """
256 Replace an existing index entry's sidedata offset and length with new
270 Replace an existing index entry's sidedata offset and length with new
257 ones.
271 ones.
258 This cannot be used outside of the context of sidedata rewriting,
272 This cannot be used outside of the context of sidedata rewriting,
259 inside the transaction that creates the revision `i`.
273 inside the transaction that creates the revision `i`.
260 """
274 """
261 if i < 0:
275 if i < 0:
262 raise KeyError
276 raise KeyError
263 self._check_index(i)
277 self._check_index(i)
264 sidedata_format = b">Qi"
278 sidedata_format = b">Qi"
265 packed_size = struct.calcsize(sidedata_format)
279 packed_size = struct.calcsize(sidedata_format)
266 if i >= self._lgt:
280 if i >= self._lgt:
267 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
281 packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
268 old = self._extra[i - self._lgt]
282 old = self._extra[i - self._lgt]
269 new = old[:64] + packed + old[64 + packed_size :]
283 new = old[:64] + packed + old[64 + packed_size :]
270 self._extra[i - self._lgt] = new
284 self._extra[i - self._lgt] = new
271 else:
285 else:
272 msg = b"cannot rewrite entries outside of this transaction"
286 msg = b"cannot rewrite entries outside of this transaction"
273 raise KeyError(msg)
287 raise KeyError(msg)
274
288
289 def entry_binary(self, rev, header):
290 """return the raw binary string representing a revision"""
291 entry = self[rev]
292 p = revlog_constants.INDEX_ENTRY_V2.pack(*entry)
293 if rev == 0:
294 v_fmt = revlog_constants.INDEX_HEADER
295 v_bin = v_fmt.pack(header)
296 p = v_bin + p[v_fmt.size :]
297 return p
298
275
299
276 class IndexObject2(Index2Mixin, IndexObject):
300 class IndexObject2(Index2Mixin, IndexObject):
277 pass
301 pass
278
302
279
303
280 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
304 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
281 def _inline_scan(self, lgt):
305 def _inline_scan(self, lgt):
282 sidedata_length_pos = 72
306 sidedata_length_pos = 72
283 off = 0
307 off = 0
284 if lgt is not None:
308 if lgt is not None:
285 self._offsets = [0] * lgt
309 self._offsets = [0] * lgt
286 count = 0
310 count = 0
287 while off <= len(self._data) - self.entry_size:
311 while off <= len(self._data) - self.entry_size:
288 start = off + self.big_int_size
312 start = off + self.big_int_size
289 (data_size,) = struct.unpack(
313 (data_size,) = struct.unpack(
290 b'>i',
314 b'>i',
291 self._data[start : start + self.int_size],
315 self._data[start : start + self.int_size],
292 )
316 )
293 start = off + sidedata_length_pos
317 start = off + sidedata_length_pos
294 (side_data_size,) = struct.unpack(
318 (side_data_size,) = struct.unpack(
295 b'>i', self._data[start : start + self.int_size]
319 b'>i', self._data[start : start + self.int_size]
296 )
320 )
297 if lgt is not None:
321 if lgt is not None:
298 self._offsets[count] = off
322 self._offsets[count] = off
299 count += 1
323 count += 1
300 off += self.entry_size + data_size + side_data_size
324 off += self.entry_size + data_size + side_data_size
301 if off != len(self._data):
325 if off != len(self._data):
302 raise ValueError(b"corrupted data")
326 raise ValueError(b"corrupted data")
303 return count
327 return count
304
328
305
329
306 def parse_index_devel_nodemap(data, inline):
330 def parse_index_devel_nodemap(data, inline):
307 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
331 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
308 return PersistentNodeMapIndexObject(data), None
332 return PersistentNodeMapIndexObject(data), None
309
333
310
334
311 def parse_dirstate(dmap, copymap, st):
335 def parse_dirstate(dmap, copymap, st):
312 parents = [st[:20], st[20:40]]
336 parents = [st[:20], st[20:40]]
313 # dereference fields so they will be local in loop
337 # dereference fields so they will be local in loop
314 format = b">cllll"
338 format = b">cllll"
315 e_size = struct.calcsize(format)
339 e_size = struct.calcsize(format)
316 pos1 = 40
340 pos1 = 40
317 l = len(st)
341 l = len(st)
318
342
319 # the inner loop
343 # the inner loop
320 while pos1 < l:
344 while pos1 < l:
321 pos2 = pos1 + e_size
345 pos2 = pos1 + e_size
322 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
346 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
323 pos1 = pos2 + e[4]
347 pos1 = pos2 + e[4]
324 f = st[pos2:pos1]
348 f = st[pos2:pos1]
325 if b'\0' in f:
349 if b'\0' in f:
326 f, c = f.split(b'\0')
350 f, c = f.split(b'\0')
327 copymap[f] = c
351 copymap[f] = c
328 dmap[f] = e[:4]
352 dmap[f] = e[:4]
329 return parents
353 return parents
330
354
331
355
332 def pack_dirstate(dmap, copymap, pl, now):
356 def pack_dirstate(dmap, copymap, pl, now):
333 now = int(now)
357 now = int(now)
334 cs = stringio()
358 cs = stringio()
335 write = cs.write
359 write = cs.write
336 write(b"".join(pl))
360 write(b"".join(pl))
337 for f, e in pycompat.iteritems(dmap):
361 for f, e in pycompat.iteritems(dmap):
338 if e[0] == b'n' and e[3] == now:
362 if e[0] == b'n' and e[3] == now:
339 # The file was last modified "simultaneously" with the current
363 # The file was last modified "simultaneously" with the current
340 # write to dirstate (i.e. within the same second for file-
364 # write to dirstate (i.e. within the same second for file-
341 # systems with a granularity of 1 sec). This commonly happens
365 # systems with a granularity of 1 sec). This commonly happens
342 # for at least a couple of files on 'update'.
366 # for at least a couple of files on 'update'.
343 # The user could change the file without changing its size
367 # The user could change the file without changing its size
344 # within the same second. Invalidate the file's mtime in
368 # within the same second. Invalidate the file's mtime in
345 # dirstate, forcing future 'status' calls to compare the
369 # dirstate, forcing future 'status' calls to compare the
346 # contents of the file if the size is the same. This prevents
370 # contents of the file if the size is the same. This prevents
347 # mistakenly treating such files as clean.
371 # mistakenly treating such files as clean.
348 e = dirstatetuple(e[0], e[1], e[2], -1)
372 e = dirstatetuple(e[0], e[1], e[2], -1)
349 dmap[f] = e
373 dmap[f] = e
350
374
351 if f in copymap:
375 if f in copymap:
352 f = b"%s\0%s" % (f, copymap[f])
376 f = b"%s\0%s" % (f, copymap[f])
353 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
377 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
354 write(e)
378 write(e)
355 write(f)
379 write(f)
356 return cs.getvalue()
380 return cs.getvalue()
@@ -1,3247 +1,3230 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 FLAG_GENERALDELTA,
37 FLAG_GENERALDELTA,
38 FLAG_INLINE_DATA,
38 FLAG_INLINE_DATA,
39 INDEX_ENTRY_V0,
39 INDEX_ENTRY_V0,
40 INDEX_ENTRY_V1,
40 INDEX_ENTRY_V1,
41 INDEX_ENTRY_V2,
41 INDEX_ENTRY_V2,
42 INDEX_HEADER,
42 INDEX_HEADER,
43 REVLOGV0,
43 REVLOGV0,
44 REVLOGV1,
44 REVLOGV1,
45 REVLOGV1_FLAGS,
45 REVLOGV1_FLAGS,
46 REVLOGV2,
46 REVLOGV2,
47 REVLOGV2_FLAGS,
47 REVLOGV2_FLAGS,
48 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FLAGS,
49 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_FORMAT,
50 REVLOG_DEFAULT_VERSION,
50 REVLOG_DEFAULT_VERSION,
51 )
51 )
52 from .revlogutils.flagutil import (
52 from .revlogutils.flagutil import (
53 REVIDX_DEFAULT_FLAGS,
53 REVIDX_DEFAULT_FLAGS,
54 REVIDX_ELLIPSIS,
54 REVIDX_ELLIPSIS,
55 REVIDX_EXTSTORED,
55 REVIDX_EXTSTORED,
56 REVIDX_FLAGS_ORDER,
56 REVIDX_FLAGS_ORDER,
57 REVIDX_HASCOPIESINFO,
57 REVIDX_HASCOPIESINFO,
58 REVIDX_ISCENSORED,
58 REVIDX_ISCENSORED,
59 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 REVIDX_RAWTEXT_CHANGING_FLAGS,
60 REVIDX_SIDEDATA,
60 REVIDX_SIDEDATA,
61 )
61 )
62 from .thirdparty import attr
62 from .thirdparty import attr
63 from . import (
63 from . import (
64 ancestor,
64 ancestor,
65 dagop,
65 dagop,
66 error,
66 error,
67 mdiff,
67 mdiff,
68 policy,
68 policy,
69 pycompat,
69 pycompat,
70 templatefilters,
70 templatefilters,
71 util,
71 util,
72 )
72 )
73 from .interfaces import (
73 from .interfaces import (
74 repository,
74 repository,
75 util as interfaceutil,
75 util as interfaceutil,
76 )
76 )
77 from .revlogutils import (
77 from .revlogutils import (
78 deltas as deltautil,
78 deltas as deltautil,
79 flagutil,
79 flagutil,
80 nodemap as nodemaputil,
80 nodemap as nodemaputil,
81 sidedata as sidedatautil,
81 sidedata as sidedatautil,
82 )
82 )
83 from .utils import (
83 from .utils import (
84 storageutil,
84 storageutil,
85 stringutil,
85 stringutil,
86 )
86 )
87
87
88 # blanked usage of all the name to prevent pyflakes constraints
88 # blanked usage of all the name to prevent pyflakes constraints
89 # We need these name available in the module for extensions.
89 # We need these name available in the module for extensions.
90 REVLOGV0
90 REVLOGV0
91 REVLOGV1
91 REVLOGV1
92 REVLOGV2
92 REVLOGV2
93 FLAG_INLINE_DATA
93 FLAG_INLINE_DATA
94 FLAG_GENERALDELTA
94 FLAG_GENERALDELTA
95 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FLAGS
96 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_FORMAT
97 REVLOG_DEFAULT_VERSION
97 REVLOG_DEFAULT_VERSION
98 REVLOGV1_FLAGS
98 REVLOGV1_FLAGS
99 REVLOGV2_FLAGS
99 REVLOGV2_FLAGS
100 REVIDX_ISCENSORED
100 REVIDX_ISCENSORED
101 REVIDX_ELLIPSIS
101 REVIDX_ELLIPSIS
102 REVIDX_SIDEDATA
102 REVIDX_SIDEDATA
103 REVIDX_HASCOPIESINFO
103 REVIDX_HASCOPIESINFO
104 REVIDX_EXTSTORED
104 REVIDX_EXTSTORED
105 REVIDX_DEFAULT_FLAGS
105 REVIDX_DEFAULT_FLAGS
106 REVIDX_FLAGS_ORDER
106 REVIDX_FLAGS_ORDER
107 REVIDX_RAWTEXT_CHANGING_FLAGS
107 REVIDX_RAWTEXT_CHANGING_FLAGS
108
108
109 parsers = policy.importmod('parsers')
109 parsers = policy.importmod('parsers')
110 rustancestor = policy.importrust('ancestor')
110 rustancestor = policy.importrust('ancestor')
111 rustdagop = policy.importrust('dagop')
111 rustdagop = policy.importrust('dagop')
112 rustrevlog = policy.importrust('revlog')
112 rustrevlog = policy.importrust('revlog')
113
113
114 # Aliased for performance.
114 # Aliased for performance.
115 _zlibdecompress = zlib.decompress
115 _zlibdecompress = zlib.decompress
116
116
117 # max size of revlog with inline data
117 # max size of revlog with inline data
118 _maxinline = 131072
118 _maxinline = 131072
119 _chunksize = 1048576
119 _chunksize = 1048576
120
120
121 # Flag processors for REVIDX_ELLIPSIS.
121 # Flag processors for REVIDX_ELLIPSIS.
122 def ellipsisreadprocessor(rl, text):
122 def ellipsisreadprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125
125
126 def ellipsiswriteprocessor(rl, text):
126 def ellipsiswriteprocessor(rl, text):
127 return text, False
127 return text, False
128
128
129
129
130 def ellipsisrawprocessor(rl, text):
130 def ellipsisrawprocessor(rl, text):
131 return False
131 return False
132
132
133
133
134 ellipsisprocessor = (
134 ellipsisprocessor = (
135 ellipsisreadprocessor,
135 ellipsisreadprocessor,
136 ellipsiswriteprocessor,
136 ellipsiswriteprocessor,
137 ellipsisrawprocessor,
137 ellipsisrawprocessor,
138 )
138 )
139
139
140
140
141 def getoffset(q):
141 def getoffset(q):
142 return int(q >> 16)
142 return int(q >> 16)
143
143
144
144
145 def gettype(q):
145 def gettype(q):
146 return int(q & 0xFFFF)
146 return int(q & 0xFFFF)
147
147
148
148
149 def offset_type(offset, type):
149 def offset_type(offset, type):
150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
151 raise ValueError(b'unknown revlog index flags')
151 raise ValueError(b'unknown revlog index flags')
152 return int(int(offset) << 16 | type)
152 return int(int(offset) << 16 | type)
153
153
154
154
155 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
158 if skipflags:
158 if skipflags:
159 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
160 else:
160 else:
161 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
162 rl.revision(node)
162 rl.revision(node)
163
163
164
164
165 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
166 #
166 #
167 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
168 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
169 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
172 )
172 )
173
173
174
174
175 @attr.s(slots=True, frozen=True)
175 @attr.s(slots=True, frozen=True)
176 class _revisioninfo(object):
176 class _revisioninfo(object):
177 """Information about a revision that allows building its fulltext
177 """Information about a revision that allows building its fulltext
178 node: expected hash of the revision
178 node: expected hash of the revision
179 p1, p2: parent revs of the revision
179 p1, p2: parent revs of the revision
180 btext: built text cache consisting of a one-element list
180 btext: built text cache consisting of a one-element list
181 cachedelta: (baserev, uncompressed_delta) or None
181 cachedelta: (baserev, uncompressed_delta) or None
182 flags: flags associated to the revision storage
182 flags: flags associated to the revision storage
183
183
184 One of btext[0] or cachedelta must be set.
184 One of btext[0] or cachedelta must be set.
185 """
185 """
186
186
187 node = attr.ib()
187 node = attr.ib()
188 p1 = attr.ib()
188 p1 = attr.ib()
189 p2 = attr.ib()
189 p2 = attr.ib()
190 btext = attr.ib()
190 btext = attr.ib()
191 textlen = attr.ib()
191 textlen = attr.ib()
192 cachedelta = attr.ib()
192 cachedelta = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194
194
195
195
196 @interfaceutil.implementer(repository.irevisiondelta)
196 @interfaceutil.implementer(repository.irevisiondelta)
197 @attr.s(slots=True)
197 @attr.s(slots=True)
198 class revlogrevisiondelta(object):
198 class revlogrevisiondelta(object):
199 node = attr.ib()
199 node = attr.ib()
200 p1node = attr.ib()
200 p1node = attr.ib()
201 p2node = attr.ib()
201 p2node = attr.ib()
202 basenode = attr.ib()
202 basenode = attr.ib()
203 flags = attr.ib()
203 flags = attr.ib()
204 baserevisionsize = attr.ib()
204 baserevisionsize = attr.ib()
205 revision = attr.ib()
205 revision = attr.ib()
206 delta = attr.ib()
206 delta = attr.ib()
207 sidedata = attr.ib()
207 sidedata = attr.ib()
208 linknode = attr.ib(default=None)
208 linknode = attr.ib(default=None)
209
209
210
210
211 @interfaceutil.implementer(repository.iverifyproblem)
211 @interfaceutil.implementer(repository.iverifyproblem)
212 @attr.s(frozen=True)
212 @attr.s(frozen=True)
213 class revlogproblem(object):
213 class revlogproblem(object):
214 warning = attr.ib(default=None)
214 warning = attr.ib(default=None)
215 error = attr.ib(default=None)
215 error = attr.ib(default=None)
216 node = attr.ib(default=None)
216 node = attr.ib(default=None)
217
217
218
218
219 class revlogoldindex(list):
219 class revlogoldindex(list):
220 entry_size = INDEX_ENTRY_V0.size
220 entry_size = INDEX_ENTRY_V0.size
221
221
222 @property
222 @property
223 def nodemap(self):
223 def nodemap(self):
224 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
224 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
225 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
225 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
226 return self._nodemap
226 return self._nodemap
227
227
228 @util.propertycache
228 @util.propertycache
229 def _nodemap(self):
229 def _nodemap(self):
230 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
230 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
231 for r in range(0, len(self)):
231 for r in range(0, len(self)):
232 n = self[r][7]
232 n = self[r][7]
233 nodemap[n] = r
233 nodemap[n] = r
234 return nodemap
234 return nodemap
235
235
236 def has_node(self, node):
236 def has_node(self, node):
237 """return True if the node exist in the index"""
237 """return True if the node exist in the index"""
238 return node in self._nodemap
238 return node in self._nodemap
239
239
240 def rev(self, node):
240 def rev(self, node):
241 """return a revision for a node
241 """return a revision for a node
242
242
243 If the node is unknown, raise a RevlogError"""
243 If the node is unknown, raise a RevlogError"""
244 return self._nodemap[node]
244 return self._nodemap[node]
245
245
246 def get_rev(self, node):
246 def get_rev(self, node):
247 """return a revision for a node
247 """return a revision for a node
248
248
249 If the node is unknown, return None"""
249 If the node is unknown, return None"""
250 return self._nodemap.get(node)
250 return self._nodemap.get(node)
251
251
252 def append(self, tup):
252 def append(self, tup):
253 self._nodemap[tup[7]] = len(self)
253 self._nodemap[tup[7]] = len(self)
254 super(revlogoldindex, self).append(tup)
254 super(revlogoldindex, self).append(tup)
255
255
256 def __delitem__(self, i):
256 def __delitem__(self, i):
257 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
257 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
258 raise ValueError(b"deleting slices only supports a:-1 with step 1")
258 raise ValueError(b"deleting slices only supports a:-1 with step 1")
259 for r in pycompat.xrange(i.start, len(self)):
259 for r in pycompat.xrange(i.start, len(self)):
260 del self._nodemap[self[r][7]]
260 del self._nodemap[self[r][7]]
261 super(revlogoldindex, self).__delitem__(i)
261 super(revlogoldindex, self).__delitem__(i)
262
262
263 def clearcaches(self):
263 def clearcaches(self):
264 self.__dict__.pop('_nodemap', None)
264 self.__dict__.pop('_nodemap', None)
265
265
266 def __getitem__(self, i):
266 def __getitem__(self, i):
267 if i == -1:
267 if i == -1:
268 return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
268 return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
269 return list.__getitem__(self, i)
269 return list.__getitem__(self, i)
270
270
271 def entry_binary(self, rev, header):
272 """return the raw binary string representing a revision"""
273 entry = self[rev]
274 if gettype(entry[0]):
275 raise error.RevlogError(
276 _(b'index entry flags need revlog version 1')
277 )
278 e2 = (
279 getoffset(entry[0]),
280 entry[1],
281 entry[3],
282 entry[4],
283 self[entry[5]][7],
284 self[entry[6]][7],
285 entry[7],
286 )
287 return INDEX_ENTRY_V0.pack(*e2)
288
271
289
272 class revlogoldio(object):
290 class revlogoldio(object):
273 def parseindex(self, data, inline):
291 def parseindex(self, data, inline):
274 s = INDEX_ENTRY_V0.size
292 s = INDEX_ENTRY_V0.size
275 index = []
293 index = []
276 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
294 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
277 n = off = 0
295 n = off = 0
278 l = len(data)
296 l = len(data)
279 while off + s <= l:
297 while off + s <= l:
280 cur = data[off : off + s]
298 cur = data[off : off + s]
281 off += s
299 off += s
282 e = INDEX_ENTRY_V0.unpack(cur)
300 e = INDEX_ENTRY_V0.unpack(cur)
283 # transform to revlogv1 format
301 # transform to revlogv1 format
284 e2 = (
302 e2 = (
285 offset_type(e[0], 0),
303 offset_type(e[0], 0),
286 e[1],
304 e[1],
287 -1,
305 -1,
288 e[2],
306 e[2],
289 e[3],
307 e[3],
290 nodemap.get(e[4], nullrev),
308 nodemap.get(e[4], nullrev),
291 nodemap.get(e[5], nullrev),
309 nodemap.get(e[5], nullrev),
292 e[6],
310 e[6],
293 )
311 )
294 index.append(e2)
312 index.append(e2)
295 nodemap[e[6]] = n
313 nodemap[e[6]] = n
296 n += 1
314 n += 1
297
315
298 index = revlogoldindex(index)
316 index = revlogoldindex(index)
299 return index, None
317 return index, None
300
318
301 def packentry(self, entry, node, version, rev):
302 """return the binary representation of an entry
303
304 entry: a tuple containing all the values (see index.__getitem__)
305 node: a callback to convert a revision to nodeid
306 version: the changelog version
307 rev: the revision number
308 """
309 if gettype(entry[0]):
310 raise error.RevlogError(
311 _(b'index entry flags need revlog version 1')
312 )
313 e2 = (
314 getoffset(entry[0]),
315 entry[1],
316 entry[3],
317 entry[4],
318 node(entry[5]),
319 node(entry[6]),
320 entry[7],
321 )
322 return INDEX_ENTRY_V0.pack(*e2)
323
324
319
325 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
320 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
326 # signed integer)
321 # signed integer)
327 _maxentrysize = 0x7FFFFFFF
322 _maxentrysize = 0x7FFFFFFF
328
323
329
324
330 class revlogio(object):
325 class revlogio(object):
331 def parseindex(self, data, inline):
326 def parseindex(self, data, inline):
332 # call the C implementation to parse the index data
327 # call the C implementation to parse the index data
333 index, cache = parsers.parse_index2(data, inline)
328 index, cache = parsers.parse_index2(data, inline)
334 return index, cache
329 return index, cache
335
330
336 def packentry(self, entry, node, version, rev):
337 p = INDEX_ENTRY_V1.pack(*entry)
338 if rev == 0:
339 p = INDEX_HEADER.pack(version) + p[4:]
340 return p
341
342
331
343 class revlogv2io(object):
332 class revlogv2io(object):
344 def parseindex(self, data, inline):
333 def parseindex(self, data, inline):
345 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
334 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
346 return index, cache
335 return index, cache
347
336
348 def packentry(self, entry, node, version, rev):
349 p = INDEX_ENTRY_V2.pack(*entry)
350 if rev == 0:
351 p = INDEX_HEADER.pack(version) + p[4:]
352 return p
353
354
337
355 NodemapRevlogIO = None
338 NodemapRevlogIO = None
356
339
357 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
340 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
358
341
359 class NodemapRevlogIO(revlogio):
342 class NodemapRevlogIO(revlogio):
360 """A debug oriented IO class that return a PersistentNodeMapIndexObject
343 """A debug oriented IO class that return a PersistentNodeMapIndexObject
361
344
362 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
345 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
363 """
346 """
364
347
365 def parseindex(self, data, inline):
348 def parseindex(self, data, inline):
366 index, cache = parsers.parse_index_devel_nodemap(data, inline)
349 index, cache = parsers.parse_index_devel_nodemap(data, inline)
367 return index, cache
350 return index, cache
368
351
369
352
370 class rustrevlogio(revlogio):
353 class rustrevlogio(revlogio):
371 def parseindex(self, data, inline):
354 def parseindex(self, data, inline):
372 index, cache = super(rustrevlogio, self).parseindex(data, inline)
355 index, cache = super(rustrevlogio, self).parseindex(data, inline)
373 return rustrevlog.MixedIndex(index), cache
356 return rustrevlog.MixedIndex(index), cache
374
357
375
358
376 class revlog(object):
359 class revlog(object):
377 """
360 """
378 the underlying revision storage object
361 the underlying revision storage object
379
362
380 A revlog consists of two parts, an index and the revision data.
363 A revlog consists of two parts, an index and the revision data.
381
364
382 The index is a file with a fixed record size containing
365 The index is a file with a fixed record size containing
383 information on each revision, including its nodeid (hash), the
366 information on each revision, including its nodeid (hash), the
384 nodeids of its parents, the position and offset of its data within
367 nodeids of its parents, the position and offset of its data within
385 the data file, and the revision it's based on. Finally, each entry
368 the data file, and the revision it's based on. Finally, each entry
386 contains a linkrev entry that can serve as a pointer to external
369 contains a linkrev entry that can serve as a pointer to external
387 data.
370 data.
388
371
389 The revision data itself is a linear collection of data chunks.
372 The revision data itself is a linear collection of data chunks.
390 Each chunk represents a revision and is usually represented as a
373 Each chunk represents a revision and is usually represented as a
391 delta against the previous chunk. To bound lookup time, runs of
374 delta against the previous chunk. To bound lookup time, runs of
392 deltas are limited to about 2 times the length of the original
375 deltas are limited to about 2 times the length of the original
393 version data. This makes retrieval of a version proportional to
376 version data. This makes retrieval of a version proportional to
394 its size, or O(1) relative to the number of revisions.
377 its size, or O(1) relative to the number of revisions.
395
378
396 Both pieces of the revlog are written to in an append-only
379 Both pieces of the revlog are written to in an append-only
397 fashion, which means we never need to rewrite a file to insert or
380 fashion, which means we never need to rewrite a file to insert or
398 remove data, and can use some simple techniques to avoid the need
381 remove data, and can use some simple techniques to avoid the need
399 for locking while reading.
382 for locking while reading.
400
383
401 If checkambig, indexfile is opened with checkambig=True at
384 If checkambig, indexfile is opened with checkambig=True at
402 writing, to avoid file stat ambiguity.
385 writing, to avoid file stat ambiguity.
403
386
404 If mmaplargeindex is True, and an mmapindexthreshold is set, the
387 If mmaplargeindex is True, and an mmapindexthreshold is set, the
405 index will be mmapped rather than read if it is larger than the
388 index will be mmapped rather than read if it is larger than the
406 configured threshold.
389 configured threshold.
407
390
408 If censorable is True, the revlog can have censored revisions.
391 If censorable is True, the revlog can have censored revisions.
409
392
410 If `upperboundcomp` is not None, this is the expected maximal gain from
393 If `upperboundcomp` is not None, this is the expected maximal gain from
411 compression for the data content.
394 compression for the data content.
412
395
413 `concurrencychecker` is an optional function that receives 3 arguments: a
396 `concurrencychecker` is an optional function that receives 3 arguments: a
414 file handle, a filename, and an expected position. It should check whether
397 file handle, a filename, and an expected position. It should check whether
415 the current position in the file handle is valid, and log/warn/fail (by
398 the current position in the file handle is valid, and log/warn/fail (by
416 raising).
399 raising).
417 """
400 """
418
401
419 _flagserrorclass = error.RevlogError
402 _flagserrorclass = error.RevlogError
420
403
421 def __init__(
404 def __init__(
422 self,
405 self,
423 opener,
406 opener,
424 indexfile,
407 indexfile,
425 datafile=None,
408 datafile=None,
426 checkambig=False,
409 checkambig=False,
427 mmaplargeindex=False,
410 mmaplargeindex=False,
428 censorable=False,
411 censorable=False,
429 upperboundcomp=None,
412 upperboundcomp=None,
430 persistentnodemap=False,
413 persistentnodemap=False,
431 concurrencychecker=None,
414 concurrencychecker=None,
432 ):
415 ):
433 """
416 """
434 create a revlog object
417 create a revlog object
435
418
436 opener is a function that abstracts the file opening operation
419 opener is a function that abstracts the file opening operation
437 and can be used to implement COW semantics or the like.
420 and can be used to implement COW semantics or the like.
438
421
439 """
422 """
440 self.upperboundcomp = upperboundcomp
423 self.upperboundcomp = upperboundcomp
441 self.indexfile = indexfile
424 self.indexfile = indexfile
442 self.datafile = datafile or (indexfile[:-2] + b".d")
425 self.datafile = datafile or (indexfile[:-2] + b".d")
443 self.nodemap_file = None
426 self.nodemap_file = None
444 if persistentnodemap:
427 if persistentnodemap:
445 self.nodemap_file = nodemaputil.get_nodemap_file(
428 self.nodemap_file = nodemaputil.get_nodemap_file(
446 opener, self.indexfile
429 opener, self.indexfile
447 )
430 )
448
431
449 self.opener = opener
432 self.opener = opener
450 # When True, indexfile is opened with checkambig=True at writing, to
433 # When True, indexfile is opened with checkambig=True at writing, to
451 # avoid file stat ambiguity.
434 # avoid file stat ambiguity.
452 self._checkambig = checkambig
435 self._checkambig = checkambig
453 self._mmaplargeindex = mmaplargeindex
436 self._mmaplargeindex = mmaplargeindex
454 self._censorable = censorable
437 self._censorable = censorable
455 # 3-tuple of (node, rev, text) for a raw revision.
438 # 3-tuple of (node, rev, text) for a raw revision.
456 self._revisioncache = None
439 self._revisioncache = None
457 # Maps rev to chain base rev.
440 # Maps rev to chain base rev.
458 self._chainbasecache = util.lrucachedict(100)
441 self._chainbasecache = util.lrucachedict(100)
459 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
442 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
460 self._chunkcache = (0, b'')
443 self._chunkcache = (0, b'')
461 # How much data to read and cache into the raw revlog data cache.
444 # How much data to read and cache into the raw revlog data cache.
462 self._chunkcachesize = 65536
445 self._chunkcachesize = 65536
463 self._maxchainlen = None
446 self._maxchainlen = None
464 self._deltabothparents = True
447 self._deltabothparents = True
465 self.index = None
448 self.index = None
466 self._nodemap_docket = None
449 self._nodemap_docket = None
467 # Mapping of partial identifiers to full nodes.
450 # Mapping of partial identifiers to full nodes.
468 self._pcache = {}
451 self._pcache = {}
469 # Mapping of revision integer to full node.
452 # Mapping of revision integer to full node.
470 self._compengine = b'zlib'
453 self._compengine = b'zlib'
471 self._compengineopts = {}
454 self._compengineopts = {}
472 self._maxdeltachainspan = -1
455 self._maxdeltachainspan = -1
473 self._withsparseread = False
456 self._withsparseread = False
474 self._sparserevlog = False
457 self._sparserevlog = False
475 self._srdensitythreshold = 0.50
458 self._srdensitythreshold = 0.50
476 self._srmingapsize = 262144
459 self._srmingapsize = 262144
477
460
478 # Make copy of flag processors so each revlog instance can support
461 # Make copy of flag processors so each revlog instance can support
479 # custom flags.
462 # custom flags.
480 self._flagprocessors = dict(flagutil.flagprocessors)
463 self._flagprocessors = dict(flagutil.flagprocessors)
481
464
482 # 2-tuple of file handles being used for active writing.
465 # 2-tuple of file handles being used for active writing.
483 self._writinghandles = None
466 self._writinghandles = None
484
467
485 self._loadindex()
468 self._loadindex()
486
469
487 self._concurrencychecker = concurrencychecker
470 self._concurrencychecker = concurrencychecker
488
471
489 def _loadindex(self):
472 def _loadindex(self):
490 mmapindexthreshold = None
473 mmapindexthreshold = None
491 opts = self.opener.options
474 opts = self.opener.options
492
475
493 if b'revlogv2' in opts:
476 if b'revlogv2' in opts:
494 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
477 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
495 elif b'revlogv1' in opts:
478 elif b'revlogv1' in opts:
496 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
479 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
497 if b'generaldelta' in opts:
480 if b'generaldelta' in opts:
498 newversionflags |= FLAG_GENERALDELTA
481 newversionflags |= FLAG_GENERALDELTA
499 elif b'revlogv0' in self.opener.options:
482 elif b'revlogv0' in self.opener.options:
500 newversionflags = REVLOGV0
483 newversionflags = REVLOGV0
501 else:
484 else:
502 newversionflags = REVLOG_DEFAULT_VERSION
485 newversionflags = REVLOG_DEFAULT_VERSION
503
486
504 if b'chunkcachesize' in opts:
487 if b'chunkcachesize' in opts:
505 self._chunkcachesize = opts[b'chunkcachesize']
488 self._chunkcachesize = opts[b'chunkcachesize']
506 if b'maxchainlen' in opts:
489 if b'maxchainlen' in opts:
507 self._maxchainlen = opts[b'maxchainlen']
490 self._maxchainlen = opts[b'maxchainlen']
508 if b'deltabothparents' in opts:
491 if b'deltabothparents' in opts:
509 self._deltabothparents = opts[b'deltabothparents']
492 self._deltabothparents = opts[b'deltabothparents']
510 self._lazydelta = bool(opts.get(b'lazydelta', True))
493 self._lazydelta = bool(opts.get(b'lazydelta', True))
511 self._lazydeltabase = False
494 self._lazydeltabase = False
512 if self._lazydelta:
495 if self._lazydelta:
513 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
496 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
514 if b'compengine' in opts:
497 if b'compengine' in opts:
515 self._compengine = opts[b'compengine']
498 self._compengine = opts[b'compengine']
516 if b'zlib.level' in opts:
499 if b'zlib.level' in opts:
517 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
500 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
518 if b'zstd.level' in opts:
501 if b'zstd.level' in opts:
519 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
502 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
520 if b'maxdeltachainspan' in opts:
503 if b'maxdeltachainspan' in opts:
521 self._maxdeltachainspan = opts[b'maxdeltachainspan']
504 self._maxdeltachainspan = opts[b'maxdeltachainspan']
522 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
505 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
523 mmapindexthreshold = opts[b'mmapindexthreshold']
506 mmapindexthreshold = opts[b'mmapindexthreshold']
524 self.hassidedata = bool(opts.get(b'side-data', False))
507 self.hassidedata = bool(opts.get(b'side-data', False))
525 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
508 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
526 withsparseread = bool(opts.get(b'with-sparse-read', False))
509 withsparseread = bool(opts.get(b'with-sparse-read', False))
527 # sparse-revlog forces sparse-read
510 # sparse-revlog forces sparse-read
528 self._withsparseread = self._sparserevlog or withsparseread
511 self._withsparseread = self._sparserevlog or withsparseread
529 if b'sparse-read-density-threshold' in opts:
512 if b'sparse-read-density-threshold' in opts:
530 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
513 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
531 if b'sparse-read-min-gap-size' in opts:
514 if b'sparse-read-min-gap-size' in opts:
532 self._srmingapsize = opts[b'sparse-read-min-gap-size']
515 self._srmingapsize = opts[b'sparse-read-min-gap-size']
533 if opts.get(b'enableellipsis'):
516 if opts.get(b'enableellipsis'):
534 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
517 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
535
518
536 # revlog v0 doesn't have flag processors
519 # revlog v0 doesn't have flag processors
537 for flag, processor in pycompat.iteritems(
520 for flag, processor in pycompat.iteritems(
538 opts.get(b'flagprocessors', {})
521 opts.get(b'flagprocessors', {})
539 ):
522 ):
540 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
523 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
541
524
542 if self._chunkcachesize <= 0:
525 if self._chunkcachesize <= 0:
543 raise error.RevlogError(
526 raise error.RevlogError(
544 _(b'revlog chunk cache size %r is not greater than 0')
527 _(b'revlog chunk cache size %r is not greater than 0')
545 % self._chunkcachesize
528 % self._chunkcachesize
546 )
529 )
547 elif self._chunkcachesize & (self._chunkcachesize - 1):
530 elif self._chunkcachesize & (self._chunkcachesize - 1):
548 raise error.RevlogError(
531 raise error.RevlogError(
549 _(b'revlog chunk cache size %r is not a power of 2')
532 _(b'revlog chunk cache size %r is not a power of 2')
550 % self._chunkcachesize
533 % self._chunkcachesize
551 )
534 )
552
535
553 indexdata = b''
536 indexdata = b''
554 self._initempty = True
537 self._initempty = True
555 try:
538 try:
556 with self._indexfp() as f:
539 with self._indexfp() as f:
557 if (
540 if (
558 mmapindexthreshold is not None
541 mmapindexthreshold is not None
559 and self.opener.fstat(f).st_size >= mmapindexthreshold
542 and self.opener.fstat(f).st_size >= mmapindexthreshold
560 ):
543 ):
561 # TODO: should .close() to release resources without
544 # TODO: should .close() to release resources without
562 # relying on Python GC
545 # relying on Python GC
563 indexdata = util.buffer(util.mmapread(f))
546 indexdata = util.buffer(util.mmapread(f))
564 else:
547 else:
565 indexdata = f.read()
548 indexdata = f.read()
566 if len(indexdata) > 0:
549 if len(indexdata) > 0:
567 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
550 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
568 self._initempty = False
551 self._initempty = False
569 else:
552 else:
570 versionflags = newversionflags
553 versionflags = newversionflags
571 except IOError as inst:
554 except IOError as inst:
572 if inst.errno != errno.ENOENT:
555 if inst.errno != errno.ENOENT:
573 raise
556 raise
574
557
575 versionflags = newversionflags
558 versionflags = newversionflags
576
559
577 self.version = versionflags
560 self.version = versionflags
578
561
579 flags = versionflags & ~0xFFFF
562 flags = versionflags & ~0xFFFF
580 fmt = versionflags & 0xFFFF
563 fmt = versionflags & 0xFFFF
581
564
582 if fmt == REVLOGV0:
565 if fmt == REVLOGV0:
583 if flags:
566 if flags:
584 raise error.RevlogError(
567 raise error.RevlogError(
585 _(b'unknown flags (%#04x) in version %d revlog %s')
568 _(b'unknown flags (%#04x) in version %d revlog %s')
586 % (flags >> 16, fmt, self.indexfile)
569 % (flags >> 16, fmt, self.indexfile)
587 )
570 )
588
571
589 self._inline = False
572 self._inline = False
590 self._generaldelta = False
573 self._generaldelta = False
591
574
592 elif fmt == REVLOGV1:
575 elif fmt == REVLOGV1:
593 if flags & ~REVLOGV1_FLAGS:
576 if flags & ~REVLOGV1_FLAGS:
594 raise error.RevlogError(
577 raise error.RevlogError(
595 _(b'unknown flags (%#04x) in version %d revlog %s')
578 _(b'unknown flags (%#04x) in version %d revlog %s')
596 % (flags >> 16, fmt, self.indexfile)
579 % (flags >> 16, fmt, self.indexfile)
597 )
580 )
598
581
599 self._inline = versionflags & FLAG_INLINE_DATA
582 self._inline = versionflags & FLAG_INLINE_DATA
600 self._generaldelta = versionflags & FLAG_GENERALDELTA
583 self._generaldelta = versionflags & FLAG_GENERALDELTA
601
584
602 elif fmt == REVLOGV2:
585 elif fmt == REVLOGV2:
603 if flags & ~REVLOGV2_FLAGS:
586 if flags & ~REVLOGV2_FLAGS:
604 raise error.RevlogError(
587 raise error.RevlogError(
605 _(b'unknown flags (%#04x) in version %d revlog %s')
588 _(b'unknown flags (%#04x) in version %d revlog %s')
606 % (flags >> 16, fmt, self.indexfile)
589 % (flags >> 16, fmt, self.indexfile)
607 )
590 )
608
591
609 # There is a bug in the transaction handling when going from an
592 # There is a bug in the transaction handling when going from an
610 # inline revlog to a separate index and data file. Turn it off until
593 # inline revlog to a separate index and data file. Turn it off until
611 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
594 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
612 # See issue6485
595 # See issue6485
613 self._inline = False
596 self._inline = False
614 # generaldelta implied by version 2 revlogs.
597 # generaldelta implied by version 2 revlogs.
615 self._generaldelta = True
598 self._generaldelta = True
616
599
617 else:
600 else:
618 raise error.RevlogError(
601 raise error.RevlogError(
619 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
602 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
620 )
603 )
621
604
622 self.nodeconstants = sha1nodeconstants
605 self.nodeconstants = sha1nodeconstants
623 self.nullid = self.nodeconstants.nullid
606 self.nullid = self.nodeconstants.nullid
624
607
625 # sparse-revlog can't be on without general-delta (issue6056)
608 # sparse-revlog can't be on without general-delta (issue6056)
626 if not self._generaldelta:
609 if not self._generaldelta:
627 self._sparserevlog = False
610 self._sparserevlog = False
628
611
629 self._storedeltachains = True
612 self._storedeltachains = True
630
613
631 devel_nodemap = (
614 devel_nodemap = (
632 self.nodemap_file
615 self.nodemap_file
633 and opts.get(b'devel-force-nodemap', False)
616 and opts.get(b'devel-force-nodemap', False)
634 and NodemapRevlogIO is not None
617 and NodemapRevlogIO is not None
635 )
618 )
636
619
637 use_rust_index = False
620 use_rust_index = False
638 if rustrevlog is not None:
621 if rustrevlog is not None:
639 if self.nodemap_file is not None:
622 if self.nodemap_file is not None:
640 use_rust_index = True
623 use_rust_index = True
641 else:
624 else:
642 use_rust_index = self.opener.options.get(b'rust.index')
625 use_rust_index = self.opener.options.get(b'rust.index')
643
626
644 self._io = revlogio()
627 self._io = revlogio()
645 if self.version == REVLOGV0:
628 if self.version == REVLOGV0:
646 self._io = revlogoldio()
629 self._io = revlogoldio()
647 elif fmt == REVLOGV2:
630 elif fmt == REVLOGV2:
648 self._io = revlogv2io()
631 self._io = revlogv2io()
649 elif devel_nodemap:
632 elif devel_nodemap:
650 self._io = NodemapRevlogIO()
633 self._io = NodemapRevlogIO()
651 elif use_rust_index:
634 elif use_rust_index:
652 self._io = rustrevlogio()
635 self._io = rustrevlogio()
653 try:
636 try:
654 d = self._io.parseindex(indexdata, self._inline)
637 d = self._io.parseindex(indexdata, self._inline)
655 index, _chunkcache = d
638 index, _chunkcache = d
656 use_nodemap = (
639 use_nodemap = (
657 not self._inline
640 not self._inline
658 and self.nodemap_file is not None
641 and self.nodemap_file is not None
659 and util.safehasattr(index, 'update_nodemap_data')
642 and util.safehasattr(index, 'update_nodemap_data')
660 )
643 )
661 if use_nodemap:
644 if use_nodemap:
662 nodemap_data = nodemaputil.persisted_data(self)
645 nodemap_data = nodemaputil.persisted_data(self)
663 if nodemap_data is not None:
646 if nodemap_data is not None:
664 docket = nodemap_data[0]
647 docket = nodemap_data[0]
665 if (
648 if (
666 len(d[0]) > docket.tip_rev
649 len(d[0]) > docket.tip_rev
667 and d[0][docket.tip_rev][7] == docket.tip_node
650 and d[0][docket.tip_rev][7] == docket.tip_node
668 ):
651 ):
669 # no changelog tampering
652 # no changelog tampering
670 self._nodemap_docket = docket
653 self._nodemap_docket = docket
671 index.update_nodemap_data(*nodemap_data)
654 index.update_nodemap_data(*nodemap_data)
672 except (ValueError, IndexError):
655 except (ValueError, IndexError):
673 raise error.RevlogError(
656 raise error.RevlogError(
674 _(b"index %s is corrupted") % self.indexfile
657 _(b"index %s is corrupted") % self.indexfile
675 )
658 )
676 self.index, self._chunkcache = d
659 self.index, self._chunkcache = d
677 if not self._chunkcache:
660 if not self._chunkcache:
678 self._chunkclear()
661 self._chunkclear()
679 # revnum -> (chain-length, sum-delta-length)
662 # revnum -> (chain-length, sum-delta-length)
680 self._chaininfocache = util.lrucachedict(500)
663 self._chaininfocache = util.lrucachedict(500)
681 # revlog header -> revlog compressor
664 # revlog header -> revlog compressor
682 self._decompressors = {}
665 self._decompressors = {}
683
666
684 @util.propertycache
667 @util.propertycache
685 def _compressor(self):
668 def _compressor(self):
686 engine = util.compengines[self._compengine]
669 engine = util.compengines[self._compengine]
687 return engine.revlogcompressor(self._compengineopts)
670 return engine.revlogcompressor(self._compengineopts)
688
671
689 def _indexfp(self, mode=b'r'):
672 def _indexfp(self, mode=b'r'):
690 """file object for the revlog's index file"""
673 """file object for the revlog's index file"""
691 args = {'mode': mode}
674 args = {'mode': mode}
692 if mode != b'r':
675 if mode != b'r':
693 args['checkambig'] = self._checkambig
676 args['checkambig'] = self._checkambig
694 if mode == b'w':
677 if mode == b'w':
695 args['atomictemp'] = True
678 args['atomictemp'] = True
696 return self.opener(self.indexfile, **args)
679 return self.opener(self.indexfile, **args)
697
680
698 def _datafp(self, mode=b'r'):
681 def _datafp(self, mode=b'r'):
699 """file object for the revlog's data file"""
682 """file object for the revlog's data file"""
700 return self.opener(self.datafile, mode=mode)
683 return self.opener(self.datafile, mode=mode)
701
684
702 @contextlib.contextmanager
685 @contextlib.contextmanager
703 def _datareadfp(self, existingfp=None):
686 def _datareadfp(self, existingfp=None):
704 """file object suitable to read data"""
687 """file object suitable to read data"""
705 # Use explicit file handle, if given.
688 # Use explicit file handle, if given.
706 if existingfp is not None:
689 if existingfp is not None:
707 yield existingfp
690 yield existingfp
708
691
709 # Use a file handle being actively used for writes, if available.
692 # Use a file handle being actively used for writes, if available.
710 # There is some danger to doing this because reads will seek the
693 # There is some danger to doing this because reads will seek the
711 # file. However, _writeentry() performs a SEEK_END before all writes,
694 # file. However, _writeentry() performs a SEEK_END before all writes,
712 # so we should be safe.
695 # so we should be safe.
713 elif self._writinghandles:
696 elif self._writinghandles:
714 if self._inline:
697 if self._inline:
715 yield self._writinghandles[0]
698 yield self._writinghandles[0]
716 else:
699 else:
717 yield self._writinghandles[1]
700 yield self._writinghandles[1]
718
701
719 # Otherwise open a new file handle.
702 # Otherwise open a new file handle.
720 else:
703 else:
721 if self._inline:
704 if self._inline:
722 func = self._indexfp
705 func = self._indexfp
723 else:
706 else:
724 func = self._datafp
707 func = self._datafp
725 with func() as fp:
708 with func() as fp:
726 yield fp
709 yield fp
727
710
728 def tiprev(self):
711 def tiprev(self):
729 return len(self.index) - 1
712 return len(self.index) - 1
730
713
731 def tip(self):
714 def tip(self):
732 return self.node(self.tiprev())
715 return self.node(self.tiprev())
733
716
734 def __contains__(self, rev):
717 def __contains__(self, rev):
735 return 0 <= rev < len(self)
718 return 0 <= rev < len(self)
736
719
737 def __len__(self):
720 def __len__(self):
738 return len(self.index)
721 return len(self.index)
739
722
740 def __iter__(self):
723 def __iter__(self):
741 return iter(pycompat.xrange(len(self)))
724 return iter(pycompat.xrange(len(self)))
742
725
743 def revs(self, start=0, stop=None):
726 def revs(self, start=0, stop=None):
744 """iterate over all rev in this revlog (from start to stop)"""
727 """iterate over all rev in this revlog (from start to stop)"""
745 return storageutil.iterrevs(len(self), start=start, stop=stop)
728 return storageutil.iterrevs(len(self), start=start, stop=stop)
746
729
747 @property
730 @property
748 def nodemap(self):
731 def nodemap(self):
749 msg = (
732 msg = (
750 b"revlog.nodemap is deprecated, "
733 b"revlog.nodemap is deprecated, "
751 b"use revlog.index.[has_node|rev|get_rev]"
734 b"use revlog.index.[has_node|rev|get_rev]"
752 )
735 )
753 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
736 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
754 return self.index.nodemap
737 return self.index.nodemap
755
738
756 @property
739 @property
757 def _nodecache(self):
740 def _nodecache(self):
758 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
741 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
759 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
742 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
760 return self.index.nodemap
743 return self.index.nodemap
761
744
762 def hasnode(self, node):
745 def hasnode(self, node):
763 try:
746 try:
764 self.rev(node)
747 self.rev(node)
765 return True
748 return True
766 except KeyError:
749 except KeyError:
767 return False
750 return False
768
751
769 def candelta(self, baserev, rev):
752 def candelta(self, baserev, rev):
770 """whether two revisions (baserev, rev) can be delta-ed or not"""
753 """whether two revisions (baserev, rev) can be delta-ed or not"""
771 # Disable delta if either rev requires a content-changing flag
754 # Disable delta if either rev requires a content-changing flag
772 # processor (ex. LFS). This is because such flag processor can alter
755 # processor (ex. LFS). This is because such flag processor can alter
773 # the rawtext content that the delta will be based on, and two clients
756 # the rawtext content that the delta will be based on, and two clients
774 # could have a same revlog node with different flags (i.e. different
757 # could have a same revlog node with different flags (i.e. different
775 # rawtext contents) and the delta could be incompatible.
758 # rawtext contents) and the delta could be incompatible.
776 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
759 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
777 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
760 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
778 ):
761 ):
779 return False
762 return False
780 return True
763 return True
781
764
782 def update_caches(self, transaction):
765 def update_caches(self, transaction):
783 if self.nodemap_file is not None:
766 if self.nodemap_file is not None:
784 if transaction is None:
767 if transaction is None:
785 nodemaputil.update_persistent_nodemap(self)
768 nodemaputil.update_persistent_nodemap(self)
786 else:
769 else:
787 nodemaputil.setup_persistent_nodemap(transaction, self)
770 nodemaputil.setup_persistent_nodemap(transaction, self)
788
771
789 def clearcaches(self):
772 def clearcaches(self):
790 self._revisioncache = None
773 self._revisioncache = None
791 self._chainbasecache.clear()
774 self._chainbasecache.clear()
792 self._chunkcache = (0, b'')
775 self._chunkcache = (0, b'')
793 self._pcache = {}
776 self._pcache = {}
794 self._nodemap_docket = None
777 self._nodemap_docket = None
795 self.index.clearcaches()
778 self.index.clearcaches()
796 # The python code is the one responsible for validating the docket, we
779 # The python code is the one responsible for validating the docket, we
797 # end up having to refresh it here.
780 # end up having to refresh it here.
798 use_nodemap = (
781 use_nodemap = (
799 not self._inline
782 not self._inline
800 and self.nodemap_file is not None
783 and self.nodemap_file is not None
801 and util.safehasattr(self.index, 'update_nodemap_data')
784 and util.safehasattr(self.index, 'update_nodemap_data')
802 )
785 )
803 if use_nodemap:
786 if use_nodemap:
804 nodemap_data = nodemaputil.persisted_data(self)
787 nodemap_data = nodemaputil.persisted_data(self)
805 if nodemap_data is not None:
788 if nodemap_data is not None:
806 self._nodemap_docket = nodemap_data[0]
789 self._nodemap_docket = nodemap_data[0]
807 self.index.update_nodemap_data(*nodemap_data)
790 self.index.update_nodemap_data(*nodemap_data)
808
791
809 def rev(self, node):
792 def rev(self, node):
810 try:
793 try:
811 return self.index.rev(node)
794 return self.index.rev(node)
812 except TypeError:
795 except TypeError:
813 raise
796 raise
814 except error.RevlogError:
797 except error.RevlogError:
815 # parsers.c radix tree lookup failed
798 # parsers.c radix tree lookup failed
816 if (
799 if (
817 node == self.nodeconstants.wdirid
800 node == self.nodeconstants.wdirid
818 or node in self.nodeconstants.wdirfilenodeids
801 or node in self.nodeconstants.wdirfilenodeids
819 ):
802 ):
820 raise error.WdirUnsupported
803 raise error.WdirUnsupported
821 raise error.LookupError(node, self.indexfile, _(b'no node'))
804 raise error.LookupError(node, self.indexfile, _(b'no node'))
822
805
823 # Accessors for index entries.
806 # Accessors for index entries.
824
807
825 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
808 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
826 # are flags.
809 # are flags.
827 def start(self, rev):
810 def start(self, rev):
828 return int(self.index[rev][0] >> 16)
811 return int(self.index[rev][0] >> 16)
829
812
830 def flags(self, rev):
813 def flags(self, rev):
831 return self.index[rev][0] & 0xFFFF
814 return self.index[rev][0] & 0xFFFF
832
815
833 def length(self, rev):
816 def length(self, rev):
834 return self.index[rev][1]
817 return self.index[rev][1]
835
818
836 def sidedata_length(self, rev):
819 def sidedata_length(self, rev):
837 if self.version & 0xFFFF != REVLOGV2:
820 if self.version & 0xFFFF != REVLOGV2:
838 return 0
821 return 0
839 return self.index[rev][9]
822 return self.index[rev][9]
840
823
841 def rawsize(self, rev):
824 def rawsize(self, rev):
842 """return the length of the uncompressed text for a given revision"""
825 """return the length of the uncompressed text for a given revision"""
843 l = self.index[rev][2]
826 l = self.index[rev][2]
844 if l >= 0:
827 if l >= 0:
845 return l
828 return l
846
829
847 t = self.rawdata(rev)
830 t = self.rawdata(rev)
848 return len(t)
831 return len(t)
849
832
850 def size(self, rev):
833 def size(self, rev):
851 """length of non-raw text (processed by a "read" flag processor)"""
834 """length of non-raw text (processed by a "read" flag processor)"""
852 # fast path: if no "read" flag processor could change the content,
835 # fast path: if no "read" flag processor could change the content,
853 # size is rawsize. note: ELLIPSIS is known to not change the content.
836 # size is rawsize. note: ELLIPSIS is known to not change the content.
854 flags = self.flags(rev)
837 flags = self.flags(rev)
855 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
838 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
856 return self.rawsize(rev)
839 return self.rawsize(rev)
857
840
858 return len(self.revision(rev, raw=False))
841 return len(self.revision(rev, raw=False))
859
842
860 def chainbase(self, rev):
843 def chainbase(self, rev):
861 base = self._chainbasecache.get(rev)
844 base = self._chainbasecache.get(rev)
862 if base is not None:
845 if base is not None:
863 return base
846 return base
864
847
865 index = self.index
848 index = self.index
866 iterrev = rev
849 iterrev = rev
867 base = index[iterrev][3]
850 base = index[iterrev][3]
868 while base != iterrev:
851 while base != iterrev:
869 iterrev = base
852 iterrev = base
870 base = index[iterrev][3]
853 base = index[iterrev][3]
871
854
872 self._chainbasecache[rev] = base
855 self._chainbasecache[rev] = base
873 return base
856 return base
874
857
875 def linkrev(self, rev):
858 def linkrev(self, rev):
876 return self.index[rev][4]
859 return self.index[rev][4]
877
860
878 def parentrevs(self, rev):
861 def parentrevs(self, rev):
879 try:
862 try:
880 entry = self.index[rev]
863 entry = self.index[rev]
881 except IndexError:
864 except IndexError:
882 if rev == wdirrev:
865 if rev == wdirrev:
883 raise error.WdirUnsupported
866 raise error.WdirUnsupported
884 raise
867 raise
885 if entry[5] == nullrev:
868 if entry[5] == nullrev:
886 return entry[6], entry[5]
869 return entry[6], entry[5]
887 else:
870 else:
888 return entry[5], entry[6]
871 return entry[5], entry[6]
889
872
890 # fast parentrevs(rev) where rev isn't filtered
873 # fast parentrevs(rev) where rev isn't filtered
891 _uncheckedparentrevs = parentrevs
874 _uncheckedparentrevs = parentrevs
892
875
893 def node(self, rev):
876 def node(self, rev):
894 try:
877 try:
895 return self.index[rev][7]
878 return self.index[rev][7]
896 except IndexError:
879 except IndexError:
897 if rev == wdirrev:
880 if rev == wdirrev:
898 raise error.WdirUnsupported
881 raise error.WdirUnsupported
899 raise
882 raise
900
883
901 # Derived from index values.
884 # Derived from index values.
902
885
903 def end(self, rev):
886 def end(self, rev):
904 return self.start(rev) + self.length(rev)
887 return self.start(rev) + self.length(rev)
905
888
906 def parents(self, node):
889 def parents(self, node):
907 i = self.index
890 i = self.index
908 d = i[self.rev(node)]
891 d = i[self.rev(node)]
909 # inline node() to avoid function call overhead
892 # inline node() to avoid function call overhead
910 if d[5] == self.nullid:
893 if d[5] == self.nullid:
911 return i[d[6]][7], i[d[5]][7]
894 return i[d[6]][7], i[d[5]][7]
912 else:
895 else:
913 return i[d[5]][7], i[d[6]][7]
896 return i[d[5]][7], i[d[6]][7]
914
897
915 def chainlen(self, rev):
898 def chainlen(self, rev):
916 return self._chaininfo(rev)[0]
899 return self._chaininfo(rev)[0]
917
900
918 def _chaininfo(self, rev):
901 def _chaininfo(self, rev):
919 chaininfocache = self._chaininfocache
902 chaininfocache = self._chaininfocache
920 if rev in chaininfocache:
903 if rev in chaininfocache:
921 return chaininfocache[rev]
904 return chaininfocache[rev]
922 index = self.index
905 index = self.index
923 generaldelta = self._generaldelta
906 generaldelta = self._generaldelta
924 iterrev = rev
907 iterrev = rev
925 e = index[iterrev]
908 e = index[iterrev]
926 clen = 0
909 clen = 0
927 compresseddeltalen = 0
910 compresseddeltalen = 0
928 while iterrev != e[3]:
911 while iterrev != e[3]:
929 clen += 1
912 clen += 1
930 compresseddeltalen += e[1]
913 compresseddeltalen += e[1]
931 if generaldelta:
914 if generaldelta:
932 iterrev = e[3]
915 iterrev = e[3]
933 else:
916 else:
934 iterrev -= 1
917 iterrev -= 1
935 if iterrev in chaininfocache:
918 if iterrev in chaininfocache:
936 t = chaininfocache[iterrev]
919 t = chaininfocache[iterrev]
937 clen += t[0]
920 clen += t[0]
938 compresseddeltalen += t[1]
921 compresseddeltalen += t[1]
939 break
922 break
940 e = index[iterrev]
923 e = index[iterrev]
941 else:
924 else:
942 # Add text length of base since decompressing that also takes
925 # Add text length of base since decompressing that also takes
943 # work. For cache hits the length is already included.
926 # work. For cache hits the length is already included.
944 compresseddeltalen += e[1]
927 compresseddeltalen += e[1]
945 r = (clen, compresseddeltalen)
928 r = (clen, compresseddeltalen)
946 chaininfocache[rev] = r
929 chaininfocache[rev] = r
947 return r
930 return r
948
931
949 def _deltachain(self, rev, stoprev=None):
932 def _deltachain(self, rev, stoprev=None):
950 """Obtain the delta chain for a revision.
933 """Obtain the delta chain for a revision.
951
934
952 ``stoprev`` specifies a revision to stop at. If not specified, we
935 ``stoprev`` specifies a revision to stop at. If not specified, we
953 stop at the base of the chain.
936 stop at the base of the chain.
954
937
955 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
938 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
956 revs in ascending order and ``stopped`` is a bool indicating whether
939 revs in ascending order and ``stopped`` is a bool indicating whether
957 ``stoprev`` was hit.
940 ``stoprev`` was hit.
958 """
941 """
959 # Try C implementation.
942 # Try C implementation.
960 try:
943 try:
961 return self.index.deltachain(rev, stoprev, self._generaldelta)
944 return self.index.deltachain(rev, stoprev, self._generaldelta)
962 except AttributeError:
945 except AttributeError:
963 pass
946 pass
964
947
965 chain = []
948 chain = []
966
949
967 # Alias to prevent attribute lookup in tight loop.
950 # Alias to prevent attribute lookup in tight loop.
968 index = self.index
951 index = self.index
969 generaldelta = self._generaldelta
952 generaldelta = self._generaldelta
970
953
971 iterrev = rev
954 iterrev = rev
972 e = index[iterrev]
955 e = index[iterrev]
973 while iterrev != e[3] and iterrev != stoprev:
956 while iterrev != e[3] and iterrev != stoprev:
974 chain.append(iterrev)
957 chain.append(iterrev)
975 if generaldelta:
958 if generaldelta:
976 iterrev = e[3]
959 iterrev = e[3]
977 else:
960 else:
978 iterrev -= 1
961 iterrev -= 1
979 e = index[iterrev]
962 e = index[iterrev]
980
963
981 if iterrev == stoprev:
964 if iterrev == stoprev:
982 stopped = True
965 stopped = True
983 else:
966 else:
984 chain.append(iterrev)
967 chain.append(iterrev)
985 stopped = False
968 stopped = False
986
969
987 chain.reverse()
970 chain.reverse()
988 return chain, stopped
971 return chain, stopped
989
972
990 def ancestors(self, revs, stoprev=0, inclusive=False):
973 def ancestors(self, revs, stoprev=0, inclusive=False):
991 """Generate the ancestors of 'revs' in reverse revision order.
974 """Generate the ancestors of 'revs' in reverse revision order.
992 Does not generate revs lower than stoprev.
975 Does not generate revs lower than stoprev.
993
976
994 See the documentation for ancestor.lazyancestors for more details."""
977 See the documentation for ancestor.lazyancestors for more details."""
995
978
996 # first, make sure start revisions aren't filtered
979 # first, make sure start revisions aren't filtered
997 revs = list(revs)
980 revs = list(revs)
998 checkrev = self.node
981 checkrev = self.node
999 for r in revs:
982 for r in revs:
1000 checkrev(r)
983 checkrev(r)
1001 # and we're sure ancestors aren't filtered as well
984 # and we're sure ancestors aren't filtered as well
1002
985
1003 if rustancestor is not None:
986 if rustancestor is not None:
1004 lazyancestors = rustancestor.LazyAncestors
987 lazyancestors = rustancestor.LazyAncestors
1005 arg = self.index
988 arg = self.index
1006 else:
989 else:
1007 lazyancestors = ancestor.lazyancestors
990 lazyancestors = ancestor.lazyancestors
1008 arg = self._uncheckedparentrevs
991 arg = self._uncheckedparentrevs
1009 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
992 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1010
993
1011 def descendants(self, revs):
994 def descendants(self, revs):
1012 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
995 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1013
996
1014 def findcommonmissing(self, common=None, heads=None):
997 def findcommonmissing(self, common=None, heads=None):
1015 """Return a tuple of the ancestors of common and the ancestors of heads
998 """Return a tuple of the ancestors of common and the ancestors of heads
1016 that are not ancestors of common. In revset terminology, we return the
999 that are not ancestors of common. In revset terminology, we return the
1017 tuple:
1000 tuple:
1018
1001
1019 ::common, (::heads) - (::common)
1002 ::common, (::heads) - (::common)
1020
1003
1021 The list is sorted by revision number, meaning it is
1004 The list is sorted by revision number, meaning it is
1022 topologically sorted.
1005 topologically sorted.
1023
1006
1024 'heads' and 'common' are both lists of node IDs. If heads is
1007 'heads' and 'common' are both lists of node IDs. If heads is
1025 not supplied, uses all of the revlog's heads. If common is not
1008 not supplied, uses all of the revlog's heads. If common is not
1026 supplied, uses nullid."""
1009 supplied, uses nullid."""
1027 if common is None:
1010 if common is None:
1028 common = [self.nullid]
1011 common = [self.nullid]
1029 if heads is None:
1012 if heads is None:
1030 heads = self.heads()
1013 heads = self.heads()
1031
1014
1032 common = [self.rev(n) for n in common]
1015 common = [self.rev(n) for n in common]
1033 heads = [self.rev(n) for n in heads]
1016 heads = [self.rev(n) for n in heads]
1034
1017
1035 # we want the ancestors, but inclusive
1018 # we want the ancestors, but inclusive
1036 class lazyset(object):
1019 class lazyset(object):
1037 def __init__(self, lazyvalues):
1020 def __init__(self, lazyvalues):
1038 self.addedvalues = set()
1021 self.addedvalues = set()
1039 self.lazyvalues = lazyvalues
1022 self.lazyvalues = lazyvalues
1040
1023
1041 def __contains__(self, value):
1024 def __contains__(self, value):
1042 return value in self.addedvalues or value in self.lazyvalues
1025 return value in self.addedvalues or value in self.lazyvalues
1043
1026
1044 def __iter__(self):
1027 def __iter__(self):
1045 added = self.addedvalues
1028 added = self.addedvalues
1046 for r in added:
1029 for r in added:
1047 yield r
1030 yield r
1048 for r in self.lazyvalues:
1031 for r in self.lazyvalues:
1049 if not r in added:
1032 if not r in added:
1050 yield r
1033 yield r
1051
1034
1052 def add(self, value):
1035 def add(self, value):
1053 self.addedvalues.add(value)
1036 self.addedvalues.add(value)
1054
1037
1055 def update(self, values):
1038 def update(self, values):
1056 self.addedvalues.update(values)
1039 self.addedvalues.update(values)
1057
1040
1058 has = lazyset(self.ancestors(common))
1041 has = lazyset(self.ancestors(common))
1059 has.add(nullrev)
1042 has.add(nullrev)
1060 has.update(common)
1043 has.update(common)
1061
1044
1062 # take all ancestors from heads that aren't in has
1045 # take all ancestors from heads that aren't in has
1063 missing = set()
1046 missing = set()
1064 visit = collections.deque(r for r in heads if r not in has)
1047 visit = collections.deque(r for r in heads if r not in has)
1065 while visit:
1048 while visit:
1066 r = visit.popleft()
1049 r = visit.popleft()
1067 if r in missing:
1050 if r in missing:
1068 continue
1051 continue
1069 else:
1052 else:
1070 missing.add(r)
1053 missing.add(r)
1071 for p in self.parentrevs(r):
1054 for p in self.parentrevs(r):
1072 if p not in has:
1055 if p not in has:
1073 visit.append(p)
1056 visit.append(p)
1074 missing = list(missing)
1057 missing = list(missing)
1075 missing.sort()
1058 missing.sort()
1076 return has, [self.node(miss) for miss in missing]
1059 return has, [self.node(miss) for miss in missing]
1077
1060
1078 def incrementalmissingrevs(self, common=None):
1061 def incrementalmissingrevs(self, common=None):
1079 """Return an object that can be used to incrementally compute the
1062 """Return an object that can be used to incrementally compute the
1080 revision numbers of the ancestors of arbitrary sets that are not
1063 revision numbers of the ancestors of arbitrary sets that are not
1081 ancestors of common. This is an ancestor.incrementalmissingancestors
1064 ancestors of common. This is an ancestor.incrementalmissingancestors
1082 object.
1065 object.
1083
1066
1084 'common' is a list of revision numbers. If common is not supplied, uses
1067 'common' is a list of revision numbers. If common is not supplied, uses
1085 nullrev.
1068 nullrev.
1086 """
1069 """
1087 if common is None:
1070 if common is None:
1088 common = [nullrev]
1071 common = [nullrev]
1089
1072
1090 if rustancestor is not None:
1073 if rustancestor is not None:
1091 return rustancestor.MissingAncestors(self.index, common)
1074 return rustancestor.MissingAncestors(self.index, common)
1092 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1075 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1093
1076
1094 def findmissingrevs(self, common=None, heads=None):
1077 def findmissingrevs(self, common=None, heads=None):
1095 """Return the revision numbers of the ancestors of heads that
1078 """Return the revision numbers of the ancestors of heads that
1096 are not ancestors of common.
1079 are not ancestors of common.
1097
1080
1098 More specifically, return a list of revision numbers corresponding to
1081 More specifically, return a list of revision numbers corresponding to
1099 nodes N such that every N satisfies the following constraints:
1082 nodes N such that every N satisfies the following constraints:
1100
1083
1101 1. N is an ancestor of some node in 'heads'
1084 1. N is an ancestor of some node in 'heads'
1102 2. N is not an ancestor of any node in 'common'
1085 2. N is not an ancestor of any node in 'common'
1103
1086
1104 The list is sorted by revision number, meaning it is
1087 The list is sorted by revision number, meaning it is
1105 topologically sorted.
1088 topologically sorted.
1106
1089
1107 'heads' and 'common' are both lists of revision numbers. If heads is
1090 'heads' and 'common' are both lists of revision numbers. If heads is
1108 not supplied, uses all of the revlog's heads. If common is not
1091 not supplied, uses all of the revlog's heads. If common is not
1109 supplied, uses nullid."""
1092 supplied, uses nullid."""
1110 if common is None:
1093 if common is None:
1111 common = [nullrev]
1094 common = [nullrev]
1112 if heads is None:
1095 if heads is None:
1113 heads = self.headrevs()
1096 heads = self.headrevs()
1114
1097
1115 inc = self.incrementalmissingrevs(common=common)
1098 inc = self.incrementalmissingrevs(common=common)
1116 return inc.missingancestors(heads)
1099 return inc.missingancestors(heads)
1117
1100
1118 def findmissing(self, common=None, heads=None):
1101 def findmissing(self, common=None, heads=None):
1119 """Return the ancestors of heads that are not ancestors of common.
1102 """Return the ancestors of heads that are not ancestors of common.
1120
1103
1121 More specifically, return a list of nodes N such that every N
1104 More specifically, return a list of nodes N such that every N
1122 satisfies the following constraints:
1105 satisfies the following constraints:
1123
1106
1124 1. N is an ancestor of some node in 'heads'
1107 1. N is an ancestor of some node in 'heads'
1125 2. N is not an ancestor of any node in 'common'
1108 2. N is not an ancestor of any node in 'common'
1126
1109
1127 The list is sorted by revision number, meaning it is
1110 The list is sorted by revision number, meaning it is
1128 topologically sorted.
1111 topologically sorted.
1129
1112
1130 'heads' and 'common' are both lists of node IDs. If heads is
1113 'heads' and 'common' are both lists of node IDs. If heads is
1131 not supplied, uses all of the revlog's heads. If common is not
1114 not supplied, uses all of the revlog's heads. If common is not
1132 supplied, uses nullid."""
1115 supplied, uses nullid."""
1133 if common is None:
1116 if common is None:
1134 common = [self.nullid]
1117 common = [self.nullid]
1135 if heads is None:
1118 if heads is None:
1136 heads = self.heads()
1119 heads = self.heads()
1137
1120
1138 common = [self.rev(n) for n in common]
1121 common = [self.rev(n) for n in common]
1139 heads = [self.rev(n) for n in heads]
1122 heads = [self.rev(n) for n in heads]
1140
1123
1141 inc = self.incrementalmissingrevs(common=common)
1124 inc = self.incrementalmissingrevs(common=common)
1142 return [self.node(r) for r in inc.missingancestors(heads)]
1125 return [self.node(r) for r in inc.missingancestors(heads)]
1143
1126
1144 def nodesbetween(self, roots=None, heads=None):
1127 def nodesbetween(self, roots=None, heads=None):
1145 """Return a topological path from 'roots' to 'heads'.
1128 """Return a topological path from 'roots' to 'heads'.
1146
1129
1147 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1130 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1148 topologically sorted list of all nodes N that satisfy both of
1131 topologically sorted list of all nodes N that satisfy both of
1149 these constraints:
1132 these constraints:
1150
1133
1151 1. N is a descendant of some node in 'roots'
1134 1. N is a descendant of some node in 'roots'
1152 2. N is an ancestor of some node in 'heads'
1135 2. N is an ancestor of some node in 'heads'
1153
1136
1154 Every node is considered to be both a descendant and an ancestor
1137 Every node is considered to be both a descendant and an ancestor
1155 of itself, so every reachable node in 'roots' and 'heads' will be
1138 of itself, so every reachable node in 'roots' and 'heads' will be
1156 included in 'nodes'.
1139 included in 'nodes'.
1157
1140
1158 'outroots' is the list of reachable nodes in 'roots', i.e., the
1141 'outroots' is the list of reachable nodes in 'roots', i.e., the
1159 subset of 'roots' that is returned in 'nodes'. Likewise,
1142 subset of 'roots' that is returned in 'nodes'. Likewise,
1160 'outheads' is the subset of 'heads' that is also in 'nodes'.
1143 'outheads' is the subset of 'heads' that is also in 'nodes'.
1161
1144
1162 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1145 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1163 unspecified, uses nullid as the only root. If 'heads' is
1146 unspecified, uses nullid as the only root. If 'heads' is
1164 unspecified, uses list of all of the revlog's heads."""
1147 unspecified, uses list of all of the revlog's heads."""
1165 nonodes = ([], [], [])
1148 nonodes = ([], [], [])
1166 if roots is not None:
1149 if roots is not None:
1167 roots = list(roots)
1150 roots = list(roots)
1168 if not roots:
1151 if not roots:
1169 return nonodes
1152 return nonodes
1170 lowestrev = min([self.rev(n) for n in roots])
1153 lowestrev = min([self.rev(n) for n in roots])
1171 else:
1154 else:
1172 roots = [self.nullid] # Everybody's a descendant of nullid
1155 roots = [self.nullid] # Everybody's a descendant of nullid
1173 lowestrev = nullrev
1156 lowestrev = nullrev
1174 if (lowestrev == nullrev) and (heads is None):
1157 if (lowestrev == nullrev) and (heads is None):
1175 # We want _all_ the nodes!
1158 # We want _all_ the nodes!
1176 return (
1159 return (
1177 [self.node(r) for r in self],
1160 [self.node(r) for r in self],
1178 [self.nullid],
1161 [self.nullid],
1179 list(self.heads()),
1162 list(self.heads()),
1180 )
1163 )
1181 if heads is None:
1164 if heads is None:
1182 # All nodes are ancestors, so the latest ancestor is the last
1165 # All nodes are ancestors, so the latest ancestor is the last
1183 # node.
1166 # node.
1184 highestrev = len(self) - 1
1167 highestrev = len(self) - 1
1185 # Set ancestors to None to signal that every node is an ancestor.
1168 # Set ancestors to None to signal that every node is an ancestor.
1186 ancestors = None
1169 ancestors = None
1187 # Set heads to an empty dictionary for later discovery of heads
1170 # Set heads to an empty dictionary for later discovery of heads
1188 heads = {}
1171 heads = {}
1189 else:
1172 else:
1190 heads = list(heads)
1173 heads = list(heads)
1191 if not heads:
1174 if not heads:
1192 return nonodes
1175 return nonodes
1193 ancestors = set()
1176 ancestors = set()
1194 # Turn heads into a dictionary so we can remove 'fake' heads.
1177 # Turn heads into a dictionary so we can remove 'fake' heads.
1195 # Also, later we will be using it to filter out the heads we can't
1178 # Also, later we will be using it to filter out the heads we can't
1196 # find from roots.
1179 # find from roots.
1197 heads = dict.fromkeys(heads, False)
1180 heads = dict.fromkeys(heads, False)
1198 # Start at the top and keep marking parents until we're done.
1181 # Start at the top and keep marking parents until we're done.
1199 nodestotag = set(heads)
1182 nodestotag = set(heads)
1200 # Remember where the top was so we can use it as a limit later.
1183 # Remember where the top was so we can use it as a limit later.
1201 highestrev = max([self.rev(n) for n in nodestotag])
1184 highestrev = max([self.rev(n) for n in nodestotag])
1202 while nodestotag:
1185 while nodestotag:
1203 # grab a node to tag
1186 # grab a node to tag
1204 n = nodestotag.pop()
1187 n = nodestotag.pop()
1205 # Never tag nullid
1188 # Never tag nullid
1206 if n == self.nullid:
1189 if n == self.nullid:
1207 continue
1190 continue
1208 # A node's revision number represents its place in a
1191 # A node's revision number represents its place in a
1209 # topologically sorted list of nodes.
1192 # topologically sorted list of nodes.
1210 r = self.rev(n)
1193 r = self.rev(n)
1211 if r >= lowestrev:
1194 if r >= lowestrev:
1212 if n not in ancestors:
1195 if n not in ancestors:
1213 # If we are possibly a descendant of one of the roots
1196 # If we are possibly a descendant of one of the roots
1214 # and we haven't already been marked as an ancestor
1197 # and we haven't already been marked as an ancestor
1215 ancestors.add(n) # Mark as ancestor
1198 ancestors.add(n) # Mark as ancestor
1216 # Add non-nullid parents to list of nodes to tag.
1199 # Add non-nullid parents to list of nodes to tag.
1217 nodestotag.update(
1200 nodestotag.update(
1218 [p for p in self.parents(n) if p != self.nullid]
1201 [p for p in self.parents(n) if p != self.nullid]
1219 )
1202 )
1220 elif n in heads: # We've seen it before, is it a fake head?
1203 elif n in heads: # We've seen it before, is it a fake head?
1221 # So it is, real heads should not be the ancestors of
1204 # So it is, real heads should not be the ancestors of
1222 # any other heads.
1205 # any other heads.
1223 heads.pop(n)
1206 heads.pop(n)
1224 if not ancestors:
1207 if not ancestors:
1225 return nonodes
1208 return nonodes
1226 # Now that we have our set of ancestors, we want to remove any
1209 # Now that we have our set of ancestors, we want to remove any
1227 # roots that are not ancestors.
1210 # roots that are not ancestors.
1228
1211
1229 # If one of the roots was nullid, everything is included anyway.
1212 # If one of the roots was nullid, everything is included anyway.
1230 if lowestrev > nullrev:
1213 if lowestrev > nullrev:
1231 # But, since we weren't, let's recompute the lowest rev to not
1214 # But, since we weren't, let's recompute the lowest rev to not
1232 # include roots that aren't ancestors.
1215 # include roots that aren't ancestors.
1233
1216
1234 # Filter out roots that aren't ancestors of heads
1217 # Filter out roots that aren't ancestors of heads
1235 roots = [root for root in roots if root in ancestors]
1218 roots = [root for root in roots if root in ancestors]
1236 # Recompute the lowest revision
1219 # Recompute the lowest revision
1237 if roots:
1220 if roots:
1238 lowestrev = min([self.rev(root) for root in roots])
1221 lowestrev = min([self.rev(root) for root in roots])
1239 else:
1222 else:
1240 # No more roots? Return empty list
1223 # No more roots? Return empty list
1241 return nonodes
1224 return nonodes
1242 else:
1225 else:
1243 # We are descending from nullid, and don't need to care about
1226 # We are descending from nullid, and don't need to care about
1244 # any other roots.
1227 # any other roots.
1245 lowestrev = nullrev
1228 lowestrev = nullrev
1246 roots = [self.nullid]
1229 roots = [self.nullid]
1247 # Transform our roots list into a set.
1230 # Transform our roots list into a set.
1248 descendants = set(roots)
1231 descendants = set(roots)
1249 # Also, keep the original roots so we can filter out roots that aren't
1232 # Also, keep the original roots so we can filter out roots that aren't
1250 # 'real' roots (i.e. are descended from other roots).
1233 # 'real' roots (i.e. are descended from other roots).
1251 roots = descendants.copy()
1234 roots = descendants.copy()
1252 # Our topologically sorted list of output nodes.
1235 # Our topologically sorted list of output nodes.
1253 orderedout = []
1236 orderedout = []
1254 # Don't start at nullid since we don't want nullid in our output list,
1237 # Don't start at nullid since we don't want nullid in our output list,
1255 # and if nullid shows up in descendants, empty parents will look like
1238 # and if nullid shows up in descendants, empty parents will look like
1256 # they're descendants.
1239 # they're descendants.
1257 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1240 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1258 n = self.node(r)
1241 n = self.node(r)
1259 isdescendant = False
1242 isdescendant = False
1260 if lowestrev == nullrev: # Everybody is a descendant of nullid
1243 if lowestrev == nullrev: # Everybody is a descendant of nullid
1261 isdescendant = True
1244 isdescendant = True
1262 elif n in descendants:
1245 elif n in descendants:
1263 # n is already a descendant
1246 # n is already a descendant
1264 isdescendant = True
1247 isdescendant = True
1265 # This check only needs to be done here because all the roots
1248 # This check only needs to be done here because all the roots
1266 # will start being marked is descendants before the loop.
1249 # will start being marked is descendants before the loop.
1267 if n in roots:
1250 if n in roots:
1268 # If n was a root, check if it's a 'real' root.
1251 # If n was a root, check if it's a 'real' root.
1269 p = tuple(self.parents(n))
1252 p = tuple(self.parents(n))
1270 # If any of its parents are descendants, it's not a root.
1253 # If any of its parents are descendants, it's not a root.
1271 if (p[0] in descendants) or (p[1] in descendants):
1254 if (p[0] in descendants) or (p[1] in descendants):
1272 roots.remove(n)
1255 roots.remove(n)
1273 else:
1256 else:
1274 p = tuple(self.parents(n))
1257 p = tuple(self.parents(n))
1275 # A node is a descendant if either of its parents are
1258 # A node is a descendant if either of its parents are
1276 # descendants. (We seeded the dependents list with the roots
1259 # descendants. (We seeded the dependents list with the roots
1277 # up there, remember?)
1260 # up there, remember?)
1278 if (p[0] in descendants) or (p[1] in descendants):
1261 if (p[0] in descendants) or (p[1] in descendants):
1279 descendants.add(n)
1262 descendants.add(n)
1280 isdescendant = True
1263 isdescendant = True
1281 if isdescendant and ((ancestors is None) or (n in ancestors)):
1264 if isdescendant and ((ancestors is None) or (n in ancestors)):
1282 # Only include nodes that are both descendants and ancestors.
1265 # Only include nodes that are both descendants and ancestors.
1283 orderedout.append(n)
1266 orderedout.append(n)
1284 if (ancestors is not None) and (n in heads):
1267 if (ancestors is not None) and (n in heads):
1285 # We're trying to figure out which heads are reachable
1268 # We're trying to figure out which heads are reachable
1286 # from roots.
1269 # from roots.
1287 # Mark this head as having been reached
1270 # Mark this head as having been reached
1288 heads[n] = True
1271 heads[n] = True
1289 elif ancestors is None:
1272 elif ancestors is None:
1290 # Otherwise, we're trying to discover the heads.
1273 # Otherwise, we're trying to discover the heads.
1291 # Assume this is a head because if it isn't, the next step
1274 # Assume this is a head because if it isn't, the next step
1292 # will eventually remove it.
1275 # will eventually remove it.
1293 heads[n] = True
1276 heads[n] = True
1294 # But, obviously its parents aren't.
1277 # But, obviously its parents aren't.
1295 for p in self.parents(n):
1278 for p in self.parents(n):
1296 heads.pop(p, None)
1279 heads.pop(p, None)
1297 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1280 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1298 roots = list(roots)
1281 roots = list(roots)
1299 assert orderedout
1282 assert orderedout
1300 assert roots
1283 assert roots
1301 assert heads
1284 assert heads
1302 return (orderedout, roots, heads)
1285 return (orderedout, roots, heads)
1303
1286
1304 def headrevs(self, revs=None):
1287 def headrevs(self, revs=None):
1305 if revs is None:
1288 if revs is None:
1306 try:
1289 try:
1307 return self.index.headrevs()
1290 return self.index.headrevs()
1308 except AttributeError:
1291 except AttributeError:
1309 return self._headrevs()
1292 return self._headrevs()
1310 if rustdagop is not None:
1293 if rustdagop is not None:
1311 return rustdagop.headrevs(self.index, revs)
1294 return rustdagop.headrevs(self.index, revs)
1312 return dagop.headrevs(revs, self._uncheckedparentrevs)
1295 return dagop.headrevs(revs, self._uncheckedparentrevs)
1313
1296
1314 def computephases(self, roots):
1297 def computephases(self, roots):
1315 return self.index.computephasesmapsets(roots)
1298 return self.index.computephasesmapsets(roots)
1316
1299
1317 def _headrevs(self):
1300 def _headrevs(self):
1318 count = len(self)
1301 count = len(self)
1319 if not count:
1302 if not count:
1320 return [nullrev]
1303 return [nullrev]
1321 # we won't iter over filtered rev so nobody is a head at start
1304 # we won't iter over filtered rev so nobody is a head at start
1322 ishead = [0] * (count + 1)
1305 ishead = [0] * (count + 1)
1323 index = self.index
1306 index = self.index
1324 for r in self:
1307 for r in self:
1325 ishead[r] = 1 # I may be an head
1308 ishead[r] = 1 # I may be an head
1326 e = index[r]
1309 e = index[r]
1327 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1310 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1328 return [r for r, val in enumerate(ishead) if val]
1311 return [r for r, val in enumerate(ishead) if val]
1329
1312
1330 def heads(self, start=None, stop=None):
1313 def heads(self, start=None, stop=None):
1331 """return the list of all nodes that have no children
1314 """return the list of all nodes that have no children
1332
1315
1333 if start is specified, only heads that are descendants of
1316 if start is specified, only heads that are descendants of
1334 start will be returned
1317 start will be returned
1335 if stop is specified, it will consider all the revs from stop
1318 if stop is specified, it will consider all the revs from stop
1336 as if they had no children
1319 as if they had no children
1337 """
1320 """
1338 if start is None and stop is None:
1321 if start is None and stop is None:
1339 if not len(self):
1322 if not len(self):
1340 return [self.nullid]
1323 return [self.nullid]
1341 return [self.node(r) for r in self.headrevs()]
1324 return [self.node(r) for r in self.headrevs()]
1342
1325
1343 if start is None:
1326 if start is None:
1344 start = nullrev
1327 start = nullrev
1345 else:
1328 else:
1346 start = self.rev(start)
1329 start = self.rev(start)
1347
1330
1348 stoprevs = {self.rev(n) for n in stop or []}
1331 stoprevs = {self.rev(n) for n in stop or []}
1349
1332
1350 revs = dagop.headrevssubset(
1333 revs = dagop.headrevssubset(
1351 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1334 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1352 )
1335 )
1353
1336
1354 return [self.node(rev) for rev in revs]
1337 return [self.node(rev) for rev in revs]
1355
1338
1356 def children(self, node):
1339 def children(self, node):
1357 """find the children of a given node"""
1340 """find the children of a given node"""
1358 c = []
1341 c = []
1359 p = self.rev(node)
1342 p = self.rev(node)
1360 for r in self.revs(start=p + 1):
1343 for r in self.revs(start=p + 1):
1361 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1344 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1362 if prevs:
1345 if prevs:
1363 for pr in prevs:
1346 for pr in prevs:
1364 if pr == p:
1347 if pr == p:
1365 c.append(self.node(r))
1348 c.append(self.node(r))
1366 elif p == nullrev:
1349 elif p == nullrev:
1367 c.append(self.node(r))
1350 c.append(self.node(r))
1368 return c
1351 return c
1369
1352
1370 def commonancestorsheads(self, a, b):
1353 def commonancestorsheads(self, a, b):
1371 """calculate all the heads of the common ancestors of nodes a and b"""
1354 """calculate all the heads of the common ancestors of nodes a and b"""
1372 a, b = self.rev(a), self.rev(b)
1355 a, b = self.rev(a), self.rev(b)
1373 ancs = self._commonancestorsheads(a, b)
1356 ancs = self._commonancestorsheads(a, b)
1374 return pycompat.maplist(self.node, ancs)
1357 return pycompat.maplist(self.node, ancs)
1375
1358
1376 def _commonancestorsheads(self, *revs):
1359 def _commonancestorsheads(self, *revs):
1377 """calculate all the heads of the common ancestors of revs"""
1360 """calculate all the heads of the common ancestors of revs"""
1378 try:
1361 try:
1379 ancs = self.index.commonancestorsheads(*revs)
1362 ancs = self.index.commonancestorsheads(*revs)
1380 except (AttributeError, OverflowError): # C implementation failed
1363 except (AttributeError, OverflowError): # C implementation failed
1381 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1364 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1382 return ancs
1365 return ancs
1383
1366
1384 def isancestor(self, a, b):
1367 def isancestor(self, a, b):
1385 """return True if node a is an ancestor of node b
1368 """return True if node a is an ancestor of node b
1386
1369
1387 A revision is considered an ancestor of itself."""
1370 A revision is considered an ancestor of itself."""
1388 a, b = self.rev(a), self.rev(b)
1371 a, b = self.rev(a), self.rev(b)
1389 return self.isancestorrev(a, b)
1372 return self.isancestorrev(a, b)
1390
1373
1391 def isancestorrev(self, a, b):
1374 def isancestorrev(self, a, b):
1392 """return True if revision a is an ancestor of revision b
1375 """return True if revision a is an ancestor of revision b
1393
1376
1394 A revision is considered an ancestor of itself.
1377 A revision is considered an ancestor of itself.
1395
1378
1396 The implementation of this is trivial but the use of
1379 The implementation of this is trivial but the use of
1397 reachableroots is not."""
1380 reachableroots is not."""
1398 if a == nullrev:
1381 if a == nullrev:
1399 return True
1382 return True
1400 elif a == b:
1383 elif a == b:
1401 return True
1384 return True
1402 elif a > b:
1385 elif a > b:
1403 return False
1386 return False
1404 return bool(self.reachableroots(a, [b], [a], includepath=False))
1387 return bool(self.reachableroots(a, [b], [a], includepath=False))
1405
1388
1406 def reachableroots(self, minroot, heads, roots, includepath=False):
1389 def reachableroots(self, minroot, heads, roots, includepath=False):
1407 """return (heads(::(<roots> and <roots>::<heads>)))
1390 """return (heads(::(<roots> and <roots>::<heads>)))
1408
1391
1409 If includepath is True, return (<roots>::<heads>)."""
1392 If includepath is True, return (<roots>::<heads>)."""
1410 try:
1393 try:
1411 return self.index.reachableroots2(
1394 return self.index.reachableroots2(
1412 minroot, heads, roots, includepath
1395 minroot, heads, roots, includepath
1413 )
1396 )
1414 except AttributeError:
1397 except AttributeError:
1415 return dagop._reachablerootspure(
1398 return dagop._reachablerootspure(
1416 self.parentrevs, minroot, roots, heads, includepath
1399 self.parentrevs, minroot, roots, heads, includepath
1417 )
1400 )
1418
1401
1419 def ancestor(self, a, b):
1402 def ancestor(self, a, b):
1420 """calculate the "best" common ancestor of nodes a and b"""
1403 """calculate the "best" common ancestor of nodes a and b"""
1421
1404
1422 a, b = self.rev(a), self.rev(b)
1405 a, b = self.rev(a), self.rev(b)
1423 try:
1406 try:
1424 ancs = self.index.ancestors(a, b)
1407 ancs = self.index.ancestors(a, b)
1425 except (AttributeError, OverflowError):
1408 except (AttributeError, OverflowError):
1426 ancs = ancestor.ancestors(self.parentrevs, a, b)
1409 ancs = ancestor.ancestors(self.parentrevs, a, b)
1427 if ancs:
1410 if ancs:
1428 # choose a consistent winner when there's a tie
1411 # choose a consistent winner when there's a tie
1429 return min(map(self.node, ancs))
1412 return min(map(self.node, ancs))
1430 return self.nullid
1413 return self.nullid
1431
1414
1432 def _match(self, id):
1415 def _match(self, id):
1433 if isinstance(id, int):
1416 if isinstance(id, int):
1434 # rev
1417 # rev
1435 return self.node(id)
1418 return self.node(id)
1436 if len(id) == 20:
1419 if len(id) == 20:
1437 # possibly a binary node
1420 # possibly a binary node
1438 # odds of a binary node being all hex in ASCII are 1 in 10**25
1421 # odds of a binary node being all hex in ASCII are 1 in 10**25
1439 try:
1422 try:
1440 node = id
1423 node = id
1441 self.rev(node) # quick search the index
1424 self.rev(node) # quick search the index
1442 return node
1425 return node
1443 except error.LookupError:
1426 except error.LookupError:
1444 pass # may be partial hex id
1427 pass # may be partial hex id
1445 try:
1428 try:
1446 # str(rev)
1429 # str(rev)
1447 rev = int(id)
1430 rev = int(id)
1448 if b"%d" % rev != id:
1431 if b"%d" % rev != id:
1449 raise ValueError
1432 raise ValueError
1450 if rev < 0:
1433 if rev < 0:
1451 rev = len(self) + rev
1434 rev = len(self) + rev
1452 if rev < 0 or rev >= len(self):
1435 if rev < 0 or rev >= len(self):
1453 raise ValueError
1436 raise ValueError
1454 return self.node(rev)
1437 return self.node(rev)
1455 except (ValueError, OverflowError):
1438 except (ValueError, OverflowError):
1456 pass
1439 pass
1457 if len(id) == 40:
1440 if len(id) == 40:
1458 try:
1441 try:
1459 # a full hex nodeid?
1442 # a full hex nodeid?
1460 node = bin(id)
1443 node = bin(id)
1461 self.rev(node)
1444 self.rev(node)
1462 return node
1445 return node
1463 except (TypeError, error.LookupError):
1446 except (TypeError, error.LookupError):
1464 pass
1447 pass
1465
1448
1466 def _partialmatch(self, id):
1449 def _partialmatch(self, id):
1467 # we don't care wdirfilenodeids as they should be always full hash
1450 # we don't care wdirfilenodeids as they should be always full hash
1468 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1451 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1469 try:
1452 try:
1470 partial = self.index.partialmatch(id)
1453 partial = self.index.partialmatch(id)
1471 if partial and self.hasnode(partial):
1454 if partial and self.hasnode(partial):
1472 if maybewdir:
1455 if maybewdir:
1473 # single 'ff...' match in radix tree, ambiguous with wdir
1456 # single 'ff...' match in radix tree, ambiguous with wdir
1474 raise error.RevlogError
1457 raise error.RevlogError
1475 return partial
1458 return partial
1476 if maybewdir:
1459 if maybewdir:
1477 # no 'ff...' match in radix tree, wdir identified
1460 # no 'ff...' match in radix tree, wdir identified
1478 raise error.WdirUnsupported
1461 raise error.WdirUnsupported
1479 return None
1462 return None
1480 except error.RevlogError:
1463 except error.RevlogError:
1481 # parsers.c radix tree lookup gave multiple matches
1464 # parsers.c radix tree lookup gave multiple matches
1482 # fast path: for unfiltered changelog, radix tree is accurate
1465 # fast path: for unfiltered changelog, radix tree is accurate
1483 if not getattr(self, 'filteredrevs', None):
1466 if not getattr(self, 'filteredrevs', None):
1484 raise error.AmbiguousPrefixLookupError(
1467 raise error.AmbiguousPrefixLookupError(
1485 id, self.indexfile, _(b'ambiguous identifier')
1468 id, self.indexfile, _(b'ambiguous identifier')
1486 )
1469 )
1487 # fall through to slow path that filters hidden revisions
1470 # fall through to slow path that filters hidden revisions
1488 except (AttributeError, ValueError):
1471 except (AttributeError, ValueError):
1489 # we are pure python, or key was too short to search radix tree
1472 # we are pure python, or key was too short to search radix tree
1490 pass
1473 pass
1491
1474
1492 if id in self._pcache:
1475 if id in self._pcache:
1493 return self._pcache[id]
1476 return self._pcache[id]
1494
1477
1495 if len(id) <= 40:
1478 if len(id) <= 40:
1496 try:
1479 try:
1497 # hex(node)[:...]
1480 # hex(node)[:...]
1498 l = len(id) // 2 # grab an even number of digits
1481 l = len(id) // 2 # grab an even number of digits
1499 prefix = bin(id[: l * 2])
1482 prefix = bin(id[: l * 2])
1500 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1483 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1501 nl = [
1484 nl = [
1502 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1485 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1503 ]
1486 ]
1504 if self.nodeconstants.nullhex.startswith(id):
1487 if self.nodeconstants.nullhex.startswith(id):
1505 nl.append(self.nullid)
1488 nl.append(self.nullid)
1506 if len(nl) > 0:
1489 if len(nl) > 0:
1507 if len(nl) == 1 and not maybewdir:
1490 if len(nl) == 1 and not maybewdir:
1508 self._pcache[id] = nl[0]
1491 self._pcache[id] = nl[0]
1509 return nl[0]
1492 return nl[0]
1510 raise error.AmbiguousPrefixLookupError(
1493 raise error.AmbiguousPrefixLookupError(
1511 id, self.indexfile, _(b'ambiguous identifier')
1494 id, self.indexfile, _(b'ambiguous identifier')
1512 )
1495 )
1513 if maybewdir:
1496 if maybewdir:
1514 raise error.WdirUnsupported
1497 raise error.WdirUnsupported
1515 return None
1498 return None
1516 except TypeError:
1499 except TypeError:
1517 pass
1500 pass
1518
1501
1519 def lookup(self, id):
1502 def lookup(self, id):
1520 """locate a node based on:
1503 """locate a node based on:
1521 - revision number or str(revision number)
1504 - revision number or str(revision number)
1522 - nodeid or subset of hex nodeid
1505 - nodeid or subset of hex nodeid
1523 """
1506 """
1524 n = self._match(id)
1507 n = self._match(id)
1525 if n is not None:
1508 if n is not None:
1526 return n
1509 return n
1527 n = self._partialmatch(id)
1510 n = self._partialmatch(id)
1528 if n:
1511 if n:
1529 return n
1512 return n
1530
1513
1531 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1514 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1532
1515
1533 def shortest(self, node, minlength=1):
1516 def shortest(self, node, minlength=1):
1534 """Find the shortest unambiguous prefix that matches node."""
1517 """Find the shortest unambiguous prefix that matches node."""
1535
1518
1536 def isvalid(prefix):
1519 def isvalid(prefix):
1537 try:
1520 try:
1538 matchednode = self._partialmatch(prefix)
1521 matchednode = self._partialmatch(prefix)
1539 except error.AmbiguousPrefixLookupError:
1522 except error.AmbiguousPrefixLookupError:
1540 return False
1523 return False
1541 except error.WdirUnsupported:
1524 except error.WdirUnsupported:
1542 # single 'ff...' match
1525 # single 'ff...' match
1543 return True
1526 return True
1544 if matchednode is None:
1527 if matchednode is None:
1545 raise error.LookupError(node, self.indexfile, _(b'no node'))
1528 raise error.LookupError(node, self.indexfile, _(b'no node'))
1546 return True
1529 return True
1547
1530
1548 def maybewdir(prefix):
1531 def maybewdir(prefix):
1549 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1532 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1550
1533
1551 hexnode = hex(node)
1534 hexnode = hex(node)
1552
1535
1553 def disambiguate(hexnode, minlength):
1536 def disambiguate(hexnode, minlength):
1554 """Disambiguate against wdirid."""
1537 """Disambiguate against wdirid."""
1555 for length in range(minlength, len(hexnode) + 1):
1538 for length in range(minlength, len(hexnode) + 1):
1556 prefix = hexnode[:length]
1539 prefix = hexnode[:length]
1557 if not maybewdir(prefix):
1540 if not maybewdir(prefix):
1558 return prefix
1541 return prefix
1559
1542
1560 if not getattr(self, 'filteredrevs', None):
1543 if not getattr(self, 'filteredrevs', None):
1561 try:
1544 try:
1562 length = max(self.index.shortest(node), minlength)
1545 length = max(self.index.shortest(node), minlength)
1563 return disambiguate(hexnode, length)
1546 return disambiguate(hexnode, length)
1564 except error.RevlogError:
1547 except error.RevlogError:
1565 if node != self.nodeconstants.wdirid:
1548 if node != self.nodeconstants.wdirid:
1566 raise error.LookupError(node, self.indexfile, _(b'no node'))
1549 raise error.LookupError(node, self.indexfile, _(b'no node'))
1567 except AttributeError:
1550 except AttributeError:
1568 # Fall through to pure code
1551 # Fall through to pure code
1569 pass
1552 pass
1570
1553
1571 if node == self.nodeconstants.wdirid:
1554 if node == self.nodeconstants.wdirid:
1572 for length in range(minlength, len(hexnode) + 1):
1555 for length in range(minlength, len(hexnode) + 1):
1573 prefix = hexnode[:length]
1556 prefix = hexnode[:length]
1574 if isvalid(prefix):
1557 if isvalid(prefix):
1575 return prefix
1558 return prefix
1576
1559
1577 for length in range(minlength, len(hexnode) + 1):
1560 for length in range(minlength, len(hexnode) + 1):
1578 prefix = hexnode[:length]
1561 prefix = hexnode[:length]
1579 if isvalid(prefix):
1562 if isvalid(prefix):
1580 return disambiguate(hexnode, length)
1563 return disambiguate(hexnode, length)
1581
1564
1582 def cmp(self, node, text):
1565 def cmp(self, node, text):
1583 """compare text with a given file revision
1566 """compare text with a given file revision
1584
1567
1585 returns True if text is different than what is stored.
1568 returns True if text is different than what is stored.
1586 """
1569 """
1587 p1, p2 = self.parents(node)
1570 p1, p2 = self.parents(node)
1588 return storageutil.hashrevisionsha1(text, p1, p2) != node
1571 return storageutil.hashrevisionsha1(text, p1, p2) != node
1589
1572
1590 def _cachesegment(self, offset, data):
1573 def _cachesegment(self, offset, data):
1591 """Add a segment to the revlog cache.
1574 """Add a segment to the revlog cache.
1592
1575
1593 Accepts an absolute offset and the data that is at that location.
1576 Accepts an absolute offset and the data that is at that location.
1594 """
1577 """
1595 o, d = self._chunkcache
1578 o, d = self._chunkcache
1596 # try to add to existing cache
1579 # try to add to existing cache
1597 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1580 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1598 self._chunkcache = o, d + data
1581 self._chunkcache = o, d + data
1599 else:
1582 else:
1600 self._chunkcache = offset, data
1583 self._chunkcache = offset, data
1601
1584
1602 def _readsegment(self, offset, length, df=None):
1585 def _readsegment(self, offset, length, df=None):
1603 """Load a segment of raw data from the revlog.
1586 """Load a segment of raw data from the revlog.
1604
1587
1605 Accepts an absolute offset, length to read, and an optional existing
1588 Accepts an absolute offset, length to read, and an optional existing
1606 file handle to read from.
1589 file handle to read from.
1607
1590
1608 If an existing file handle is passed, it will be seeked and the
1591 If an existing file handle is passed, it will be seeked and the
1609 original seek position will NOT be restored.
1592 original seek position will NOT be restored.
1610
1593
1611 Returns a str or buffer of raw byte data.
1594 Returns a str or buffer of raw byte data.
1612
1595
1613 Raises if the requested number of bytes could not be read.
1596 Raises if the requested number of bytes could not be read.
1614 """
1597 """
1615 # Cache data both forward and backward around the requested
1598 # Cache data both forward and backward around the requested
1616 # data, in a fixed size window. This helps speed up operations
1599 # data, in a fixed size window. This helps speed up operations
1617 # involving reading the revlog backwards.
1600 # involving reading the revlog backwards.
1618 cachesize = self._chunkcachesize
1601 cachesize = self._chunkcachesize
1619 realoffset = offset & ~(cachesize - 1)
1602 realoffset = offset & ~(cachesize - 1)
1620 reallength = (
1603 reallength = (
1621 (offset + length + cachesize) & ~(cachesize - 1)
1604 (offset + length + cachesize) & ~(cachesize - 1)
1622 ) - realoffset
1605 ) - realoffset
1623 with self._datareadfp(df) as df:
1606 with self._datareadfp(df) as df:
1624 df.seek(realoffset)
1607 df.seek(realoffset)
1625 d = df.read(reallength)
1608 d = df.read(reallength)
1626
1609
1627 self._cachesegment(realoffset, d)
1610 self._cachesegment(realoffset, d)
1628 if offset != realoffset or reallength != length:
1611 if offset != realoffset or reallength != length:
1629 startoffset = offset - realoffset
1612 startoffset = offset - realoffset
1630 if len(d) - startoffset < length:
1613 if len(d) - startoffset < length:
1631 raise error.RevlogError(
1614 raise error.RevlogError(
1632 _(
1615 _(
1633 b'partial read of revlog %s; expected %d bytes from '
1616 b'partial read of revlog %s; expected %d bytes from '
1634 b'offset %d, got %d'
1617 b'offset %d, got %d'
1635 )
1618 )
1636 % (
1619 % (
1637 self.indexfile if self._inline else self.datafile,
1620 self.indexfile if self._inline else self.datafile,
1638 length,
1621 length,
1639 realoffset,
1622 realoffset,
1640 len(d) - startoffset,
1623 len(d) - startoffset,
1641 )
1624 )
1642 )
1625 )
1643
1626
1644 return util.buffer(d, startoffset, length)
1627 return util.buffer(d, startoffset, length)
1645
1628
1646 if len(d) < length:
1629 if len(d) < length:
1647 raise error.RevlogError(
1630 raise error.RevlogError(
1648 _(
1631 _(
1649 b'partial read of revlog %s; expected %d bytes from offset '
1632 b'partial read of revlog %s; expected %d bytes from offset '
1650 b'%d, got %d'
1633 b'%d, got %d'
1651 )
1634 )
1652 % (
1635 % (
1653 self.indexfile if self._inline else self.datafile,
1636 self.indexfile if self._inline else self.datafile,
1654 length,
1637 length,
1655 offset,
1638 offset,
1656 len(d),
1639 len(d),
1657 )
1640 )
1658 )
1641 )
1659
1642
1660 return d
1643 return d
1661
1644
1662 def _getsegment(self, offset, length, df=None):
1645 def _getsegment(self, offset, length, df=None):
1663 """Obtain a segment of raw data from the revlog.
1646 """Obtain a segment of raw data from the revlog.
1664
1647
1665 Accepts an absolute offset, length of bytes to obtain, and an
1648 Accepts an absolute offset, length of bytes to obtain, and an
1666 optional file handle to the already-opened revlog. If the file
1649 optional file handle to the already-opened revlog. If the file
1667 handle is used, it's original seek position will not be preserved.
1650 handle is used, it's original seek position will not be preserved.
1668
1651
1669 Requests for data may be returned from a cache.
1652 Requests for data may be returned from a cache.
1670
1653
1671 Returns a str or a buffer instance of raw byte data.
1654 Returns a str or a buffer instance of raw byte data.
1672 """
1655 """
1673 o, d = self._chunkcache
1656 o, d = self._chunkcache
1674 l = len(d)
1657 l = len(d)
1675
1658
1676 # is it in the cache?
1659 # is it in the cache?
1677 cachestart = offset - o
1660 cachestart = offset - o
1678 cacheend = cachestart + length
1661 cacheend = cachestart + length
1679 if cachestart >= 0 and cacheend <= l:
1662 if cachestart >= 0 and cacheend <= l:
1680 if cachestart == 0 and cacheend == l:
1663 if cachestart == 0 and cacheend == l:
1681 return d # avoid a copy
1664 return d # avoid a copy
1682 return util.buffer(d, cachestart, cacheend - cachestart)
1665 return util.buffer(d, cachestart, cacheend - cachestart)
1683
1666
1684 return self._readsegment(offset, length, df=df)
1667 return self._readsegment(offset, length, df=df)
1685
1668
1686 def _getsegmentforrevs(self, startrev, endrev, df=None):
1669 def _getsegmentforrevs(self, startrev, endrev, df=None):
1687 """Obtain a segment of raw data corresponding to a range of revisions.
1670 """Obtain a segment of raw data corresponding to a range of revisions.
1688
1671
1689 Accepts the start and end revisions and an optional already-open
1672 Accepts the start and end revisions and an optional already-open
1690 file handle to be used for reading. If the file handle is read, its
1673 file handle to be used for reading. If the file handle is read, its
1691 seek position will not be preserved.
1674 seek position will not be preserved.
1692
1675
1693 Requests for data may be satisfied by a cache.
1676 Requests for data may be satisfied by a cache.
1694
1677
1695 Returns a 2-tuple of (offset, data) for the requested range of
1678 Returns a 2-tuple of (offset, data) for the requested range of
1696 revisions. Offset is the integer offset from the beginning of the
1679 revisions. Offset is the integer offset from the beginning of the
1697 revlog and data is a str or buffer of the raw byte data.
1680 revlog and data is a str or buffer of the raw byte data.
1698
1681
1699 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1682 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1700 to determine where each revision's data begins and ends.
1683 to determine where each revision's data begins and ends.
1701 """
1684 """
1702 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1685 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1703 # (functions are expensive).
1686 # (functions are expensive).
1704 index = self.index
1687 index = self.index
1705 istart = index[startrev]
1688 istart = index[startrev]
1706 start = int(istart[0] >> 16)
1689 start = int(istart[0] >> 16)
1707 if startrev == endrev:
1690 if startrev == endrev:
1708 end = start + istart[1]
1691 end = start + istart[1]
1709 else:
1692 else:
1710 iend = index[endrev]
1693 iend = index[endrev]
1711 end = int(iend[0] >> 16) + iend[1]
1694 end = int(iend[0] >> 16) + iend[1]
1712
1695
1713 if self._inline:
1696 if self._inline:
1714 start += (startrev + 1) * self.index.entry_size
1697 start += (startrev + 1) * self.index.entry_size
1715 end += (endrev + 1) * self.index.entry_size
1698 end += (endrev + 1) * self.index.entry_size
1716 length = end - start
1699 length = end - start
1717
1700
1718 return start, self._getsegment(start, length, df=df)
1701 return start, self._getsegment(start, length, df=df)
1719
1702
1720 def _chunk(self, rev, df=None):
1703 def _chunk(self, rev, df=None):
1721 """Obtain a single decompressed chunk for a revision.
1704 """Obtain a single decompressed chunk for a revision.
1722
1705
1723 Accepts an integer revision and an optional already-open file handle
1706 Accepts an integer revision and an optional already-open file handle
1724 to be used for reading. If used, the seek position of the file will not
1707 to be used for reading. If used, the seek position of the file will not
1725 be preserved.
1708 be preserved.
1726
1709
1727 Returns a str holding uncompressed data for the requested revision.
1710 Returns a str holding uncompressed data for the requested revision.
1728 """
1711 """
1729 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1712 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1730
1713
1731 def _chunks(self, revs, df=None, targetsize=None):
1714 def _chunks(self, revs, df=None, targetsize=None):
1732 """Obtain decompressed chunks for the specified revisions.
1715 """Obtain decompressed chunks for the specified revisions.
1733
1716
1734 Accepts an iterable of numeric revisions that are assumed to be in
1717 Accepts an iterable of numeric revisions that are assumed to be in
1735 ascending order. Also accepts an optional already-open file handle
1718 ascending order. Also accepts an optional already-open file handle
1736 to be used for reading. If used, the seek position of the file will
1719 to be used for reading. If used, the seek position of the file will
1737 not be preserved.
1720 not be preserved.
1738
1721
1739 This function is similar to calling ``self._chunk()`` multiple times,
1722 This function is similar to calling ``self._chunk()`` multiple times,
1740 but is faster.
1723 but is faster.
1741
1724
1742 Returns a list with decompressed data for each requested revision.
1725 Returns a list with decompressed data for each requested revision.
1743 """
1726 """
1744 if not revs:
1727 if not revs:
1745 return []
1728 return []
1746 start = self.start
1729 start = self.start
1747 length = self.length
1730 length = self.length
1748 inline = self._inline
1731 inline = self._inline
1749 iosize = self.index.entry_size
1732 iosize = self.index.entry_size
1750 buffer = util.buffer
1733 buffer = util.buffer
1751
1734
1752 l = []
1735 l = []
1753 ladd = l.append
1736 ladd = l.append
1754
1737
1755 if not self._withsparseread:
1738 if not self._withsparseread:
1756 slicedchunks = (revs,)
1739 slicedchunks = (revs,)
1757 else:
1740 else:
1758 slicedchunks = deltautil.slicechunk(
1741 slicedchunks = deltautil.slicechunk(
1759 self, revs, targetsize=targetsize
1742 self, revs, targetsize=targetsize
1760 )
1743 )
1761
1744
1762 for revschunk in slicedchunks:
1745 for revschunk in slicedchunks:
1763 firstrev = revschunk[0]
1746 firstrev = revschunk[0]
1764 # Skip trailing revisions with empty diff
1747 # Skip trailing revisions with empty diff
1765 for lastrev in revschunk[::-1]:
1748 for lastrev in revschunk[::-1]:
1766 if length(lastrev) != 0:
1749 if length(lastrev) != 0:
1767 break
1750 break
1768
1751
1769 try:
1752 try:
1770 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1753 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1771 except OverflowError:
1754 except OverflowError:
1772 # issue4215 - we can't cache a run of chunks greater than
1755 # issue4215 - we can't cache a run of chunks greater than
1773 # 2G on Windows
1756 # 2G on Windows
1774 return [self._chunk(rev, df=df) for rev in revschunk]
1757 return [self._chunk(rev, df=df) for rev in revschunk]
1775
1758
1776 decomp = self.decompress
1759 decomp = self.decompress
1777 for rev in revschunk:
1760 for rev in revschunk:
1778 chunkstart = start(rev)
1761 chunkstart = start(rev)
1779 if inline:
1762 if inline:
1780 chunkstart += (rev + 1) * iosize
1763 chunkstart += (rev + 1) * iosize
1781 chunklength = length(rev)
1764 chunklength = length(rev)
1782 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1765 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1783
1766
1784 return l
1767 return l
1785
1768
1786 def _chunkclear(self):
1769 def _chunkclear(self):
1787 """Clear the raw chunk cache."""
1770 """Clear the raw chunk cache."""
1788 self._chunkcache = (0, b'')
1771 self._chunkcache = (0, b'')
1789
1772
1790 def deltaparent(self, rev):
1773 def deltaparent(self, rev):
1791 """return deltaparent of the given revision"""
1774 """return deltaparent of the given revision"""
1792 base = self.index[rev][3]
1775 base = self.index[rev][3]
1793 if base == rev:
1776 if base == rev:
1794 return nullrev
1777 return nullrev
1795 elif self._generaldelta:
1778 elif self._generaldelta:
1796 return base
1779 return base
1797 else:
1780 else:
1798 return rev - 1
1781 return rev - 1
1799
1782
1800 def issnapshot(self, rev):
1783 def issnapshot(self, rev):
1801 """tells whether rev is a snapshot"""
1784 """tells whether rev is a snapshot"""
1802 if not self._sparserevlog:
1785 if not self._sparserevlog:
1803 return self.deltaparent(rev) == nullrev
1786 return self.deltaparent(rev) == nullrev
1804 elif util.safehasattr(self.index, b'issnapshot'):
1787 elif util.safehasattr(self.index, b'issnapshot'):
1805 # directly assign the method to cache the testing and access
1788 # directly assign the method to cache the testing and access
1806 self.issnapshot = self.index.issnapshot
1789 self.issnapshot = self.index.issnapshot
1807 return self.issnapshot(rev)
1790 return self.issnapshot(rev)
1808 if rev == nullrev:
1791 if rev == nullrev:
1809 return True
1792 return True
1810 entry = self.index[rev]
1793 entry = self.index[rev]
1811 base = entry[3]
1794 base = entry[3]
1812 if base == rev:
1795 if base == rev:
1813 return True
1796 return True
1814 if base == nullrev:
1797 if base == nullrev:
1815 return True
1798 return True
1816 p1 = entry[5]
1799 p1 = entry[5]
1817 p2 = entry[6]
1800 p2 = entry[6]
1818 if base == p1 or base == p2:
1801 if base == p1 or base == p2:
1819 return False
1802 return False
1820 return self.issnapshot(base)
1803 return self.issnapshot(base)
1821
1804
1822 def snapshotdepth(self, rev):
1805 def snapshotdepth(self, rev):
1823 """number of snapshot in the chain before this one"""
1806 """number of snapshot in the chain before this one"""
1824 if not self.issnapshot(rev):
1807 if not self.issnapshot(rev):
1825 raise error.ProgrammingError(b'revision %d not a snapshot')
1808 raise error.ProgrammingError(b'revision %d not a snapshot')
1826 return len(self._deltachain(rev)[0]) - 1
1809 return len(self._deltachain(rev)[0]) - 1
1827
1810
1828 def revdiff(self, rev1, rev2):
1811 def revdiff(self, rev1, rev2):
1829 """return or calculate a delta between two revisions
1812 """return or calculate a delta between two revisions
1830
1813
1831 The delta calculated is in binary form and is intended to be written to
1814 The delta calculated is in binary form and is intended to be written to
1832 revlog data directly. So this function needs raw revision data.
1815 revlog data directly. So this function needs raw revision data.
1833 """
1816 """
1834 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1817 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1835 return bytes(self._chunk(rev2))
1818 return bytes(self._chunk(rev2))
1836
1819
1837 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1820 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1838
1821
1839 def _processflags(self, text, flags, operation, raw=False):
1822 def _processflags(self, text, flags, operation, raw=False):
1840 """deprecated entry point to access flag processors"""
1823 """deprecated entry point to access flag processors"""
1841 msg = b'_processflag(...) use the specialized variant'
1824 msg = b'_processflag(...) use the specialized variant'
1842 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1825 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1843 if raw:
1826 if raw:
1844 return text, flagutil.processflagsraw(self, text, flags)
1827 return text, flagutil.processflagsraw(self, text, flags)
1845 elif operation == b'read':
1828 elif operation == b'read':
1846 return flagutil.processflagsread(self, text, flags)
1829 return flagutil.processflagsread(self, text, flags)
1847 else: # write operation
1830 else: # write operation
1848 return flagutil.processflagswrite(self, text, flags)
1831 return flagutil.processflagswrite(self, text, flags)
1849
1832
1850 def revision(self, nodeorrev, _df=None, raw=False):
1833 def revision(self, nodeorrev, _df=None, raw=False):
1851 """return an uncompressed revision of a given node or revision
1834 """return an uncompressed revision of a given node or revision
1852 number.
1835 number.
1853
1836
1854 _df - an existing file handle to read from. (internal-only)
1837 _df - an existing file handle to read from. (internal-only)
1855 raw - an optional argument specifying if the revision data is to be
1838 raw - an optional argument specifying if the revision data is to be
1856 treated as raw data when applying flag transforms. 'raw' should be set
1839 treated as raw data when applying flag transforms. 'raw' should be set
1857 to True when generating changegroups or in debug commands.
1840 to True when generating changegroups or in debug commands.
1858 """
1841 """
1859 if raw:
1842 if raw:
1860 msg = (
1843 msg = (
1861 b'revlog.revision(..., raw=True) is deprecated, '
1844 b'revlog.revision(..., raw=True) is deprecated, '
1862 b'use revlog.rawdata(...)'
1845 b'use revlog.rawdata(...)'
1863 )
1846 )
1864 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1847 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1865 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1848 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1866
1849
1867 def sidedata(self, nodeorrev, _df=None):
1850 def sidedata(self, nodeorrev, _df=None):
1868 """a map of extra data related to the changeset but not part of the hash
1851 """a map of extra data related to the changeset but not part of the hash
1869
1852
1870 This function currently return a dictionary. However, more advanced
1853 This function currently return a dictionary. However, more advanced
1871 mapping object will likely be used in the future for a more
1854 mapping object will likely be used in the future for a more
1872 efficient/lazy code.
1855 efficient/lazy code.
1873 """
1856 """
1874 return self._revisiondata(nodeorrev, _df)[1]
1857 return self._revisiondata(nodeorrev, _df)[1]
1875
1858
1876 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1859 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1877 # deal with <nodeorrev> argument type
1860 # deal with <nodeorrev> argument type
1878 if isinstance(nodeorrev, int):
1861 if isinstance(nodeorrev, int):
1879 rev = nodeorrev
1862 rev = nodeorrev
1880 node = self.node(rev)
1863 node = self.node(rev)
1881 else:
1864 else:
1882 node = nodeorrev
1865 node = nodeorrev
1883 rev = None
1866 rev = None
1884
1867
1885 # fast path the special `nullid` rev
1868 # fast path the special `nullid` rev
1886 if node == self.nullid:
1869 if node == self.nullid:
1887 return b"", {}
1870 return b"", {}
1888
1871
1889 # ``rawtext`` is the text as stored inside the revlog. Might be the
1872 # ``rawtext`` is the text as stored inside the revlog. Might be the
1890 # revision or might need to be processed to retrieve the revision.
1873 # revision or might need to be processed to retrieve the revision.
1891 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1874 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1892
1875
1893 if self.version & 0xFFFF == REVLOGV2:
1876 if self.version & 0xFFFF == REVLOGV2:
1894 if rev is None:
1877 if rev is None:
1895 rev = self.rev(node)
1878 rev = self.rev(node)
1896 sidedata = self._sidedata(rev)
1879 sidedata = self._sidedata(rev)
1897 else:
1880 else:
1898 sidedata = {}
1881 sidedata = {}
1899
1882
1900 if raw and validated:
1883 if raw and validated:
1901 # if we don't want to process the raw text and that raw
1884 # if we don't want to process the raw text and that raw
1902 # text is cached, we can exit early.
1885 # text is cached, we can exit early.
1903 return rawtext, sidedata
1886 return rawtext, sidedata
1904 if rev is None:
1887 if rev is None:
1905 rev = self.rev(node)
1888 rev = self.rev(node)
1906 # the revlog's flag for this revision
1889 # the revlog's flag for this revision
1907 # (usually alter its state or content)
1890 # (usually alter its state or content)
1908 flags = self.flags(rev)
1891 flags = self.flags(rev)
1909
1892
1910 if validated and flags == REVIDX_DEFAULT_FLAGS:
1893 if validated and flags == REVIDX_DEFAULT_FLAGS:
1911 # no extra flags set, no flag processor runs, text = rawtext
1894 # no extra flags set, no flag processor runs, text = rawtext
1912 return rawtext, sidedata
1895 return rawtext, sidedata
1913
1896
1914 if raw:
1897 if raw:
1915 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1898 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1916 text = rawtext
1899 text = rawtext
1917 else:
1900 else:
1918 r = flagutil.processflagsread(self, rawtext, flags)
1901 r = flagutil.processflagsread(self, rawtext, flags)
1919 text, validatehash = r
1902 text, validatehash = r
1920 if validatehash:
1903 if validatehash:
1921 self.checkhash(text, node, rev=rev)
1904 self.checkhash(text, node, rev=rev)
1922 if not validated:
1905 if not validated:
1923 self._revisioncache = (node, rev, rawtext)
1906 self._revisioncache = (node, rev, rawtext)
1924
1907
1925 return text, sidedata
1908 return text, sidedata
1926
1909
1927 def _rawtext(self, node, rev, _df=None):
1910 def _rawtext(self, node, rev, _df=None):
1928 """return the possibly unvalidated rawtext for a revision
1911 """return the possibly unvalidated rawtext for a revision
1929
1912
1930 returns (rev, rawtext, validated)
1913 returns (rev, rawtext, validated)
1931 """
1914 """
1932
1915
1933 # revision in the cache (could be useful to apply delta)
1916 # revision in the cache (could be useful to apply delta)
1934 cachedrev = None
1917 cachedrev = None
1935 # An intermediate text to apply deltas to
1918 # An intermediate text to apply deltas to
1936 basetext = None
1919 basetext = None
1937
1920
1938 # Check if we have the entry in cache
1921 # Check if we have the entry in cache
1939 # The cache entry looks like (node, rev, rawtext)
1922 # The cache entry looks like (node, rev, rawtext)
1940 if self._revisioncache:
1923 if self._revisioncache:
1941 if self._revisioncache[0] == node:
1924 if self._revisioncache[0] == node:
1942 return (rev, self._revisioncache[2], True)
1925 return (rev, self._revisioncache[2], True)
1943 cachedrev = self._revisioncache[1]
1926 cachedrev = self._revisioncache[1]
1944
1927
1945 if rev is None:
1928 if rev is None:
1946 rev = self.rev(node)
1929 rev = self.rev(node)
1947
1930
1948 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1931 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1949 if stopped:
1932 if stopped:
1950 basetext = self._revisioncache[2]
1933 basetext = self._revisioncache[2]
1951
1934
1952 # drop cache to save memory, the caller is expected to
1935 # drop cache to save memory, the caller is expected to
1953 # update self._revisioncache after validating the text
1936 # update self._revisioncache after validating the text
1954 self._revisioncache = None
1937 self._revisioncache = None
1955
1938
1956 targetsize = None
1939 targetsize = None
1957 rawsize = self.index[rev][2]
1940 rawsize = self.index[rev][2]
1958 if 0 <= rawsize:
1941 if 0 <= rawsize:
1959 targetsize = 4 * rawsize
1942 targetsize = 4 * rawsize
1960
1943
1961 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1944 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1962 if basetext is None:
1945 if basetext is None:
1963 basetext = bytes(bins[0])
1946 basetext = bytes(bins[0])
1964 bins = bins[1:]
1947 bins = bins[1:]
1965
1948
1966 rawtext = mdiff.patches(basetext, bins)
1949 rawtext = mdiff.patches(basetext, bins)
1967 del basetext # let us have a chance to free memory early
1950 del basetext # let us have a chance to free memory early
1968 return (rev, rawtext, False)
1951 return (rev, rawtext, False)
1969
1952
1970 def _sidedata(self, rev):
1953 def _sidedata(self, rev):
1971 """Return the sidedata for a given revision number."""
1954 """Return the sidedata for a given revision number."""
1972 index_entry = self.index[rev]
1955 index_entry = self.index[rev]
1973 sidedata_offset = index_entry[8]
1956 sidedata_offset = index_entry[8]
1974 sidedata_size = index_entry[9]
1957 sidedata_size = index_entry[9]
1975
1958
1976 if self._inline:
1959 if self._inline:
1977 sidedata_offset += self.index.entry_size * (1 + rev)
1960 sidedata_offset += self.index.entry_size * (1 + rev)
1978 if sidedata_size == 0:
1961 if sidedata_size == 0:
1979 return {}
1962 return {}
1980
1963
1981 segment = self._getsegment(sidedata_offset, sidedata_size)
1964 segment = self._getsegment(sidedata_offset, sidedata_size)
1982 sidedata = sidedatautil.deserialize_sidedata(segment)
1965 sidedata = sidedatautil.deserialize_sidedata(segment)
1983 return sidedata
1966 return sidedata
1984
1967
1985 def rawdata(self, nodeorrev, _df=None):
1968 def rawdata(self, nodeorrev, _df=None):
1986 """return an uncompressed raw data of a given node or revision number.
1969 """return an uncompressed raw data of a given node or revision number.
1987
1970
1988 _df - an existing file handle to read from. (internal-only)
1971 _df - an existing file handle to read from. (internal-only)
1989 """
1972 """
1990 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1973 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1991
1974
1992 def hash(self, text, p1, p2):
1975 def hash(self, text, p1, p2):
1993 """Compute a node hash.
1976 """Compute a node hash.
1994
1977
1995 Available as a function so that subclasses can replace the hash
1978 Available as a function so that subclasses can replace the hash
1996 as needed.
1979 as needed.
1997 """
1980 """
1998 return storageutil.hashrevisionsha1(text, p1, p2)
1981 return storageutil.hashrevisionsha1(text, p1, p2)
1999
1982
2000 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1983 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2001 """Check node hash integrity.
1984 """Check node hash integrity.
2002
1985
2003 Available as a function so that subclasses can extend hash mismatch
1986 Available as a function so that subclasses can extend hash mismatch
2004 behaviors as needed.
1987 behaviors as needed.
2005 """
1988 """
2006 try:
1989 try:
2007 if p1 is None and p2 is None:
1990 if p1 is None and p2 is None:
2008 p1, p2 = self.parents(node)
1991 p1, p2 = self.parents(node)
2009 if node != self.hash(text, p1, p2):
1992 if node != self.hash(text, p1, p2):
2010 # Clear the revision cache on hash failure. The revision cache
1993 # Clear the revision cache on hash failure. The revision cache
2011 # only stores the raw revision and clearing the cache does have
1994 # only stores the raw revision and clearing the cache does have
2012 # the side-effect that we won't have a cache hit when the raw
1995 # the side-effect that we won't have a cache hit when the raw
2013 # revision data is accessed. But this case should be rare and
1996 # revision data is accessed. But this case should be rare and
2014 # it is extra work to teach the cache about the hash
1997 # it is extra work to teach the cache about the hash
2015 # verification state.
1998 # verification state.
2016 if self._revisioncache and self._revisioncache[0] == node:
1999 if self._revisioncache and self._revisioncache[0] == node:
2017 self._revisioncache = None
2000 self._revisioncache = None
2018
2001
2019 revornode = rev
2002 revornode = rev
2020 if revornode is None:
2003 if revornode is None:
2021 revornode = templatefilters.short(hex(node))
2004 revornode = templatefilters.short(hex(node))
2022 raise error.RevlogError(
2005 raise error.RevlogError(
2023 _(b"integrity check failed on %s:%s")
2006 _(b"integrity check failed on %s:%s")
2024 % (self.indexfile, pycompat.bytestr(revornode))
2007 % (self.indexfile, pycompat.bytestr(revornode))
2025 )
2008 )
2026 except error.RevlogError:
2009 except error.RevlogError:
2027 if self._censorable and storageutil.iscensoredtext(text):
2010 if self._censorable and storageutil.iscensoredtext(text):
2028 raise error.CensoredNodeError(self.indexfile, node, text)
2011 raise error.CensoredNodeError(self.indexfile, node, text)
2029 raise
2012 raise
2030
2013
2031 def _enforceinlinesize(self, tr, fp=None):
2014 def _enforceinlinesize(self, tr, fp=None):
2032 """Check if the revlog is too big for inline and convert if so.
2015 """Check if the revlog is too big for inline and convert if so.
2033
2016
2034 This should be called after revisions are added to the revlog. If the
2017 This should be called after revisions are added to the revlog. If the
2035 revlog has grown too large to be an inline revlog, it will convert it
2018 revlog has grown too large to be an inline revlog, it will convert it
2036 to use multiple index and data files.
2019 to use multiple index and data files.
2037 """
2020 """
2038 tiprev = len(self) - 1
2021 tiprev = len(self) - 1
2039 if (
2022 if (
2040 not self._inline
2023 not self._inline
2041 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2024 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2042 ):
2025 ):
2043 return
2026 return
2044
2027
2045 troffset = tr.findoffset(self.indexfile)
2028 troffset = tr.findoffset(self.indexfile)
2046 if troffset is None:
2029 if troffset is None:
2047 raise error.RevlogError(
2030 raise error.RevlogError(
2048 _(b"%s not found in the transaction") % self.indexfile
2031 _(b"%s not found in the transaction") % self.indexfile
2049 )
2032 )
2050 trindex = 0
2033 trindex = 0
2051 tr.add(self.datafile, 0)
2034 tr.add(self.datafile, 0)
2052
2035
2053 if fp:
2036 if fp:
2054 fp.flush()
2037 fp.flush()
2055 fp.close()
2038 fp.close()
2056 # We can't use the cached file handle after close(). So prevent
2039 # We can't use the cached file handle after close(). So prevent
2057 # its usage.
2040 # its usage.
2058 self._writinghandles = None
2041 self._writinghandles = None
2059
2042
2060 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2043 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2061 for r in self:
2044 for r in self:
2062 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2045 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2063 if troffset <= self.start(r):
2046 if troffset <= self.start(r):
2064 trindex = r
2047 trindex = r
2065
2048
2066 with self._indexfp(b'w') as fp:
2049 with self._indexfp(b'w') as fp:
2067 self.version &= ~FLAG_INLINE_DATA
2050 self.version &= ~FLAG_INLINE_DATA
2068 self._inline = False
2051 self._inline = False
2069 io = self._io
2052 io = self._io
2070 for i in self:
2053 for i in self:
2071 e = io.packentry(self.index[i], self.node, self.version, i)
2054 e = self.index.entry_binary(i, self.version)
2072 fp.write(e)
2055 fp.write(e)
2073
2056
2074 # the temp file replace the real index when we exit the context
2057 # the temp file replace the real index when we exit the context
2075 # manager
2058 # manager
2076
2059
2077 tr.replace(self.indexfile, trindex * self.index.entry_size)
2060 tr.replace(self.indexfile, trindex * self.index.entry_size)
2078 nodemaputil.setup_persistent_nodemap(tr, self)
2061 nodemaputil.setup_persistent_nodemap(tr, self)
2079 self._chunkclear()
2062 self._chunkclear()
2080
2063
2081 def _nodeduplicatecallback(self, transaction, node):
2064 def _nodeduplicatecallback(self, transaction, node):
2082 """called when trying to add a node already stored."""
2065 """called when trying to add a node already stored."""
2083
2066
2084 def addrevision(
2067 def addrevision(
2085 self,
2068 self,
2086 text,
2069 text,
2087 transaction,
2070 transaction,
2088 link,
2071 link,
2089 p1,
2072 p1,
2090 p2,
2073 p2,
2091 cachedelta=None,
2074 cachedelta=None,
2092 node=None,
2075 node=None,
2093 flags=REVIDX_DEFAULT_FLAGS,
2076 flags=REVIDX_DEFAULT_FLAGS,
2094 deltacomputer=None,
2077 deltacomputer=None,
2095 sidedata=None,
2078 sidedata=None,
2096 ):
2079 ):
2097 """add a revision to the log
2080 """add a revision to the log
2098
2081
2099 text - the revision data to add
2082 text - the revision data to add
2100 transaction - the transaction object used for rollback
2083 transaction - the transaction object used for rollback
2101 link - the linkrev data to add
2084 link - the linkrev data to add
2102 p1, p2 - the parent nodeids of the revision
2085 p1, p2 - the parent nodeids of the revision
2103 cachedelta - an optional precomputed delta
2086 cachedelta - an optional precomputed delta
2104 node - nodeid of revision; typically node is not specified, and it is
2087 node - nodeid of revision; typically node is not specified, and it is
2105 computed by default as hash(text, p1, p2), however subclasses might
2088 computed by default as hash(text, p1, p2), however subclasses might
2106 use different hashing method (and override checkhash() in such case)
2089 use different hashing method (and override checkhash() in such case)
2107 flags - the known flags to set on the revision
2090 flags - the known flags to set on the revision
2108 deltacomputer - an optional deltacomputer instance shared between
2091 deltacomputer - an optional deltacomputer instance shared between
2109 multiple calls
2092 multiple calls
2110 """
2093 """
2111 if link == nullrev:
2094 if link == nullrev:
2112 raise error.RevlogError(
2095 raise error.RevlogError(
2113 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2096 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2114 )
2097 )
2115
2098
2116 if sidedata is None:
2099 if sidedata is None:
2117 sidedata = {}
2100 sidedata = {}
2118 elif not self.hassidedata:
2101 elif not self.hassidedata:
2119 raise error.ProgrammingError(
2102 raise error.ProgrammingError(
2120 _(b"trying to add sidedata to a revlog who don't support them")
2103 _(b"trying to add sidedata to a revlog who don't support them")
2121 )
2104 )
2122
2105
2123 if flags:
2106 if flags:
2124 node = node or self.hash(text, p1, p2)
2107 node = node or self.hash(text, p1, p2)
2125
2108
2126 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2109 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2127
2110
2128 # If the flag processor modifies the revision data, ignore any provided
2111 # If the flag processor modifies the revision data, ignore any provided
2129 # cachedelta.
2112 # cachedelta.
2130 if rawtext != text:
2113 if rawtext != text:
2131 cachedelta = None
2114 cachedelta = None
2132
2115
2133 if len(rawtext) > _maxentrysize:
2116 if len(rawtext) > _maxentrysize:
2134 raise error.RevlogError(
2117 raise error.RevlogError(
2135 _(
2118 _(
2136 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2119 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2137 )
2120 )
2138 % (self.indexfile, len(rawtext))
2121 % (self.indexfile, len(rawtext))
2139 )
2122 )
2140
2123
2141 node = node or self.hash(rawtext, p1, p2)
2124 node = node or self.hash(rawtext, p1, p2)
2142 rev = self.index.get_rev(node)
2125 rev = self.index.get_rev(node)
2143 if rev is not None:
2126 if rev is not None:
2144 return rev
2127 return rev
2145
2128
2146 if validatehash:
2129 if validatehash:
2147 self.checkhash(rawtext, node, p1=p1, p2=p2)
2130 self.checkhash(rawtext, node, p1=p1, p2=p2)
2148
2131
2149 return self.addrawrevision(
2132 return self.addrawrevision(
2150 rawtext,
2133 rawtext,
2151 transaction,
2134 transaction,
2152 link,
2135 link,
2153 p1,
2136 p1,
2154 p2,
2137 p2,
2155 node,
2138 node,
2156 flags,
2139 flags,
2157 cachedelta=cachedelta,
2140 cachedelta=cachedelta,
2158 deltacomputer=deltacomputer,
2141 deltacomputer=deltacomputer,
2159 sidedata=sidedata,
2142 sidedata=sidedata,
2160 )
2143 )
2161
2144
2162 def addrawrevision(
2145 def addrawrevision(
2163 self,
2146 self,
2164 rawtext,
2147 rawtext,
2165 transaction,
2148 transaction,
2166 link,
2149 link,
2167 p1,
2150 p1,
2168 p2,
2151 p2,
2169 node,
2152 node,
2170 flags,
2153 flags,
2171 cachedelta=None,
2154 cachedelta=None,
2172 deltacomputer=None,
2155 deltacomputer=None,
2173 sidedata=None,
2156 sidedata=None,
2174 ):
2157 ):
2175 """add a raw revision with known flags, node and parents
2158 """add a raw revision with known flags, node and parents
2176 useful when reusing a revision not stored in this revlog (ex: received
2159 useful when reusing a revision not stored in this revlog (ex: received
2177 over wire, or read from an external bundle).
2160 over wire, or read from an external bundle).
2178 """
2161 """
2179 dfh = None
2162 dfh = None
2180 if not self._inline:
2163 if not self._inline:
2181 dfh = self._datafp(b"a+")
2164 dfh = self._datafp(b"a+")
2182 ifh = self._indexfp(b"a+")
2165 ifh = self._indexfp(b"a+")
2183 try:
2166 try:
2184 return self._addrevision(
2167 return self._addrevision(
2185 node,
2168 node,
2186 rawtext,
2169 rawtext,
2187 transaction,
2170 transaction,
2188 link,
2171 link,
2189 p1,
2172 p1,
2190 p2,
2173 p2,
2191 flags,
2174 flags,
2192 cachedelta,
2175 cachedelta,
2193 ifh,
2176 ifh,
2194 dfh,
2177 dfh,
2195 deltacomputer=deltacomputer,
2178 deltacomputer=deltacomputer,
2196 sidedata=sidedata,
2179 sidedata=sidedata,
2197 )
2180 )
2198 finally:
2181 finally:
2199 if dfh:
2182 if dfh:
2200 dfh.close()
2183 dfh.close()
2201 ifh.close()
2184 ifh.close()
2202
2185
2203 def compress(self, data):
2186 def compress(self, data):
2204 """Generate a possibly-compressed representation of data."""
2187 """Generate a possibly-compressed representation of data."""
2205 if not data:
2188 if not data:
2206 return b'', data
2189 return b'', data
2207
2190
2208 compressed = self._compressor.compress(data)
2191 compressed = self._compressor.compress(data)
2209
2192
2210 if compressed:
2193 if compressed:
2211 # The revlog compressor added the header in the returned data.
2194 # The revlog compressor added the header in the returned data.
2212 return b'', compressed
2195 return b'', compressed
2213
2196
2214 if data[0:1] == b'\0':
2197 if data[0:1] == b'\0':
2215 return b'', data
2198 return b'', data
2216 return b'u', data
2199 return b'u', data
2217
2200
2218 def decompress(self, data):
2201 def decompress(self, data):
2219 """Decompress a revlog chunk.
2202 """Decompress a revlog chunk.
2220
2203
2221 The chunk is expected to begin with a header identifying the
2204 The chunk is expected to begin with a header identifying the
2222 format type so it can be routed to an appropriate decompressor.
2205 format type so it can be routed to an appropriate decompressor.
2223 """
2206 """
2224 if not data:
2207 if not data:
2225 return data
2208 return data
2226
2209
2227 # Revlogs are read much more frequently than they are written and many
2210 # Revlogs are read much more frequently than they are written and many
2228 # chunks only take microseconds to decompress, so performance is
2211 # chunks only take microseconds to decompress, so performance is
2229 # important here.
2212 # important here.
2230 #
2213 #
2231 # We can make a few assumptions about revlogs:
2214 # We can make a few assumptions about revlogs:
2232 #
2215 #
2233 # 1) the majority of chunks will be compressed (as opposed to inline
2216 # 1) the majority of chunks will be compressed (as opposed to inline
2234 # raw data).
2217 # raw data).
2235 # 2) decompressing *any* data will likely by at least 10x slower than
2218 # 2) decompressing *any* data will likely by at least 10x slower than
2236 # returning raw inline data.
2219 # returning raw inline data.
2237 # 3) we want to prioritize common and officially supported compression
2220 # 3) we want to prioritize common and officially supported compression
2238 # engines
2221 # engines
2239 #
2222 #
2240 # It follows that we want to optimize for "decompress compressed data
2223 # It follows that we want to optimize for "decompress compressed data
2241 # when encoded with common and officially supported compression engines"
2224 # when encoded with common and officially supported compression engines"
2242 # case over "raw data" and "data encoded by less common or non-official
2225 # case over "raw data" and "data encoded by less common or non-official
2243 # compression engines." That is why we have the inline lookup first
2226 # compression engines." That is why we have the inline lookup first
2244 # followed by the compengines lookup.
2227 # followed by the compengines lookup.
2245 #
2228 #
2246 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2229 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2247 # compressed chunks. And this matters for changelog and manifest reads.
2230 # compressed chunks. And this matters for changelog and manifest reads.
2248 t = data[0:1]
2231 t = data[0:1]
2249
2232
2250 if t == b'x':
2233 if t == b'x':
2251 try:
2234 try:
2252 return _zlibdecompress(data)
2235 return _zlibdecompress(data)
2253 except zlib.error as e:
2236 except zlib.error as e:
2254 raise error.RevlogError(
2237 raise error.RevlogError(
2255 _(b'revlog decompress error: %s')
2238 _(b'revlog decompress error: %s')
2256 % stringutil.forcebytestr(e)
2239 % stringutil.forcebytestr(e)
2257 )
2240 )
2258 # '\0' is more common than 'u' so it goes first.
2241 # '\0' is more common than 'u' so it goes first.
2259 elif t == b'\0':
2242 elif t == b'\0':
2260 return data
2243 return data
2261 elif t == b'u':
2244 elif t == b'u':
2262 return util.buffer(data, 1)
2245 return util.buffer(data, 1)
2263
2246
2264 try:
2247 try:
2265 compressor = self._decompressors[t]
2248 compressor = self._decompressors[t]
2266 except KeyError:
2249 except KeyError:
2267 try:
2250 try:
2268 engine = util.compengines.forrevlogheader(t)
2251 engine = util.compengines.forrevlogheader(t)
2269 compressor = engine.revlogcompressor(self._compengineopts)
2252 compressor = engine.revlogcompressor(self._compengineopts)
2270 self._decompressors[t] = compressor
2253 self._decompressors[t] = compressor
2271 except KeyError:
2254 except KeyError:
2272 raise error.RevlogError(
2255 raise error.RevlogError(
2273 _(b'unknown compression type %s') % binascii.hexlify(t)
2256 _(b'unknown compression type %s') % binascii.hexlify(t)
2274 )
2257 )
2275
2258
2276 return compressor.decompress(data)
2259 return compressor.decompress(data)
2277
2260
2278 def _addrevision(
2261 def _addrevision(
2279 self,
2262 self,
2280 node,
2263 node,
2281 rawtext,
2264 rawtext,
2282 transaction,
2265 transaction,
2283 link,
2266 link,
2284 p1,
2267 p1,
2285 p2,
2268 p2,
2286 flags,
2269 flags,
2287 cachedelta,
2270 cachedelta,
2288 ifh,
2271 ifh,
2289 dfh,
2272 dfh,
2290 alwayscache=False,
2273 alwayscache=False,
2291 deltacomputer=None,
2274 deltacomputer=None,
2292 sidedata=None,
2275 sidedata=None,
2293 ):
2276 ):
2294 """internal function to add revisions to the log
2277 """internal function to add revisions to the log
2295
2278
2296 see addrevision for argument descriptions.
2279 see addrevision for argument descriptions.
2297
2280
2298 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2281 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2299
2282
2300 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2283 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2301 be used.
2284 be used.
2302
2285
2303 invariants:
2286 invariants:
2304 - rawtext is optional (can be None); if not set, cachedelta must be set.
2287 - rawtext is optional (can be None); if not set, cachedelta must be set.
2305 if both are set, they must correspond to each other.
2288 if both are set, they must correspond to each other.
2306 """
2289 """
2307 if node == self.nullid:
2290 if node == self.nullid:
2308 raise error.RevlogError(
2291 raise error.RevlogError(
2309 _(b"%s: attempt to add null revision") % self.indexfile
2292 _(b"%s: attempt to add null revision") % self.indexfile
2310 )
2293 )
2311 if (
2294 if (
2312 node == self.nodeconstants.wdirid
2295 node == self.nodeconstants.wdirid
2313 or node in self.nodeconstants.wdirfilenodeids
2296 or node in self.nodeconstants.wdirfilenodeids
2314 ):
2297 ):
2315 raise error.RevlogError(
2298 raise error.RevlogError(
2316 _(b"%s: attempt to add wdir revision") % self.indexfile
2299 _(b"%s: attempt to add wdir revision") % self.indexfile
2317 )
2300 )
2318
2301
2319 if self._inline:
2302 if self._inline:
2320 fh = ifh
2303 fh = ifh
2321 else:
2304 else:
2322 fh = dfh
2305 fh = dfh
2323
2306
2324 btext = [rawtext]
2307 btext = [rawtext]
2325
2308
2326 curr = len(self)
2309 curr = len(self)
2327 prev = curr - 1
2310 prev = curr - 1
2328
2311
2329 offset = self._get_data_offset(prev)
2312 offset = self._get_data_offset(prev)
2330
2313
2331 if self._concurrencychecker:
2314 if self._concurrencychecker:
2332 if self._inline:
2315 if self._inline:
2333 # offset is "as if" it were in the .d file, so we need to add on
2316 # offset is "as if" it were in the .d file, so we need to add on
2334 # the size of the entry metadata.
2317 # the size of the entry metadata.
2335 self._concurrencychecker(
2318 self._concurrencychecker(
2336 ifh, self.indexfile, offset + curr * self.index.entry_size
2319 ifh, self.indexfile, offset + curr * self.index.entry_size
2337 )
2320 )
2338 else:
2321 else:
2339 # Entries in the .i are a consistent size.
2322 # Entries in the .i are a consistent size.
2340 self._concurrencychecker(
2323 self._concurrencychecker(
2341 ifh, self.indexfile, curr * self.index.entry_size
2324 ifh, self.indexfile, curr * self.index.entry_size
2342 )
2325 )
2343 self._concurrencychecker(dfh, self.datafile, offset)
2326 self._concurrencychecker(dfh, self.datafile, offset)
2344
2327
2345 p1r, p2r = self.rev(p1), self.rev(p2)
2328 p1r, p2r = self.rev(p1), self.rev(p2)
2346
2329
2347 # full versions are inserted when the needed deltas
2330 # full versions are inserted when the needed deltas
2348 # become comparable to the uncompressed text
2331 # become comparable to the uncompressed text
2349 if rawtext is None:
2332 if rawtext is None:
2350 # need rawtext size, before changed by flag processors, which is
2333 # need rawtext size, before changed by flag processors, which is
2351 # the non-raw size. use revlog explicitly to avoid filelog's extra
2334 # the non-raw size. use revlog explicitly to avoid filelog's extra
2352 # logic that might remove metadata size.
2335 # logic that might remove metadata size.
2353 textlen = mdiff.patchedsize(
2336 textlen = mdiff.patchedsize(
2354 revlog.size(self, cachedelta[0]), cachedelta[1]
2337 revlog.size(self, cachedelta[0]), cachedelta[1]
2355 )
2338 )
2356 else:
2339 else:
2357 textlen = len(rawtext)
2340 textlen = len(rawtext)
2358
2341
2359 if deltacomputer is None:
2342 if deltacomputer is None:
2360 deltacomputer = deltautil.deltacomputer(self)
2343 deltacomputer = deltautil.deltacomputer(self)
2361
2344
2362 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2345 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2363
2346
2364 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2347 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2365
2348
2366 if sidedata:
2349 if sidedata:
2367 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2350 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2368 sidedata_offset = offset + deltainfo.deltalen
2351 sidedata_offset = offset + deltainfo.deltalen
2369 else:
2352 else:
2370 serialized_sidedata = b""
2353 serialized_sidedata = b""
2371 # Don't store the offset if the sidedata is empty, that way
2354 # Don't store the offset if the sidedata is empty, that way
2372 # we can easily detect empty sidedata and they will be no different
2355 # we can easily detect empty sidedata and they will be no different
2373 # than ones we manually add.
2356 # than ones we manually add.
2374 sidedata_offset = 0
2357 sidedata_offset = 0
2375
2358
2376 e = (
2359 e = (
2377 offset_type(offset, flags),
2360 offset_type(offset, flags),
2378 deltainfo.deltalen,
2361 deltainfo.deltalen,
2379 textlen,
2362 textlen,
2380 deltainfo.base,
2363 deltainfo.base,
2381 link,
2364 link,
2382 p1r,
2365 p1r,
2383 p2r,
2366 p2r,
2384 node,
2367 node,
2385 sidedata_offset,
2368 sidedata_offset,
2386 len(serialized_sidedata),
2369 len(serialized_sidedata),
2387 )
2370 )
2388
2371
2389 if self.version & 0xFFFF != REVLOGV2:
2372 if self.version & 0xFFFF != REVLOGV2:
2390 e = e[:8]
2373 e = e[:8]
2391
2374
2392 self.index.append(e)
2375 self.index.append(e)
2393 entry = self._io.packentry(e, self.node, self.version, curr)
2376 entry = self.index.entry_binary(curr, self.version)
2394 self._writeentry(
2377 self._writeentry(
2395 transaction,
2378 transaction,
2396 ifh,
2379 ifh,
2397 dfh,
2380 dfh,
2398 entry,
2381 entry,
2399 deltainfo.data,
2382 deltainfo.data,
2400 link,
2383 link,
2401 offset,
2384 offset,
2402 serialized_sidedata,
2385 serialized_sidedata,
2403 )
2386 )
2404
2387
2405 rawtext = btext[0]
2388 rawtext = btext[0]
2406
2389
2407 if alwayscache and rawtext is None:
2390 if alwayscache and rawtext is None:
2408 rawtext = deltacomputer.buildtext(revinfo, fh)
2391 rawtext = deltacomputer.buildtext(revinfo, fh)
2409
2392
2410 if type(rawtext) == bytes: # only accept immutable objects
2393 if type(rawtext) == bytes: # only accept immutable objects
2411 self._revisioncache = (node, curr, rawtext)
2394 self._revisioncache = (node, curr, rawtext)
2412 self._chainbasecache[curr] = deltainfo.chainbase
2395 self._chainbasecache[curr] = deltainfo.chainbase
2413 return curr
2396 return curr
2414
2397
2415 def _get_data_offset(self, prev):
2398 def _get_data_offset(self, prev):
2416 """Returns the current offset in the (in-transaction) data file.
2399 """Returns the current offset in the (in-transaction) data file.
2417 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2400 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2418 file to store that information: since sidedata can be rewritten to the
2401 file to store that information: since sidedata can be rewritten to the
2419 end of the data file within a transaction, you can have cases where, for
2402 end of the data file within a transaction, you can have cases where, for
2420 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2403 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2421 to `n - 1`'s sidedata being written after `n`'s data.
2404 to `n - 1`'s sidedata being written after `n`'s data.
2422
2405
2423 TODO cache this in a docket file before getting out of experimental."""
2406 TODO cache this in a docket file before getting out of experimental."""
2424 if self.version & 0xFFFF != REVLOGV2:
2407 if self.version & 0xFFFF != REVLOGV2:
2425 return self.end(prev)
2408 return self.end(prev)
2426
2409
2427 offset = 0
2410 offset = 0
2428 for rev, entry in enumerate(self.index):
2411 for rev, entry in enumerate(self.index):
2429 sidedata_end = entry[8] + entry[9]
2412 sidedata_end = entry[8] + entry[9]
2430 # Sidedata for a previous rev has potentially been written after
2413 # Sidedata for a previous rev has potentially been written after
2431 # this rev's end, so take the max.
2414 # this rev's end, so take the max.
2432 offset = max(self.end(rev), offset, sidedata_end)
2415 offset = max(self.end(rev), offset, sidedata_end)
2433 return offset
2416 return offset
2434
2417
2435 def _writeentry(
2418 def _writeentry(
2436 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2419 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2437 ):
2420 ):
2438 # Files opened in a+ mode have inconsistent behavior on various
2421 # Files opened in a+ mode have inconsistent behavior on various
2439 # platforms. Windows requires that a file positioning call be made
2422 # platforms. Windows requires that a file positioning call be made
2440 # when the file handle transitions between reads and writes. See
2423 # when the file handle transitions between reads and writes. See
2441 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2424 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2442 # platforms, Python or the platform itself can be buggy. Some versions
2425 # platforms, Python or the platform itself can be buggy. Some versions
2443 # of Solaris have been observed to not append at the end of the file
2426 # of Solaris have been observed to not append at the end of the file
2444 # if the file was seeked to before the end. See issue4943 for more.
2427 # if the file was seeked to before the end. See issue4943 for more.
2445 #
2428 #
2446 # We work around this issue by inserting a seek() before writing.
2429 # We work around this issue by inserting a seek() before writing.
2447 # Note: This is likely not necessary on Python 3. However, because
2430 # Note: This is likely not necessary on Python 3. However, because
2448 # the file handle is reused for reads and may be seeked there, we need
2431 # the file handle is reused for reads and may be seeked there, we need
2449 # to be careful before changing this.
2432 # to be careful before changing this.
2450 ifh.seek(0, os.SEEK_END)
2433 ifh.seek(0, os.SEEK_END)
2451 if dfh:
2434 if dfh:
2452 dfh.seek(0, os.SEEK_END)
2435 dfh.seek(0, os.SEEK_END)
2453
2436
2454 curr = len(self) - 1
2437 curr = len(self) - 1
2455 if not self._inline:
2438 if not self._inline:
2456 transaction.add(self.datafile, offset)
2439 transaction.add(self.datafile, offset)
2457 transaction.add(self.indexfile, curr * len(entry))
2440 transaction.add(self.indexfile, curr * len(entry))
2458 if data[0]:
2441 if data[0]:
2459 dfh.write(data[0])
2442 dfh.write(data[0])
2460 dfh.write(data[1])
2443 dfh.write(data[1])
2461 if sidedata:
2444 if sidedata:
2462 dfh.write(sidedata)
2445 dfh.write(sidedata)
2463 ifh.write(entry)
2446 ifh.write(entry)
2464 else:
2447 else:
2465 offset += curr * self.index.entry_size
2448 offset += curr * self.index.entry_size
2466 transaction.add(self.indexfile, offset)
2449 transaction.add(self.indexfile, offset)
2467 ifh.write(entry)
2450 ifh.write(entry)
2468 ifh.write(data[0])
2451 ifh.write(data[0])
2469 ifh.write(data[1])
2452 ifh.write(data[1])
2470 if sidedata:
2453 if sidedata:
2471 ifh.write(sidedata)
2454 ifh.write(sidedata)
2472 self._enforceinlinesize(transaction, ifh)
2455 self._enforceinlinesize(transaction, ifh)
2473 nodemaputil.setup_persistent_nodemap(transaction, self)
2456 nodemaputil.setup_persistent_nodemap(transaction, self)
2474
2457
2475 def addgroup(
2458 def addgroup(
2476 self,
2459 self,
2477 deltas,
2460 deltas,
2478 linkmapper,
2461 linkmapper,
2479 transaction,
2462 transaction,
2480 alwayscache=False,
2463 alwayscache=False,
2481 addrevisioncb=None,
2464 addrevisioncb=None,
2482 duplicaterevisioncb=None,
2465 duplicaterevisioncb=None,
2483 ):
2466 ):
2484 """
2467 """
2485 add a delta group
2468 add a delta group
2486
2469
2487 given a set of deltas, add them to the revision log. the
2470 given a set of deltas, add them to the revision log. the
2488 first delta is against its parent, which should be in our
2471 first delta is against its parent, which should be in our
2489 log, the rest are against the previous delta.
2472 log, the rest are against the previous delta.
2490
2473
2491 If ``addrevisioncb`` is defined, it will be called with arguments of
2474 If ``addrevisioncb`` is defined, it will be called with arguments of
2492 this revlog and the node that was added.
2475 this revlog and the node that was added.
2493 """
2476 """
2494
2477
2495 if self._writinghandles:
2478 if self._writinghandles:
2496 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2479 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2497
2480
2498 r = len(self)
2481 r = len(self)
2499 end = 0
2482 end = 0
2500 if r:
2483 if r:
2501 end = self.end(r - 1)
2484 end = self.end(r - 1)
2502 ifh = self._indexfp(b"a+")
2485 ifh = self._indexfp(b"a+")
2503 isize = r * self.index.entry_size
2486 isize = r * self.index.entry_size
2504 if self._inline:
2487 if self._inline:
2505 transaction.add(self.indexfile, end + isize)
2488 transaction.add(self.indexfile, end + isize)
2506 dfh = None
2489 dfh = None
2507 else:
2490 else:
2508 transaction.add(self.indexfile, isize)
2491 transaction.add(self.indexfile, isize)
2509 transaction.add(self.datafile, end)
2492 transaction.add(self.datafile, end)
2510 dfh = self._datafp(b"a+")
2493 dfh = self._datafp(b"a+")
2511
2494
2512 def flush():
2495 def flush():
2513 if dfh:
2496 if dfh:
2514 dfh.flush()
2497 dfh.flush()
2515 ifh.flush()
2498 ifh.flush()
2516
2499
2517 self._writinghandles = (ifh, dfh)
2500 self._writinghandles = (ifh, dfh)
2518 empty = True
2501 empty = True
2519
2502
2520 try:
2503 try:
2521 deltacomputer = deltautil.deltacomputer(self)
2504 deltacomputer = deltautil.deltacomputer(self)
2522 # loop through our set of deltas
2505 # loop through our set of deltas
2523 for data in deltas:
2506 for data in deltas:
2524 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2507 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2525 link = linkmapper(linknode)
2508 link = linkmapper(linknode)
2526 flags = flags or REVIDX_DEFAULT_FLAGS
2509 flags = flags or REVIDX_DEFAULT_FLAGS
2527
2510
2528 rev = self.index.get_rev(node)
2511 rev = self.index.get_rev(node)
2529 if rev is not None:
2512 if rev is not None:
2530 # this can happen if two branches make the same change
2513 # this can happen if two branches make the same change
2531 self._nodeduplicatecallback(transaction, rev)
2514 self._nodeduplicatecallback(transaction, rev)
2532 if duplicaterevisioncb:
2515 if duplicaterevisioncb:
2533 duplicaterevisioncb(self, rev)
2516 duplicaterevisioncb(self, rev)
2534 empty = False
2517 empty = False
2535 continue
2518 continue
2536
2519
2537 for p in (p1, p2):
2520 for p in (p1, p2):
2538 if not self.index.has_node(p):
2521 if not self.index.has_node(p):
2539 raise error.LookupError(
2522 raise error.LookupError(
2540 p, self.indexfile, _(b'unknown parent')
2523 p, self.indexfile, _(b'unknown parent')
2541 )
2524 )
2542
2525
2543 if not self.index.has_node(deltabase):
2526 if not self.index.has_node(deltabase):
2544 raise error.LookupError(
2527 raise error.LookupError(
2545 deltabase, self.indexfile, _(b'unknown delta base')
2528 deltabase, self.indexfile, _(b'unknown delta base')
2546 )
2529 )
2547
2530
2548 baserev = self.rev(deltabase)
2531 baserev = self.rev(deltabase)
2549
2532
2550 if baserev != nullrev and self.iscensored(baserev):
2533 if baserev != nullrev and self.iscensored(baserev):
2551 # if base is censored, delta must be full replacement in a
2534 # if base is censored, delta must be full replacement in a
2552 # single patch operation
2535 # single patch operation
2553 hlen = struct.calcsize(b">lll")
2536 hlen = struct.calcsize(b">lll")
2554 oldlen = self.rawsize(baserev)
2537 oldlen = self.rawsize(baserev)
2555 newlen = len(delta) - hlen
2538 newlen = len(delta) - hlen
2556 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2539 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2557 raise error.CensoredBaseError(
2540 raise error.CensoredBaseError(
2558 self.indexfile, self.node(baserev)
2541 self.indexfile, self.node(baserev)
2559 )
2542 )
2560
2543
2561 if not flags and self._peek_iscensored(baserev, delta, flush):
2544 if not flags and self._peek_iscensored(baserev, delta, flush):
2562 flags |= REVIDX_ISCENSORED
2545 flags |= REVIDX_ISCENSORED
2563
2546
2564 # We assume consumers of addrevisioncb will want to retrieve
2547 # We assume consumers of addrevisioncb will want to retrieve
2565 # the added revision, which will require a call to
2548 # the added revision, which will require a call to
2566 # revision(). revision() will fast path if there is a cache
2549 # revision(). revision() will fast path if there is a cache
2567 # hit. So, we tell _addrevision() to always cache in this case.
2550 # hit. So, we tell _addrevision() to always cache in this case.
2568 # We're only using addgroup() in the context of changegroup
2551 # We're only using addgroup() in the context of changegroup
2569 # generation so the revision data can always be handled as raw
2552 # generation so the revision data can always be handled as raw
2570 # by the flagprocessor.
2553 # by the flagprocessor.
2571 rev = self._addrevision(
2554 rev = self._addrevision(
2572 node,
2555 node,
2573 None,
2556 None,
2574 transaction,
2557 transaction,
2575 link,
2558 link,
2576 p1,
2559 p1,
2577 p2,
2560 p2,
2578 flags,
2561 flags,
2579 (baserev, delta),
2562 (baserev, delta),
2580 ifh,
2563 ifh,
2581 dfh,
2564 dfh,
2582 alwayscache=alwayscache,
2565 alwayscache=alwayscache,
2583 deltacomputer=deltacomputer,
2566 deltacomputer=deltacomputer,
2584 sidedata=sidedata,
2567 sidedata=sidedata,
2585 )
2568 )
2586
2569
2587 if addrevisioncb:
2570 if addrevisioncb:
2588 addrevisioncb(self, rev)
2571 addrevisioncb(self, rev)
2589 empty = False
2572 empty = False
2590
2573
2591 if not dfh and not self._inline:
2574 if not dfh and not self._inline:
2592 # addrevision switched from inline to conventional
2575 # addrevision switched from inline to conventional
2593 # reopen the index
2576 # reopen the index
2594 ifh.close()
2577 ifh.close()
2595 dfh = self._datafp(b"a+")
2578 dfh = self._datafp(b"a+")
2596 ifh = self._indexfp(b"a+")
2579 ifh = self._indexfp(b"a+")
2597 self._writinghandles = (ifh, dfh)
2580 self._writinghandles = (ifh, dfh)
2598 finally:
2581 finally:
2599 self._writinghandles = None
2582 self._writinghandles = None
2600
2583
2601 if dfh:
2584 if dfh:
2602 dfh.close()
2585 dfh.close()
2603 ifh.close()
2586 ifh.close()
2604 return not empty
2587 return not empty
2605
2588
2606 def iscensored(self, rev):
2589 def iscensored(self, rev):
2607 """Check if a file revision is censored."""
2590 """Check if a file revision is censored."""
2608 if not self._censorable:
2591 if not self._censorable:
2609 return False
2592 return False
2610
2593
2611 return self.flags(rev) & REVIDX_ISCENSORED
2594 return self.flags(rev) & REVIDX_ISCENSORED
2612
2595
2613 def _peek_iscensored(self, baserev, delta, flush):
2596 def _peek_iscensored(self, baserev, delta, flush):
2614 """Quickly check if a delta produces a censored revision."""
2597 """Quickly check if a delta produces a censored revision."""
2615 if not self._censorable:
2598 if not self._censorable:
2616 return False
2599 return False
2617
2600
2618 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2601 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2619
2602
2620 def getstrippoint(self, minlink):
2603 def getstrippoint(self, minlink):
2621 """find the minimum rev that must be stripped to strip the linkrev
2604 """find the minimum rev that must be stripped to strip the linkrev
2622
2605
2623 Returns a tuple containing the minimum rev and a set of all revs that
2606 Returns a tuple containing the minimum rev and a set of all revs that
2624 have linkrevs that will be broken by this strip.
2607 have linkrevs that will be broken by this strip.
2625 """
2608 """
2626 return storageutil.resolvestripinfo(
2609 return storageutil.resolvestripinfo(
2627 minlink,
2610 minlink,
2628 len(self) - 1,
2611 len(self) - 1,
2629 self.headrevs(),
2612 self.headrevs(),
2630 self.linkrev,
2613 self.linkrev,
2631 self.parentrevs,
2614 self.parentrevs,
2632 )
2615 )
2633
2616
2634 def strip(self, minlink, transaction):
2617 def strip(self, minlink, transaction):
2635 """truncate the revlog on the first revision with a linkrev >= minlink
2618 """truncate the revlog on the first revision with a linkrev >= minlink
2636
2619
2637 This function is called when we're stripping revision minlink and
2620 This function is called when we're stripping revision minlink and
2638 its descendants from the repository.
2621 its descendants from the repository.
2639
2622
2640 We have to remove all revisions with linkrev >= minlink, because
2623 We have to remove all revisions with linkrev >= minlink, because
2641 the equivalent changelog revisions will be renumbered after the
2624 the equivalent changelog revisions will be renumbered after the
2642 strip.
2625 strip.
2643
2626
2644 So we truncate the revlog on the first of these revisions, and
2627 So we truncate the revlog on the first of these revisions, and
2645 trust that the caller has saved the revisions that shouldn't be
2628 trust that the caller has saved the revisions that shouldn't be
2646 removed and that it'll re-add them after this truncation.
2629 removed and that it'll re-add them after this truncation.
2647 """
2630 """
2648 if len(self) == 0:
2631 if len(self) == 0:
2649 return
2632 return
2650
2633
2651 rev, _ = self.getstrippoint(minlink)
2634 rev, _ = self.getstrippoint(minlink)
2652 if rev == len(self):
2635 if rev == len(self):
2653 return
2636 return
2654
2637
2655 # first truncate the files on disk
2638 # first truncate the files on disk
2656 end = self.start(rev)
2639 end = self.start(rev)
2657 if not self._inline:
2640 if not self._inline:
2658 transaction.add(self.datafile, end)
2641 transaction.add(self.datafile, end)
2659 end = rev * self.index.entry_size
2642 end = rev * self.index.entry_size
2660 else:
2643 else:
2661 end += rev * self.index.entry_size
2644 end += rev * self.index.entry_size
2662
2645
2663 transaction.add(self.indexfile, end)
2646 transaction.add(self.indexfile, end)
2664
2647
2665 # then reset internal state in memory to forget those revisions
2648 # then reset internal state in memory to forget those revisions
2666 self._revisioncache = None
2649 self._revisioncache = None
2667 self._chaininfocache = util.lrucachedict(500)
2650 self._chaininfocache = util.lrucachedict(500)
2668 self._chunkclear()
2651 self._chunkclear()
2669
2652
2670 del self.index[rev:-1]
2653 del self.index[rev:-1]
2671
2654
2672 def checksize(self):
2655 def checksize(self):
2673 """Check size of index and data files
2656 """Check size of index and data files
2674
2657
2675 return a (dd, di) tuple.
2658 return a (dd, di) tuple.
2676 - dd: extra bytes for the "data" file
2659 - dd: extra bytes for the "data" file
2677 - di: extra bytes for the "index" file
2660 - di: extra bytes for the "index" file
2678
2661
2679 A healthy revlog will return (0, 0).
2662 A healthy revlog will return (0, 0).
2680 """
2663 """
2681 expected = 0
2664 expected = 0
2682 if len(self):
2665 if len(self):
2683 expected = max(0, self.end(len(self) - 1))
2666 expected = max(0, self.end(len(self) - 1))
2684
2667
2685 try:
2668 try:
2686 with self._datafp() as f:
2669 with self._datafp() as f:
2687 f.seek(0, io.SEEK_END)
2670 f.seek(0, io.SEEK_END)
2688 actual = f.tell()
2671 actual = f.tell()
2689 dd = actual - expected
2672 dd = actual - expected
2690 except IOError as inst:
2673 except IOError as inst:
2691 if inst.errno != errno.ENOENT:
2674 if inst.errno != errno.ENOENT:
2692 raise
2675 raise
2693 dd = 0
2676 dd = 0
2694
2677
2695 try:
2678 try:
2696 f = self.opener(self.indexfile)
2679 f = self.opener(self.indexfile)
2697 f.seek(0, io.SEEK_END)
2680 f.seek(0, io.SEEK_END)
2698 actual = f.tell()
2681 actual = f.tell()
2699 f.close()
2682 f.close()
2700 s = self.index.entry_size
2683 s = self.index.entry_size
2701 i = max(0, actual // s)
2684 i = max(0, actual // s)
2702 di = actual - (i * s)
2685 di = actual - (i * s)
2703 if self._inline:
2686 if self._inline:
2704 databytes = 0
2687 databytes = 0
2705 for r in self:
2688 for r in self:
2706 databytes += max(0, self.length(r))
2689 databytes += max(0, self.length(r))
2707 dd = 0
2690 dd = 0
2708 di = actual - len(self) * s - databytes
2691 di = actual - len(self) * s - databytes
2709 except IOError as inst:
2692 except IOError as inst:
2710 if inst.errno != errno.ENOENT:
2693 if inst.errno != errno.ENOENT:
2711 raise
2694 raise
2712 di = 0
2695 di = 0
2713
2696
2714 return (dd, di)
2697 return (dd, di)
2715
2698
2716 def files(self):
2699 def files(self):
2717 res = [self.indexfile]
2700 res = [self.indexfile]
2718 if not self._inline:
2701 if not self._inline:
2719 res.append(self.datafile)
2702 res.append(self.datafile)
2720 return res
2703 return res
2721
2704
2722 def emitrevisions(
2705 def emitrevisions(
2723 self,
2706 self,
2724 nodes,
2707 nodes,
2725 nodesorder=None,
2708 nodesorder=None,
2726 revisiondata=False,
2709 revisiondata=False,
2727 assumehaveparentrevisions=False,
2710 assumehaveparentrevisions=False,
2728 deltamode=repository.CG_DELTAMODE_STD,
2711 deltamode=repository.CG_DELTAMODE_STD,
2729 sidedata_helpers=None,
2712 sidedata_helpers=None,
2730 ):
2713 ):
2731 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2714 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2732 raise error.ProgrammingError(
2715 raise error.ProgrammingError(
2733 b'unhandled value for nodesorder: %s' % nodesorder
2716 b'unhandled value for nodesorder: %s' % nodesorder
2734 )
2717 )
2735
2718
2736 if nodesorder is None and not self._generaldelta:
2719 if nodesorder is None and not self._generaldelta:
2737 nodesorder = b'storage'
2720 nodesorder = b'storage'
2738
2721
2739 if (
2722 if (
2740 not self._storedeltachains
2723 not self._storedeltachains
2741 and deltamode != repository.CG_DELTAMODE_PREV
2724 and deltamode != repository.CG_DELTAMODE_PREV
2742 ):
2725 ):
2743 deltamode = repository.CG_DELTAMODE_FULL
2726 deltamode = repository.CG_DELTAMODE_FULL
2744
2727
2745 return storageutil.emitrevisions(
2728 return storageutil.emitrevisions(
2746 self,
2729 self,
2747 nodes,
2730 nodes,
2748 nodesorder,
2731 nodesorder,
2749 revlogrevisiondelta,
2732 revlogrevisiondelta,
2750 deltaparentfn=self.deltaparent,
2733 deltaparentfn=self.deltaparent,
2751 candeltafn=self.candelta,
2734 candeltafn=self.candelta,
2752 rawsizefn=self.rawsize,
2735 rawsizefn=self.rawsize,
2753 revdifffn=self.revdiff,
2736 revdifffn=self.revdiff,
2754 flagsfn=self.flags,
2737 flagsfn=self.flags,
2755 deltamode=deltamode,
2738 deltamode=deltamode,
2756 revisiondata=revisiondata,
2739 revisiondata=revisiondata,
2757 assumehaveparentrevisions=assumehaveparentrevisions,
2740 assumehaveparentrevisions=assumehaveparentrevisions,
2758 sidedata_helpers=sidedata_helpers,
2741 sidedata_helpers=sidedata_helpers,
2759 )
2742 )
2760
2743
2761 DELTAREUSEALWAYS = b'always'
2744 DELTAREUSEALWAYS = b'always'
2762 DELTAREUSESAMEREVS = b'samerevs'
2745 DELTAREUSESAMEREVS = b'samerevs'
2763 DELTAREUSENEVER = b'never'
2746 DELTAREUSENEVER = b'never'
2764
2747
2765 DELTAREUSEFULLADD = b'fulladd'
2748 DELTAREUSEFULLADD = b'fulladd'
2766
2749
2767 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2750 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2768
2751
2769 def clone(
2752 def clone(
2770 self,
2753 self,
2771 tr,
2754 tr,
2772 destrevlog,
2755 destrevlog,
2773 addrevisioncb=None,
2756 addrevisioncb=None,
2774 deltareuse=DELTAREUSESAMEREVS,
2757 deltareuse=DELTAREUSESAMEREVS,
2775 forcedeltabothparents=None,
2758 forcedeltabothparents=None,
2776 sidedatacompanion=None,
2759 sidedatacompanion=None,
2777 ):
2760 ):
2778 """Copy this revlog to another, possibly with format changes.
2761 """Copy this revlog to another, possibly with format changes.
2779
2762
2780 The destination revlog will contain the same revisions and nodes.
2763 The destination revlog will contain the same revisions and nodes.
2781 However, it may not be bit-for-bit identical due to e.g. delta encoding
2764 However, it may not be bit-for-bit identical due to e.g. delta encoding
2782 differences.
2765 differences.
2783
2766
2784 The ``deltareuse`` argument control how deltas from the existing revlog
2767 The ``deltareuse`` argument control how deltas from the existing revlog
2785 are preserved in the destination revlog. The argument can have the
2768 are preserved in the destination revlog. The argument can have the
2786 following values:
2769 following values:
2787
2770
2788 DELTAREUSEALWAYS
2771 DELTAREUSEALWAYS
2789 Deltas will always be reused (if possible), even if the destination
2772 Deltas will always be reused (if possible), even if the destination
2790 revlog would not select the same revisions for the delta. This is the
2773 revlog would not select the same revisions for the delta. This is the
2791 fastest mode of operation.
2774 fastest mode of operation.
2792 DELTAREUSESAMEREVS
2775 DELTAREUSESAMEREVS
2793 Deltas will be reused if the destination revlog would pick the same
2776 Deltas will be reused if the destination revlog would pick the same
2794 revisions for the delta. This mode strikes a balance between speed
2777 revisions for the delta. This mode strikes a balance between speed
2795 and optimization.
2778 and optimization.
2796 DELTAREUSENEVER
2779 DELTAREUSENEVER
2797 Deltas will never be reused. This is the slowest mode of execution.
2780 Deltas will never be reused. This is the slowest mode of execution.
2798 This mode can be used to recompute deltas (e.g. if the diff/delta
2781 This mode can be used to recompute deltas (e.g. if the diff/delta
2799 algorithm changes).
2782 algorithm changes).
2800 DELTAREUSEFULLADD
2783 DELTAREUSEFULLADD
2801 Revision will be re-added as if their were new content. This is
2784 Revision will be re-added as if their were new content. This is
2802 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2785 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2803 eg: large file detection and handling.
2786 eg: large file detection and handling.
2804
2787
2805 Delta computation can be slow, so the choice of delta reuse policy can
2788 Delta computation can be slow, so the choice of delta reuse policy can
2806 significantly affect run time.
2789 significantly affect run time.
2807
2790
2808 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2791 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2809 two extremes. Deltas will be reused if they are appropriate. But if the
2792 two extremes. Deltas will be reused if they are appropriate. But if the
2810 delta could choose a better revision, it will do so. This means if you
2793 delta could choose a better revision, it will do so. This means if you
2811 are converting a non-generaldelta revlog to a generaldelta revlog,
2794 are converting a non-generaldelta revlog to a generaldelta revlog,
2812 deltas will be recomputed if the delta's parent isn't a parent of the
2795 deltas will be recomputed if the delta's parent isn't a parent of the
2813 revision.
2796 revision.
2814
2797
2815 In addition to the delta policy, the ``forcedeltabothparents``
2798 In addition to the delta policy, the ``forcedeltabothparents``
2816 argument controls whether to force compute deltas against both parents
2799 argument controls whether to force compute deltas against both parents
2817 for merges. By default, the current default is used.
2800 for merges. By default, the current default is used.
2818
2801
2819 If not None, the `sidedatacompanion` is callable that accept two
2802 If not None, the `sidedatacompanion` is callable that accept two
2820 arguments:
2803 arguments:
2821
2804
2822 (srcrevlog, rev)
2805 (srcrevlog, rev)
2823
2806
2824 and return a quintet that control changes to sidedata content from the
2807 and return a quintet that control changes to sidedata content from the
2825 old revision to the new clone result:
2808 old revision to the new clone result:
2826
2809
2827 (dropall, filterout, update, new_flags, dropped_flags)
2810 (dropall, filterout, update, new_flags, dropped_flags)
2828
2811
2829 * if `dropall` is True, all sidedata should be dropped
2812 * if `dropall` is True, all sidedata should be dropped
2830 * `filterout` is a set of sidedata keys that should be dropped
2813 * `filterout` is a set of sidedata keys that should be dropped
2831 * `update` is a mapping of additionnal/new key -> value
2814 * `update` is a mapping of additionnal/new key -> value
2832 * new_flags is a bitfields of new flags that the revision should get
2815 * new_flags is a bitfields of new flags that the revision should get
2833 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2816 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2834 """
2817 """
2835 if deltareuse not in self.DELTAREUSEALL:
2818 if deltareuse not in self.DELTAREUSEALL:
2836 raise ValueError(
2819 raise ValueError(
2837 _(b'value for deltareuse invalid: %s') % deltareuse
2820 _(b'value for deltareuse invalid: %s') % deltareuse
2838 )
2821 )
2839
2822
2840 if len(destrevlog):
2823 if len(destrevlog):
2841 raise ValueError(_(b'destination revlog is not empty'))
2824 raise ValueError(_(b'destination revlog is not empty'))
2842
2825
2843 if getattr(self, 'filteredrevs', None):
2826 if getattr(self, 'filteredrevs', None):
2844 raise ValueError(_(b'source revlog has filtered revisions'))
2827 raise ValueError(_(b'source revlog has filtered revisions'))
2845 if getattr(destrevlog, 'filteredrevs', None):
2828 if getattr(destrevlog, 'filteredrevs', None):
2846 raise ValueError(_(b'destination revlog has filtered revisions'))
2829 raise ValueError(_(b'destination revlog has filtered revisions'))
2847
2830
2848 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2831 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2849 # if possible.
2832 # if possible.
2850 oldlazydelta = destrevlog._lazydelta
2833 oldlazydelta = destrevlog._lazydelta
2851 oldlazydeltabase = destrevlog._lazydeltabase
2834 oldlazydeltabase = destrevlog._lazydeltabase
2852 oldamd = destrevlog._deltabothparents
2835 oldamd = destrevlog._deltabothparents
2853
2836
2854 try:
2837 try:
2855 if deltareuse == self.DELTAREUSEALWAYS:
2838 if deltareuse == self.DELTAREUSEALWAYS:
2856 destrevlog._lazydeltabase = True
2839 destrevlog._lazydeltabase = True
2857 destrevlog._lazydelta = True
2840 destrevlog._lazydelta = True
2858 elif deltareuse == self.DELTAREUSESAMEREVS:
2841 elif deltareuse == self.DELTAREUSESAMEREVS:
2859 destrevlog._lazydeltabase = False
2842 destrevlog._lazydeltabase = False
2860 destrevlog._lazydelta = True
2843 destrevlog._lazydelta = True
2861 elif deltareuse == self.DELTAREUSENEVER:
2844 elif deltareuse == self.DELTAREUSENEVER:
2862 destrevlog._lazydeltabase = False
2845 destrevlog._lazydeltabase = False
2863 destrevlog._lazydelta = False
2846 destrevlog._lazydelta = False
2864
2847
2865 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2848 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2866
2849
2867 self._clone(
2850 self._clone(
2868 tr,
2851 tr,
2869 destrevlog,
2852 destrevlog,
2870 addrevisioncb,
2853 addrevisioncb,
2871 deltareuse,
2854 deltareuse,
2872 forcedeltabothparents,
2855 forcedeltabothparents,
2873 sidedatacompanion,
2856 sidedatacompanion,
2874 )
2857 )
2875
2858
2876 finally:
2859 finally:
2877 destrevlog._lazydelta = oldlazydelta
2860 destrevlog._lazydelta = oldlazydelta
2878 destrevlog._lazydeltabase = oldlazydeltabase
2861 destrevlog._lazydeltabase = oldlazydeltabase
2879 destrevlog._deltabothparents = oldamd
2862 destrevlog._deltabothparents = oldamd
2880
2863
2881 def _clone(
2864 def _clone(
2882 self,
2865 self,
2883 tr,
2866 tr,
2884 destrevlog,
2867 destrevlog,
2885 addrevisioncb,
2868 addrevisioncb,
2886 deltareuse,
2869 deltareuse,
2887 forcedeltabothparents,
2870 forcedeltabothparents,
2888 sidedatacompanion,
2871 sidedatacompanion,
2889 ):
2872 ):
2890 """perform the core duty of `revlog.clone` after parameter processing"""
2873 """perform the core duty of `revlog.clone` after parameter processing"""
2891 deltacomputer = deltautil.deltacomputer(destrevlog)
2874 deltacomputer = deltautil.deltacomputer(destrevlog)
2892 index = self.index
2875 index = self.index
2893 for rev in self:
2876 for rev in self:
2894 entry = index[rev]
2877 entry = index[rev]
2895
2878
2896 # Some classes override linkrev to take filtered revs into
2879 # Some classes override linkrev to take filtered revs into
2897 # account. Use raw entry from index.
2880 # account. Use raw entry from index.
2898 flags = entry[0] & 0xFFFF
2881 flags = entry[0] & 0xFFFF
2899 linkrev = entry[4]
2882 linkrev = entry[4]
2900 p1 = index[entry[5]][7]
2883 p1 = index[entry[5]][7]
2901 p2 = index[entry[6]][7]
2884 p2 = index[entry[6]][7]
2902 node = entry[7]
2885 node = entry[7]
2903
2886
2904 sidedataactions = (False, [], {}, 0, 0)
2887 sidedataactions = (False, [], {}, 0, 0)
2905 if sidedatacompanion is not None:
2888 if sidedatacompanion is not None:
2906 sidedataactions = sidedatacompanion(self, rev)
2889 sidedataactions = sidedatacompanion(self, rev)
2907
2890
2908 # (Possibly) reuse the delta from the revlog if allowed and
2891 # (Possibly) reuse the delta from the revlog if allowed and
2909 # the revlog chunk is a delta.
2892 # the revlog chunk is a delta.
2910 cachedelta = None
2893 cachedelta = None
2911 rawtext = None
2894 rawtext = None
2912 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2895 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2913 dropall = sidedataactions[0]
2896 dropall = sidedataactions[0]
2914 filterout = sidedataactions[1]
2897 filterout = sidedataactions[1]
2915 update = sidedataactions[2]
2898 update = sidedataactions[2]
2916 new_flags = sidedataactions[3]
2899 new_flags = sidedataactions[3]
2917 dropped_flags = sidedataactions[4]
2900 dropped_flags = sidedataactions[4]
2918 text, sidedata = self._revisiondata(rev)
2901 text, sidedata = self._revisiondata(rev)
2919 if dropall:
2902 if dropall:
2920 sidedata = {}
2903 sidedata = {}
2921 for key in filterout:
2904 for key in filterout:
2922 sidedata.pop(key, None)
2905 sidedata.pop(key, None)
2923 sidedata.update(update)
2906 sidedata.update(update)
2924 if not sidedata:
2907 if not sidedata:
2925 sidedata = None
2908 sidedata = None
2926
2909
2927 flags |= new_flags
2910 flags |= new_flags
2928 flags &= ~dropped_flags
2911 flags &= ~dropped_flags
2929
2912
2930 destrevlog.addrevision(
2913 destrevlog.addrevision(
2931 text,
2914 text,
2932 tr,
2915 tr,
2933 linkrev,
2916 linkrev,
2934 p1,
2917 p1,
2935 p2,
2918 p2,
2936 cachedelta=cachedelta,
2919 cachedelta=cachedelta,
2937 node=node,
2920 node=node,
2938 flags=flags,
2921 flags=flags,
2939 deltacomputer=deltacomputer,
2922 deltacomputer=deltacomputer,
2940 sidedata=sidedata,
2923 sidedata=sidedata,
2941 )
2924 )
2942 else:
2925 else:
2943 if destrevlog._lazydelta:
2926 if destrevlog._lazydelta:
2944 dp = self.deltaparent(rev)
2927 dp = self.deltaparent(rev)
2945 if dp != nullrev:
2928 if dp != nullrev:
2946 cachedelta = (dp, bytes(self._chunk(rev)))
2929 cachedelta = (dp, bytes(self._chunk(rev)))
2947
2930
2948 if not cachedelta:
2931 if not cachedelta:
2949 rawtext = self.rawdata(rev)
2932 rawtext = self.rawdata(rev)
2950
2933
2951 ifh = destrevlog.opener(
2934 ifh = destrevlog.opener(
2952 destrevlog.indexfile, b'a+', checkambig=False
2935 destrevlog.indexfile, b'a+', checkambig=False
2953 )
2936 )
2954 dfh = None
2937 dfh = None
2955 if not destrevlog._inline:
2938 if not destrevlog._inline:
2956 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2939 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2957 try:
2940 try:
2958 destrevlog._addrevision(
2941 destrevlog._addrevision(
2959 node,
2942 node,
2960 rawtext,
2943 rawtext,
2961 tr,
2944 tr,
2962 linkrev,
2945 linkrev,
2963 p1,
2946 p1,
2964 p2,
2947 p2,
2965 flags,
2948 flags,
2966 cachedelta,
2949 cachedelta,
2967 ifh,
2950 ifh,
2968 dfh,
2951 dfh,
2969 deltacomputer=deltacomputer,
2952 deltacomputer=deltacomputer,
2970 )
2953 )
2971 finally:
2954 finally:
2972 if dfh:
2955 if dfh:
2973 dfh.close()
2956 dfh.close()
2974 ifh.close()
2957 ifh.close()
2975
2958
2976 if addrevisioncb:
2959 if addrevisioncb:
2977 addrevisioncb(self, rev, node)
2960 addrevisioncb(self, rev, node)
2978
2961
2979 def censorrevision(self, tr, censornode, tombstone=b''):
2962 def censorrevision(self, tr, censornode, tombstone=b''):
2980 if (self.version & 0xFFFF) == REVLOGV0:
2963 if (self.version & 0xFFFF) == REVLOGV0:
2981 raise error.RevlogError(
2964 raise error.RevlogError(
2982 _(b'cannot censor with version %d revlogs') % self.version
2965 _(b'cannot censor with version %d revlogs') % self.version
2983 )
2966 )
2984
2967
2985 censorrev = self.rev(censornode)
2968 censorrev = self.rev(censornode)
2986 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2969 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2987
2970
2988 if len(tombstone) > self.rawsize(censorrev):
2971 if len(tombstone) > self.rawsize(censorrev):
2989 raise error.Abort(
2972 raise error.Abort(
2990 _(b'censor tombstone must be no longer than censored data')
2973 _(b'censor tombstone must be no longer than censored data')
2991 )
2974 )
2992
2975
2993 # Rewriting the revlog in place is hard. Our strategy for censoring is
2976 # Rewriting the revlog in place is hard. Our strategy for censoring is
2994 # to create a new revlog, copy all revisions to it, then replace the
2977 # to create a new revlog, copy all revisions to it, then replace the
2995 # revlogs on transaction close.
2978 # revlogs on transaction close.
2996
2979
2997 newindexfile = self.indexfile + b'.tmpcensored'
2980 newindexfile = self.indexfile + b'.tmpcensored'
2998 newdatafile = self.datafile + b'.tmpcensored'
2981 newdatafile = self.datafile + b'.tmpcensored'
2999
2982
3000 # This is a bit dangerous. We could easily have a mismatch of state.
2983 # This is a bit dangerous. We could easily have a mismatch of state.
3001 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2984 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3002 newrl.version = self.version
2985 newrl.version = self.version
3003 newrl._generaldelta = self._generaldelta
2986 newrl._generaldelta = self._generaldelta
3004 newrl._io = self._io
2987 newrl._io = self._io
3005
2988
3006 for rev in self.revs():
2989 for rev in self.revs():
3007 node = self.node(rev)
2990 node = self.node(rev)
3008 p1, p2 = self.parents(node)
2991 p1, p2 = self.parents(node)
3009
2992
3010 if rev == censorrev:
2993 if rev == censorrev:
3011 newrl.addrawrevision(
2994 newrl.addrawrevision(
3012 tombstone,
2995 tombstone,
3013 tr,
2996 tr,
3014 self.linkrev(censorrev),
2997 self.linkrev(censorrev),
3015 p1,
2998 p1,
3016 p2,
2999 p2,
3017 censornode,
3000 censornode,
3018 REVIDX_ISCENSORED,
3001 REVIDX_ISCENSORED,
3019 )
3002 )
3020
3003
3021 if newrl.deltaparent(rev) != nullrev:
3004 if newrl.deltaparent(rev) != nullrev:
3022 raise error.Abort(
3005 raise error.Abort(
3023 _(
3006 _(
3024 b'censored revision stored as delta; '
3007 b'censored revision stored as delta; '
3025 b'cannot censor'
3008 b'cannot censor'
3026 ),
3009 ),
3027 hint=_(
3010 hint=_(
3028 b'censoring of revlogs is not '
3011 b'censoring of revlogs is not '
3029 b'fully implemented; please report '
3012 b'fully implemented; please report '
3030 b'this bug'
3013 b'this bug'
3031 ),
3014 ),
3032 )
3015 )
3033 continue
3016 continue
3034
3017
3035 if self.iscensored(rev):
3018 if self.iscensored(rev):
3036 if self.deltaparent(rev) != nullrev:
3019 if self.deltaparent(rev) != nullrev:
3037 raise error.Abort(
3020 raise error.Abort(
3038 _(
3021 _(
3039 b'cannot censor due to censored '
3022 b'cannot censor due to censored '
3040 b'revision having delta stored'
3023 b'revision having delta stored'
3041 )
3024 )
3042 )
3025 )
3043 rawtext = self._chunk(rev)
3026 rawtext = self._chunk(rev)
3044 else:
3027 else:
3045 rawtext = self.rawdata(rev)
3028 rawtext = self.rawdata(rev)
3046
3029
3047 newrl.addrawrevision(
3030 newrl.addrawrevision(
3048 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3031 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3049 )
3032 )
3050
3033
3051 tr.addbackup(self.indexfile, location=b'store')
3034 tr.addbackup(self.indexfile, location=b'store')
3052 if not self._inline:
3035 if not self._inline:
3053 tr.addbackup(self.datafile, location=b'store')
3036 tr.addbackup(self.datafile, location=b'store')
3054
3037
3055 self.opener.rename(newrl.indexfile, self.indexfile)
3038 self.opener.rename(newrl.indexfile, self.indexfile)
3056 if not self._inline:
3039 if not self._inline:
3057 self.opener.rename(newrl.datafile, self.datafile)
3040 self.opener.rename(newrl.datafile, self.datafile)
3058
3041
3059 self.clearcaches()
3042 self.clearcaches()
3060 self._loadindex()
3043 self._loadindex()
3061
3044
3062 def verifyintegrity(self, state):
3045 def verifyintegrity(self, state):
3063 """Verifies the integrity of the revlog.
3046 """Verifies the integrity of the revlog.
3064
3047
3065 Yields ``revlogproblem`` instances describing problems that are
3048 Yields ``revlogproblem`` instances describing problems that are
3066 found.
3049 found.
3067 """
3050 """
3068 dd, di = self.checksize()
3051 dd, di = self.checksize()
3069 if dd:
3052 if dd:
3070 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3053 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3071 if di:
3054 if di:
3072 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3055 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3073
3056
3074 version = self.version & 0xFFFF
3057 version = self.version & 0xFFFF
3075
3058
3076 # The verifier tells us what version revlog we should be.
3059 # The verifier tells us what version revlog we should be.
3077 if version != state[b'expectedversion']:
3060 if version != state[b'expectedversion']:
3078 yield revlogproblem(
3061 yield revlogproblem(
3079 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3062 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3080 % (self.indexfile, version, state[b'expectedversion'])
3063 % (self.indexfile, version, state[b'expectedversion'])
3081 )
3064 )
3082
3065
3083 state[b'skipread'] = set()
3066 state[b'skipread'] = set()
3084 state[b'safe_renamed'] = set()
3067 state[b'safe_renamed'] = set()
3085
3068
3086 for rev in self:
3069 for rev in self:
3087 node = self.node(rev)
3070 node = self.node(rev)
3088
3071
3089 # Verify contents. 4 cases to care about:
3072 # Verify contents. 4 cases to care about:
3090 #
3073 #
3091 # common: the most common case
3074 # common: the most common case
3092 # rename: with a rename
3075 # rename: with a rename
3093 # meta: file content starts with b'\1\n', the metadata
3076 # meta: file content starts with b'\1\n', the metadata
3094 # header defined in filelog.py, but without a rename
3077 # header defined in filelog.py, but without a rename
3095 # ext: content stored externally
3078 # ext: content stored externally
3096 #
3079 #
3097 # More formally, their differences are shown below:
3080 # More formally, their differences are shown below:
3098 #
3081 #
3099 # | common | rename | meta | ext
3082 # | common | rename | meta | ext
3100 # -------------------------------------------------------
3083 # -------------------------------------------------------
3101 # flags() | 0 | 0 | 0 | not 0
3084 # flags() | 0 | 0 | 0 | not 0
3102 # renamed() | False | True | False | ?
3085 # renamed() | False | True | False | ?
3103 # rawtext[0:2]=='\1\n'| False | True | True | ?
3086 # rawtext[0:2]=='\1\n'| False | True | True | ?
3104 #
3087 #
3105 # "rawtext" means the raw text stored in revlog data, which
3088 # "rawtext" means the raw text stored in revlog data, which
3106 # could be retrieved by "rawdata(rev)". "text"
3089 # could be retrieved by "rawdata(rev)". "text"
3107 # mentioned below is "revision(rev)".
3090 # mentioned below is "revision(rev)".
3108 #
3091 #
3109 # There are 3 different lengths stored physically:
3092 # There are 3 different lengths stored physically:
3110 # 1. L1: rawsize, stored in revlog index
3093 # 1. L1: rawsize, stored in revlog index
3111 # 2. L2: len(rawtext), stored in revlog data
3094 # 2. L2: len(rawtext), stored in revlog data
3112 # 3. L3: len(text), stored in revlog data if flags==0, or
3095 # 3. L3: len(text), stored in revlog data if flags==0, or
3113 # possibly somewhere else if flags!=0
3096 # possibly somewhere else if flags!=0
3114 #
3097 #
3115 # L1 should be equal to L2. L3 could be different from them.
3098 # L1 should be equal to L2. L3 could be different from them.
3116 # "text" may or may not affect commit hash depending on flag
3099 # "text" may or may not affect commit hash depending on flag
3117 # processors (see flagutil.addflagprocessor).
3100 # processors (see flagutil.addflagprocessor).
3118 #
3101 #
3119 # | common | rename | meta | ext
3102 # | common | rename | meta | ext
3120 # -------------------------------------------------
3103 # -------------------------------------------------
3121 # rawsize() | L1 | L1 | L1 | L1
3104 # rawsize() | L1 | L1 | L1 | L1
3122 # size() | L1 | L2-LM | L1(*) | L1 (?)
3105 # size() | L1 | L2-LM | L1(*) | L1 (?)
3123 # len(rawtext) | L2 | L2 | L2 | L2
3106 # len(rawtext) | L2 | L2 | L2 | L2
3124 # len(text) | L2 | L2 | L2 | L3
3107 # len(text) | L2 | L2 | L2 | L3
3125 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3108 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3126 #
3109 #
3127 # LM: length of metadata, depending on rawtext
3110 # LM: length of metadata, depending on rawtext
3128 # (*): not ideal, see comment in filelog.size
3111 # (*): not ideal, see comment in filelog.size
3129 # (?): could be "- len(meta)" if the resolved content has
3112 # (?): could be "- len(meta)" if the resolved content has
3130 # rename metadata
3113 # rename metadata
3131 #
3114 #
3132 # Checks needed to be done:
3115 # Checks needed to be done:
3133 # 1. length check: L1 == L2, in all cases.
3116 # 1. length check: L1 == L2, in all cases.
3134 # 2. hash check: depending on flag processor, we may need to
3117 # 2. hash check: depending on flag processor, we may need to
3135 # use either "text" (external), or "rawtext" (in revlog).
3118 # use either "text" (external), or "rawtext" (in revlog).
3136
3119
3137 try:
3120 try:
3138 skipflags = state.get(b'skipflags', 0)
3121 skipflags = state.get(b'skipflags', 0)
3139 if skipflags:
3122 if skipflags:
3140 skipflags &= self.flags(rev)
3123 skipflags &= self.flags(rev)
3141
3124
3142 _verify_revision(self, skipflags, state, node)
3125 _verify_revision(self, skipflags, state, node)
3143
3126
3144 l1 = self.rawsize(rev)
3127 l1 = self.rawsize(rev)
3145 l2 = len(self.rawdata(node))
3128 l2 = len(self.rawdata(node))
3146
3129
3147 if l1 != l2:
3130 if l1 != l2:
3148 yield revlogproblem(
3131 yield revlogproblem(
3149 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3132 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3150 node=node,
3133 node=node,
3151 )
3134 )
3152
3135
3153 except error.CensoredNodeError:
3136 except error.CensoredNodeError:
3154 if state[b'erroroncensored']:
3137 if state[b'erroroncensored']:
3155 yield revlogproblem(
3138 yield revlogproblem(
3156 error=_(b'censored file data'), node=node
3139 error=_(b'censored file data'), node=node
3157 )
3140 )
3158 state[b'skipread'].add(node)
3141 state[b'skipread'].add(node)
3159 except Exception as e:
3142 except Exception as e:
3160 yield revlogproblem(
3143 yield revlogproblem(
3161 error=_(b'unpacking %s: %s')
3144 error=_(b'unpacking %s: %s')
3162 % (short(node), stringutil.forcebytestr(e)),
3145 % (short(node), stringutil.forcebytestr(e)),
3163 node=node,
3146 node=node,
3164 )
3147 )
3165 state[b'skipread'].add(node)
3148 state[b'skipread'].add(node)
3166
3149
3167 def storageinfo(
3150 def storageinfo(
3168 self,
3151 self,
3169 exclusivefiles=False,
3152 exclusivefiles=False,
3170 sharedfiles=False,
3153 sharedfiles=False,
3171 revisionscount=False,
3154 revisionscount=False,
3172 trackedsize=False,
3155 trackedsize=False,
3173 storedsize=False,
3156 storedsize=False,
3174 ):
3157 ):
3175 d = {}
3158 d = {}
3176
3159
3177 if exclusivefiles:
3160 if exclusivefiles:
3178 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3161 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3179 if not self._inline:
3162 if not self._inline:
3180 d[b'exclusivefiles'].append((self.opener, self.datafile))
3163 d[b'exclusivefiles'].append((self.opener, self.datafile))
3181
3164
3182 if sharedfiles:
3165 if sharedfiles:
3183 d[b'sharedfiles'] = []
3166 d[b'sharedfiles'] = []
3184
3167
3185 if revisionscount:
3168 if revisionscount:
3186 d[b'revisionscount'] = len(self)
3169 d[b'revisionscount'] = len(self)
3187
3170
3188 if trackedsize:
3171 if trackedsize:
3189 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3172 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3190
3173
3191 if storedsize:
3174 if storedsize:
3192 d[b'storedsize'] = sum(
3175 d[b'storedsize'] = sum(
3193 self.opener.stat(path).st_size for path in self.files()
3176 self.opener.stat(path).st_size for path in self.files()
3194 )
3177 )
3195
3178
3196 return d
3179 return d
3197
3180
3198 def rewrite_sidedata(self, helpers, startrev, endrev):
3181 def rewrite_sidedata(self, helpers, startrev, endrev):
3199 if self.version & 0xFFFF != REVLOGV2:
3182 if self.version & 0xFFFF != REVLOGV2:
3200 return
3183 return
3201 # inline are not yet supported because they suffer from an issue when
3184 # inline are not yet supported because they suffer from an issue when
3202 # rewriting them (since it's not an append-only operation).
3185 # rewriting them (since it's not an append-only operation).
3203 # See issue6485.
3186 # See issue6485.
3204 assert not self._inline
3187 assert not self._inline
3205 if not helpers[1] and not helpers[2]:
3188 if not helpers[1] and not helpers[2]:
3206 # Nothing to generate or remove
3189 # Nothing to generate or remove
3207 return
3190 return
3208
3191
3209 new_entries = []
3192 new_entries = []
3210 # append the new sidedata
3193 # append the new sidedata
3211 with self._datafp(b'a+') as fp:
3194 with self._datafp(b'a+') as fp:
3212 # Maybe this bug still exists, see revlog._writeentry
3195 # Maybe this bug still exists, see revlog._writeentry
3213 fp.seek(0, os.SEEK_END)
3196 fp.seek(0, os.SEEK_END)
3214 current_offset = fp.tell()
3197 current_offset = fp.tell()
3215 for rev in range(startrev, endrev + 1):
3198 for rev in range(startrev, endrev + 1):
3216 entry = self.index[rev]
3199 entry = self.index[rev]
3217 new_sidedata = storageutil.run_sidedata_helpers(
3200 new_sidedata = storageutil.run_sidedata_helpers(
3218 store=self,
3201 store=self,
3219 sidedata_helpers=helpers,
3202 sidedata_helpers=helpers,
3220 sidedata={},
3203 sidedata={},
3221 rev=rev,
3204 rev=rev,
3222 )
3205 )
3223
3206
3224 serialized_sidedata = sidedatautil.serialize_sidedata(
3207 serialized_sidedata = sidedatautil.serialize_sidedata(
3225 new_sidedata
3208 new_sidedata
3226 )
3209 )
3227 if entry[8] != 0 or entry[9] != 0:
3210 if entry[8] != 0 or entry[9] != 0:
3228 # rewriting entries that already have sidedata is not
3211 # rewriting entries that already have sidedata is not
3229 # supported yet, because it introduces garbage data in the
3212 # supported yet, because it introduces garbage data in the
3230 # revlog.
3213 # revlog.
3231 msg = b"Rewriting existing sidedata is not supported yet"
3214 msg = b"Rewriting existing sidedata is not supported yet"
3232 raise error.Abort(msg)
3215 raise error.Abort(msg)
3233 entry = entry[:8]
3216 entry = entry[:8]
3234 entry += (current_offset, len(serialized_sidedata))
3217 entry += (current_offset, len(serialized_sidedata))
3235
3218
3236 fp.write(serialized_sidedata)
3219 fp.write(serialized_sidedata)
3237 new_entries.append(entry)
3220 new_entries.append(entry)
3238 current_offset += len(serialized_sidedata)
3221 current_offset += len(serialized_sidedata)
3239
3222
3240 # rewrite the new index entries
3223 # rewrite the new index entries
3241 with self._indexfp(b'w+') as fp:
3224 with self._indexfp(b'w+') as fp:
3242 fp.seek(startrev * self.index.entry_size)
3225 fp.seek(startrev * self.index.entry_size)
3243 for i, entry in enumerate(new_entries):
3226 for i, entry in enumerate(new_entries):
3244 rev = startrev + i
3227 rev = startrev + i
3245 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3228 self.index.replace_sidedata_info(rev, entry[8], entry[9])
3246 packed = self._io.packentry(entry, self.node, self.version, rev)
3229 packed = self.index.entry_binary(rev, self.version)
3247 fp.write(packed)
3230 fp.write(packed)
@@ -1,494 +1,499 b''
1 // revlog.rs
1 // revlog.rs
2 //
2 //
3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::{
8 use crate::{
9 cindex,
9 cindex,
10 utils::{node_from_py_bytes, node_from_py_object},
10 utils::{node_from_py_bytes, node_from_py_object},
11 };
11 };
12 use cpython::{
12 use cpython::{
13 buffer::{Element, PyBuffer},
13 buffer::{Element, PyBuffer},
14 exc::{IndexError, ValueError},
14 exc::{IndexError, ValueError},
15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
15 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
16 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
16 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
17 };
17 };
18 use hg::{
18 use hg::{
19 nodemap::{Block, NodeMapError, NodeTree},
19 nodemap::{Block, NodeMapError, NodeTree},
20 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
20 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
21 Revision,
21 Revision,
22 };
22 };
23 use std::cell::RefCell;
23 use std::cell::RefCell;
24
24
25 /// Return a Struct implementing the Graph trait
25 /// Return a Struct implementing the Graph trait
26 pub(crate) fn pyindex_to_graph(
26 pub(crate) fn pyindex_to_graph(
27 py: Python,
27 py: Python,
28 index: PyObject,
28 index: PyObject,
29 ) -> PyResult<cindex::Index> {
29 ) -> PyResult<cindex::Index> {
30 match index.extract::<MixedIndex>(py) {
30 match index.extract::<MixedIndex>(py) {
31 Ok(midx) => Ok(midx.clone_cindex(py)),
31 Ok(midx) => Ok(midx.clone_cindex(py)),
32 Err(_) => cindex::Index::new(py, index),
32 Err(_) => cindex::Index::new(py, index),
33 }
33 }
34 }
34 }
35
35
36 py_class!(pub class MixedIndex |py| {
36 py_class!(pub class MixedIndex |py| {
37 data cindex: RefCell<cindex::Index>;
37 data cindex: RefCell<cindex::Index>;
38 data nt: RefCell<Option<NodeTree>>;
38 data nt: RefCell<Option<NodeTree>>;
39 data docket: RefCell<Option<PyObject>>;
39 data docket: RefCell<Option<PyObject>>;
40 // Holds a reference to the mmap'ed persistent nodemap data
40 // Holds a reference to the mmap'ed persistent nodemap data
41 data mmap: RefCell<Option<PyBuffer>>;
41 data mmap: RefCell<Option<PyBuffer>>;
42
42
43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
43 def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> {
44 Self::new(py, cindex)
44 Self::new(py, cindex)
45 }
45 }
46
46
47 /// Compatibility layer used for Python consumers needing access to the C index
47 /// Compatibility layer used for Python consumers needing access to the C index
48 ///
48 ///
49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
49 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
50 /// that may need to build a custom `nodetree`, based on a specified revset.
50 /// that may need to build a custom `nodetree`, based on a specified revset.
51 /// With a Rust implementation of the nodemap, we will be able to get rid of
51 /// With a Rust implementation of the nodemap, we will be able to get rid of
52 /// this, by exposing our own standalone nodemap class,
52 /// this, by exposing our own standalone nodemap class,
53 /// ready to accept `MixedIndex`.
53 /// ready to accept `MixedIndex`.
54 def get_cindex(&self) -> PyResult<PyObject> {
54 def get_cindex(&self) -> PyResult<PyObject> {
55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
55 Ok(self.cindex(py).borrow().inner().clone_ref(py))
56 }
56 }
57
57
58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
59
59
60 /// Return Revision if found, raises a bare `error.RevlogError`
60 /// Return Revision if found, raises a bare `error.RevlogError`
61 /// in case of ambiguity, same as C version does
61 /// in case of ambiguity, same as C version does
62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> {
63 let opt = self.get_nodetree(py)?.borrow();
63 let opt = self.get_nodetree(py)?.borrow();
64 let nt = opt.as_ref().unwrap();
64 let nt = opt.as_ref().unwrap();
65 let idx = &*self.cindex(py).borrow();
65 let idx = &*self.cindex(py).borrow();
66 let node = node_from_py_bytes(py, &node)?;
66 let node = node_from_py_bytes(py, &node)?;
67 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
67 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e))
68 }
68 }
69
69
70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
71 /// is not found.
71 /// is not found.
72 ///
72 ///
73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
73 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
74 /// will catch and rewrap with it
74 /// will catch and rewrap with it
75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
75 def rev(&self, node: PyBytes) -> PyResult<Revision> {
76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
76 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
77 }
77 }
78
78
79 /// return True if the node exist in the index
79 /// return True if the node exist in the index
80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
80 def has_node(&self, node: PyBytes) -> PyResult<bool> {
81 self.get_rev(py, node).map(|opt| opt.is_some())
81 self.get_rev(py, node).map(|opt| opt.is_some())
82 }
82 }
83
83
84 /// find length of shortest hex nodeid of a binary ID
84 /// find length of shortest hex nodeid of a binary ID
85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
85 def shortest(&self, node: PyBytes) -> PyResult<usize> {
86 let opt = self.get_nodetree(py)?.borrow();
86 let opt = self.get_nodetree(py)?.borrow();
87 let nt = opt.as_ref().unwrap();
87 let nt = opt.as_ref().unwrap();
88 let idx = &*self.cindex(py).borrow();
88 let idx = &*self.cindex(py).borrow();
89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
89 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
90 {
90 {
91 Ok(Some(l)) => Ok(l),
91 Ok(Some(l)) => Ok(l),
92 Ok(None) => Err(revlog_error(py)),
92 Ok(None) => Err(revlog_error(py)),
93 Err(e) => Err(nodemap_error(py, e)),
93 Err(e) => Err(nodemap_error(py, e)),
94 }
94 }
95 }
95 }
96
96
97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
98 let opt = self.get_nodetree(py)?.borrow();
98 let opt = self.get_nodetree(py)?.borrow();
99 let nt = opt.as_ref().unwrap();
99 let nt = opt.as_ref().unwrap();
100 let idx = &*self.cindex(py).borrow();
100 let idx = &*self.cindex(py).borrow();
101
101
102 let node_as_string = if cfg!(feature = "python3-sys") {
102 let node_as_string = if cfg!(feature = "python3-sys") {
103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
104 }
104 }
105 else {
105 else {
106 let node = node.extract::<PyBytes>(py)?;
106 let node = node.extract::<PyBytes>(py)?;
107 String::from_utf8_lossy(node.data(py)).to_string()
107 String::from_utf8_lossy(node.data(py)).to_string()
108 };
108 };
109
109
110 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
110 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
111
111
112 nt.find_bin(idx, prefix)
112 nt.find_bin(idx, prefix)
113 // TODO make an inner API returning the node directly
113 // TODO make an inner API returning the node directly
114 .map(|opt| opt.map(
114 .map(|opt| opt.map(
115 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
115 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
116 .map_err(|e| nodemap_error(py, e))
116 .map_err(|e| nodemap_error(py, e))
117
117
118 }
118 }
119
119
120 /// append an index entry
120 /// append an index entry
121 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
121 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
122 if tup.len(py) < 8 {
122 if tup.len(py) < 8 {
123 // this is better than the panic promised by tup.get_item()
123 // this is better than the panic promised by tup.get_item()
124 return Err(
124 return Err(
125 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
125 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
126 }
126 }
127 let node_bytes = tup.get_item(py, 7).extract(py)?;
127 let node_bytes = tup.get_item(py, 7).extract(py)?;
128 let node = node_from_py_object(py, &node_bytes)?;
128 let node = node_from_py_object(py, &node_bytes)?;
129
129
130 let mut idx = self.cindex(py).borrow_mut();
130 let mut idx = self.cindex(py).borrow_mut();
131 let rev = idx.len() as Revision;
131 let rev = idx.len() as Revision;
132
132
133 idx.append(py, tup)?;
133 idx.append(py, tup)?;
134 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
134 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
135 .insert(&*idx, &node, rev)
135 .insert(&*idx, &node, rev)
136 .map_err(|e| nodemap_error(py, e))?;
136 .map_err(|e| nodemap_error(py, e))?;
137 Ok(py.None())
137 Ok(py.None())
138 }
138 }
139
139
140 def __delitem__(&self, key: PyObject) -> PyResult<()> {
140 def __delitem__(&self, key: PyObject) -> PyResult<()> {
141 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
141 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
142 self.cindex(py).borrow().inner().del_item(py, key)?;
142 self.cindex(py).borrow().inner().del_item(py, key)?;
143 let mut opt = self.get_nodetree(py)?.borrow_mut();
143 let mut opt = self.get_nodetree(py)?.borrow_mut();
144 let mut nt = opt.as_mut().unwrap();
144 let mut nt = opt.as_mut().unwrap();
145 nt.invalidate_all();
145 nt.invalidate_all();
146 self.fill_nodemap(py, &mut nt)?;
146 self.fill_nodemap(py, &mut nt)?;
147 Ok(())
147 Ok(())
148 }
148 }
149
149
150 //
150 //
151 // Reforwarded C index API
151 // Reforwarded C index API
152 //
152 //
153
153
154 // index_methods (tp_methods). Same ordering as in revlog.c
154 // index_methods (tp_methods). Same ordering as in revlog.c
155
155
156 /// return the gca set of the given revs
156 /// return the gca set of the given revs
157 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
157 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
158 self.call_cindex(py, "ancestors", args, kw)
158 self.call_cindex(py, "ancestors", args, kw)
159 }
159 }
160
160
161 /// return the heads of the common ancestors of the given revs
161 /// return the heads of the common ancestors of the given revs
162 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
162 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
163 self.call_cindex(py, "commonancestorsheads", args, kw)
163 self.call_cindex(py, "commonancestorsheads", args, kw)
164 }
164 }
165
165
166 /// Clear the index caches and inner py_class data.
166 /// Clear the index caches and inner py_class data.
167 /// It is Python's responsibility to call `update_nodemap_data` again.
167 /// It is Python's responsibility to call `update_nodemap_data` again.
168 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
168 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
169 self.nt(py).borrow_mut().take();
169 self.nt(py).borrow_mut().take();
170 self.docket(py).borrow_mut().take();
170 self.docket(py).borrow_mut().take();
171 self.mmap(py).borrow_mut().take();
171 self.mmap(py).borrow_mut().take();
172 self.call_cindex(py, "clearcaches", args, kw)
172 self.call_cindex(py, "clearcaches", args, kw)
173 }
173 }
174
174
175 /// return the raw binary string representing a revision
176 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
177 self.call_cindex(py, "entry_binary", args, kw)
178 }
179
175 /// get an index entry
180 /// get an index entry
176 def get(&self, *args, **kw) -> PyResult<PyObject> {
181 def get(&self, *args, **kw) -> PyResult<PyObject> {
177 self.call_cindex(py, "get", args, kw)
182 self.call_cindex(py, "get", args, kw)
178 }
183 }
179
184
180 /// compute phases
185 /// compute phases
181 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
186 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
182 self.call_cindex(py, "computephasesmapsets", args, kw)
187 self.call_cindex(py, "computephasesmapsets", args, kw)
183 }
188 }
184
189
185 /// reachableroots
190 /// reachableroots
186 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
191 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
187 self.call_cindex(py, "reachableroots2", args, kw)
192 self.call_cindex(py, "reachableroots2", args, kw)
188 }
193 }
189
194
190 /// get head revisions
195 /// get head revisions
191 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
196 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
192 self.call_cindex(py, "headrevs", args, kw)
197 self.call_cindex(py, "headrevs", args, kw)
193 }
198 }
194
199
195 /// get filtered head revisions
200 /// get filtered head revisions
196 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
201 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
197 self.call_cindex(py, "headrevsfiltered", args, kw)
202 self.call_cindex(py, "headrevsfiltered", args, kw)
198 }
203 }
199
204
200 /// True if the object is a snapshot
205 /// True if the object is a snapshot
201 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
206 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
202 self.call_cindex(py, "issnapshot", args, kw)
207 self.call_cindex(py, "issnapshot", args, kw)
203 }
208 }
204
209
205 /// Gather snapshot data in a cache dict
210 /// Gather snapshot data in a cache dict
206 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
211 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
207 self.call_cindex(py, "findsnapshots", args, kw)
212 self.call_cindex(py, "findsnapshots", args, kw)
208 }
213 }
209
214
210 /// determine revisions with deltas to reconstruct fulltext
215 /// determine revisions with deltas to reconstruct fulltext
211 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
216 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
212 self.call_cindex(py, "deltachain", args, kw)
217 self.call_cindex(py, "deltachain", args, kw)
213 }
218 }
214
219
215 /// slice planned chunk read to reach a density threshold
220 /// slice planned chunk read to reach a density threshold
216 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
221 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
217 self.call_cindex(py, "slicechunktodensity", args, kw)
222 self.call_cindex(py, "slicechunktodensity", args, kw)
218 }
223 }
219
224
220 /// stats for the index
225 /// stats for the index
221 def stats(&self, *args, **kw) -> PyResult<PyObject> {
226 def stats(&self, *args, **kw) -> PyResult<PyObject> {
222 self.call_cindex(py, "stats", args, kw)
227 self.call_cindex(py, "stats", args, kw)
223 }
228 }
224
229
225 // index_sequence_methods and index_mapping_methods.
230 // index_sequence_methods and index_mapping_methods.
226 //
231 //
227 // Since we call back through the high level Python API,
232 // Since we call back through the high level Python API,
228 // there's no point making a distinction between index_get
233 // there's no point making a distinction between index_get
229 // and index_getitem.
234 // and index_getitem.
230
235
231 def __len__(&self) -> PyResult<usize> {
236 def __len__(&self) -> PyResult<usize> {
232 self.cindex(py).borrow().inner().len(py)
237 self.cindex(py).borrow().inner().len(py)
233 }
238 }
234
239
235 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
240 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
236 // this conversion seems needless, but that's actually because
241 // this conversion seems needless, but that's actually because
237 // `index_getitem` does not handle conversion from PyLong,
242 // `index_getitem` does not handle conversion from PyLong,
238 // which expressions such as [e for e in index] internally use.
243 // which expressions such as [e for e in index] internally use.
239 // Note that we don't seem to have a direct way to call
244 // Note that we don't seem to have a direct way to call
240 // PySequence_GetItem (does the job), which would possibly be better
245 // PySequence_GetItem (does the job), which would possibly be better
241 // for performance
246 // for performance
242 let key = match key.extract::<Revision>(py) {
247 let key = match key.extract::<Revision>(py) {
243 Ok(rev) => rev.to_py_object(py).into_object(),
248 Ok(rev) => rev.to_py_object(py).into_object(),
244 Err(_) => key,
249 Err(_) => key,
245 };
250 };
246 self.cindex(py).borrow().inner().get_item(py, key)
251 self.cindex(py).borrow().inner().get_item(py, key)
247 }
252 }
248
253
249 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
254 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
250 self.cindex(py).borrow().inner().set_item(py, key, value)
255 self.cindex(py).borrow().inner().set_item(py, key, value)
251 }
256 }
252
257
253 def __contains__(&self, item: PyObject) -> PyResult<bool> {
258 def __contains__(&self, item: PyObject) -> PyResult<bool> {
254 // ObjectProtocol does not seem to provide contains(), so
259 // ObjectProtocol does not seem to provide contains(), so
255 // this is an equivalent implementation of the index_contains()
260 // this is an equivalent implementation of the index_contains()
256 // defined in revlog.c
261 // defined in revlog.c
257 let cindex = self.cindex(py).borrow();
262 let cindex = self.cindex(py).borrow();
258 match item.extract::<Revision>(py) {
263 match item.extract::<Revision>(py) {
259 Ok(rev) => {
264 Ok(rev) => {
260 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
265 Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision)
261 }
266 }
262 Err(_) => {
267 Err(_) => {
263 cindex.inner().call_method(
268 cindex.inner().call_method(
264 py,
269 py,
265 "has_node",
270 "has_node",
266 PyTuple::new(py, &[item]),
271 PyTuple::new(py, &[item]),
267 None)?
272 None)?
268 .extract(py)
273 .extract(py)
269 }
274 }
270 }
275 }
271 }
276 }
272
277
273 def nodemap_data_all(&self) -> PyResult<PyBytes> {
278 def nodemap_data_all(&self) -> PyResult<PyBytes> {
274 self.inner_nodemap_data_all(py)
279 self.inner_nodemap_data_all(py)
275 }
280 }
276
281
277 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
282 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
278 self.inner_nodemap_data_incremental(py)
283 self.inner_nodemap_data_incremental(py)
279 }
284 }
280 def update_nodemap_data(
285 def update_nodemap_data(
281 &self,
286 &self,
282 docket: PyObject,
287 docket: PyObject,
283 nm_data: PyObject
288 nm_data: PyObject
284 ) -> PyResult<PyObject> {
289 ) -> PyResult<PyObject> {
285 self.inner_update_nodemap_data(py, docket, nm_data)
290 self.inner_update_nodemap_data(py, docket, nm_data)
286 }
291 }
287
292
288 @property
293 @property
289 def entry_size(&self) -> PyResult<PyInt> {
294 def entry_size(&self) -> PyResult<PyInt> {
290 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
295 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
291 }
296 }
292
297
293 });
298 });
294
299
295 impl MixedIndex {
300 impl MixedIndex {
296 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
301 fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> {
297 Self::create_instance(
302 Self::create_instance(
298 py,
303 py,
299 RefCell::new(cindex::Index::new(py, cindex)?),
304 RefCell::new(cindex::Index::new(py, cindex)?),
300 RefCell::new(None),
305 RefCell::new(None),
301 RefCell::new(None),
306 RefCell::new(None),
302 RefCell::new(None),
307 RefCell::new(None),
303 )
308 )
304 }
309 }
305
310
306 /// This is scaffolding at this point, but it could also become
311 /// This is scaffolding at this point, but it could also become
307 /// a way to start a persistent nodemap or perform a
312 /// a way to start a persistent nodemap or perform a
308 /// vacuum / repack operation
313 /// vacuum / repack operation
309 fn fill_nodemap(
314 fn fill_nodemap(
310 &self,
315 &self,
311 py: Python,
316 py: Python,
312 nt: &mut NodeTree,
317 nt: &mut NodeTree,
313 ) -> PyResult<PyObject> {
318 ) -> PyResult<PyObject> {
314 let index = self.cindex(py).borrow();
319 let index = self.cindex(py).borrow();
315 for r in 0..index.len() {
320 for r in 0..index.len() {
316 let rev = r as Revision;
321 let rev = r as Revision;
317 // in this case node() won't ever return None
322 // in this case node() won't ever return None
318 nt.insert(&*index, index.node(rev).unwrap(), rev)
323 nt.insert(&*index, index.node(rev).unwrap(), rev)
319 .map_err(|e| nodemap_error(py, e))?
324 .map_err(|e| nodemap_error(py, e))?
320 }
325 }
321 Ok(py.None())
326 Ok(py.None())
322 }
327 }
323
328
324 fn get_nodetree<'a>(
329 fn get_nodetree<'a>(
325 &'a self,
330 &'a self,
326 py: Python<'a>,
331 py: Python<'a>,
327 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
332 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
328 if self.nt(py).borrow().is_none() {
333 if self.nt(py).borrow().is_none() {
329 let readonly = Box::new(Vec::new());
334 let readonly = Box::new(Vec::new());
330 let mut nt = NodeTree::load_bytes(readonly, 0);
335 let mut nt = NodeTree::load_bytes(readonly, 0);
331 self.fill_nodemap(py, &mut nt)?;
336 self.fill_nodemap(py, &mut nt)?;
332 self.nt(py).borrow_mut().replace(nt);
337 self.nt(py).borrow_mut().replace(nt);
333 }
338 }
334 Ok(self.nt(py))
339 Ok(self.nt(py))
335 }
340 }
336
341
337 /// forward a method call to the underlying C index
342 /// forward a method call to the underlying C index
338 fn call_cindex(
343 fn call_cindex(
339 &self,
344 &self,
340 py: Python,
345 py: Python,
341 name: &str,
346 name: &str,
342 args: &PyTuple,
347 args: &PyTuple,
343 kwargs: Option<&PyDict>,
348 kwargs: Option<&PyDict>,
344 ) -> PyResult<PyObject> {
349 ) -> PyResult<PyObject> {
345 self.cindex(py)
350 self.cindex(py)
346 .borrow()
351 .borrow()
347 .inner()
352 .inner()
348 .call_method(py, name, args, kwargs)
353 .call_method(py, name, args, kwargs)
349 }
354 }
350
355
351 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
356 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
352 self.cindex(py).borrow().clone_ref(py)
357 self.cindex(py).borrow().clone_ref(py)
353 }
358 }
354
359
355 /// Returns the full nodemap bytes to be written as-is to disk
360 /// Returns the full nodemap bytes to be written as-is to disk
356 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
361 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
357 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
362 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
358 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
363 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
359
364
360 // If there's anything readonly, we need to build the data again from
365 // If there's anything readonly, we need to build the data again from
361 // scratch
366 // scratch
362 let bytes = if readonly.len() > 0 {
367 let bytes = if readonly.len() > 0 {
363 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
368 let mut nt = NodeTree::load_bytes(Box::new(vec![]), 0);
364 self.fill_nodemap(py, &mut nt)?;
369 self.fill_nodemap(py, &mut nt)?;
365
370
366 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
371 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
367 assert_eq!(readonly.len(), 0);
372 assert_eq!(readonly.len(), 0);
368
373
369 bytes
374 bytes
370 } else {
375 } else {
371 bytes
376 bytes
372 };
377 };
373
378
374 let bytes = PyBytes::new(py, &bytes);
379 let bytes = PyBytes::new(py, &bytes);
375 Ok(bytes)
380 Ok(bytes)
376 }
381 }
377
382
378 /// Returns the last saved docket along with the size of any changed data
383 /// Returns the last saved docket along with the size of any changed data
379 /// (in number of blocks), and said data as bytes.
384 /// (in number of blocks), and said data as bytes.
380 fn inner_nodemap_data_incremental(
385 fn inner_nodemap_data_incremental(
381 &self,
386 &self,
382 py: Python,
387 py: Python,
383 ) -> PyResult<PyObject> {
388 ) -> PyResult<PyObject> {
384 let docket = self.docket(py).borrow();
389 let docket = self.docket(py).borrow();
385 let docket = match docket.as_ref() {
390 let docket = match docket.as_ref() {
386 Some(d) => d,
391 Some(d) => d,
387 None => return Ok(py.None()),
392 None => return Ok(py.None()),
388 };
393 };
389
394
390 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
395 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
391 let masked_blocks = node_tree.masked_readonly_blocks();
396 let masked_blocks = node_tree.masked_readonly_blocks();
392 let (_, data) = node_tree.into_readonly_and_added_bytes();
397 let (_, data) = node_tree.into_readonly_and_added_bytes();
393 let changed = masked_blocks * std::mem::size_of::<Block>();
398 let changed = masked_blocks * std::mem::size_of::<Block>();
394
399
395 Ok((docket, changed, PyBytes::new(py, &data))
400 Ok((docket, changed, PyBytes::new(py, &data))
396 .to_py_object(py)
401 .to_py_object(py)
397 .into_object())
402 .into_object())
398 }
403 }
399
404
400 /// Update the nodemap from the new (mmaped) data.
405 /// Update the nodemap from the new (mmaped) data.
401 /// The docket is kept as a reference for later incremental calls.
406 /// The docket is kept as a reference for later incremental calls.
402 fn inner_update_nodemap_data(
407 fn inner_update_nodemap_data(
403 &self,
408 &self,
404 py: Python,
409 py: Python,
405 docket: PyObject,
410 docket: PyObject,
406 nm_data: PyObject,
411 nm_data: PyObject,
407 ) -> PyResult<PyObject> {
412 ) -> PyResult<PyObject> {
408 let buf = PyBuffer::get(py, &nm_data)?;
413 let buf = PyBuffer::get(py, &nm_data)?;
409 let len = buf.item_count();
414 let len = buf.item_count();
410
415
411 // Build a slice from the mmap'ed buffer data
416 // Build a slice from the mmap'ed buffer data
412 let cbuf = buf.buf_ptr();
417 let cbuf = buf.buf_ptr();
413 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
418 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
414 && buf.is_c_contiguous()
419 && buf.is_c_contiguous()
415 && u8::is_compatible_format(buf.format())
420 && u8::is_compatible_format(buf.format())
416 {
421 {
417 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
422 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
418 } else {
423 } else {
419 return Err(PyErr::new::<ValueError, _>(
424 return Err(PyErr::new::<ValueError, _>(
420 py,
425 py,
421 "Nodemap data buffer has an invalid memory representation"
426 "Nodemap data buffer has an invalid memory representation"
422 .to_string(),
427 .to_string(),
423 ));
428 ));
424 };
429 };
425
430
426 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
431 // Keep a reference to the mmap'ed buffer, otherwise we get a dangling
427 // pointer.
432 // pointer.
428 self.mmap(py).borrow_mut().replace(buf);
433 self.mmap(py).borrow_mut().replace(buf);
429
434
430 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
435 let mut nt = NodeTree::load_bytes(Box::new(bytes), len);
431
436
432 let data_tip =
437 let data_tip =
433 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
438 docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?;
434 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
439 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
435 let idx = self.cindex(py).borrow();
440 let idx = self.cindex(py).borrow();
436 let current_tip = idx.len();
441 let current_tip = idx.len();
437
442
438 for r in (data_tip + 1)..current_tip as Revision {
443 for r in (data_tip + 1)..current_tip as Revision {
439 let rev = r as Revision;
444 let rev = r as Revision;
440 // in this case node() won't ever return None
445 // in this case node() won't ever return None
441 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
446 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
442 .map_err(|e| nodemap_error(py, e))?
447 .map_err(|e| nodemap_error(py, e))?
443 }
448 }
444
449
445 *self.nt(py).borrow_mut() = Some(nt);
450 *self.nt(py).borrow_mut() = Some(nt);
446
451
447 Ok(py.None())
452 Ok(py.None())
448 }
453 }
449 }
454 }
450
455
451 fn revlog_error(py: Python) -> PyErr {
456 fn revlog_error(py: Python) -> PyErr {
452 match py
457 match py
453 .import("mercurial.error")
458 .import("mercurial.error")
454 .and_then(|m| m.get(py, "RevlogError"))
459 .and_then(|m| m.get(py, "RevlogError"))
455 {
460 {
456 Err(e) => e,
461 Err(e) => e,
457 Ok(cls) => PyErr::from_instance(py, cls),
462 Ok(cls) => PyErr::from_instance(py, cls),
458 }
463 }
459 }
464 }
460
465
461 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
466 fn rev_not_in_index(py: Python, rev: Revision) -> PyErr {
462 PyErr::new::<ValueError, _>(
467 PyErr::new::<ValueError, _>(
463 py,
468 py,
464 format!(
469 format!(
465 "Inconsistency: Revision {} found in nodemap \
470 "Inconsistency: Revision {} found in nodemap \
466 is not in revlog index",
471 is not in revlog index",
467 rev
472 rev
468 ),
473 ),
469 )
474 )
470 }
475 }
471
476
472 /// Standard treatment of NodeMapError
477 /// Standard treatment of NodeMapError
473 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
478 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
474 match err {
479 match err {
475 NodeMapError::MultipleResults => revlog_error(py),
480 NodeMapError::MultipleResults => revlog_error(py),
476 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
481 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
477 }
482 }
478 }
483 }
479
484
480 /// Create the module, with __package__ given from parent
485 /// Create the module, with __package__ given from parent
481 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
486 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
482 let dotted_name = &format!("{}.revlog", package);
487 let dotted_name = &format!("{}.revlog", package);
483 let m = PyModule::new(py, dotted_name)?;
488 let m = PyModule::new(py, dotted_name)?;
484 m.add(py, "__package__", package)?;
489 m.add(py, "__package__", package)?;
485 m.add(py, "__doc__", "RevLog - Rust implementations")?;
490 m.add(py, "__doc__", "RevLog - Rust implementations")?;
486
491
487 m.add_class::<MixedIndex>(py)?;
492 m.add_class::<MixedIndex>(py)?;
488
493
489 let sys = PyModule::import(py, "sys")?;
494 let sys = PyModule::import(py, "sys")?;
490 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
495 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
491 sys_modules.set_item(py, dotted_name, &m)?;
496 sys_modules.set_item(py, dotted_name, &m)?;
492
497
493 Ok(m)
498 Ok(m)
494 }
499 }
General Comments 0
You need to be logged in to leave comments. Login now