##// END OF EJS Templates
revlog: compress sidedata when doing "post-pull" sidedata update...
marmoute -
r48033:07641baf default
parent child Browse files
Show More
@@ -1,3053 +1,3055 b''
1 /*
1 /*
2 parsers.c - efficient content parsing
2 parsers.c - efficient content parsing
3
3
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
4 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define PY_SSIZE_T_CLEAN
10 #define PY_SSIZE_T_CLEAN
11 #include <Python.h>
11 #include <Python.h>
12 #include <assert.h>
12 #include <assert.h>
13 #include <ctype.h>
13 #include <ctype.h>
14 #include <limits.h>
14 #include <limits.h>
15 #include <stddef.h>
15 #include <stddef.h>
16 #include <stdlib.h>
16 #include <stdlib.h>
17 #include <string.h>
17 #include <string.h>
18 #include <structmember.h>
18 #include <structmember.h>
19
19
20 #include "bitmanipulation.h"
20 #include "bitmanipulation.h"
21 #include "charencode.h"
21 #include "charencode.h"
22 #include "compat.h"
22 #include "compat.h"
23 #include "revlog.h"
23 #include "revlog.h"
24 #include "util.h"
24 #include "util.h"
25
25
26 #ifdef IS_PY3K
26 #ifdef IS_PY3K
27 /* The mapping of Python types is meant to be temporary to get Python
27 /* The mapping of Python types is meant to be temporary to get Python
28 * 3 to compile. We should remove this once Python 3 support is fully
28 * 3 to compile. We should remove this once Python 3 support is fully
29 * supported and proper types are used in the extensions themselves. */
29 * supported and proper types are used in the extensions themselves. */
30 #define PyInt_Check PyLong_Check
30 #define PyInt_Check PyLong_Check
31 #define PyInt_FromLong PyLong_FromLong
31 #define PyInt_FromLong PyLong_FromLong
32 #define PyInt_FromSsize_t PyLong_FromSsize_t
32 #define PyInt_FromSsize_t PyLong_FromSsize_t
33 #define PyInt_AsLong PyLong_AsLong
33 #define PyInt_AsLong PyLong_AsLong
34 #endif
34 #endif
35
35
36 typedef struct indexObjectStruct indexObject;
36 typedef struct indexObjectStruct indexObject;
37
37
38 typedef struct {
38 typedef struct {
39 int children[16];
39 int children[16];
40 } nodetreenode;
40 } nodetreenode;
41
41
42 typedef struct {
42 typedef struct {
43 int abi_version;
43 int abi_version;
44 Py_ssize_t (*index_length)(const indexObject *);
44 Py_ssize_t (*index_length)(const indexObject *);
45 const char *(*index_node)(indexObject *, Py_ssize_t);
45 const char *(*index_node)(indexObject *, Py_ssize_t);
46 int (*index_parents)(PyObject *, int, int *);
46 int (*index_parents)(PyObject *, int, int *);
47 } Revlog_CAPI;
47 } Revlog_CAPI;
48
48
49 /*
49 /*
50 * A base-16 trie for fast node->rev mapping.
50 * A base-16 trie for fast node->rev mapping.
51 *
51 *
52 * Positive value is index of the next node in the trie
52 * Positive value is index of the next node in the trie
53 * Negative value is a leaf: -(rev + 2)
53 * Negative value is a leaf: -(rev + 2)
54 * Zero is empty
54 * Zero is empty
55 */
55 */
56 typedef struct {
56 typedef struct {
57 indexObject *index;
57 indexObject *index;
58 nodetreenode *nodes;
58 nodetreenode *nodes;
59 Py_ssize_t nodelen;
59 Py_ssize_t nodelen;
60 size_t length; /* # nodes in use */
60 size_t length; /* # nodes in use */
61 size_t capacity; /* # nodes allocated */
61 size_t capacity; /* # nodes allocated */
62 int depth; /* maximum depth of tree */
62 int depth; /* maximum depth of tree */
63 int splits; /* # splits performed */
63 int splits; /* # splits performed */
64 } nodetree;
64 } nodetree;
65
65
66 typedef struct {
66 typedef struct {
67 PyObject_HEAD /* ; */
67 PyObject_HEAD /* ; */
68 nodetree nt;
68 nodetree nt;
69 } nodetreeObject;
69 } nodetreeObject;
70
70
71 /*
71 /*
72 * This class has two behaviors.
72 * This class has two behaviors.
73 *
73 *
74 * When used in a list-like way (with integer keys), we decode an
74 * When used in a list-like way (with integer keys), we decode an
75 * entry in a RevlogNG index file on demand. We have limited support for
75 * entry in a RevlogNG index file on demand. We have limited support for
76 * integer-keyed insert and delete, only at elements right before the
76 * integer-keyed insert and delete, only at elements right before the
77 * end.
77 * end.
78 *
78 *
79 * With string keys, we lazily perform a reverse mapping from node to
79 * With string keys, we lazily perform a reverse mapping from node to
80 * rev, using a base-16 trie.
80 * rev, using a base-16 trie.
81 */
81 */
82 struct indexObjectStruct {
82 struct indexObjectStruct {
83 PyObject_HEAD
83 PyObject_HEAD
84 /* Type-specific fields go here. */
84 /* Type-specific fields go here. */
85 PyObject *data; /* raw bytes of index */
85 PyObject *data; /* raw bytes of index */
86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
86 Py_ssize_t nodelen; /* digest size of the hash, 20 for SHA-1 */
87 PyObject *nullentry; /* fast path for references to null */
87 PyObject *nullentry; /* fast path for references to null */
88 Py_buffer buf; /* buffer of data */
88 Py_buffer buf; /* buffer of data */
89 const char **offsets; /* populated on demand */
89 const char **offsets; /* populated on demand */
90 Py_ssize_t length; /* current on-disk number of elements */
90 Py_ssize_t length; /* current on-disk number of elements */
91 unsigned new_length; /* number of added elements */
91 unsigned new_length; /* number of added elements */
92 unsigned added_length; /* space reserved for added elements */
92 unsigned added_length; /* space reserved for added elements */
93 char *added; /* populated on demand */
93 char *added; /* populated on demand */
94 PyObject *headrevs; /* cache, invalidated on changes */
94 PyObject *headrevs; /* cache, invalidated on changes */
95 PyObject *filteredrevs; /* filtered revs set */
95 PyObject *filteredrevs; /* filtered revs set */
96 nodetree nt; /* base-16 trie */
96 nodetree nt; /* base-16 trie */
97 int ntinitialized; /* 0 or 1 */
97 int ntinitialized; /* 0 or 1 */
98 int ntrev; /* last rev scanned */
98 int ntrev; /* last rev scanned */
99 int ntlookups; /* # lookups */
99 int ntlookups; /* # lookups */
100 int ntmisses; /* # lookups that miss the cache */
100 int ntmisses; /* # lookups that miss the cache */
101 int inlined;
101 int inlined;
102 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
102 long entry_size; /* size of index headers. Differs in v1 v.s. v2 format
103 */
103 */
104 char format_version; /* size of index headers. Differs in v1 v.s. v2
104 char format_version; /* size of index headers. Differs in v1 v.s. v2
105 format */
105 format */
106 };
106 };
107
107
108 static Py_ssize_t index_length(const indexObject *self)
108 static Py_ssize_t index_length(const indexObject *self)
109 {
109 {
110 return self->length + self->new_length;
110 return self->length + self->new_length;
111 }
111 }
112
112
113 static const char nullid[32] = {0};
113 static const char nullid[32] = {0};
114 static const Py_ssize_t nullrev = -1;
114 static const Py_ssize_t nullrev = -1;
115
115
116 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
116 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
117
117
118 static int index_find_node(indexObject *self, const char *node);
118 static int index_find_node(indexObject *self, const char *node);
119
119
120 #if LONG_MAX == 0x7fffffffL
120 #if LONG_MAX == 0x7fffffffL
121 static const char *const tuple_format = PY23("Kiiiiiis#KiBB", "Kiiiiiiy#KiBB");
121 static const char *const tuple_format = PY23("Kiiiiiis#KiBB", "Kiiiiiiy#KiBB");
122 #else
122 #else
123 static const char *const tuple_format = PY23("kiiiiiis#kiBB", "kiiiiiiy#kiBB");
123 static const char *const tuple_format = PY23("kiiiiiis#kiBB", "kiiiiiiy#kiBB");
124 #endif
124 #endif
125
125
126 /* A RevlogNG v1 index entry is 64 bytes long. */
126 /* A RevlogNG v1 index entry is 64 bytes long. */
127 static const long v1_entry_size = 64;
127 static const long v1_entry_size = 64;
128
128
129 /* A Revlogv2 index entry is 96 bytes long. */
129 /* A Revlogv2 index entry is 96 bytes long. */
130 static const long v2_entry_size = 96;
130 static const long v2_entry_size = 96;
131
131
132 static const long format_v1 = 1; /* Internal only, could be any number */
132 static const long format_v1 = 1; /* Internal only, could be any number */
133 static const long format_v2 = 2; /* Internal only, could be any number */
133 static const long format_v2 = 2; /* Internal only, could be any number */
134
134
135 static const char comp_mode_inline = 2;
135 static const char comp_mode_inline = 2;
136
136
137 static void raise_revlog_error(void)
137 static void raise_revlog_error(void)
138 {
138 {
139 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
139 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
140
140
141 mod = PyImport_ImportModule("mercurial.error");
141 mod = PyImport_ImportModule("mercurial.error");
142 if (mod == NULL) {
142 if (mod == NULL) {
143 goto cleanup;
143 goto cleanup;
144 }
144 }
145
145
146 dict = PyModule_GetDict(mod);
146 dict = PyModule_GetDict(mod);
147 if (dict == NULL) {
147 if (dict == NULL) {
148 goto cleanup;
148 goto cleanup;
149 }
149 }
150 Py_INCREF(dict);
150 Py_INCREF(dict);
151
151
152 errclass = PyDict_GetItemString(dict, "RevlogError");
152 errclass = PyDict_GetItemString(dict, "RevlogError");
153 if (errclass == NULL) {
153 if (errclass == NULL) {
154 PyErr_SetString(PyExc_SystemError,
154 PyErr_SetString(PyExc_SystemError,
155 "could not find RevlogError");
155 "could not find RevlogError");
156 goto cleanup;
156 goto cleanup;
157 }
157 }
158
158
159 /* value of exception is ignored by callers */
159 /* value of exception is ignored by callers */
160 PyErr_SetString(errclass, "RevlogError");
160 PyErr_SetString(errclass, "RevlogError");
161
161
162 cleanup:
162 cleanup:
163 Py_XDECREF(dict);
163 Py_XDECREF(dict);
164 Py_XDECREF(mod);
164 Py_XDECREF(mod);
165 }
165 }
166
166
167 /*
167 /*
168 * Return a pointer to the beginning of a RevlogNG record.
168 * Return a pointer to the beginning of a RevlogNG record.
169 */
169 */
170 static const char *index_deref(indexObject *self, Py_ssize_t pos)
170 static const char *index_deref(indexObject *self, Py_ssize_t pos)
171 {
171 {
172 if (pos >= self->length)
172 if (pos >= self->length)
173 return self->added + (pos - self->length) * self->entry_size;
173 return self->added + (pos - self->length) * self->entry_size;
174
174
175 if (self->inlined && pos > 0) {
175 if (self->inlined && pos > 0) {
176 if (self->offsets == NULL) {
176 if (self->offsets == NULL) {
177 Py_ssize_t ret;
177 Py_ssize_t ret;
178 self->offsets =
178 self->offsets =
179 PyMem_Malloc(self->length * sizeof(*self->offsets));
179 PyMem_Malloc(self->length * sizeof(*self->offsets));
180 if (self->offsets == NULL)
180 if (self->offsets == NULL)
181 return (const char *)PyErr_NoMemory();
181 return (const char *)PyErr_NoMemory();
182 ret = inline_scan(self, self->offsets);
182 ret = inline_scan(self, self->offsets);
183 if (ret == -1) {
183 if (ret == -1) {
184 return NULL;
184 return NULL;
185 };
185 };
186 }
186 }
187 return self->offsets[pos];
187 return self->offsets[pos];
188 }
188 }
189
189
190 return (const char *)(self->buf.buf) + pos * self->entry_size;
190 return (const char *)(self->buf.buf) + pos * self->entry_size;
191 }
191 }
192
192
193 /*
193 /*
194 * Get parents of the given rev.
194 * Get parents of the given rev.
195 *
195 *
196 * The specified rev must be valid and must not be nullrev. A returned
196 * The specified rev must be valid and must not be nullrev. A returned
197 * parent revision may be nullrev, but is guaranteed to be in valid range.
197 * parent revision may be nullrev, but is guaranteed to be in valid range.
198 */
198 */
199 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
199 static inline int index_get_parents(indexObject *self, Py_ssize_t rev, int *ps,
200 int maxrev)
200 int maxrev)
201 {
201 {
202 const char *data = index_deref(self, rev);
202 const char *data = index_deref(self, rev);
203
203
204 ps[0] = getbe32(data + 24);
204 ps[0] = getbe32(data + 24);
205 ps[1] = getbe32(data + 28);
205 ps[1] = getbe32(data + 28);
206
206
207 /* If index file is corrupted, ps[] may point to invalid revisions. So
207 /* If index file is corrupted, ps[] may point to invalid revisions. So
208 * there is a risk of buffer overflow to trust them unconditionally. */
208 * there is a risk of buffer overflow to trust them unconditionally. */
209 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
209 if (ps[0] < -1 || ps[0] > maxrev || ps[1] < -1 || ps[1] > maxrev) {
210 PyErr_SetString(PyExc_ValueError, "parent out of range");
210 PyErr_SetString(PyExc_ValueError, "parent out of range");
211 return -1;
211 return -1;
212 }
212 }
213 return 0;
213 return 0;
214 }
214 }
215
215
216 /*
216 /*
217 * Get parents of the given rev.
217 * Get parents of the given rev.
218 *
218 *
219 * If the specified rev is out of range, IndexError will be raised. If the
219 * If the specified rev is out of range, IndexError will be raised. If the
220 * revlog entry is corrupted, ValueError may be raised.
220 * revlog entry is corrupted, ValueError may be raised.
221 *
221 *
222 * Returns 0 on success or -1 on failure.
222 * Returns 0 on success or -1 on failure.
223 */
223 */
224 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
224 static int HgRevlogIndex_GetParents(PyObject *op, int rev, int *ps)
225 {
225 {
226 int tiprev;
226 int tiprev;
227 if (!op || !HgRevlogIndex_Check(op) || !ps) {
227 if (!op || !HgRevlogIndex_Check(op) || !ps) {
228 PyErr_BadInternalCall();
228 PyErr_BadInternalCall();
229 return -1;
229 return -1;
230 }
230 }
231 tiprev = (int)index_length((indexObject *)op) - 1;
231 tiprev = (int)index_length((indexObject *)op) - 1;
232 if (rev < -1 || rev > tiprev) {
232 if (rev < -1 || rev > tiprev) {
233 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
233 PyErr_Format(PyExc_IndexError, "rev out of range: %d", rev);
234 return -1;
234 return -1;
235 } else if (rev == -1) {
235 } else if (rev == -1) {
236 ps[0] = ps[1] = -1;
236 ps[0] = ps[1] = -1;
237 return 0;
237 return 0;
238 } else {
238 } else {
239 return index_get_parents((indexObject *)op, rev, ps, tiprev);
239 return index_get_parents((indexObject *)op, rev, ps, tiprev);
240 }
240 }
241 }
241 }
242
242
243 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
243 static inline int64_t index_get_start(indexObject *self, Py_ssize_t rev)
244 {
244 {
245 const char *data;
245 const char *data;
246 uint64_t offset;
246 uint64_t offset;
247
247
248 if (rev == nullrev)
248 if (rev == nullrev)
249 return 0;
249 return 0;
250
250
251 data = index_deref(self, rev);
251 data = index_deref(self, rev);
252 offset = getbe32(data + 4);
252 offset = getbe32(data + 4);
253 if (rev == 0) {
253 if (rev == 0) {
254 /* mask out version number for the first entry */
254 /* mask out version number for the first entry */
255 offset &= 0xFFFF;
255 offset &= 0xFFFF;
256 } else {
256 } else {
257 uint32_t offset_high = getbe32(data);
257 uint32_t offset_high = getbe32(data);
258 offset |= ((uint64_t)offset_high) << 32;
258 offset |= ((uint64_t)offset_high) << 32;
259 }
259 }
260 return (int64_t)(offset >> 16);
260 return (int64_t)(offset >> 16);
261 }
261 }
262
262
263 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
263 static inline int index_get_length(indexObject *self, Py_ssize_t rev)
264 {
264 {
265 const char *data;
265 const char *data;
266 int tmp;
266 int tmp;
267
267
268 if (rev == nullrev)
268 if (rev == nullrev)
269 return 0;
269 return 0;
270
270
271 data = index_deref(self, rev);
271 data = index_deref(self, rev);
272
272
273 tmp = (int)getbe32(data + 8);
273 tmp = (int)getbe32(data + 8);
274 if (tmp < 0) {
274 if (tmp < 0) {
275 PyErr_Format(PyExc_OverflowError,
275 PyErr_Format(PyExc_OverflowError,
276 "revlog entry size out of bound (%d)", tmp);
276 "revlog entry size out of bound (%d)", tmp);
277 return -1;
277 return -1;
278 }
278 }
279 return tmp;
279 return tmp;
280 }
280 }
281
281
282 /*
282 /*
283 * RevlogNG format (all in big endian, data may be inlined):
283 * RevlogNG format (all in big endian, data may be inlined):
284 * 6 bytes: offset
284 * 6 bytes: offset
285 * 2 bytes: flags
285 * 2 bytes: flags
286 * 4 bytes: compressed length
286 * 4 bytes: compressed length
287 * 4 bytes: uncompressed length
287 * 4 bytes: uncompressed length
288 * 4 bytes: base revision
288 * 4 bytes: base revision
289 * 4 bytes: link revision
289 * 4 bytes: link revision
290 * 4 bytes: parent 1 revision
290 * 4 bytes: parent 1 revision
291 * 4 bytes: parent 2 revision
291 * 4 bytes: parent 2 revision
292 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
292 * 32 bytes: nodeid (only 20 bytes used with SHA-1)
293 */
293 */
294 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
294 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
295 {
295 {
296 uint64_t offset_flags, sidedata_offset;
296 uint64_t offset_flags, sidedata_offset;
297 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
297 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
298 sidedata_comp_len;
298 sidedata_comp_len;
299 char data_comp_mode, sidedata_comp_mode;
299 char data_comp_mode, sidedata_comp_mode;
300 const char *c_node_id;
300 const char *c_node_id;
301 const char *data;
301 const char *data;
302 Py_ssize_t length = index_length(self);
302 Py_ssize_t length = index_length(self);
303
303
304 if (pos == nullrev) {
304 if (pos == nullrev) {
305 Py_INCREF(self->nullentry);
305 Py_INCREF(self->nullentry);
306 return self->nullentry;
306 return self->nullentry;
307 }
307 }
308
308
309 if (pos < 0 || pos >= length) {
309 if (pos < 0 || pos >= length) {
310 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
310 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
311 return NULL;
311 return NULL;
312 }
312 }
313
313
314 data = index_deref(self, pos);
314 data = index_deref(self, pos);
315 if (data == NULL)
315 if (data == NULL)
316 return NULL;
316 return NULL;
317
317
318 offset_flags = getbe32(data + 4);
318 offset_flags = getbe32(data + 4);
319 /*
319 /*
320 * The first entry on-disk needs the version number masked out,
320 * The first entry on-disk needs the version number masked out,
321 * but this doesn't apply if entries are added to an empty index.
321 * but this doesn't apply if entries are added to an empty index.
322 */
322 */
323 if (self->length && pos == 0)
323 if (self->length && pos == 0)
324 offset_flags &= 0xFFFF;
324 offset_flags &= 0xFFFF;
325 else {
325 else {
326 uint32_t offset_high = getbe32(data);
326 uint32_t offset_high = getbe32(data);
327 offset_flags |= ((uint64_t)offset_high) << 32;
327 offset_flags |= ((uint64_t)offset_high) << 32;
328 }
328 }
329
329
330 comp_len = getbe32(data + 8);
330 comp_len = getbe32(data + 8);
331 uncomp_len = getbe32(data + 12);
331 uncomp_len = getbe32(data + 12);
332 base_rev = getbe32(data + 16);
332 base_rev = getbe32(data + 16);
333 link_rev = getbe32(data + 20);
333 link_rev = getbe32(data + 20);
334 parent_1 = getbe32(data + 24);
334 parent_1 = getbe32(data + 24);
335 parent_2 = getbe32(data + 28);
335 parent_2 = getbe32(data + 28);
336 c_node_id = data + 32;
336 c_node_id = data + 32;
337
337
338 if (self->format_version == format_v1) {
338 if (self->format_version == format_v1) {
339 sidedata_offset = 0;
339 sidedata_offset = 0;
340 sidedata_comp_len = 0;
340 sidedata_comp_len = 0;
341 data_comp_mode = comp_mode_inline;
341 data_comp_mode = comp_mode_inline;
342 sidedata_comp_mode = comp_mode_inline;
342 sidedata_comp_mode = comp_mode_inline;
343 } else {
343 } else {
344 sidedata_offset = getbe64(data + 64);
344 sidedata_offset = getbe64(data + 64);
345 sidedata_comp_len = getbe32(data + 72);
345 sidedata_comp_len = getbe32(data + 72);
346 data_comp_mode = data[76] & 3;
346 data_comp_mode = data[76] & 3;
347 sidedata_comp_mode = ((data[76] >> 2) & 3);
347 sidedata_comp_mode = ((data[76] >> 2) & 3);
348 }
348 }
349
349
350 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
350 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
351 base_rev, link_rev, parent_1, parent_2, c_node_id,
351 base_rev, link_rev, parent_1, parent_2, c_node_id,
352 self->nodelen, sidedata_offset, sidedata_comp_len,
352 self->nodelen, sidedata_offset, sidedata_comp_len,
353 data_comp_mode, sidedata_comp_mode);
353 data_comp_mode, sidedata_comp_mode);
354 }
354 }
355 /*
355 /*
356 * Pack header information in binary
356 * Pack header information in binary
357 */
357 */
358 static PyObject *index_pack_header(indexObject *self, PyObject *args)
358 static PyObject *index_pack_header(indexObject *self, PyObject *args)
359 {
359 {
360 int header;
360 int header;
361 char out[4];
361 char out[4];
362 if (!PyArg_ParseTuple(args, "I", &header)) {
362 if (!PyArg_ParseTuple(args, "I", &header)) {
363 return NULL;
363 return NULL;
364 }
364 }
365 if (self->format_version != format_v1) {
365 if (self->format_version != format_v1) {
366 PyErr_Format(PyExc_RuntimeError,
366 PyErr_Format(PyExc_RuntimeError,
367 "version header should go in the docket, not the "
367 "version header should go in the docket, not the "
368 "index: %lu",
368 "index: %lu",
369 header);
369 header);
370 return NULL;
370 return NULL;
371 }
371 }
372 putbe32(header, out);
372 putbe32(header, out);
373 return PyBytes_FromStringAndSize(out, 4);
373 return PyBytes_FromStringAndSize(out, 4);
374 }
374 }
375 /*
375 /*
376 * Return the raw binary string representing a revision
376 * Return the raw binary string representing a revision
377 */
377 */
378 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
378 static PyObject *index_entry_binary(indexObject *self, PyObject *value)
379 {
379 {
380 long rev;
380 long rev;
381 const char *data;
381 const char *data;
382 Py_ssize_t length = index_length(self);
382 Py_ssize_t length = index_length(self);
383
383
384 if (!pylong_to_long(value, &rev)) {
384 if (!pylong_to_long(value, &rev)) {
385 return NULL;
385 return NULL;
386 }
386 }
387 if (rev < 0 || rev >= length) {
387 if (rev < 0 || rev >= length) {
388 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
388 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
389 rev);
389 rev);
390 return NULL;
390 return NULL;
391 };
391 };
392
392
393 data = index_deref(self, rev);
393 data = index_deref(self, rev);
394 if (data == NULL)
394 if (data == NULL)
395 return NULL;
395 return NULL;
396 if (rev == 0 && self->format_version == format_v1) {
396 if (rev == 0 && self->format_version == format_v1) {
397 /* the header is eating the start of the first entry */
397 /* the header is eating the start of the first entry */
398 return PyBytes_FromStringAndSize(data + 4,
398 return PyBytes_FromStringAndSize(data + 4,
399 self->entry_size - 4);
399 self->entry_size - 4);
400 }
400 }
401 return PyBytes_FromStringAndSize(data, self->entry_size);
401 return PyBytes_FromStringAndSize(data, self->entry_size);
402 }
402 }
403
403
404 /*
404 /*
405 * Return the hash of node corresponding to the given rev.
405 * Return the hash of node corresponding to the given rev.
406 */
406 */
407 static const char *index_node(indexObject *self, Py_ssize_t pos)
407 static const char *index_node(indexObject *self, Py_ssize_t pos)
408 {
408 {
409 Py_ssize_t length = index_length(self);
409 Py_ssize_t length = index_length(self);
410 const char *data;
410 const char *data;
411
411
412 if (pos == nullrev)
412 if (pos == nullrev)
413 return nullid;
413 return nullid;
414
414
415 if (pos >= length)
415 if (pos >= length)
416 return NULL;
416 return NULL;
417
417
418 data = index_deref(self, pos);
418 data = index_deref(self, pos);
419 return data ? data + 32 : NULL;
419 return data ? data + 32 : NULL;
420 }
420 }
421
421
422 /*
422 /*
423 * Return the hash of the node corresponding to the given rev. The
423 * Return the hash of the node corresponding to the given rev. The
424 * rev is assumed to be existing. If not, an exception is set.
424 * rev is assumed to be existing. If not, an exception is set.
425 */
425 */
426 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
426 static const char *index_node_existing(indexObject *self, Py_ssize_t pos)
427 {
427 {
428 const char *node = index_node(self, pos);
428 const char *node = index_node(self, pos);
429 if (node == NULL) {
429 if (node == NULL) {
430 PyErr_Format(PyExc_IndexError, "could not access rev %d",
430 PyErr_Format(PyExc_IndexError, "could not access rev %d",
431 (int)pos);
431 (int)pos);
432 }
432 }
433 return node;
433 return node;
434 }
434 }
435
435
436 static int nt_insert(nodetree *self, const char *node, int rev);
436 static int nt_insert(nodetree *self, const char *node, int rev);
437
437
438 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
438 static int node_check(Py_ssize_t nodelen, PyObject *obj, char **node)
439 {
439 {
440 Py_ssize_t thisnodelen;
440 Py_ssize_t thisnodelen;
441 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
441 if (PyBytes_AsStringAndSize(obj, node, &thisnodelen) == -1)
442 return -1;
442 return -1;
443 if (nodelen == thisnodelen)
443 if (nodelen == thisnodelen)
444 return 0;
444 return 0;
445 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
445 PyErr_Format(PyExc_ValueError, "node len %zd != expected node len %zd",
446 thisnodelen, nodelen);
446 thisnodelen, nodelen);
447 return -1;
447 return -1;
448 }
448 }
449
449
450 static PyObject *index_append(indexObject *self, PyObject *obj)
450 static PyObject *index_append(indexObject *self, PyObject *obj)
451 {
451 {
452 uint64_t offset_flags, sidedata_offset;
452 uint64_t offset_flags, sidedata_offset;
453 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
453 int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
454 char data_comp_mode, sidedata_comp_mode;
454 char data_comp_mode, sidedata_comp_mode;
455 Py_ssize_t c_node_id_len, sidedata_comp_len;
455 Py_ssize_t c_node_id_len, sidedata_comp_len;
456 const char *c_node_id;
456 const char *c_node_id;
457 char comp_field;
457 char comp_field;
458 char *data;
458 char *data;
459
459
460 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
460 if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
461 &uncomp_len, &base_rev, &link_rev, &parent_1,
461 &uncomp_len, &base_rev, &link_rev, &parent_1,
462 &parent_2, &c_node_id, &c_node_id_len,
462 &parent_2, &c_node_id, &c_node_id_len,
463 &sidedata_offset, &sidedata_comp_len,
463 &sidedata_offset, &sidedata_comp_len,
464 &data_comp_mode, &sidedata_comp_mode)) {
464 &data_comp_mode, &sidedata_comp_mode)) {
465 PyErr_SetString(PyExc_TypeError, "11-tuple required");
465 PyErr_SetString(PyExc_TypeError, "11-tuple required");
466 return NULL;
466 return NULL;
467 }
467 }
468
468
469 if (c_node_id_len != self->nodelen) {
469 if (c_node_id_len != self->nodelen) {
470 PyErr_SetString(PyExc_TypeError, "invalid node");
470 PyErr_SetString(PyExc_TypeError, "invalid node");
471 return NULL;
471 return NULL;
472 }
472 }
473 if (self->format_version == format_v1) {
473 if (self->format_version == format_v1) {
474
474
475 if (data_comp_mode != comp_mode_inline) {
475 if (data_comp_mode != comp_mode_inline) {
476 PyErr_Format(PyExc_ValueError,
476 PyErr_Format(PyExc_ValueError,
477 "invalid data compression mode: %i",
477 "invalid data compression mode: %i",
478 data_comp_mode);
478 data_comp_mode);
479 return NULL;
479 return NULL;
480 }
480 }
481 if (sidedata_comp_mode != comp_mode_inline) {
481 if (sidedata_comp_mode != comp_mode_inline) {
482 PyErr_Format(PyExc_ValueError,
482 PyErr_Format(PyExc_ValueError,
483 "invalid sidedata compression mode: %i",
483 "invalid sidedata compression mode: %i",
484 sidedata_comp_mode);
484 sidedata_comp_mode);
485 return NULL;
485 return NULL;
486 }
486 }
487 }
487 }
488
488
489 if (self->new_length == self->added_length) {
489 if (self->new_length == self->added_length) {
490 size_t new_added_length =
490 size_t new_added_length =
491 self->added_length ? self->added_length * 2 : 4096;
491 self->added_length ? self->added_length * 2 : 4096;
492 void *new_added = PyMem_Realloc(
492 void *new_added = PyMem_Realloc(
493 self->added, new_added_length * self->entry_size);
493 self->added, new_added_length * self->entry_size);
494 if (!new_added)
494 if (!new_added)
495 return PyErr_NoMemory();
495 return PyErr_NoMemory();
496 self->added = new_added;
496 self->added = new_added;
497 self->added_length = new_added_length;
497 self->added_length = new_added_length;
498 }
498 }
499 rev = self->length + self->new_length;
499 rev = self->length + self->new_length;
500 data = self->added + self->entry_size * self->new_length++;
500 data = self->added + self->entry_size * self->new_length++;
501 putbe32(offset_flags >> 32, data);
501 putbe32(offset_flags >> 32, data);
502 putbe32(offset_flags & 0xffffffffU, data + 4);
502 putbe32(offset_flags & 0xffffffffU, data + 4);
503 putbe32(comp_len, data + 8);
503 putbe32(comp_len, data + 8);
504 putbe32(uncomp_len, data + 12);
504 putbe32(uncomp_len, data + 12);
505 putbe32(base_rev, data + 16);
505 putbe32(base_rev, data + 16);
506 putbe32(link_rev, data + 20);
506 putbe32(link_rev, data + 20);
507 putbe32(parent_1, data + 24);
507 putbe32(parent_1, data + 24);
508 putbe32(parent_2, data + 28);
508 putbe32(parent_2, data + 28);
509 memcpy(data + 32, c_node_id, c_node_id_len);
509 memcpy(data + 32, c_node_id, c_node_id_len);
510 /* Padding since SHA-1 is only 20 bytes for now */
510 /* Padding since SHA-1 is only 20 bytes for now */
511 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
511 memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
512 if (self->format_version == format_v2) {
512 if (self->format_version == format_v2) {
513 putbe64(sidedata_offset, data + 64);
513 putbe64(sidedata_offset, data + 64);
514 putbe32(sidedata_comp_len, data + 72);
514 putbe32(sidedata_comp_len, data + 72);
515 comp_field = data_comp_mode & 3;
515 comp_field = data_comp_mode & 3;
516 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
516 comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
517 data[76] = comp_field;
517 data[76] = comp_field;
518 /* Padding for 96 bytes alignment */
518 /* Padding for 96 bytes alignment */
519 memset(data + 77, 0, self->entry_size - 77);
519 memset(data + 77, 0, self->entry_size - 77);
520 }
520 }
521
521
522 if (self->ntinitialized)
522 if (self->ntinitialized)
523 nt_insert(&self->nt, c_node_id, rev);
523 nt_insert(&self->nt, c_node_id, rev);
524
524
525 Py_CLEAR(self->headrevs);
525 Py_CLEAR(self->headrevs);
526 Py_RETURN_NONE;
526 Py_RETURN_NONE;
527 }
527 }
528
528
529 /* Replace an existing index entry's sidedata offset and length with new ones.
529 /* Replace an existing index entry's sidedata offset and length with new ones.
530 This cannot be used outside of the context of sidedata rewriting,
530 This cannot be used outside of the context of sidedata rewriting,
531 inside the transaction that creates the given revision. */
531 inside the transaction that creates the given revision. */
532 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
532 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
533 {
533 {
534 uint64_t offset_flags, sidedata_offset;
534 uint64_t offset_flags, sidedata_offset;
535 int rev;
535 int rev;
536 char comp_mode;
536 Py_ssize_t sidedata_comp_len;
537 Py_ssize_t sidedata_comp_len;
537 char *data;
538 char *data;
538 #if LONG_MAX == 0x7fffffffL
539 #if LONG_MAX == 0x7fffffffL
539 const char *const sidedata_format = PY23("nKiK", "nKiK");
540 const char *const sidedata_format = PY23("nKiKB", "nKiKB");
540 #else
541 #else
541 const char *const sidedata_format = PY23("nkik", "nkik");
542 const char *const sidedata_format = PY23("nkikB", "nkikB");
542 #endif
543 #endif
543
544
544 if (self->entry_size == v1_entry_size || self->inlined) {
545 if (self->entry_size == v1_entry_size || self->inlined) {
545 /*
546 /*
546 There is a bug in the transaction handling when going from an
547 There is a bug in the transaction handling when going from an
547 inline revlog to a separate index and data file. Turn it off until
548 inline revlog to a separate index and data file. Turn it off until
548 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
549 it's fixed, since v2 revlogs sometimes get rewritten on exchange.
549 See issue6485.
550 See issue6485.
550 */
551 */
551 raise_revlog_error();
552 raise_revlog_error();
552 return NULL;
553 return NULL;
553 }
554 }
554
555
555 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
556 if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
556 &sidedata_comp_len, &offset_flags))
557 &sidedata_comp_len, &offset_flags, &comp_mode))
557 return NULL;
558 return NULL;
558
559
559 if (rev < 0 || rev >= index_length(self)) {
560 if (rev < 0 || rev >= index_length(self)) {
560 PyErr_SetString(PyExc_IndexError, "revision outside index");
561 PyErr_SetString(PyExc_IndexError, "revision outside index");
561 return NULL;
562 return NULL;
562 }
563 }
563 if (rev < self->length) {
564 if (rev < self->length) {
564 PyErr_SetString(
565 PyErr_SetString(
565 PyExc_IndexError,
566 PyExc_IndexError,
566 "cannot rewrite entries outside of this transaction");
567 "cannot rewrite entries outside of this transaction");
567 return NULL;
568 return NULL;
568 }
569 }
569
570
570 /* Find the newly added node, offset from the "already on-disk" length
571 /* Find the newly added node, offset from the "already on-disk" length
571 */
572 */
572 data = self->added + self->entry_size * (rev - self->length);
573 data = self->added + self->entry_size * (rev - self->length);
573 putbe64(offset_flags, data);
574 putbe64(offset_flags, data);
574 putbe64(sidedata_offset, data + 64);
575 putbe64(sidedata_offset, data + 64);
575 putbe32(sidedata_comp_len, data + 72);
576 putbe32(sidedata_comp_len, data + 72);
577 data[76] = (data[76] & ~(3 << 2)) | ((comp_mode & 3) << 2);
576
578
577 Py_RETURN_NONE;
579 Py_RETURN_NONE;
578 }
580 }
579
581
580 static PyObject *index_stats(indexObject *self)
582 static PyObject *index_stats(indexObject *self)
581 {
583 {
582 PyObject *obj = PyDict_New();
584 PyObject *obj = PyDict_New();
583 PyObject *s = NULL;
585 PyObject *s = NULL;
584 PyObject *t = NULL;
586 PyObject *t = NULL;
585
587
586 if (obj == NULL)
588 if (obj == NULL)
587 return NULL;
589 return NULL;
588
590
589 #define istat(__n, __d) \
591 #define istat(__n, __d) \
590 do { \
592 do { \
591 s = PyBytes_FromString(__d); \
593 s = PyBytes_FromString(__d); \
592 t = PyInt_FromSsize_t(self->__n); \
594 t = PyInt_FromSsize_t(self->__n); \
593 if (!s || !t) \
595 if (!s || !t) \
594 goto bail; \
596 goto bail; \
595 if (PyDict_SetItem(obj, s, t) == -1) \
597 if (PyDict_SetItem(obj, s, t) == -1) \
596 goto bail; \
598 goto bail; \
597 Py_CLEAR(s); \
599 Py_CLEAR(s); \
598 Py_CLEAR(t); \
600 Py_CLEAR(t); \
599 } while (0)
601 } while (0)
600
602
601 if (self->added_length)
603 if (self->added_length)
602 istat(new_length, "index entries added");
604 istat(new_length, "index entries added");
603 istat(length, "revs in memory");
605 istat(length, "revs in memory");
604 istat(ntlookups, "node trie lookups");
606 istat(ntlookups, "node trie lookups");
605 istat(ntmisses, "node trie misses");
607 istat(ntmisses, "node trie misses");
606 istat(ntrev, "node trie last rev scanned");
608 istat(ntrev, "node trie last rev scanned");
607 if (self->ntinitialized) {
609 if (self->ntinitialized) {
608 istat(nt.capacity, "node trie capacity");
610 istat(nt.capacity, "node trie capacity");
609 istat(nt.depth, "node trie depth");
611 istat(nt.depth, "node trie depth");
610 istat(nt.length, "node trie count");
612 istat(nt.length, "node trie count");
611 istat(nt.splits, "node trie splits");
613 istat(nt.splits, "node trie splits");
612 }
614 }
613
615
614 #undef istat
616 #undef istat
615
617
616 return obj;
618 return obj;
617
619
618 bail:
620 bail:
619 Py_XDECREF(obj);
621 Py_XDECREF(obj);
620 Py_XDECREF(s);
622 Py_XDECREF(s);
621 Py_XDECREF(t);
623 Py_XDECREF(t);
622 return NULL;
624 return NULL;
623 }
625 }
624
626
625 /*
627 /*
626 * When we cache a list, we want to be sure the caller can't mutate
628 * When we cache a list, we want to be sure the caller can't mutate
627 * the cached copy.
629 * the cached copy.
628 */
630 */
629 static PyObject *list_copy(PyObject *list)
631 static PyObject *list_copy(PyObject *list)
630 {
632 {
631 Py_ssize_t len = PyList_GET_SIZE(list);
633 Py_ssize_t len = PyList_GET_SIZE(list);
632 PyObject *newlist = PyList_New(len);
634 PyObject *newlist = PyList_New(len);
633 Py_ssize_t i;
635 Py_ssize_t i;
634
636
635 if (newlist == NULL)
637 if (newlist == NULL)
636 return NULL;
638 return NULL;
637
639
638 for (i = 0; i < len; i++) {
640 for (i = 0; i < len; i++) {
639 PyObject *obj = PyList_GET_ITEM(list, i);
641 PyObject *obj = PyList_GET_ITEM(list, i);
640 Py_INCREF(obj);
642 Py_INCREF(obj);
641 PyList_SET_ITEM(newlist, i, obj);
643 PyList_SET_ITEM(newlist, i, obj);
642 }
644 }
643
645
644 return newlist;
646 return newlist;
645 }
647 }
646
648
647 static int check_filter(PyObject *filter, Py_ssize_t arg)
649 static int check_filter(PyObject *filter, Py_ssize_t arg)
648 {
650 {
649 if (filter) {
651 if (filter) {
650 PyObject *arglist, *result;
652 PyObject *arglist, *result;
651 int isfiltered;
653 int isfiltered;
652
654
653 arglist = Py_BuildValue("(n)", arg);
655 arglist = Py_BuildValue("(n)", arg);
654 if (!arglist) {
656 if (!arglist) {
655 return -1;
657 return -1;
656 }
658 }
657
659
658 result = PyObject_Call(filter, arglist, NULL);
660 result = PyObject_Call(filter, arglist, NULL);
659 Py_DECREF(arglist);
661 Py_DECREF(arglist);
660 if (!result) {
662 if (!result) {
661 return -1;
663 return -1;
662 }
664 }
663
665
664 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
666 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
665 * same as this function, so we can just return it directly.*/
667 * same as this function, so we can just return it directly.*/
666 isfiltered = PyObject_IsTrue(result);
668 isfiltered = PyObject_IsTrue(result);
667 Py_DECREF(result);
669 Py_DECREF(result);
668 return isfiltered;
670 return isfiltered;
669 } else {
671 } else {
670 return 0;
672 return 0;
671 }
673 }
672 }
674 }
673
675
674 static inline void set_phase_from_parents(char *phases, int parent_1,
676 static inline void set_phase_from_parents(char *phases, int parent_1,
675 int parent_2, Py_ssize_t i)
677 int parent_2, Py_ssize_t i)
676 {
678 {
677 if (parent_1 >= 0 && phases[parent_1] > phases[i])
679 if (parent_1 >= 0 && phases[parent_1] > phases[i])
678 phases[i] = phases[parent_1];
680 phases[i] = phases[parent_1];
679 if (parent_2 >= 0 && phases[parent_2] > phases[i])
681 if (parent_2 >= 0 && phases[parent_2] > phases[i])
680 phases[i] = phases[parent_2];
682 phases[i] = phases[parent_2];
681 }
683 }
682
684
683 static PyObject *reachableroots2(indexObject *self, PyObject *args)
685 static PyObject *reachableroots2(indexObject *self, PyObject *args)
684 {
686 {
685
687
686 /* Input */
688 /* Input */
687 long minroot;
689 long minroot;
688 PyObject *includepatharg = NULL;
690 PyObject *includepatharg = NULL;
689 int includepath = 0;
691 int includepath = 0;
690 /* heads and roots are lists */
692 /* heads and roots are lists */
691 PyObject *heads = NULL;
693 PyObject *heads = NULL;
692 PyObject *roots = NULL;
694 PyObject *roots = NULL;
693 PyObject *reachable = NULL;
695 PyObject *reachable = NULL;
694
696
695 PyObject *val;
697 PyObject *val;
696 Py_ssize_t len = index_length(self);
698 Py_ssize_t len = index_length(self);
697 long revnum;
699 long revnum;
698 Py_ssize_t k;
700 Py_ssize_t k;
699 Py_ssize_t i;
701 Py_ssize_t i;
700 Py_ssize_t l;
702 Py_ssize_t l;
701 int r;
703 int r;
702 int parents[2];
704 int parents[2];
703
705
704 /* Internal data structure:
706 /* Internal data structure:
705 * tovisit: array of length len+1 (all revs + nullrev), filled upto
707 * tovisit: array of length len+1 (all revs + nullrev), filled upto
706 * lentovisit
708 * lentovisit
707 *
709 *
708 * revstates: array of length len+1 (all revs + nullrev) */
710 * revstates: array of length len+1 (all revs + nullrev) */
709 int *tovisit = NULL;
711 int *tovisit = NULL;
710 long lentovisit = 0;
712 long lentovisit = 0;
711 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
713 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
712 char *revstates = NULL;
714 char *revstates = NULL;
713
715
714 /* Get arguments */
716 /* Get arguments */
715 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
717 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
716 &PyList_Type, &roots, &PyBool_Type,
718 &PyList_Type, &roots, &PyBool_Type,
717 &includepatharg))
719 &includepatharg))
718 goto bail;
720 goto bail;
719
721
720 if (includepatharg == Py_True)
722 if (includepatharg == Py_True)
721 includepath = 1;
723 includepath = 1;
722
724
723 /* Initialize return set */
725 /* Initialize return set */
724 reachable = PyList_New(0);
726 reachable = PyList_New(0);
725 if (reachable == NULL)
727 if (reachable == NULL)
726 goto bail;
728 goto bail;
727
729
728 /* Initialize internal datastructures */
730 /* Initialize internal datastructures */
729 tovisit = (int *)malloc((len + 1) * sizeof(int));
731 tovisit = (int *)malloc((len + 1) * sizeof(int));
730 if (tovisit == NULL) {
732 if (tovisit == NULL) {
731 PyErr_NoMemory();
733 PyErr_NoMemory();
732 goto bail;
734 goto bail;
733 }
735 }
734
736
735 revstates = (char *)calloc(len + 1, 1);
737 revstates = (char *)calloc(len + 1, 1);
736 if (revstates == NULL) {
738 if (revstates == NULL) {
737 PyErr_NoMemory();
739 PyErr_NoMemory();
738 goto bail;
740 goto bail;
739 }
741 }
740
742
741 l = PyList_GET_SIZE(roots);
743 l = PyList_GET_SIZE(roots);
742 for (i = 0; i < l; i++) {
744 for (i = 0; i < l; i++) {
743 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
745 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
744 if (revnum == -1 && PyErr_Occurred())
746 if (revnum == -1 && PyErr_Occurred())
745 goto bail;
747 goto bail;
746 /* If root is out of range, e.g. wdir(), it must be unreachable
748 /* If root is out of range, e.g. wdir(), it must be unreachable
747 * from heads. So we can just ignore it. */
749 * from heads. So we can just ignore it. */
748 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
750 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
749 continue;
751 continue;
750 revstates[revnum + 1] |= RS_ROOT;
752 revstates[revnum + 1] |= RS_ROOT;
751 }
753 }
752
754
753 /* Populate tovisit with all the heads */
755 /* Populate tovisit with all the heads */
754 l = PyList_GET_SIZE(heads);
756 l = PyList_GET_SIZE(heads);
755 for (i = 0; i < l; i++) {
757 for (i = 0; i < l; i++) {
756 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
758 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
757 if (revnum == -1 && PyErr_Occurred())
759 if (revnum == -1 && PyErr_Occurred())
758 goto bail;
760 goto bail;
759 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
761 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
760 PyErr_SetString(PyExc_IndexError, "head out of range");
762 PyErr_SetString(PyExc_IndexError, "head out of range");
761 goto bail;
763 goto bail;
762 }
764 }
763 if (!(revstates[revnum + 1] & RS_SEEN)) {
765 if (!(revstates[revnum + 1] & RS_SEEN)) {
764 tovisit[lentovisit++] = (int)revnum;
766 tovisit[lentovisit++] = (int)revnum;
765 revstates[revnum + 1] |= RS_SEEN;
767 revstates[revnum + 1] |= RS_SEEN;
766 }
768 }
767 }
769 }
768
770
769 /* Visit the tovisit list and find the reachable roots */
771 /* Visit the tovisit list and find the reachable roots */
770 k = 0;
772 k = 0;
771 while (k < lentovisit) {
773 while (k < lentovisit) {
772 /* Add the node to reachable if it is a root*/
774 /* Add the node to reachable if it is a root*/
773 revnum = tovisit[k++];
775 revnum = tovisit[k++];
774 if (revstates[revnum + 1] & RS_ROOT) {
776 if (revstates[revnum + 1] & RS_ROOT) {
775 revstates[revnum + 1] |= RS_REACHABLE;
777 revstates[revnum + 1] |= RS_REACHABLE;
776 val = PyInt_FromLong(revnum);
778 val = PyInt_FromLong(revnum);
777 if (val == NULL)
779 if (val == NULL)
778 goto bail;
780 goto bail;
779 r = PyList_Append(reachable, val);
781 r = PyList_Append(reachable, val);
780 Py_DECREF(val);
782 Py_DECREF(val);
781 if (r < 0)
783 if (r < 0)
782 goto bail;
784 goto bail;
783 if (includepath == 0)
785 if (includepath == 0)
784 continue;
786 continue;
785 }
787 }
786
788
787 /* Add its parents to the list of nodes to visit */
789 /* Add its parents to the list of nodes to visit */
788 if (revnum == nullrev)
790 if (revnum == nullrev)
789 continue;
791 continue;
790 r = index_get_parents(self, revnum, parents, (int)len - 1);
792 r = index_get_parents(self, revnum, parents, (int)len - 1);
791 if (r < 0)
793 if (r < 0)
792 goto bail;
794 goto bail;
793 for (i = 0; i < 2; i++) {
795 for (i = 0; i < 2; i++) {
794 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
796 if (!(revstates[parents[i] + 1] & RS_SEEN) &&
795 parents[i] >= minroot) {
797 parents[i] >= minroot) {
796 tovisit[lentovisit++] = parents[i];
798 tovisit[lentovisit++] = parents[i];
797 revstates[parents[i] + 1] |= RS_SEEN;
799 revstates[parents[i] + 1] |= RS_SEEN;
798 }
800 }
799 }
801 }
800 }
802 }
801
803
802 /* Find all the nodes in between the roots we found and the heads
804 /* Find all the nodes in between the roots we found and the heads
803 * and add them to the reachable set */
805 * and add them to the reachable set */
804 if (includepath == 1) {
806 if (includepath == 1) {
805 long minidx = minroot;
807 long minidx = minroot;
806 if (minidx < 0)
808 if (minidx < 0)
807 minidx = 0;
809 minidx = 0;
808 for (i = minidx; i < len; i++) {
810 for (i = minidx; i < len; i++) {
809 if (!(revstates[i + 1] & RS_SEEN))
811 if (!(revstates[i + 1] & RS_SEEN))
810 continue;
812 continue;
811 r = index_get_parents(self, i, parents, (int)len - 1);
813 r = index_get_parents(self, i, parents, (int)len - 1);
812 /* Corrupted index file, error is set from
814 /* Corrupted index file, error is set from
813 * index_get_parents */
815 * index_get_parents */
814 if (r < 0)
816 if (r < 0)
815 goto bail;
817 goto bail;
816 if (((revstates[parents[0] + 1] |
818 if (((revstates[parents[0] + 1] |
817 revstates[parents[1] + 1]) &
819 revstates[parents[1] + 1]) &
818 RS_REACHABLE) &&
820 RS_REACHABLE) &&
819 !(revstates[i + 1] & RS_REACHABLE)) {
821 !(revstates[i + 1] & RS_REACHABLE)) {
820 revstates[i + 1] |= RS_REACHABLE;
822 revstates[i + 1] |= RS_REACHABLE;
821 val = PyInt_FromSsize_t(i);
823 val = PyInt_FromSsize_t(i);
822 if (val == NULL)
824 if (val == NULL)
823 goto bail;
825 goto bail;
824 r = PyList_Append(reachable, val);
826 r = PyList_Append(reachable, val);
825 Py_DECREF(val);
827 Py_DECREF(val);
826 if (r < 0)
828 if (r < 0)
827 goto bail;
829 goto bail;
828 }
830 }
829 }
831 }
830 }
832 }
831
833
832 free(revstates);
834 free(revstates);
833 free(tovisit);
835 free(tovisit);
834 return reachable;
836 return reachable;
835 bail:
837 bail:
836 Py_XDECREF(reachable);
838 Py_XDECREF(reachable);
837 free(revstates);
839 free(revstates);
838 free(tovisit);
840 free(tovisit);
839 return NULL;
841 return NULL;
840 }
842 }
841
843
842 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
844 static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
843 char phase)
845 char phase)
844 {
846 {
845 Py_ssize_t len = index_length(self);
847 Py_ssize_t len = index_length(self);
846 PyObject *item;
848 PyObject *item;
847 PyObject *iterator;
849 PyObject *iterator;
848 int rev, minrev = -1;
850 int rev, minrev = -1;
849 char *node;
851 char *node;
850
852
851 if (!PySet_Check(roots)) {
853 if (!PySet_Check(roots)) {
852 PyErr_SetString(PyExc_TypeError,
854 PyErr_SetString(PyExc_TypeError,
853 "roots must be a set of nodes");
855 "roots must be a set of nodes");
854 return -2;
856 return -2;
855 }
857 }
856 iterator = PyObject_GetIter(roots);
858 iterator = PyObject_GetIter(roots);
857 if (iterator == NULL)
859 if (iterator == NULL)
858 return -2;
860 return -2;
859 while ((item = PyIter_Next(iterator))) {
861 while ((item = PyIter_Next(iterator))) {
860 if (node_check(self->nodelen, item, &node) == -1)
862 if (node_check(self->nodelen, item, &node) == -1)
861 goto failed;
863 goto failed;
862 rev = index_find_node(self, node);
864 rev = index_find_node(self, node);
863 /* null is implicitly public, so negative is invalid */
865 /* null is implicitly public, so negative is invalid */
864 if (rev < 0 || rev >= len)
866 if (rev < 0 || rev >= len)
865 goto failed;
867 goto failed;
866 phases[rev] = phase;
868 phases[rev] = phase;
867 if (minrev == -1 || minrev > rev)
869 if (minrev == -1 || minrev > rev)
868 minrev = rev;
870 minrev = rev;
869 Py_DECREF(item);
871 Py_DECREF(item);
870 }
872 }
871 Py_DECREF(iterator);
873 Py_DECREF(iterator);
872 return minrev;
874 return minrev;
873 failed:
875 failed:
874 Py_DECREF(iterator);
876 Py_DECREF(iterator);
875 Py_DECREF(item);
877 Py_DECREF(item);
876 return -2;
878 return -2;
877 }
879 }
878
880
879 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
881 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
880 {
882 {
881 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
883 /* 0: public (untracked), 1: draft, 2: secret, 32: archive,
882 96: internal */
884 96: internal */
883 static const char trackedphases[] = {1, 2, 32, 96};
885 static const char trackedphases[] = {1, 2, 32, 96};
884 PyObject *roots = Py_None;
886 PyObject *roots = Py_None;
885 PyObject *phasesetsdict = NULL;
887 PyObject *phasesetsdict = NULL;
886 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
888 PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
887 Py_ssize_t len = index_length(self);
889 Py_ssize_t len = index_length(self);
888 char *phases = NULL;
890 char *phases = NULL;
889 int minphaserev = -1, rev, i;
891 int minphaserev = -1, rev, i;
890 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
892 const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
891
893
892 if (!PyArg_ParseTuple(args, "O", &roots))
894 if (!PyArg_ParseTuple(args, "O", &roots))
893 return NULL;
895 return NULL;
894 if (roots == NULL || !PyDict_Check(roots)) {
896 if (roots == NULL || !PyDict_Check(roots)) {
895 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
897 PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
896 return NULL;
898 return NULL;
897 }
899 }
898
900
899 phases = calloc(len, 1);
901 phases = calloc(len, 1);
900 if (phases == NULL) {
902 if (phases == NULL) {
901 PyErr_NoMemory();
903 PyErr_NoMemory();
902 return NULL;
904 return NULL;
903 }
905 }
904
906
905 for (i = 0; i < numphases; ++i) {
907 for (i = 0; i < numphases; ++i) {
906 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
908 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
907 PyObject *phaseroots = NULL;
909 PyObject *phaseroots = NULL;
908 if (pyphase == NULL)
910 if (pyphase == NULL)
909 goto release;
911 goto release;
910 phaseroots = PyDict_GetItem(roots, pyphase);
912 phaseroots = PyDict_GetItem(roots, pyphase);
911 Py_DECREF(pyphase);
913 Py_DECREF(pyphase);
912 if (phaseroots == NULL)
914 if (phaseroots == NULL)
913 continue;
915 continue;
914 rev = add_roots_get_min(self, phaseroots, phases,
916 rev = add_roots_get_min(self, phaseroots, phases,
915 trackedphases[i]);
917 trackedphases[i]);
916 if (rev == -2)
918 if (rev == -2)
917 goto release;
919 goto release;
918 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
920 if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
919 minphaserev = rev;
921 minphaserev = rev;
920 }
922 }
921
923
922 for (i = 0; i < numphases; ++i) {
924 for (i = 0; i < numphases; ++i) {
923 phasesets[i] = PySet_New(NULL);
925 phasesets[i] = PySet_New(NULL);
924 if (phasesets[i] == NULL)
926 if (phasesets[i] == NULL)
925 goto release;
927 goto release;
926 }
928 }
927
929
928 if (minphaserev == -1)
930 if (minphaserev == -1)
929 minphaserev = len;
931 minphaserev = len;
930 for (rev = minphaserev; rev < len; ++rev) {
932 for (rev = minphaserev; rev < len; ++rev) {
931 PyObject *pyphase = NULL;
933 PyObject *pyphase = NULL;
932 PyObject *pyrev = NULL;
934 PyObject *pyrev = NULL;
933 int parents[2];
935 int parents[2];
934 /*
936 /*
935 * The parent lookup could be skipped for phaseroots, but
937 * The parent lookup could be skipped for phaseroots, but
936 * phase --force would historically not recompute them
938 * phase --force would historically not recompute them
937 * correctly, leaving descendents with a lower phase around.
939 * correctly, leaving descendents with a lower phase around.
938 * As such, unconditionally recompute the phase.
940 * As such, unconditionally recompute the phase.
939 */
941 */
940 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
942 if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
941 goto release;
943 goto release;
942 set_phase_from_parents(phases, parents[0], parents[1], rev);
944 set_phase_from_parents(phases, parents[0], parents[1], rev);
943 switch (phases[rev]) {
945 switch (phases[rev]) {
944 case 0:
946 case 0:
945 continue;
947 continue;
946 case 1:
948 case 1:
947 pyphase = phasesets[0];
949 pyphase = phasesets[0];
948 break;
950 break;
949 case 2:
951 case 2:
950 pyphase = phasesets[1];
952 pyphase = phasesets[1];
951 break;
953 break;
952 case 32:
954 case 32:
953 pyphase = phasesets[2];
955 pyphase = phasesets[2];
954 break;
956 break;
955 case 96:
957 case 96:
956 pyphase = phasesets[3];
958 pyphase = phasesets[3];
957 break;
959 break;
958 default:
960 default:
959 /* this should never happen since the phase number is
961 /* this should never happen since the phase number is
960 * specified by this function. */
962 * specified by this function. */
961 PyErr_SetString(PyExc_SystemError,
963 PyErr_SetString(PyExc_SystemError,
962 "bad phase number in internal list");
964 "bad phase number in internal list");
963 goto release;
965 goto release;
964 }
966 }
965 pyrev = PyInt_FromLong(rev);
967 pyrev = PyInt_FromLong(rev);
966 if (pyrev == NULL)
968 if (pyrev == NULL)
967 goto release;
969 goto release;
968 if (PySet_Add(pyphase, pyrev) == -1) {
970 if (PySet_Add(pyphase, pyrev) == -1) {
969 Py_DECREF(pyrev);
971 Py_DECREF(pyrev);
970 goto release;
972 goto release;
971 }
973 }
972 Py_DECREF(pyrev);
974 Py_DECREF(pyrev);
973 }
975 }
974
976
975 phasesetsdict = _dict_new_presized(numphases);
977 phasesetsdict = _dict_new_presized(numphases);
976 if (phasesetsdict == NULL)
978 if (phasesetsdict == NULL)
977 goto release;
979 goto release;
978 for (i = 0; i < numphases; ++i) {
980 for (i = 0; i < numphases; ++i) {
979 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
981 PyObject *pyphase = PyInt_FromLong(trackedphases[i]);
980 if (pyphase == NULL)
982 if (pyphase == NULL)
981 goto release;
983 goto release;
982 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
984 if (PyDict_SetItem(phasesetsdict, pyphase, phasesets[i]) ==
983 -1) {
985 -1) {
984 Py_DECREF(pyphase);
986 Py_DECREF(pyphase);
985 goto release;
987 goto release;
986 }
988 }
987 Py_DECREF(phasesets[i]);
989 Py_DECREF(phasesets[i]);
988 phasesets[i] = NULL;
990 phasesets[i] = NULL;
989 }
991 }
990
992
991 return Py_BuildValue("nN", len, phasesetsdict);
993 return Py_BuildValue("nN", len, phasesetsdict);
992
994
993 release:
995 release:
994 for (i = 0; i < numphases; ++i)
996 for (i = 0; i < numphases; ++i)
995 Py_XDECREF(phasesets[i]);
997 Py_XDECREF(phasesets[i]);
996 Py_XDECREF(phasesetsdict);
998 Py_XDECREF(phasesetsdict);
997
999
998 free(phases);
1000 free(phases);
999 return NULL;
1001 return NULL;
1000 }
1002 }
1001
1003
1002 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1004 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1003 {
1005 {
1004 Py_ssize_t i, j, len;
1006 Py_ssize_t i, j, len;
1005 char *nothead = NULL;
1007 char *nothead = NULL;
1006 PyObject *heads = NULL;
1008 PyObject *heads = NULL;
1007 PyObject *filter = NULL;
1009 PyObject *filter = NULL;
1008 PyObject *filteredrevs = Py_None;
1010 PyObject *filteredrevs = Py_None;
1009
1011
1010 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1012 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1011 return NULL;
1013 return NULL;
1012 }
1014 }
1013
1015
1014 if (self->headrevs && filteredrevs == self->filteredrevs)
1016 if (self->headrevs && filteredrevs == self->filteredrevs)
1015 return list_copy(self->headrevs);
1017 return list_copy(self->headrevs);
1016
1018
1017 Py_DECREF(self->filteredrevs);
1019 Py_DECREF(self->filteredrevs);
1018 self->filteredrevs = filteredrevs;
1020 self->filteredrevs = filteredrevs;
1019 Py_INCREF(filteredrevs);
1021 Py_INCREF(filteredrevs);
1020
1022
1021 if (filteredrevs != Py_None) {
1023 if (filteredrevs != Py_None) {
1022 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1024 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1023 if (!filter) {
1025 if (!filter) {
1024 PyErr_SetString(
1026 PyErr_SetString(
1025 PyExc_TypeError,
1027 PyExc_TypeError,
1026 "filteredrevs has no attribute __contains__");
1028 "filteredrevs has no attribute __contains__");
1027 goto bail;
1029 goto bail;
1028 }
1030 }
1029 }
1031 }
1030
1032
1031 len = index_length(self);
1033 len = index_length(self);
1032 heads = PyList_New(0);
1034 heads = PyList_New(0);
1033 if (heads == NULL)
1035 if (heads == NULL)
1034 goto bail;
1036 goto bail;
1035 if (len == 0) {
1037 if (len == 0) {
1036 PyObject *nullid = PyInt_FromLong(-1);
1038 PyObject *nullid = PyInt_FromLong(-1);
1037 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1039 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1038 Py_XDECREF(nullid);
1040 Py_XDECREF(nullid);
1039 goto bail;
1041 goto bail;
1040 }
1042 }
1041 goto done;
1043 goto done;
1042 }
1044 }
1043
1045
1044 nothead = calloc(len, 1);
1046 nothead = calloc(len, 1);
1045 if (nothead == NULL) {
1047 if (nothead == NULL) {
1046 PyErr_NoMemory();
1048 PyErr_NoMemory();
1047 goto bail;
1049 goto bail;
1048 }
1050 }
1049
1051
1050 for (i = len - 1; i >= 0; i--) {
1052 for (i = len - 1; i >= 0; i--) {
1051 int isfiltered;
1053 int isfiltered;
1052 int parents[2];
1054 int parents[2];
1053
1055
1054 /* If nothead[i] == 1, it means we've seen an unfiltered child
1056 /* If nothead[i] == 1, it means we've seen an unfiltered child
1055 * of this node already, and therefore this node is not
1057 * of this node already, and therefore this node is not
1056 * filtered. So we can skip the expensive check_filter step.
1058 * filtered. So we can skip the expensive check_filter step.
1057 */
1059 */
1058 if (nothead[i] != 1) {
1060 if (nothead[i] != 1) {
1059 isfiltered = check_filter(filter, i);
1061 isfiltered = check_filter(filter, i);
1060 if (isfiltered == -1) {
1062 if (isfiltered == -1) {
1061 PyErr_SetString(PyExc_TypeError,
1063 PyErr_SetString(PyExc_TypeError,
1062 "unable to check filter");
1064 "unable to check filter");
1063 goto bail;
1065 goto bail;
1064 }
1066 }
1065
1067
1066 if (isfiltered) {
1068 if (isfiltered) {
1067 nothead[i] = 1;
1069 nothead[i] = 1;
1068 continue;
1070 continue;
1069 }
1071 }
1070 }
1072 }
1071
1073
1072 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1074 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1073 goto bail;
1075 goto bail;
1074 for (j = 0; j < 2; j++) {
1076 for (j = 0; j < 2; j++) {
1075 if (parents[j] >= 0)
1077 if (parents[j] >= 0)
1076 nothead[parents[j]] = 1;
1078 nothead[parents[j]] = 1;
1077 }
1079 }
1078 }
1080 }
1079
1081
1080 for (i = 0; i < len; i++) {
1082 for (i = 0; i < len; i++) {
1081 PyObject *head;
1083 PyObject *head;
1082
1084
1083 if (nothead[i])
1085 if (nothead[i])
1084 continue;
1086 continue;
1085 head = PyInt_FromSsize_t(i);
1087 head = PyInt_FromSsize_t(i);
1086 if (head == NULL || PyList_Append(heads, head) == -1) {
1088 if (head == NULL || PyList_Append(heads, head) == -1) {
1087 Py_XDECREF(head);
1089 Py_XDECREF(head);
1088 goto bail;
1090 goto bail;
1089 }
1091 }
1090 }
1092 }
1091
1093
1092 done:
1094 done:
1093 self->headrevs = heads;
1095 self->headrevs = heads;
1094 Py_XDECREF(filter);
1096 Py_XDECREF(filter);
1095 free(nothead);
1097 free(nothead);
1096 return list_copy(self->headrevs);
1098 return list_copy(self->headrevs);
1097 bail:
1099 bail:
1098 Py_XDECREF(filter);
1100 Py_XDECREF(filter);
1099 Py_XDECREF(heads);
1101 Py_XDECREF(heads);
1100 free(nothead);
1102 free(nothead);
1101 return NULL;
1103 return NULL;
1102 }
1104 }
1103
1105
1104 /**
1106 /**
1105 * Obtain the base revision index entry.
1107 * Obtain the base revision index entry.
1106 *
1108 *
1107 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1109 * Callers must ensure that rev >= 0 or illegal memory access may occur.
1108 */
1110 */
1109 static inline int index_baserev(indexObject *self, int rev)
1111 static inline int index_baserev(indexObject *self, int rev)
1110 {
1112 {
1111 const char *data;
1113 const char *data;
1112 int result;
1114 int result;
1113
1115
1114 data = index_deref(self, rev);
1116 data = index_deref(self, rev);
1115 if (data == NULL)
1117 if (data == NULL)
1116 return -2;
1118 return -2;
1117 result = getbe32(data + 16);
1119 result = getbe32(data + 16);
1118
1120
1119 if (result > rev) {
1121 if (result > rev) {
1120 PyErr_Format(
1122 PyErr_Format(
1121 PyExc_ValueError,
1123 PyExc_ValueError,
1122 "corrupted revlog, revision base above revision: %d, %d",
1124 "corrupted revlog, revision base above revision: %d, %d",
1123 rev, result);
1125 rev, result);
1124 return -2;
1126 return -2;
1125 }
1127 }
1126 if (result < -1) {
1128 if (result < -1) {
1127 PyErr_Format(
1129 PyErr_Format(
1128 PyExc_ValueError,
1130 PyExc_ValueError,
1129 "corrupted revlog, revision base out of range: %d, %d", rev,
1131 "corrupted revlog, revision base out of range: %d, %d", rev,
1130 result);
1132 result);
1131 return -2;
1133 return -2;
1132 }
1134 }
1133 return result;
1135 return result;
1134 }
1136 }
1135
1137
1136 /**
1138 /**
1137 * Find if a revision is a snapshot or not
1139 * Find if a revision is a snapshot or not
1138 *
1140 *
1139 * Only relevant for sparse-revlog case.
1141 * Only relevant for sparse-revlog case.
1140 * Callers must ensure that rev is in a valid range.
1142 * Callers must ensure that rev is in a valid range.
1141 */
1143 */
1142 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1144 static int index_issnapshotrev(indexObject *self, Py_ssize_t rev)
1143 {
1145 {
1144 int ps[2];
1146 int ps[2];
1145 Py_ssize_t base;
1147 Py_ssize_t base;
1146 while (rev >= 0) {
1148 while (rev >= 0) {
1147 base = (Py_ssize_t)index_baserev(self, rev);
1149 base = (Py_ssize_t)index_baserev(self, rev);
1148 if (base == rev) {
1150 if (base == rev) {
1149 base = -1;
1151 base = -1;
1150 }
1152 }
1151 if (base == -2) {
1153 if (base == -2) {
1152 assert(PyErr_Occurred());
1154 assert(PyErr_Occurred());
1153 return -1;
1155 return -1;
1154 }
1156 }
1155 if (base == -1) {
1157 if (base == -1) {
1156 return 1;
1158 return 1;
1157 }
1159 }
1158 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1160 if (index_get_parents(self, rev, ps, (int)rev) < 0) {
1159 assert(PyErr_Occurred());
1161 assert(PyErr_Occurred());
1160 return -1;
1162 return -1;
1161 };
1163 };
1162 if (base == ps[0] || base == ps[1]) {
1164 if (base == ps[0] || base == ps[1]) {
1163 return 0;
1165 return 0;
1164 }
1166 }
1165 rev = base;
1167 rev = base;
1166 }
1168 }
1167 return rev == -1;
1169 return rev == -1;
1168 }
1170 }
1169
1171
1170 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1172 static PyObject *index_issnapshot(indexObject *self, PyObject *value)
1171 {
1173 {
1172 long rev;
1174 long rev;
1173 int issnap;
1175 int issnap;
1174 Py_ssize_t length = index_length(self);
1176 Py_ssize_t length = index_length(self);
1175
1177
1176 if (!pylong_to_long(value, &rev)) {
1178 if (!pylong_to_long(value, &rev)) {
1177 return NULL;
1179 return NULL;
1178 }
1180 }
1179 if (rev < -1 || rev >= length) {
1181 if (rev < -1 || rev >= length) {
1180 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1182 PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
1181 rev);
1183 rev);
1182 return NULL;
1184 return NULL;
1183 };
1185 };
1184 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1186 issnap = index_issnapshotrev(self, (Py_ssize_t)rev);
1185 if (issnap < 0) {
1187 if (issnap < 0) {
1186 return NULL;
1188 return NULL;
1187 };
1189 };
1188 return PyBool_FromLong((long)issnap);
1190 return PyBool_FromLong((long)issnap);
1189 }
1191 }
1190
1192
1191 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1193 static PyObject *index_findsnapshots(indexObject *self, PyObject *args)
1192 {
1194 {
1193 Py_ssize_t start_rev;
1195 Py_ssize_t start_rev;
1194 PyObject *cache;
1196 PyObject *cache;
1195 Py_ssize_t base;
1197 Py_ssize_t base;
1196 Py_ssize_t rev;
1198 Py_ssize_t rev;
1197 PyObject *key = NULL;
1199 PyObject *key = NULL;
1198 PyObject *value = NULL;
1200 PyObject *value = NULL;
1199 const Py_ssize_t length = index_length(self);
1201 const Py_ssize_t length = index_length(self);
1200 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1202 if (!PyArg_ParseTuple(args, "O!n", &PyDict_Type, &cache, &start_rev)) {
1201 return NULL;
1203 return NULL;
1202 }
1204 }
1203 for (rev = start_rev; rev < length; rev++) {
1205 for (rev = start_rev; rev < length; rev++) {
1204 int issnap;
1206 int issnap;
1205 PyObject *allvalues = NULL;
1207 PyObject *allvalues = NULL;
1206 issnap = index_issnapshotrev(self, rev);
1208 issnap = index_issnapshotrev(self, rev);
1207 if (issnap < 0) {
1209 if (issnap < 0) {
1208 goto bail;
1210 goto bail;
1209 }
1211 }
1210 if (issnap == 0) {
1212 if (issnap == 0) {
1211 continue;
1213 continue;
1212 }
1214 }
1213 base = (Py_ssize_t)index_baserev(self, rev);
1215 base = (Py_ssize_t)index_baserev(self, rev);
1214 if (base == rev) {
1216 if (base == rev) {
1215 base = -1;
1217 base = -1;
1216 }
1218 }
1217 if (base == -2) {
1219 if (base == -2) {
1218 assert(PyErr_Occurred());
1220 assert(PyErr_Occurred());
1219 goto bail;
1221 goto bail;
1220 }
1222 }
1221 key = PyInt_FromSsize_t(base);
1223 key = PyInt_FromSsize_t(base);
1222 allvalues = PyDict_GetItem(cache, key);
1224 allvalues = PyDict_GetItem(cache, key);
1223 if (allvalues == NULL && PyErr_Occurred()) {
1225 if (allvalues == NULL && PyErr_Occurred()) {
1224 goto bail;
1226 goto bail;
1225 }
1227 }
1226 if (allvalues == NULL) {
1228 if (allvalues == NULL) {
1227 int r;
1229 int r;
1228 allvalues = PyList_New(0);
1230 allvalues = PyList_New(0);
1229 if (!allvalues) {
1231 if (!allvalues) {
1230 goto bail;
1232 goto bail;
1231 }
1233 }
1232 r = PyDict_SetItem(cache, key, allvalues);
1234 r = PyDict_SetItem(cache, key, allvalues);
1233 Py_DECREF(allvalues);
1235 Py_DECREF(allvalues);
1234 if (r < 0) {
1236 if (r < 0) {
1235 goto bail;
1237 goto bail;
1236 }
1238 }
1237 }
1239 }
1238 value = PyInt_FromSsize_t(rev);
1240 value = PyInt_FromSsize_t(rev);
1239 if (PyList_Append(allvalues, value)) {
1241 if (PyList_Append(allvalues, value)) {
1240 goto bail;
1242 goto bail;
1241 }
1243 }
1242 Py_CLEAR(key);
1244 Py_CLEAR(key);
1243 Py_CLEAR(value);
1245 Py_CLEAR(value);
1244 }
1246 }
1245 Py_RETURN_NONE;
1247 Py_RETURN_NONE;
1246 bail:
1248 bail:
1247 Py_XDECREF(key);
1249 Py_XDECREF(key);
1248 Py_XDECREF(value);
1250 Py_XDECREF(value);
1249 return NULL;
1251 return NULL;
1250 }
1252 }
1251
1253
1252 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1254 static PyObject *index_deltachain(indexObject *self, PyObject *args)
1253 {
1255 {
1254 int rev, generaldelta;
1256 int rev, generaldelta;
1255 PyObject *stoparg;
1257 PyObject *stoparg;
1256 int stoprev, iterrev, baserev = -1;
1258 int stoprev, iterrev, baserev = -1;
1257 int stopped;
1259 int stopped;
1258 PyObject *chain = NULL, *result = NULL;
1260 PyObject *chain = NULL, *result = NULL;
1259 const Py_ssize_t length = index_length(self);
1261 const Py_ssize_t length = index_length(self);
1260
1262
1261 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1263 if (!PyArg_ParseTuple(args, "iOi", &rev, &stoparg, &generaldelta)) {
1262 return NULL;
1264 return NULL;
1263 }
1265 }
1264
1266
1265 if (PyInt_Check(stoparg)) {
1267 if (PyInt_Check(stoparg)) {
1266 stoprev = (int)PyInt_AsLong(stoparg);
1268 stoprev = (int)PyInt_AsLong(stoparg);
1267 if (stoprev == -1 && PyErr_Occurred()) {
1269 if (stoprev == -1 && PyErr_Occurred()) {
1268 return NULL;
1270 return NULL;
1269 }
1271 }
1270 } else if (stoparg == Py_None) {
1272 } else if (stoparg == Py_None) {
1271 stoprev = -2;
1273 stoprev = -2;
1272 } else {
1274 } else {
1273 PyErr_SetString(PyExc_ValueError,
1275 PyErr_SetString(PyExc_ValueError,
1274 "stoprev must be integer or None");
1276 "stoprev must be integer or None");
1275 return NULL;
1277 return NULL;
1276 }
1278 }
1277
1279
1278 if (rev < 0 || rev >= length) {
1280 if (rev < 0 || rev >= length) {
1279 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1281 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1280 return NULL;
1282 return NULL;
1281 }
1283 }
1282
1284
1283 chain = PyList_New(0);
1285 chain = PyList_New(0);
1284 if (chain == NULL) {
1286 if (chain == NULL) {
1285 return NULL;
1287 return NULL;
1286 }
1288 }
1287
1289
1288 baserev = index_baserev(self, rev);
1290 baserev = index_baserev(self, rev);
1289
1291
1290 /* This should never happen. */
1292 /* This should never happen. */
1291 if (baserev <= -2) {
1293 if (baserev <= -2) {
1292 /* Error should be set by index_deref() */
1294 /* Error should be set by index_deref() */
1293 assert(PyErr_Occurred());
1295 assert(PyErr_Occurred());
1294 goto bail;
1296 goto bail;
1295 }
1297 }
1296
1298
1297 iterrev = rev;
1299 iterrev = rev;
1298
1300
1299 while (iterrev != baserev && iterrev != stoprev) {
1301 while (iterrev != baserev && iterrev != stoprev) {
1300 PyObject *value = PyInt_FromLong(iterrev);
1302 PyObject *value = PyInt_FromLong(iterrev);
1301 if (value == NULL) {
1303 if (value == NULL) {
1302 goto bail;
1304 goto bail;
1303 }
1305 }
1304 if (PyList_Append(chain, value)) {
1306 if (PyList_Append(chain, value)) {
1305 Py_DECREF(value);
1307 Py_DECREF(value);
1306 goto bail;
1308 goto bail;
1307 }
1309 }
1308 Py_DECREF(value);
1310 Py_DECREF(value);
1309
1311
1310 if (generaldelta) {
1312 if (generaldelta) {
1311 iterrev = baserev;
1313 iterrev = baserev;
1312 } else {
1314 } else {
1313 iterrev--;
1315 iterrev--;
1314 }
1316 }
1315
1317
1316 if (iterrev < 0) {
1318 if (iterrev < 0) {
1317 break;
1319 break;
1318 }
1320 }
1319
1321
1320 if (iterrev >= length) {
1322 if (iterrev >= length) {
1321 PyErr_SetString(PyExc_IndexError,
1323 PyErr_SetString(PyExc_IndexError,
1322 "revision outside index");
1324 "revision outside index");
1323 return NULL;
1325 return NULL;
1324 }
1326 }
1325
1327
1326 baserev = index_baserev(self, iterrev);
1328 baserev = index_baserev(self, iterrev);
1327
1329
1328 /* This should never happen. */
1330 /* This should never happen. */
1329 if (baserev <= -2) {
1331 if (baserev <= -2) {
1330 /* Error should be set by index_deref() */
1332 /* Error should be set by index_deref() */
1331 assert(PyErr_Occurred());
1333 assert(PyErr_Occurred());
1332 goto bail;
1334 goto bail;
1333 }
1335 }
1334 }
1336 }
1335
1337
1336 if (iterrev == stoprev) {
1338 if (iterrev == stoprev) {
1337 stopped = 1;
1339 stopped = 1;
1338 } else {
1340 } else {
1339 PyObject *value = PyInt_FromLong(iterrev);
1341 PyObject *value = PyInt_FromLong(iterrev);
1340 if (value == NULL) {
1342 if (value == NULL) {
1341 goto bail;
1343 goto bail;
1342 }
1344 }
1343 if (PyList_Append(chain, value)) {
1345 if (PyList_Append(chain, value)) {
1344 Py_DECREF(value);
1346 Py_DECREF(value);
1345 goto bail;
1347 goto bail;
1346 }
1348 }
1347 Py_DECREF(value);
1349 Py_DECREF(value);
1348
1350
1349 stopped = 0;
1351 stopped = 0;
1350 }
1352 }
1351
1353
1352 if (PyList_Reverse(chain)) {
1354 if (PyList_Reverse(chain)) {
1353 goto bail;
1355 goto bail;
1354 }
1356 }
1355
1357
1356 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1358 result = Py_BuildValue("OO", chain, stopped ? Py_True : Py_False);
1357 Py_DECREF(chain);
1359 Py_DECREF(chain);
1358 return result;
1360 return result;
1359
1361
1360 bail:
1362 bail:
1361 Py_DECREF(chain);
1363 Py_DECREF(chain);
1362 return NULL;
1364 return NULL;
1363 }
1365 }
1364
1366
1365 static inline int64_t
1367 static inline int64_t
1366 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1368 index_segment_span(indexObject *self, Py_ssize_t start_rev, Py_ssize_t end_rev)
1367 {
1369 {
1368 int64_t start_offset;
1370 int64_t start_offset;
1369 int64_t end_offset;
1371 int64_t end_offset;
1370 int end_size;
1372 int end_size;
1371 start_offset = index_get_start(self, start_rev);
1373 start_offset = index_get_start(self, start_rev);
1372 if (start_offset < 0) {
1374 if (start_offset < 0) {
1373 return -1;
1375 return -1;
1374 }
1376 }
1375 end_offset = index_get_start(self, end_rev);
1377 end_offset = index_get_start(self, end_rev);
1376 if (end_offset < 0) {
1378 if (end_offset < 0) {
1377 return -1;
1379 return -1;
1378 }
1380 }
1379 end_size = index_get_length(self, end_rev);
1381 end_size = index_get_length(self, end_rev);
1380 if (end_size < 0) {
1382 if (end_size < 0) {
1381 return -1;
1383 return -1;
1382 }
1384 }
1383 if (end_offset < start_offset) {
1385 if (end_offset < start_offset) {
1384 PyErr_Format(PyExc_ValueError,
1386 PyErr_Format(PyExc_ValueError,
1385 "corrupted revlog index: inconsistent offset "
1387 "corrupted revlog index: inconsistent offset "
1386 "between revisions (%zd) and (%zd)",
1388 "between revisions (%zd) and (%zd)",
1387 start_rev, end_rev);
1389 start_rev, end_rev);
1388 return -1;
1390 return -1;
1389 }
1391 }
1390 return (end_offset - start_offset) + (int64_t)end_size;
1392 return (end_offset - start_offset) + (int64_t)end_size;
1391 }
1393 }
1392
1394
1393 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1395 /* returns endidx so that revs[startidx:endidx] has no empty trailing revs */
1394 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1396 static Py_ssize_t trim_endidx(indexObject *self, const Py_ssize_t *revs,
1395 Py_ssize_t startidx, Py_ssize_t endidx)
1397 Py_ssize_t startidx, Py_ssize_t endidx)
1396 {
1398 {
1397 int length;
1399 int length;
1398 while (endidx > 1 && endidx > startidx) {
1400 while (endidx > 1 && endidx > startidx) {
1399 length = index_get_length(self, revs[endidx - 1]);
1401 length = index_get_length(self, revs[endidx - 1]);
1400 if (length < 0) {
1402 if (length < 0) {
1401 return -1;
1403 return -1;
1402 }
1404 }
1403 if (length != 0) {
1405 if (length != 0) {
1404 break;
1406 break;
1405 }
1407 }
1406 endidx -= 1;
1408 endidx -= 1;
1407 }
1409 }
1408 return endidx;
1410 return endidx;
1409 }
1411 }
1410
1412
1411 struct Gap {
1413 struct Gap {
1412 int64_t size;
1414 int64_t size;
1413 Py_ssize_t idx;
1415 Py_ssize_t idx;
1414 };
1416 };
1415
1417
1416 static int gap_compare(const void *left, const void *right)
1418 static int gap_compare(const void *left, const void *right)
1417 {
1419 {
1418 const struct Gap *l_left = ((const struct Gap *)left);
1420 const struct Gap *l_left = ((const struct Gap *)left);
1419 const struct Gap *l_right = ((const struct Gap *)right);
1421 const struct Gap *l_right = ((const struct Gap *)right);
1420 if (l_left->size < l_right->size) {
1422 if (l_left->size < l_right->size) {
1421 return -1;
1423 return -1;
1422 } else if (l_left->size > l_right->size) {
1424 } else if (l_left->size > l_right->size) {
1423 return 1;
1425 return 1;
1424 }
1426 }
1425 return 0;
1427 return 0;
1426 }
1428 }
1427 static int Py_ssize_t_compare(const void *left, const void *right)
1429 static int Py_ssize_t_compare(const void *left, const void *right)
1428 {
1430 {
1429 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1431 const Py_ssize_t l_left = *(const Py_ssize_t *)left;
1430 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1432 const Py_ssize_t l_right = *(const Py_ssize_t *)right;
1431 if (l_left < l_right) {
1433 if (l_left < l_right) {
1432 return -1;
1434 return -1;
1433 } else if (l_left > l_right) {
1435 } else if (l_left > l_right) {
1434 return 1;
1436 return 1;
1435 }
1437 }
1436 return 0;
1438 return 0;
1437 }
1439 }
1438
1440
1439 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1441 static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
1440 {
1442 {
1441 /* method arguments */
1443 /* method arguments */
1442 PyObject *list_revs = NULL; /* revisions in the chain */
1444 PyObject *list_revs = NULL; /* revisions in the chain */
1443 double targetdensity = 0; /* min density to achieve */
1445 double targetdensity = 0; /* min density to achieve */
1444 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1446 Py_ssize_t mingapsize = 0; /* threshold to ignore gaps */
1445
1447
1446 /* other core variables */
1448 /* other core variables */
1447 Py_ssize_t idxlen = index_length(self);
1449 Py_ssize_t idxlen = index_length(self);
1448 Py_ssize_t i; /* used for various iteration */
1450 Py_ssize_t i; /* used for various iteration */
1449 PyObject *result = NULL; /* the final return of the function */
1451 PyObject *result = NULL; /* the final return of the function */
1450
1452
1451 /* generic information about the delta chain being slice */
1453 /* generic information about the delta chain being slice */
1452 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1454 Py_ssize_t num_revs = 0; /* size of the full delta chain */
1453 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1455 Py_ssize_t *revs = NULL; /* native array of revision in the chain */
1454 int64_t chainpayload = 0; /* sum of all delta in the chain */
1456 int64_t chainpayload = 0; /* sum of all delta in the chain */
1455 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1457 int64_t deltachainspan = 0; /* distance from first byte to last byte */
1456
1458
1457 /* variable used for slicing the delta chain */
1459 /* variable used for slicing the delta chain */
1458 int64_t readdata = 0; /* amount of data currently planned to be read */
1460 int64_t readdata = 0; /* amount of data currently planned to be read */
1459 double density = 0; /* ration of payload data compared to read ones */
1461 double density = 0; /* ration of payload data compared to read ones */
1460 int64_t previous_end;
1462 int64_t previous_end;
1461 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1463 struct Gap *gaps = NULL; /* array of notable gap in the chain */
1462 Py_ssize_t num_gaps =
1464 Py_ssize_t num_gaps =
1463 0; /* total number of notable gap recorded so far */
1465 0; /* total number of notable gap recorded so far */
1464 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1466 Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
1465 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1467 Py_ssize_t num_selected = 0; /* number of gaps skipped */
1466 PyObject *chunk = NULL; /* individual slice */
1468 PyObject *chunk = NULL; /* individual slice */
1467 PyObject *allchunks = NULL; /* all slices */
1469 PyObject *allchunks = NULL; /* all slices */
1468 Py_ssize_t previdx;
1470 Py_ssize_t previdx;
1469
1471
1470 /* parsing argument */
1472 /* parsing argument */
1471 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1473 if (!PyArg_ParseTuple(args, "O!dn", &PyList_Type, &list_revs,
1472 &targetdensity, &mingapsize)) {
1474 &targetdensity, &mingapsize)) {
1473 goto bail;
1475 goto bail;
1474 }
1476 }
1475
1477
1476 /* If the delta chain contains a single element, we do not need slicing
1478 /* If the delta chain contains a single element, we do not need slicing
1477 */
1479 */
1478 num_revs = PyList_GET_SIZE(list_revs);
1480 num_revs = PyList_GET_SIZE(list_revs);
1479 if (num_revs <= 1) {
1481 if (num_revs <= 1) {
1480 result = PyTuple_Pack(1, list_revs);
1482 result = PyTuple_Pack(1, list_revs);
1481 goto done;
1483 goto done;
1482 }
1484 }
1483
1485
1484 /* Turn the python list into a native integer array (for efficiency) */
1486 /* Turn the python list into a native integer array (for efficiency) */
1485 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1487 revs = (Py_ssize_t *)calloc(num_revs, sizeof(Py_ssize_t));
1486 if (revs == NULL) {
1488 if (revs == NULL) {
1487 PyErr_NoMemory();
1489 PyErr_NoMemory();
1488 goto bail;
1490 goto bail;
1489 }
1491 }
1490 for (i = 0; i < num_revs; i++) {
1492 for (i = 0; i < num_revs; i++) {
1491 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1493 Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
1492 if (revnum == -1 && PyErr_Occurred()) {
1494 if (revnum == -1 && PyErr_Occurred()) {
1493 goto bail;
1495 goto bail;
1494 }
1496 }
1495 if (revnum < nullrev || revnum >= idxlen) {
1497 if (revnum < nullrev || revnum >= idxlen) {
1496 PyErr_Format(PyExc_IndexError,
1498 PyErr_Format(PyExc_IndexError,
1497 "index out of range: %zd", revnum);
1499 "index out of range: %zd", revnum);
1498 goto bail;
1500 goto bail;
1499 }
1501 }
1500 revs[i] = revnum;
1502 revs[i] = revnum;
1501 }
1503 }
1502
1504
1503 /* Compute and check various property of the unsliced delta chain */
1505 /* Compute and check various property of the unsliced delta chain */
1504 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1506 deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
1505 if (deltachainspan < 0) {
1507 if (deltachainspan < 0) {
1506 goto bail;
1508 goto bail;
1507 }
1509 }
1508
1510
1509 if (deltachainspan <= mingapsize) {
1511 if (deltachainspan <= mingapsize) {
1510 result = PyTuple_Pack(1, list_revs);
1512 result = PyTuple_Pack(1, list_revs);
1511 goto done;
1513 goto done;
1512 }
1514 }
1513 chainpayload = 0;
1515 chainpayload = 0;
1514 for (i = 0; i < num_revs; i++) {
1516 for (i = 0; i < num_revs; i++) {
1515 int tmp = index_get_length(self, revs[i]);
1517 int tmp = index_get_length(self, revs[i]);
1516 if (tmp < 0) {
1518 if (tmp < 0) {
1517 goto bail;
1519 goto bail;
1518 }
1520 }
1519 chainpayload += tmp;
1521 chainpayload += tmp;
1520 }
1522 }
1521
1523
1522 readdata = deltachainspan;
1524 readdata = deltachainspan;
1523 density = 1.0;
1525 density = 1.0;
1524
1526
1525 if (0 < deltachainspan) {
1527 if (0 < deltachainspan) {
1526 density = (double)chainpayload / (double)deltachainspan;
1528 density = (double)chainpayload / (double)deltachainspan;
1527 }
1529 }
1528
1530
1529 if (density >= targetdensity) {
1531 if (density >= targetdensity) {
1530 result = PyTuple_Pack(1, list_revs);
1532 result = PyTuple_Pack(1, list_revs);
1531 goto done;
1533 goto done;
1532 }
1534 }
1533
1535
1534 /* if chain is too sparse, look for relevant gaps */
1536 /* if chain is too sparse, look for relevant gaps */
1535 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1537 gaps = (struct Gap *)calloc(num_revs, sizeof(struct Gap));
1536 if (gaps == NULL) {
1538 if (gaps == NULL) {
1537 PyErr_NoMemory();
1539 PyErr_NoMemory();
1538 goto bail;
1540 goto bail;
1539 }
1541 }
1540
1542
1541 previous_end = -1;
1543 previous_end = -1;
1542 for (i = 0; i < num_revs; i++) {
1544 for (i = 0; i < num_revs; i++) {
1543 int64_t revstart;
1545 int64_t revstart;
1544 int revsize;
1546 int revsize;
1545 revstart = index_get_start(self, revs[i]);
1547 revstart = index_get_start(self, revs[i]);
1546 if (revstart < 0) {
1548 if (revstart < 0) {
1547 goto bail;
1549 goto bail;
1548 };
1550 };
1549 revsize = index_get_length(self, revs[i]);
1551 revsize = index_get_length(self, revs[i]);
1550 if (revsize < 0) {
1552 if (revsize < 0) {
1551 goto bail;
1553 goto bail;
1552 };
1554 };
1553 if (revsize == 0) {
1555 if (revsize == 0) {
1554 continue;
1556 continue;
1555 }
1557 }
1556 if (previous_end >= 0) {
1558 if (previous_end >= 0) {
1557 int64_t gapsize = revstart - previous_end;
1559 int64_t gapsize = revstart - previous_end;
1558 if (gapsize > mingapsize) {
1560 if (gapsize > mingapsize) {
1559 gaps[num_gaps].size = gapsize;
1561 gaps[num_gaps].size = gapsize;
1560 gaps[num_gaps].idx = i;
1562 gaps[num_gaps].idx = i;
1561 num_gaps += 1;
1563 num_gaps += 1;
1562 }
1564 }
1563 }
1565 }
1564 previous_end = revstart + revsize;
1566 previous_end = revstart + revsize;
1565 }
1567 }
1566 if (num_gaps == 0) {
1568 if (num_gaps == 0) {
1567 result = PyTuple_Pack(1, list_revs);
1569 result = PyTuple_Pack(1, list_revs);
1568 goto done;
1570 goto done;
1569 }
1571 }
1570 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1572 qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
1571
1573
1572 /* Slice the largest gap first, they improve the density the most */
1574 /* Slice the largest gap first, they improve the density the most */
1573 selected_indices =
1575 selected_indices =
1574 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1576 (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
1575 if (selected_indices == NULL) {
1577 if (selected_indices == NULL) {
1576 PyErr_NoMemory();
1578 PyErr_NoMemory();
1577 goto bail;
1579 goto bail;
1578 }
1580 }
1579
1581
1580 for (i = num_gaps - 1; i >= 0; i--) {
1582 for (i = num_gaps - 1; i >= 0; i--) {
1581 selected_indices[num_selected] = gaps[i].idx;
1583 selected_indices[num_selected] = gaps[i].idx;
1582 readdata -= gaps[i].size;
1584 readdata -= gaps[i].size;
1583 num_selected += 1;
1585 num_selected += 1;
1584 if (readdata <= 0) {
1586 if (readdata <= 0) {
1585 density = 1.0;
1587 density = 1.0;
1586 } else {
1588 } else {
1587 density = (double)chainpayload / (double)readdata;
1589 density = (double)chainpayload / (double)readdata;
1588 }
1590 }
1589 if (density >= targetdensity) {
1591 if (density >= targetdensity) {
1590 break;
1592 break;
1591 }
1593 }
1592 }
1594 }
1593 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1595 qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
1594 &Py_ssize_t_compare);
1596 &Py_ssize_t_compare);
1595
1597
1596 /* create the resulting slice */
1598 /* create the resulting slice */
1597 allchunks = PyList_New(0);
1599 allchunks = PyList_New(0);
1598 if (allchunks == NULL) {
1600 if (allchunks == NULL) {
1599 goto bail;
1601 goto bail;
1600 }
1602 }
1601 previdx = 0;
1603 previdx = 0;
1602 selected_indices[num_selected] = num_revs;
1604 selected_indices[num_selected] = num_revs;
1603 for (i = 0; i <= num_selected; i++) {
1605 for (i = 0; i <= num_selected; i++) {
1604 Py_ssize_t idx = selected_indices[i];
1606 Py_ssize_t idx = selected_indices[i];
1605 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1607 Py_ssize_t endidx = trim_endidx(self, revs, previdx, idx);
1606 if (endidx < 0) {
1608 if (endidx < 0) {
1607 goto bail;
1609 goto bail;
1608 }
1610 }
1609 if (previdx < endidx) {
1611 if (previdx < endidx) {
1610 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1612 chunk = PyList_GetSlice(list_revs, previdx, endidx);
1611 if (chunk == NULL) {
1613 if (chunk == NULL) {
1612 goto bail;
1614 goto bail;
1613 }
1615 }
1614 if (PyList_Append(allchunks, chunk) == -1) {
1616 if (PyList_Append(allchunks, chunk) == -1) {
1615 goto bail;
1617 goto bail;
1616 }
1618 }
1617 Py_DECREF(chunk);
1619 Py_DECREF(chunk);
1618 chunk = NULL;
1620 chunk = NULL;
1619 }
1621 }
1620 previdx = idx;
1622 previdx = idx;
1621 }
1623 }
1622 result = allchunks;
1624 result = allchunks;
1623 goto done;
1625 goto done;
1624
1626
1625 bail:
1627 bail:
1626 Py_XDECREF(allchunks);
1628 Py_XDECREF(allchunks);
1627 Py_XDECREF(chunk);
1629 Py_XDECREF(chunk);
1628 done:
1630 done:
1629 free(revs);
1631 free(revs);
1630 free(gaps);
1632 free(gaps);
1631 free(selected_indices);
1633 free(selected_indices);
1632 return result;
1634 return result;
1633 }
1635 }
1634
1636
1635 static inline int nt_level(const char *node, Py_ssize_t level)
1637 static inline int nt_level(const char *node, Py_ssize_t level)
1636 {
1638 {
1637 int v = node[level >> 1];
1639 int v = node[level >> 1];
1638 if (!(level & 1))
1640 if (!(level & 1))
1639 v >>= 4;
1641 v >>= 4;
1640 return v & 0xf;
1642 return v & 0xf;
1641 }
1643 }
1642
1644
1643 /*
1645 /*
1644 * Return values:
1646 * Return values:
1645 *
1647 *
1646 * -4: match is ambiguous (multiple candidates)
1648 * -4: match is ambiguous (multiple candidates)
1647 * -2: not found
1649 * -2: not found
1648 * rest: valid rev
1650 * rest: valid rev
1649 */
1651 */
1650 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1652 static int nt_find(nodetree *self, const char *node, Py_ssize_t nodelen,
1651 int hex)
1653 int hex)
1652 {
1654 {
1653 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1655 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1654 int level, maxlevel, off;
1656 int level, maxlevel, off;
1655
1657
1656 /* If the input is binary, do a fast check for the nullid first. */
1658 /* If the input is binary, do a fast check for the nullid first. */
1657 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1659 if (!hex && nodelen == self->nodelen && node[0] == '\0' &&
1658 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1660 node[1] == '\0' && memcmp(node, nullid, self->nodelen) == 0)
1659 return -1;
1661 return -1;
1660
1662
1661 if (hex)
1663 if (hex)
1662 maxlevel = nodelen;
1664 maxlevel = nodelen;
1663 else
1665 else
1664 maxlevel = 2 * nodelen;
1666 maxlevel = 2 * nodelen;
1665 if (maxlevel > 2 * self->nodelen)
1667 if (maxlevel > 2 * self->nodelen)
1666 maxlevel = 2 * self->nodelen;
1668 maxlevel = 2 * self->nodelen;
1667
1669
1668 for (level = off = 0; level < maxlevel; level++) {
1670 for (level = off = 0; level < maxlevel; level++) {
1669 int k = getnybble(node, level);
1671 int k = getnybble(node, level);
1670 nodetreenode *n = &self->nodes[off];
1672 nodetreenode *n = &self->nodes[off];
1671 int v = n->children[k];
1673 int v = n->children[k];
1672
1674
1673 if (v < 0) {
1675 if (v < 0) {
1674 const char *n;
1676 const char *n;
1675 Py_ssize_t i;
1677 Py_ssize_t i;
1676
1678
1677 v = -(v + 2);
1679 v = -(v + 2);
1678 n = index_node(self->index, v);
1680 n = index_node(self->index, v);
1679 if (n == NULL)
1681 if (n == NULL)
1680 return -2;
1682 return -2;
1681 for (i = level; i < maxlevel; i++)
1683 for (i = level; i < maxlevel; i++)
1682 if (getnybble(node, i) != nt_level(n, i))
1684 if (getnybble(node, i) != nt_level(n, i))
1683 return -2;
1685 return -2;
1684 return v;
1686 return v;
1685 }
1687 }
1686 if (v == 0)
1688 if (v == 0)
1687 return -2;
1689 return -2;
1688 off = v;
1690 off = v;
1689 }
1691 }
1690 /* multiple matches against an ambiguous prefix */
1692 /* multiple matches against an ambiguous prefix */
1691 return -4;
1693 return -4;
1692 }
1694 }
1693
1695
1694 static int nt_new(nodetree *self)
1696 static int nt_new(nodetree *self)
1695 {
1697 {
1696 if (self->length == self->capacity) {
1698 if (self->length == self->capacity) {
1697 size_t newcapacity;
1699 size_t newcapacity;
1698 nodetreenode *newnodes;
1700 nodetreenode *newnodes;
1699 newcapacity = self->capacity * 2;
1701 newcapacity = self->capacity * 2;
1700 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1702 if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
1701 PyErr_SetString(PyExc_MemoryError,
1703 PyErr_SetString(PyExc_MemoryError,
1702 "overflow in nt_new");
1704 "overflow in nt_new");
1703 return -1;
1705 return -1;
1704 }
1706 }
1705 newnodes =
1707 newnodes =
1706 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1708 realloc(self->nodes, newcapacity * sizeof(nodetreenode));
1707 if (newnodes == NULL) {
1709 if (newnodes == NULL) {
1708 PyErr_SetString(PyExc_MemoryError, "out of memory");
1710 PyErr_SetString(PyExc_MemoryError, "out of memory");
1709 return -1;
1711 return -1;
1710 }
1712 }
1711 self->capacity = newcapacity;
1713 self->capacity = newcapacity;
1712 self->nodes = newnodes;
1714 self->nodes = newnodes;
1713 memset(&self->nodes[self->length], 0,
1715 memset(&self->nodes[self->length], 0,
1714 sizeof(nodetreenode) * (self->capacity - self->length));
1716 sizeof(nodetreenode) * (self->capacity - self->length));
1715 }
1717 }
1716 return self->length++;
1718 return self->length++;
1717 }
1719 }
1718
1720
1719 static int nt_insert(nodetree *self, const char *node, int rev)
1721 static int nt_insert(nodetree *self, const char *node, int rev)
1720 {
1722 {
1721 int level = 0;
1723 int level = 0;
1722 int off = 0;
1724 int off = 0;
1723
1725
1724 while (level < 2 * self->nodelen) {
1726 while (level < 2 * self->nodelen) {
1725 int k = nt_level(node, level);
1727 int k = nt_level(node, level);
1726 nodetreenode *n;
1728 nodetreenode *n;
1727 int v;
1729 int v;
1728
1730
1729 n = &self->nodes[off];
1731 n = &self->nodes[off];
1730 v = n->children[k];
1732 v = n->children[k];
1731
1733
1732 if (v == 0) {
1734 if (v == 0) {
1733 n->children[k] = -rev - 2;
1735 n->children[k] = -rev - 2;
1734 return 0;
1736 return 0;
1735 }
1737 }
1736 if (v < 0) {
1738 if (v < 0) {
1737 const char *oldnode =
1739 const char *oldnode =
1738 index_node_existing(self->index, -(v + 2));
1740 index_node_existing(self->index, -(v + 2));
1739 int noff;
1741 int noff;
1740
1742
1741 if (oldnode == NULL)
1743 if (oldnode == NULL)
1742 return -1;
1744 return -1;
1743 if (!memcmp(oldnode, node, self->nodelen)) {
1745 if (!memcmp(oldnode, node, self->nodelen)) {
1744 n->children[k] = -rev - 2;
1746 n->children[k] = -rev - 2;
1745 return 0;
1747 return 0;
1746 }
1748 }
1747 noff = nt_new(self);
1749 noff = nt_new(self);
1748 if (noff == -1)
1750 if (noff == -1)
1749 return -1;
1751 return -1;
1750 /* self->nodes may have been changed by realloc */
1752 /* self->nodes may have been changed by realloc */
1751 self->nodes[off].children[k] = noff;
1753 self->nodes[off].children[k] = noff;
1752 off = noff;
1754 off = noff;
1753 n = &self->nodes[off];
1755 n = &self->nodes[off];
1754 n->children[nt_level(oldnode, ++level)] = v;
1756 n->children[nt_level(oldnode, ++level)] = v;
1755 if (level > self->depth)
1757 if (level > self->depth)
1756 self->depth = level;
1758 self->depth = level;
1757 self->splits += 1;
1759 self->splits += 1;
1758 } else {
1760 } else {
1759 level += 1;
1761 level += 1;
1760 off = v;
1762 off = v;
1761 }
1763 }
1762 }
1764 }
1763
1765
1764 return -1;
1766 return -1;
1765 }
1767 }
1766
1768
1767 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1769 static PyObject *ntobj_insert(nodetreeObject *self, PyObject *args)
1768 {
1770 {
1769 Py_ssize_t rev;
1771 Py_ssize_t rev;
1770 const char *node;
1772 const char *node;
1771 Py_ssize_t length;
1773 Py_ssize_t length;
1772 if (!PyArg_ParseTuple(args, "n", &rev))
1774 if (!PyArg_ParseTuple(args, "n", &rev))
1773 return NULL;
1775 return NULL;
1774 length = index_length(self->nt.index);
1776 length = index_length(self->nt.index);
1775 if (rev < 0 || rev >= length) {
1777 if (rev < 0 || rev >= length) {
1776 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1778 PyErr_SetString(PyExc_ValueError, "revlog index out of range");
1777 return NULL;
1779 return NULL;
1778 }
1780 }
1779 node = index_node_existing(self->nt.index, rev);
1781 node = index_node_existing(self->nt.index, rev);
1780 if (nt_insert(&self->nt, node, (int)rev) == -1)
1782 if (nt_insert(&self->nt, node, (int)rev) == -1)
1781 return NULL;
1783 return NULL;
1782 Py_RETURN_NONE;
1784 Py_RETURN_NONE;
1783 }
1785 }
1784
1786
1785 static int nt_delete_node(nodetree *self, const char *node)
1787 static int nt_delete_node(nodetree *self, const char *node)
1786 {
1788 {
1787 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1789 /* rev==-2 happens to get encoded as 0, which is interpreted as not set
1788 */
1790 */
1789 return nt_insert(self, node, -2);
1791 return nt_insert(self, node, -2);
1790 }
1792 }
1791
1793
1792 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1794 static int nt_init(nodetree *self, indexObject *index, unsigned capacity)
1793 {
1795 {
1794 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1796 /* Initialize before overflow-checking to avoid nt_dealloc() crash. */
1795 self->nodes = NULL;
1797 self->nodes = NULL;
1796
1798
1797 self->index = index;
1799 self->index = index;
1798 /* The input capacity is in terms of revisions, while the field is in
1800 /* The input capacity is in terms of revisions, while the field is in
1799 * terms of nodetree nodes. */
1801 * terms of nodetree nodes. */
1800 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1802 self->capacity = (capacity < 4 ? 4 : capacity / 2);
1801 self->nodelen = index->nodelen;
1803 self->nodelen = index->nodelen;
1802 self->depth = 0;
1804 self->depth = 0;
1803 self->splits = 0;
1805 self->splits = 0;
1804 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1806 if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
1805 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1807 PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
1806 return -1;
1808 return -1;
1807 }
1809 }
1808 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1810 self->nodes = calloc(self->capacity, sizeof(nodetreenode));
1809 if (self->nodes == NULL) {
1811 if (self->nodes == NULL) {
1810 PyErr_NoMemory();
1812 PyErr_NoMemory();
1811 return -1;
1813 return -1;
1812 }
1814 }
1813 self->length = 1;
1815 self->length = 1;
1814 return 0;
1816 return 0;
1815 }
1817 }
1816
1818
1817 static int ntobj_init(nodetreeObject *self, PyObject *args)
1819 static int ntobj_init(nodetreeObject *self, PyObject *args)
1818 {
1820 {
1819 PyObject *index;
1821 PyObject *index;
1820 unsigned capacity;
1822 unsigned capacity;
1821 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1823 if (!PyArg_ParseTuple(args, "O!I", &HgRevlogIndex_Type, &index,
1822 &capacity))
1824 &capacity))
1823 return -1;
1825 return -1;
1824 Py_INCREF(index);
1826 Py_INCREF(index);
1825 return nt_init(&self->nt, (indexObject *)index, capacity);
1827 return nt_init(&self->nt, (indexObject *)index, capacity);
1826 }
1828 }
1827
1829
1828 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1830 static int nt_partialmatch(nodetree *self, const char *node, Py_ssize_t nodelen)
1829 {
1831 {
1830 return nt_find(self, node, nodelen, 1);
1832 return nt_find(self, node, nodelen, 1);
1831 }
1833 }
1832
1834
1833 /*
1835 /*
1834 * Find the length of the shortest unique prefix of node.
1836 * Find the length of the shortest unique prefix of node.
1835 *
1837 *
1836 * Return values:
1838 * Return values:
1837 *
1839 *
1838 * -3: error (exception set)
1840 * -3: error (exception set)
1839 * -2: not found (no exception set)
1841 * -2: not found (no exception set)
1840 * rest: length of shortest prefix
1842 * rest: length of shortest prefix
1841 */
1843 */
1842 static int nt_shortest(nodetree *self, const char *node)
1844 static int nt_shortest(nodetree *self, const char *node)
1843 {
1845 {
1844 int level, off;
1846 int level, off;
1845
1847
1846 for (level = off = 0; level < 2 * self->nodelen; level++) {
1848 for (level = off = 0; level < 2 * self->nodelen; level++) {
1847 int k, v;
1849 int k, v;
1848 nodetreenode *n = &self->nodes[off];
1850 nodetreenode *n = &self->nodes[off];
1849 k = nt_level(node, level);
1851 k = nt_level(node, level);
1850 v = n->children[k];
1852 v = n->children[k];
1851 if (v < 0) {
1853 if (v < 0) {
1852 const char *n;
1854 const char *n;
1853 v = -(v + 2);
1855 v = -(v + 2);
1854 n = index_node_existing(self->index, v);
1856 n = index_node_existing(self->index, v);
1855 if (n == NULL)
1857 if (n == NULL)
1856 return -3;
1858 return -3;
1857 if (memcmp(node, n, self->nodelen) != 0)
1859 if (memcmp(node, n, self->nodelen) != 0)
1858 /*
1860 /*
1859 * Found a unique prefix, but it wasn't for the
1861 * Found a unique prefix, but it wasn't for the
1860 * requested node (i.e the requested node does
1862 * requested node (i.e the requested node does
1861 * not exist).
1863 * not exist).
1862 */
1864 */
1863 return -2;
1865 return -2;
1864 return level + 1;
1866 return level + 1;
1865 }
1867 }
1866 if (v == 0)
1868 if (v == 0)
1867 return -2;
1869 return -2;
1868 off = v;
1870 off = v;
1869 }
1871 }
1870 /*
1872 /*
1871 * The node was still not unique after 40 hex digits, so this won't
1873 * The node was still not unique after 40 hex digits, so this won't
1872 * happen. Also, if we get here, then there's a programming error in
1874 * happen. Also, if we get here, then there's a programming error in
1873 * this file that made us insert a node longer than 40 hex digits.
1875 * this file that made us insert a node longer than 40 hex digits.
1874 */
1876 */
1875 PyErr_SetString(PyExc_Exception, "broken node tree");
1877 PyErr_SetString(PyExc_Exception, "broken node tree");
1876 return -3;
1878 return -3;
1877 }
1879 }
1878
1880
1879 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1881 static PyObject *ntobj_shortest(nodetreeObject *self, PyObject *args)
1880 {
1882 {
1881 PyObject *val;
1883 PyObject *val;
1882 char *node;
1884 char *node;
1883 int length;
1885 int length;
1884
1886
1885 if (!PyArg_ParseTuple(args, "O", &val))
1887 if (!PyArg_ParseTuple(args, "O", &val))
1886 return NULL;
1888 return NULL;
1887 if (node_check(self->nt.nodelen, val, &node) == -1)
1889 if (node_check(self->nt.nodelen, val, &node) == -1)
1888 return NULL;
1890 return NULL;
1889
1891
1890 length = nt_shortest(&self->nt, node);
1892 length = nt_shortest(&self->nt, node);
1891 if (length == -3)
1893 if (length == -3)
1892 return NULL;
1894 return NULL;
1893 if (length == -2) {
1895 if (length == -2) {
1894 raise_revlog_error();
1896 raise_revlog_error();
1895 return NULL;
1897 return NULL;
1896 }
1898 }
1897 return PyInt_FromLong(length);
1899 return PyInt_FromLong(length);
1898 }
1900 }
1899
1901
1900 static void nt_dealloc(nodetree *self)
1902 static void nt_dealloc(nodetree *self)
1901 {
1903 {
1902 free(self->nodes);
1904 free(self->nodes);
1903 self->nodes = NULL;
1905 self->nodes = NULL;
1904 }
1906 }
1905
1907
1906 static void ntobj_dealloc(nodetreeObject *self)
1908 static void ntobj_dealloc(nodetreeObject *self)
1907 {
1909 {
1908 Py_XDECREF(self->nt.index);
1910 Py_XDECREF(self->nt.index);
1909 nt_dealloc(&self->nt);
1911 nt_dealloc(&self->nt);
1910 PyObject_Del(self);
1912 PyObject_Del(self);
1911 }
1913 }
1912
1914
1913 static PyMethodDef ntobj_methods[] = {
1915 static PyMethodDef ntobj_methods[] = {
1914 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1916 {"insert", (PyCFunction)ntobj_insert, METH_VARARGS,
1915 "insert an index entry"},
1917 "insert an index entry"},
1916 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1918 {"shortest", (PyCFunction)ntobj_shortest, METH_VARARGS,
1917 "find length of shortest hex nodeid of a binary ID"},
1919 "find length of shortest hex nodeid of a binary ID"},
1918 {NULL} /* Sentinel */
1920 {NULL} /* Sentinel */
1919 };
1921 };
1920
1922
1921 static PyTypeObject nodetreeType = {
1923 static PyTypeObject nodetreeType = {
1922 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1924 PyVarObject_HEAD_INIT(NULL, 0) /* header */
1923 "parsers.nodetree", /* tp_name */
1925 "parsers.nodetree", /* tp_name */
1924 sizeof(nodetreeObject), /* tp_basicsize */
1926 sizeof(nodetreeObject), /* tp_basicsize */
1925 0, /* tp_itemsize */
1927 0, /* tp_itemsize */
1926 (destructor)ntobj_dealloc, /* tp_dealloc */
1928 (destructor)ntobj_dealloc, /* tp_dealloc */
1927 0, /* tp_print */
1929 0, /* tp_print */
1928 0, /* tp_getattr */
1930 0, /* tp_getattr */
1929 0, /* tp_setattr */
1931 0, /* tp_setattr */
1930 0, /* tp_compare */
1932 0, /* tp_compare */
1931 0, /* tp_repr */
1933 0, /* tp_repr */
1932 0, /* tp_as_number */
1934 0, /* tp_as_number */
1933 0, /* tp_as_sequence */
1935 0, /* tp_as_sequence */
1934 0, /* tp_as_mapping */
1936 0, /* tp_as_mapping */
1935 0, /* tp_hash */
1937 0, /* tp_hash */
1936 0, /* tp_call */
1938 0, /* tp_call */
1937 0, /* tp_str */
1939 0, /* tp_str */
1938 0, /* tp_getattro */
1940 0, /* tp_getattro */
1939 0, /* tp_setattro */
1941 0, /* tp_setattro */
1940 0, /* tp_as_buffer */
1942 0, /* tp_as_buffer */
1941 Py_TPFLAGS_DEFAULT, /* tp_flags */
1943 Py_TPFLAGS_DEFAULT, /* tp_flags */
1942 "nodetree", /* tp_doc */
1944 "nodetree", /* tp_doc */
1943 0, /* tp_traverse */
1945 0, /* tp_traverse */
1944 0, /* tp_clear */
1946 0, /* tp_clear */
1945 0, /* tp_richcompare */
1947 0, /* tp_richcompare */
1946 0, /* tp_weaklistoffset */
1948 0, /* tp_weaklistoffset */
1947 0, /* tp_iter */
1949 0, /* tp_iter */
1948 0, /* tp_iternext */
1950 0, /* tp_iternext */
1949 ntobj_methods, /* tp_methods */
1951 ntobj_methods, /* tp_methods */
1950 0, /* tp_members */
1952 0, /* tp_members */
1951 0, /* tp_getset */
1953 0, /* tp_getset */
1952 0, /* tp_base */
1954 0, /* tp_base */
1953 0, /* tp_dict */
1955 0, /* tp_dict */
1954 0, /* tp_descr_get */
1956 0, /* tp_descr_get */
1955 0, /* tp_descr_set */
1957 0, /* tp_descr_set */
1956 0, /* tp_dictoffset */
1958 0, /* tp_dictoffset */
1957 (initproc)ntobj_init, /* tp_init */
1959 (initproc)ntobj_init, /* tp_init */
1958 0, /* tp_alloc */
1960 0, /* tp_alloc */
1959 };
1961 };
1960
1962
1961 static int index_init_nt(indexObject *self)
1963 static int index_init_nt(indexObject *self)
1962 {
1964 {
1963 if (!self->ntinitialized) {
1965 if (!self->ntinitialized) {
1964 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1966 if (nt_init(&self->nt, self, (int)self->length) == -1) {
1965 nt_dealloc(&self->nt);
1967 nt_dealloc(&self->nt);
1966 return -1;
1968 return -1;
1967 }
1969 }
1968 if (nt_insert(&self->nt, nullid, -1) == -1) {
1970 if (nt_insert(&self->nt, nullid, -1) == -1) {
1969 nt_dealloc(&self->nt);
1971 nt_dealloc(&self->nt);
1970 return -1;
1972 return -1;
1971 }
1973 }
1972 self->ntinitialized = 1;
1974 self->ntinitialized = 1;
1973 self->ntrev = (int)index_length(self);
1975 self->ntrev = (int)index_length(self);
1974 self->ntlookups = 1;
1976 self->ntlookups = 1;
1975 self->ntmisses = 0;
1977 self->ntmisses = 0;
1976 }
1978 }
1977 return 0;
1979 return 0;
1978 }
1980 }
1979
1981
1980 /*
1982 /*
1981 * Return values:
1983 * Return values:
1982 *
1984 *
1983 * -3: error (exception set)
1985 * -3: error (exception set)
1984 * -2: not found (no exception set)
1986 * -2: not found (no exception set)
1985 * rest: valid rev
1987 * rest: valid rev
1986 */
1988 */
1987 static int index_find_node(indexObject *self, const char *node)
1989 static int index_find_node(indexObject *self, const char *node)
1988 {
1990 {
1989 int rev;
1991 int rev;
1990
1992
1991 if (index_init_nt(self) == -1)
1993 if (index_init_nt(self) == -1)
1992 return -3;
1994 return -3;
1993
1995
1994 self->ntlookups++;
1996 self->ntlookups++;
1995 rev = nt_find(&self->nt, node, self->nodelen, 0);
1997 rev = nt_find(&self->nt, node, self->nodelen, 0);
1996 if (rev >= -1)
1998 if (rev >= -1)
1997 return rev;
1999 return rev;
1998
2000
1999 /*
2001 /*
2000 * For the first handful of lookups, we scan the entire index,
2002 * For the first handful of lookups, we scan the entire index,
2001 * and cache only the matching nodes. This optimizes for cases
2003 * and cache only the matching nodes. This optimizes for cases
2002 * like "hg tip", where only a few nodes are accessed.
2004 * like "hg tip", where only a few nodes are accessed.
2003 *
2005 *
2004 * After that, we cache every node we visit, using a single
2006 * After that, we cache every node we visit, using a single
2005 * scan amortized over multiple lookups. This gives the best
2007 * scan amortized over multiple lookups. This gives the best
2006 * bulk performance, e.g. for "hg log".
2008 * bulk performance, e.g. for "hg log".
2007 */
2009 */
2008 if (self->ntmisses++ < 4) {
2010 if (self->ntmisses++ < 4) {
2009 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2011 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2010 const char *n = index_node_existing(self, rev);
2012 const char *n = index_node_existing(self, rev);
2011 if (n == NULL)
2013 if (n == NULL)
2012 return -3;
2014 return -3;
2013 if (memcmp(node, n, self->nodelen) == 0) {
2015 if (memcmp(node, n, self->nodelen) == 0) {
2014 if (nt_insert(&self->nt, n, rev) == -1)
2016 if (nt_insert(&self->nt, n, rev) == -1)
2015 return -3;
2017 return -3;
2016 break;
2018 break;
2017 }
2019 }
2018 }
2020 }
2019 } else {
2021 } else {
2020 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2022 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2021 const char *n = index_node_existing(self, rev);
2023 const char *n = index_node_existing(self, rev);
2022 if (n == NULL)
2024 if (n == NULL)
2023 return -3;
2025 return -3;
2024 if (nt_insert(&self->nt, n, rev) == -1) {
2026 if (nt_insert(&self->nt, n, rev) == -1) {
2025 self->ntrev = rev + 1;
2027 self->ntrev = rev + 1;
2026 return -3;
2028 return -3;
2027 }
2029 }
2028 if (memcmp(node, n, self->nodelen) == 0) {
2030 if (memcmp(node, n, self->nodelen) == 0) {
2029 break;
2031 break;
2030 }
2032 }
2031 }
2033 }
2032 self->ntrev = rev;
2034 self->ntrev = rev;
2033 }
2035 }
2034
2036
2035 if (rev >= 0)
2037 if (rev >= 0)
2036 return rev;
2038 return rev;
2037 return -2;
2039 return -2;
2038 }
2040 }
2039
2041
2040 static PyObject *index_getitem(indexObject *self, PyObject *value)
2042 static PyObject *index_getitem(indexObject *self, PyObject *value)
2041 {
2043 {
2042 char *node;
2044 char *node;
2043 int rev;
2045 int rev;
2044
2046
2045 if (PyInt_Check(value)) {
2047 if (PyInt_Check(value)) {
2046 long idx;
2048 long idx;
2047 if (!pylong_to_long(value, &idx)) {
2049 if (!pylong_to_long(value, &idx)) {
2048 return NULL;
2050 return NULL;
2049 }
2051 }
2050 return index_get(self, idx);
2052 return index_get(self, idx);
2051 }
2053 }
2052
2054
2053 if (node_check(self->nodelen, value, &node) == -1)
2055 if (node_check(self->nodelen, value, &node) == -1)
2054 return NULL;
2056 return NULL;
2055 rev = index_find_node(self, node);
2057 rev = index_find_node(self, node);
2056 if (rev >= -1)
2058 if (rev >= -1)
2057 return PyInt_FromLong(rev);
2059 return PyInt_FromLong(rev);
2058 if (rev == -2)
2060 if (rev == -2)
2059 raise_revlog_error();
2061 raise_revlog_error();
2060 return NULL;
2062 return NULL;
2061 }
2063 }
2062
2064
2063 /*
2065 /*
2064 * Fully populate the radix tree.
2066 * Fully populate the radix tree.
2065 */
2067 */
2066 static int index_populate_nt(indexObject *self)
2068 static int index_populate_nt(indexObject *self)
2067 {
2069 {
2068 int rev;
2070 int rev;
2069 if (self->ntrev > 0) {
2071 if (self->ntrev > 0) {
2070 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2072 for (rev = self->ntrev - 1; rev >= 0; rev--) {
2071 const char *n = index_node_existing(self, rev);
2073 const char *n = index_node_existing(self, rev);
2072 if (n == NULL)
2074 if (n == NULL)
2073 return -1;
2075 return -1;
2074 if (nt_insert(&self->nt, n, rev) == -1)
2076 if (nt_insert(&self->nt, n, rev) == -1)
2075 return -1;
2077 return -1;
2076 }
2078 }
2077 self->ntrev = -1;
2079 self->ntrev = -1;
2078 }
2080 }
2079 return 0;
2081 return 0;
2080 }
2082 }
2081
2083
2082 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2084 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
2083 {
2085 {
2084 const char *fullnode;
2086 const char *fullnode;
2085 Py_ssize_t nodelen;
2087 Py_ssize_t nodelen;
2086 char *node;
2088 char *node;
2087 int rev, i;
2089 int rev, i;
2088
2090
2089 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2091 if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &node, &nodelen))
2090 return NULL;
2092 return NULL;
2091
2093
2092 if (nodelen < 1) {
2094 if (nodelen < 1) {
2093 PyErr_SetString(PyExc_ValueError, "key too short");
2095 PyErr_SetString(PyExc_ValueError, "key too short");
2094 return NULL;
2096 return NULL;
2095 }
2097 }
2096
2098
2097 if (nodelen > 2 * self->nodelen) {
2099 if (nodelen > 2 * self->nodelen) {
2098 PyErr_SetString(PyExc_ValueError, "key too long");
2100 PyErr_SetString(PyExc_ValueError, "key too long");
2099 return NULL;
2101 return NULL;
2100 }
2102 }
2101
2103
2102 for (i = 0; i < nodelen; i++)
2104 for (i = 0; i < nodelen; i++)
2103 hexdigit(node, i);
2105 hexdigit(node, i);
2104 if (PyErr_Occurred()) {
2106 if (PyErr_Occurred()) {
2105 /* input contains non-hex characters */
2107 /* input contains non-hex characters */
2106 PyErr_Clear();
2108 PyErr_Clear();
2107 Py_RETURN_NONE;
2109 Py_RETURN_NONE;
2108 }
2110 }
2109
2111
2110 if (index_init_nt(self) == -1)
2112 if (index_init_nt(self) == -1)
2111 return NULL;
2113 return NULL;
2112 if (index_populate_nt(self) == -1)
2114 if (index_populate_nt(self) == -1)
2113 return NULL;
2115 return NULL;
2114 rev = nt_partialmatch(&self->nt, node, nodelen);
2116 rev = nt_partialmatch(&self->nt, node, nodelen);
2115
2117
2116 switch (rev) {
2118 switch (rev) {
2117 case -4:
2119 case -4:
2118 raise_revlog_error();
2120 raise_revlog_error();
2119 return NULL;
2121 return NULL;
2120 case -2:
2122 case -2:
2121 Py_RETURN_NONE;
2123 Py_RETURN_NONE;
2122 case -1:
2124 case -1:
2123 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2125 return PyBytes_FromStringAndSize(nullid, self->nodelen);
2124 }
2126 }
2125
2127
2126 fullnode = index_node_existing(self, rev);
2128 fullnode = index_node_existing(self, rev);
2127 if (fullnode == NULL) {
2129 if (fullnode == NULL) {
2128 return NULL;
2130 return NULL;
2129 }
2131 }
2130 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2132 return PyBytes_FromStringAndSize(fullnode, self->nodelen);
2131 }
2133 }
2132
2134
2133 static PyObject *index_shortest(indexObject *self, PyObject *args)
2135 static PyObject *index_shortest(indexObject *self, PyObject *args)
2134 {
2136 {
2135 PyObject *val;
2137 PyObject *val;
2136 char *node;
2138 char *node;
2137 int length;
2139 int length;
2138
2140
2139 if (!PyArg_ParseTuple(args, "O", &val))
2141 if (!PyArg_ParseTuple(args, "O", &val))
2140 return NULL;
2142 return NULL;
2141 if (node_check(self->nodelen, val, &node) == -1)
2143 if (node_check(self->nodelen, val, &node) == -1)
2142 return NULL;
2144 return NULL;
2143
2145
2144 self->ntlookups++;
2146 self->ntlookups++;
2145 if (index_init_nt(self) == -1)
2147 if (index_init_nt(self) == -1)
2146 return NULL;
2148 return NULL;
2147 if (index_populate_nt(self) == -1)
2149 if (index_populate_nt(self) == -1)
2148 return NULL;
2150 return NULL;
2149 length = nt_shortest(&self->nt, node);
2151 length = nt_shortest(&self->nt, node);
2150 if (length == -3)
2152 if (length == -3)
2151 return NULL;
2153 return NULL;
2152 if (length == -2) {
2154 if (length == -2) {
2153 raise_revlog_error();
2155 raise_revlog_error();
2154 return NULL;
2156 return NULL;
2155 }
2157 }
2156 return PyInt_FromLong(length);
2158 return PyInt_FromLong(length);
2157 }
2159 }
2158
2160
2159 static PyObject *index_m_get(indexObject *self, PyObject *args)
2161 static PyObject *index_m_get(indexObject *self, PyObject *args)
2160 {
2162 {
2161 PyObject *val;
2163 PyObject *val;
2162 char *node;
2164 char *node;
2163 int rev;
2165 int rev;
2164
2166
2165 if (!PyArg_ParseTuple(args, "O", &val))
2167 if (!PyArg_ParseTuple(args, "O", &val))
2166 return NULL;
2168 return NULL;
2167 if (node_check(self->nodelen, val, &node) == -1)
2169 if (node_check(self->nodelen, val, &node) == -1)
2168 return NULL;
2170 return NULL;
2169 rev = index_find_node(self, node);
2171 rev = index_find_node(self, node);
2170 if (rev == -3)
2172 if (rev == -3)
2171 return NULL;
2173 return NULL;
2172 if (rev == -2)
2174 if (rev == -2)
2173 Py_RETURN_NONE;
2175 Py_RETURN_NONE;
2174 return PyInt_FromLong(rev);
2176 return PyInt_FromLong(rev);
2175 }
2177 }
2176
2178
2177 static int index_contains(indexObject *self, PyObject *value)
2179 static int index_contains(indexObject *self, PyObject *value)
2178 {
2180 {
2179 char *node;
2181 char *node;
2180
2182
2181 if (PyInt_Check(value)) {
2183 if (PyInt_Check(value)) {
2182 long rev;
2184 long rev;
2183 if (!pylong_to_long(value, &rev)) {
2185 if (!pylong_to_long(value, &rev)) {
2184 return -1;
2186 return -1;
2185 }
2187 }
2186 return rev >= -1 && rev < index_length(self);
2188 return rev >= -1 && rev < index_length(self);
2187 }
2189 }
2188
2190
2189 if (node_check(self->nodelen, value, &node) == -1)
2191 if (node_check(self->nodelen, value, &node) == -1)
2190 return -1;
2192 return -1;
2191
2193
2192 switch (index_find_node(self, node)) {
2194 switch (index_find_node(self, node)) {
2193 case -3:
2195 case -3:
2194 return -1;
2196 return -1;
2195 case -2:
2197 case -2:
2196 return 0;
2198 return 0;
2197 default:
2199 default:
2198 return 1;
2200 return 1;
2199 }
2201 }
2200 }
2202 }
2201
2203
2202 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2204 static PyObject *index_m_has_node(indexObject *self, PyObject *args)
2203 {
2205 {
2204 int ret = index_contains(self, args);
2206 int ret = index_contains(self, args);
2205 if (ret < 0)
2207 if (ret < 0)
2206 return NULL;
2208 return NULL;
2207 return PyBool_FromLong((long)ret);
2209 return PyBool_FromLong((long)ret);
2208 }
2210 }
2209
2211
2210 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2212 static PyObject *index_m_rev(indexObject *self, PyObject *val)
2211 {
2213 {
2212 char *node;
2214 char *node;
2213 int rev;
2215 int rev;
2214
2216
2215 if (node_check(self->nodelen, val, &node) == -1)
2217 if (node_check(self->nodelen, val, &node) == -1)
2216 return NULL;
2218 return NULL;
2217 rev = index_find_node(self, node);
2219 rev = index_find_node(self, node);
2218 if (rev >= -1)
2220 if (rev >= -1)
2219 return PyInt_FromLong(rev);
2221 return PyInt_FromLong(rev);
2220 if (rev == -2)
2222 if (rev == -2)
2221 raise_revlog_error();
2223 raise_revlog_error();
2222 return NULL;
2224 return NULL;
2223 }
2225 }
2224
2226
2225 typedef uint64_t bitmask;
2227 typedef uint64_t bitmask;
2226
2228
2227 /*
2229 /*
2228 * Given a disjoint set of revs, return all candidates for the
2230 * Given a disjoint set of revs, return all candidates for the
2229 * greatest common ancestor. In revset notation, this is the set
2231 * greatest common ancestor. In revset notation, this is the set
2230 * "heads(::a and ::b and ...)"
2232 * "heads(::a and ::b and ...)"
2231 */
2233 */
2232 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2234 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
2233 int revcount)
2235 int revcount)
2234 {
2236 {
2235 const bitmask allseen = (1ull << revcount) - 1;
2237 const bitmask allseen = (1ull << revcount) - 1;
2236 const bitmask poison = 1ull << revcount;
2238 const bitmask poison = 1ull << revcount;
2237 PyObject *gca = PyList_New(0);
2239 PyObject *gca = PyList_New(0);
2238 int i, v, interesting;
2240 int i, v, interesting;
2239 int maxrev = -1;
2241 int maxrev = -1;
2240 bitmask sp;
2242 bitmask sp;
2241 bitmask *seen;
2243 bitmask *seen;
2242
2244
2243 if (gca == NULL)
2245 if (gca == NULL)
2244 return PyErr_NoMemory();
2246 return PyErr_NoMemory();
2245
2247
2246 for (i = 0; i < revcount; i++) {
2248 for (i = 0; i < revcount; i++) {
2247 if (revs[i] > maxrev)
2249 if (revs[i] > maxrev)
2248 maxrev = revs[i];
2250 maxrev = revs[i];
2249 }
2251 }
2250
2252
2251 seen = calloc(sizeof(*seen), maxrev + 1);
2253 seen = calloc(sizeof(*seen), maxrev + 1);
2252 if (seen == NULL) {
2254 if (seen == NULL) {
2253 Py_DECREF(gca);
2255 Py_DECREF(gca);
2254 return PyErr_NoMemory();
2256 return PyErr_NoMemory();
2255 }
2257 }
2256
2258
2257 for (i = 0; i < revcount; i++)
2259 for (i = 0; i < revcount; i++)
2258 seen[revs[i]] = 1ull << i;
2260 seen[revs[i]] = 1ull << i;
2259
2261
2260 interesting = revcount;
2262 interesting = revcount;
2261
2263
2262 for (v = maxrev; v >= 0 && interesting; v--) {
2264 for (v = maxrev; v >= 0 && interesting; v--) {
2263 bitmask sv = seen[v];
2265 bitmask sv = seen[v];
2264 int parents[2];
2266 int parents[2];
2265
2267
2266 if (!sv)
2268 if (!sv)
2267 continue;
2269 continue;
2268
2270
2269 if (sv < poison) {
2271 if (sv < poison) {
2270 interesting -= 1;
2272 interesting -= 1;
2271 if (sv == allseen) {
2273 if (sv == allseen) {
2272 PyObject *obj = PyInt_FromLong(v);
2274 PyObject *obj = PyInt_FromLong(v);
2273 if (obj == NULL)
2275 if (obj == NULL)
2274 goto bail;
2276 goto bail;
2275 if (PyList_Append(gca, obj) == -1) {
2277 if (PyList_Append(gca, obj) == -1) {
2276 Py_DECREF(obj);
2278 Py_DECREF(obj);
2277 goto bail;
2279 goto bail;
2278 }
2280 }
2279 sv |= poison;
2281 sv |= poison;
2280 for (i = 0; i < revcount; i++) {
2282 for (i = 0; i < revcount; i++) {
2281 if (revs[i] == v)
2283 if (revs[i] == v)
2282 goto done;
2284 goto done;
2283 }
2285 }
2284 }
2286 }
2285 }
2287 }
2286 if (index_get_parents(self, v, parents, maxrev) < 0)
2288 if (index_get_parents(self, v, parents, maxrev) < 0)
2287 goto bail;
2289 goto bail;
2288
2290
2289 for (i = 0; i < 2; i++) {
2291 for (i = 0; i < 2; i++) {
2290 int p = parents[i];
2292 int p = parents[i];
2291 if (p == -1)
2293 if (p == -1)
2292 continue;
2294 continue;
2293 sp = seen[p];
2295 sp = seen[p];
2294 if (sv < poison) {
2296 if (sv < poison) {
2295 if (sp == 0) {
2297 if (sp == 0) {
2296 seen[p] = sv;
2298 seen[p] = sv;
2297 interesting++;
2299 interesting++;
2298 } else if (sp != sv)
2300 } else if (sp != sv)
2299 seen[p] |= sv;
2301 seen[p] |= sv;
2300 } else {
2302 } else {
2301 if (sp && sp < poison)
2303 if (sp && sp < poison)
2302 interesting--;
2304 interesting--;
2303 seen[p] = sv;
2305 seen[p] = sv;
2304 }
2306 }
2305 }
2307 }
2306 }
2308 }
2307
2309
2308 done:
2310 done:
2309 free(seen);
2311 free(seen);
2310 return gca;
2312 return gca;
2311 bail:
2313 bail:
2312 free(seen);
2314 free(seen);
2313 Py_XDECREF(gca);
2315 Py_XDECREF(gca);
2314 return NULL;
2316 return NULL;
2315 }
2317 }
2316
2318
2317 /*
2319 /*
2318 * Given a disjoint set of revs, return the subset with the longest
2320 * Given a disjoint set of revs, return the subset with the longest
2319 * path to the root.
2321 * path to the root.
2320 */
2322 */
2321 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2323 static PyObject *find_deepest(indexObject *self, PyObject *revs)
2322 {
2324 {
2323 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2325 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
2324 static const Py_ssize_t capacity = 24;
2326 static const Py_ssize_t capacity = 24;
2325 int *depth, *interesting = NULL;
2327 int *depth, *interesting = NULL;
2326 int i, j, v, ninteresting;
2328 int i, j, v, ninteresting;
2327 PyObject *dict = NULL, *keys = NULL;
2329 PyObject *dict = NULL, *keys = NULL;
2328 long *seen = NULL;
2330 long *seen = NULL;
2329 int maxrev = -1;
2331 int maxrev = -1;
2330 long final;
2332 long final;
2331
2333
2332 if (revcount > capacity) {
2334 if (revcount > capacity) {
2333 PyErr_Format(PyExc_OverflowError,
2335 PyErr_Format(PyExc_OverflowError,
2334 "bitset size (%ld) > capacity (%ld)",
2336 "bitset size (%ld) > capacity (%ld)",
2335 (long)revcount, (long)capacity);
2337 (long)revcount, (long)capacity);
2336 return NULL;
2338 return NULL;
2337 }
2339 }
2338
2340
2339 for (i = 0; i < revcount; i++) {
2341 for (i = 0; i < revcount; i++) {
2340 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2342 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2341 if (n > maxrev)
2343 if (n > maxrev)
2342 maxrev = n;
2344 maxrev = n;
2343 }
2345 }
2344
2346
2345 depth = calloc(sizeof(*depth), maxrev + 1);
2347 depth = calloc(sizeof(*depth), maxrev + 1);
2346 if (depth == NULL)
2348 if (depth == NULL)
2347 return PyErr_NoMemory();
2349 return PyErr_NoMemory();
2348
2350
2349 seen = calloc(sizeof(*seen), maxrev + 1);
2351 seen = calloc(sizeof(*seen), maxrev + 1);
2350 if (seen == NULL) {
2352 if (seen == NULL) {
2351 PyErr_NoMemory();
2353 PyErr_NoMemory();
2352 goto bail;
2354 goto bail;
2353 }
2355 }
2354
2356
2355 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2357 interesting = calloc(sizeof(*interesting), ((size_t)1) << revcount);
2356 if (interesting == NULL) {
2358 if (interesting == NULL) {
2357 PyErr_NoMemory();
2359 PyErr_NoMemory();
2358 goto bail;
2360 goto bail;
2359 }
2361 }
2360
2362
2361 if (PyList_Sort(revs) == -1)
2363 if (PyList_Sort(revs) == -1)
2362 goto bail;
2364 goto bail;
2363
2365
2364 for (i = 0; i < revcount; i++) {
2366 for (i = 0; i < revcount; i++) {
2365 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2367 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2366 long b = 1l << i;
2368 long b = 1l << i;
2367 depth[n] = 1;
2369 depth[n] = 1;
2368 seen[n] = b;
2370 seen[n] = b;
2369 interesting[b] = 1;
2371 interesting[b] = 1;
2370 }
2372 }
2371
2373
2372 /* invariant: ninteresting is the number of non-zero entries in
2374 /* invariant: ninteresting is the number of non-zero entries in
2373 * interesting. */
2375 * interesting. */
2374 ninteresting = (int)revcount;
2376 ninteresting = (int)revcount;
2375
2377
2376 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2378 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2377 int dv = depth[v];
2379 int dv = depth[v];
2378 int parents[2];
2380 int parents[2];
2379 long sv;
2381 long sv;
2380
2382
2381 if (dv == 0)
2383 if (dv == 0)
2382 continue;
2384 continue;
2383
2385
2384 sv = seen[v];
2386 sv = seen[v];
2385 if (index_get_parents(self, v, parents, maxrev) < 0)
2387 if (index_get_parents(self, v, parents, maxrev) < 0)
2386 goto bail;
2388 goto bail;
2387
2389
2388 for (i = 0; i < 2; i++) {
2390 for (i = 0; i < 2; i++) {
2389 int p = parents[i];
2391 int p = parents[i];
2390 long sp;
2392 long sp;
2391 int dp;
2393 int dp;
2392
2394
2393 if (p == -1)
2395 if (p == -1)
2394 continue;
2396 continue;
2395
2397
2396 dp = depth[p];
2398 dp = depth[p];
2397 sp = seen[p];
2399 sp = seen[p];
2398 if (dp <= dv) {
2400 if (dp <= dv) {
2399 depth[p] = dv + 1;
2401 depth[p] = dv + 1;
2400 if (sp != sv) {
2402 if (sp != sv) {
2401 interesting[sv] += 1;
2403 interesting[sv] += 1;
2402 seen[p] = sv;
2404 seen[p] = sv;
2403 if (sp) {
2405 if (sp) {
2404 interesting[sp] -= 1;
2406 interesting[sp] -= 1;
2405 if (interesting[sp] == 0)
2407 if (interesting[sp] == 0)
2406 ninteresting -= 1;
2408 ninteresting -= 1;
2407 }
2409 }
2408 }
2410 }
2409 } else if (dv == dp - 1) {
2411 } else if (dv == dp - 1) {
2410 long nsp = sp | sv;
2412 long nsp = sp | sv;
2411 if (nsp == sp)
2413 if (nsp == sp)
2412 continue;
2414 continue;
2413 seen[p] = nsp;
2415 seen[p] = nsp;
2414 interesting[sp] -= 1;
2416 interesting[sp] -= 1;
2415 if (interesting[sp] == 0)
2417 if (interesting[sp] == 0)
2416 ninteresting -= 1;
2418 ninteresting -= 1;
2417 if (interesting[nsp] == 0)
2419 if (interesting[nsp] == 0)
2418 ninteresting += 1;
2420 ninteresting += 1;
2419 interesting[nsp] += 1;
2421 interesting[nsp] += 1;
2420 }
2422 }
2421 }
2423 }
2422 interesting[sv] -= 1;
2424 interesting[sv] -= 1;
2423 if (interesting[sv] == 0)
2425 if (interesting[sv] == 0)
2424 ninteresting -= 1;
2426 ninteresting -= 1;
2425 }
2427 }
2426
2428
2427 final = 0;
2429 final = 0;
2428 j = ninteresting;
2430 j = ninteresting;
2429 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2431 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2430 if (interesting[i] == 0)
2432 if (interesting[i] == 0)
2431 continue;
2433 continue;
2432 final |= i;
2434 final |= i;
2433 j -= 1;
2435 j -= 1;
2434 }
2436 }
2435 if (final == 0) {
2437 if (final == 0) {
2436 keys = PyList_New(0);
2438 keys = PyList_New(0);
2437 goto bail;
2439 goto bail;
2438 }
2440 }
2439
2441
2440 dict = PyDict_New();
2442 dict = PyDict_New();
2441 if (dict == NULL)
2443 if (dict == NULL)
2442 goto bail;
2444 goto bail;
2443
2445
2444 for (i = 0; i < revcount; i++) {
2446 for (i = 0; i < revcount; i++) {
2445 PyObject *key;
2447 PyObject *key;
2446
2448
2447 if ((final & (1 << i)) == 0)
2449 if ((final & (1 << i)) == 0)
2448 continue;
2450 continue;
2449
2451
2450 key = PyList_GET_ITEM(revs, i);
2452 key = PyList_GET_ITEM(revs, i);
2451 Py_INCREF(key);
2453 Py_INCREF(key);
2452 Py_INCREF(Py_None);
2454 Py_INCREF(Py_None);
2453 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2455 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2454 Py_DECREF(key);
2456 Py_DECREF(key);
2455 Py_DECREF(Py_None);
2457 Py_DECREF(Py_None);
2456 goto bail;
2458 goto bail;
2457 }
2459 }
2458 }
2460 }
2459
2461
2460 keys = PyDict_Keys(dict);
2462 keys = PyDict_Keys(dict);
2461
2463
2462 bail:
2464 bail:
2463 free(depth);
2465 free(depth);
2464 free(seen);
2466 free(seen);
2465 free(interesting);
2467 free(interesting);
2466 Py_XDECREF(dict);
2468 Py_XDECREF(dict);
2467
2469
2468 return keys;
2470 return keys;
2469 }
2471 }
2470
2472
2471 /*
2473 /*
2472 * Given a (possibly overlapping) set of revs, return all the
2474 * Given a (possibly overlapping) set of revs, return all the
2473 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2475 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2474 */
2476 */
2475 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2477 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2476 {
2478 {
2477 PyObject *ret = NULL;
2479 PyObject *ret = NULL;
2478 Py_ssize_t argcount, i, len;
2480 Py_ssize_t argcount, i, len;
2479 bitmask repeat = 0;
2481 bitmask repeat = 0;
2480 int revcount = 0;
2482 int revcount = 0;
2481 int *revs;
2483 int *revs;
2482
2484
2483 argcount = PySequence_Length(args);
2485 argcount = PySequence_Length(args);
2484 revs = PyMem_Malloc(argcount * sizeof(*revs));
2486 revs = PyMem_Malloc(argcount * sizeof(*revs));
2485 if (argcount > 0 && revs == NULL)
2487 if (argcount > 0 && revs == NULL)
2486 return PyErr_NoMemory();
2488 return PyErr_NoMemory();
2487 len = index_length(self);
2489 len = index_length(self);
2488
2490
2489 for (i = 0; i < argcount; i++) {
2491 for (i = 0; i < argcount; i++) {
2490 static const int capacity = 24;
2492 static const int capacity = 24;
2491 PyObject *obj = PySequence_GetItem(args, i);
2493 PyObject *obj = PySequence_GetItem(args, i);
2492 bitmask x;
2494 bitmask x;
2493 long val;
2495 long val;
2494
2496
2495 if (!PyInt_Check(obj)) {
2497 if (!PyInt_Check(obj)) {
2496 PyErr_SetString(PyExc_TypeError,
2498 PyErr_SetString(PyExc_TypeError,
2497 "arguments must all be ints");
2499 "arguments must all be ints");
2498 Py_DECREF(obj);
2500 Py_DECREF(obj);
2499 goto bail;
2501 goto bail;
2500 }
2502 }
2501 val = PyInt_AsLong(obj);
2503 val = PyInt_AsLong(obj);
2502 Py_DECREF(obj);
2504 Py_DECREF(obj);
2503 if (val == -1) {
2505 if (val == -1) {
2504 ret = PyList_New(0);
2506 ret = PyList_New(0);
2505 goto done;
2507 goto done;
2506 }
2508 }
2507 if (val < 0 || val >= len) {
2509 if (val < 0 || val >= len) {
2508 PyErr_SetString(PyExc_IndexError, "index out of range");
2510 PyErr_SetString(PyExc_IndexError, "index out of range");
2509 goto bail;
2511 goto bail;
2510 }
2512 }
2511 /* this cheesy bloom filter lets us avoid some more
2513 /* this cheesy bloom filter lets us avoid some more
2512 * expensive duplicate checks in the common set-is-disjoint
2514 * expensive duplicate checks in the common set-is-disjoint
2513 * case */
2515 * case */
2514 x = 1ull << (val & 0x3f);
2516 x = 1ull << (val & 0x3f);
2515 if (repeat & x) {
2517 if (repeat & x) {
2516 int k;
2518 int k;
2517 for (k = 0; k < revcount; k++) {
2519 for (k = 0; k < revcount; k++) {
2518 if (val == revs[k])
2520 if (val == revs[k])
2519 goto duplicate;
2521 goto duplicate;
2520 }
2522 }
2521 } else
2523 } else
2522 repeat |= x;
2524 repeat |= x;
2523 if (revcount >= capacity) {
2525 if (revcount >= capacity) {
2524 PyErr_Format(PyExc_OverflowError,
2526 PyErr_Format(PyExc_OverflowError,
2525 "bitset size (%d) > capacity (%d)",
2527 "bitset size (%d) > capacity (%d)",
2526 revcount, capacity);
2528 revcount, capacity);
2527 goto bail;
2529 goto bail;
2528 }
2530 }
2529 revs[revcount++] = (int)val;
2531 revs[revcount++] = (int)val;
2530 duplicate:;
2532 duplicate:;
2531 }
2533 }
2532
2534
2533 if (revcount == 0) {
2535 if (revcount == 0) {
2534 ret = PyList_New(0);
2536 ret = PyList_New(0);
2535 goto done;
2537 goto done;
2536 }
2538 }
2537 if (revcount == 1) {
2539 if (revcount == 1) {
2538 PyObject *obj;
2540 PyObject *obj;
2539 ret = PyList_New(1);
2541 ret = PyList_New(1);
2540 if (ret == NULL)
2542 if (ret == NULL)
2541 goto bail;
2543 goto bail;
2542 obj = PyInt_FromLong(revs[0]);
2544 obj = PyInt_FromLong(revs[0]);
2543 if (obj == NULL)
2545 if (obj == NULL)
2544 goto bail;
2546 goto bail;
2545 PyList_SET_ITEM(ret, 0, obj);
2547 PyList_SET_ITEM(ret, 0, obj);
2546 goto done;
2548 goto done;
2547 }
2549 }
2548
2550
2549 ret = find_gca_candidates(self, revs, revcount);
2551 ret = find_gca_candidates(self, revs, revcount);
2550 if (ret == NULL)
2552 if (ret == NULL)
2551 goto bail;
2553 goto bail;
2552
2554
2553 done:
2555 done:
2554 PyMem_Free(revs);
2556 PyMem_Free(revs);
2555 return ret;
2557 return ret;
2556
2558
2557 bail:
2559 bail:
2558 PyMem_Free(revs);
2560 PyMem_Free(revs);
2559 Py_XDECREF(ret);
2561 Py_XDECREF(ret);
2560 return NULL;
2562 return NULL;
2561 }
2563 }
2562
2564
2563 /*
2565 /*
2564 * Given a (possibly overlapping) set of revs, return the greatest
2566 * Given a (possibly overlapping) set of revs, return the greatest
2565 * common ancestors: those with the longest path to the root.
2567 * common ancestors: those with the longest path to the root.
2566 */
2568 */
2567 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2569 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2568 {
2570 {
2569 PyObject *ret;
2571 PyObject *ret;
2570 PyObject *gca = index_commonancestorsheads(self, args);
2572 PyObject *gca = index_commonancestorsheads(self, args);
2571 if (gca == NULL)
2573 if (gca == NULL)
2572 return NULL;
2574 return NULL;
2573
2575
2574 if (PyList_GET_SIZE(gca) <= 1) {
2576 if (PyList_GET_SIZE(gca) <= 1) {
2575 return gca;
2577 return gca;
2576 }
2578 }
2577
2579
2578 ret = find_deepest(self, gca);
2580 ret = find_deepest(self, gca);
2579 Py_DECREF(gca);
2581 Py_DECREF(gca);
2580 return ret;
2582 return ret;
2581 }
2583 }
2582
2584
2583 /*
2585 /*
2584 * Invalidate any trie entries introduced by added revs.
2586 * Invalidate any trie entries introduced by added revs.
2585 */
2587 */
2586 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2588 static void index_invalidate_added(indexObject *self, Py_ssize_t start)
2587 {
2589 {
2588 Py_ssize_t i, len;
2590 Py_ssize_t i, len;
2589
2591
2590 len = self->length + self->new_length;
2592 len = self->length + self->new_length;
2591 i = start - self->length;
2593 i = start - self->length;
2592 if (i < 0)
2594 if (i < 0)
2593 return;
2595 return;
2594
2596
2595 for (i = start; i < len; i++)
2597 for (i = start; i < len; i++)
2596 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2598 nt_delete_node(&self->nt, index_deref(self, i) + 32);
2597
2599
2598 self->new_length = start - self->length;
2600 self->new_length = start - self->length;
2599 }
2601 }
2600
2602
2601 /*
2603 /*
2602 * Delete a numeric range of revs, which must be at the end of the
2604 * Delete a numeric range of revs, which must be at the end of the
2603 * range.
2605 * range.
2604 */
2606 */
2605 static int index_slice_del(indexObject *self, PyObject *item)
2607 static int index_slice_del(indexObject *self, PyObject *item)
2606 {
2608 {
2607 Py_ssize_t start, stop, step, slicelength;
2609 Py_ssize_t start, stop, step, slicelength;
2608 Py_ssize_t length = index_length(self) + 1;
2610 Py_ssize_t length = index_length(self) + 1;
2609 int ret = 0;
2611 int ret = 0;
2610
2612
2611 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2613 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2612 #ifdef IS_PY3K
2614 #ifdef IS_PY3K
2613 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2615 if (PySlice_GetIndicesEx(item, length, &start, &stop, &step,
2614 &slicelength) < 0)
2616 &slicelength) < 0)
2615 #else
2617 #else
2616 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2618 if (PySlice_GetIndicesEx((PySliceObject *)item, length, &start, &stop,
2617 &step, &slicelength) < 0)
2619 &step, &slicelength) < 0)
2618 #endif
2620 #endif
2619 return -1;
2621 return -1;
2620
2622
2621 if (slicelength <= 0)
2623 if (slicelength <= 0)
2622 return 0;
2624 return 0;
2623
2625
2624 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2626 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2625 stop = start;
2627 stop = start;
2626
2628
2627 if (step < 0) {
2629 if (step < 0) {
2628 stop = start + 1;
2630 stop = start + 1;
2629 start = stop + step * (slicelength - 1) - 1;
2631 start = stop + step * (slicelength - 1) - 1;
2630 step = -step;
2632 step = -step;
2631 }
2633 }
2632
2634
2633 if (step != 1) {
2635 if (step != 1) {
2634 PyErr_SetString(PyExc_ValueError,
2636 PyErr_SetString(PyExc_ValueError,
2635 "revlog index delete requires step size of 1");
2637 "revlog index delete requires step size of 1");
2636 return -1;
2638 return -1;
2637 }
2639 }
2638
2640
2639 if (stop != length - 1) {
2641 if (stop != length - 1) {
2640 PyErr_SetString(PyExc_IndexError,
2642 PyErr_SetString(PyExc_IndexError,
2641 "revlog index deletion indices are invalid");
2643 "revlog index deletion indices are invalid");
2642 return -1;
2644 return -1;
2643 }
2645 }
2644
2646
2645 if (start < self->length) {
2647 if (start < self->length) {
2646 if (self->ntinitialized) {
2648 if (self->ntinitialized) {
2647 Py_ssize_t i;
2649 Py_ssize_t i;
2648
2650
2649 for (i = start; i < self->length; i++) {
2651 for (i = start; i < self->length; i++) {
2650 const char *node = index_node_existing(self, i);
2652 const char *node = index_node_existing(self, i);
2651 if (node == NULL)
2653 if (node == NULL)
2652 return -1;
2654 return -1;
2653
2655
2654 nt_delete_node(&self->nt, node);
2656 nt_delete_node(&self->nt, node);
2655 }
2657 }
2656 if (self->new_length)
2658 if (self->new_length)
2657 index_invalidate_added(self, self->length);
2659 index_invalidate_added(self, self->length);
2658 if (self->ntrev > start)
2660 if (self->ntrev > start)
2659 self->ntrev = (int)start;
2661 self->ntrev = (int)start;
2660 } else if (self->new_length) {
2662 } else if (self->new_length) {
2661 self->new_length = 0;
2663 self->new_length = 0;
2662 }
2664 }
2663
2665
2664 self->length = start;
2666 self->length = start;
2665 goto done;
2667 goto done;
2666 }
2668 }
2667
2669
2668 if (self->ntinitialized) {
2670 if (self->ntinitialized) {
2669 index_invalidate_added(self, start);
2671 index_invalidate_added(self, start);
2670 if (self->ntrev > start)
2672 if (self->ntrev > start)
2671 self->ntrev = (int)start;
2673 self->ntrev = (int)start;
2672 } else {
2674 } else {
2673 self->new_length = start - self->length;
2675 self->new_length = start - self->length;
2674 }
2676 }
2675 done:
2677 done:
2676 Py_CLEAR(self->headrevs);
2678 Py_CLEAR(self->headrevs);
2677 return ret;
2679 return ret;
2678 }
2680 }
2679
2681
2680 /*
2682 /*
2681 * Supported ops:
2683 * Supported ops:
2682 *
2684 *
2683 * slice deletion
2685 * slice deletion
2684 * string assignment (extend node->rev mapping)
2686 * string assignment (extend node->rev mapping)
2685 * string deletion (shrink node->rev mapping)
2687 * string deletion (shrink node->rev mapping)
2686 */
2688 */
2687 static int index_assign_subscript(indexObject *self, PyObject *item,
2689 static int index_assign_subscript(indexObject *self, PyObject *item,
2688 PyObject *value)
2690 PyObject *value)
2689 {
2691 {
2690 char *node;
2692 char *node;
2691 long rev;
2693 long rev;
2692
2694
2693 if (PySlice_Check(item) && value == NULL)
2695 if (PySlice_Check(item) && value == NULL)
2694 return index_slice_del(self, item);
2696 return index_slice_del(self, item);
2695
2697
2696 if (node_check(self->nodelen, item, &node) == -1)
2698 if (node_check(self->nodelen, item, &node) == -1)
2697 return -1;
2699 return -1;
2698
2700
2699 if (value == NULL)
2701 if (value == NULL)
2700 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2702 return self->ntinitialized ? nt_delete_node(&self->nt, node)
2701 : 0;
2703 : 0;
2702 rev = PyInt_AsLong(value);
2704 rev = PyInt_AsLong(value);
2703 if (rev > INT_MAX || rev < 0) {
2705 if (rev > INT_MAX || rev < 0) {
2704 if (!PyErr_Occurred())
2706 if (!PyErr_Occurred())
2705 PyErr_SetString(PyExc_ValueError, "rev out of range");
2707 PyErr_SetString(PyExc_ValueError, "rev out of range");
2706 return -1;
2708 return -1;
2707 }
2709 }
2708
2710
2709 if (index_init_nt(self) == -1)
2711 if (index_init_nt(self) == -1)
2710 return -1;
2712 return -1;
2711 return nt_insert(&self->nt, node, (int)rev);
2713 return nt_insert(&self->nt, node, (int)rev);
2712 }
2714 }
2713
2715
2714 /*
2716 /*
2715 * Find all RevlogNG entries in an index that has inline data. Update
2717 * Find all RevlogNG entries in an index that has inline data. Update
2716 * the optional "offsets" table with those entries.
2718 * the optional "offsets" table with those entries.
2717 */
2719 */
2718 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2720 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2719 {
2721 {
2720 const char *data = (const char *)self->buf.buf;
2722 const char *data = (const char *)self->buf.buf;
2721 Py_ssize_t pos = 0;
2723 Py_ssize_t pos = 0;
2722 Py_ssize_t end = self->buf.len;
2724 Py_ssize_t end = self->buf.len;
2723 long incr = self->entry_size;
2725 long incr = self->entry_size;
2724 Py_ssize_t len = 0;
2726 Py_ssize_t len = 0;
2725
2727
2726 while (pos + self->entry_size <= end && pos >= 0) {
2728 while (pos + self->entry_size <= end && pos >= 0) {
2727 uint32_t comp_len, sidedata_comp_len = 0;
2729 uint32_t comp_len, sidedata_comp_len = 0;
2728 /* 3rd element of header is length of compressed inline data */
2730 /* 3rd element of header is length of compressed inline data */
2729 comp_len = getbe32(data + pos + 8);
2731 comp_len = getbe32(data + pos + 8);
2730 if (self->entry_size == v2_entry_size) {
2732 if (self->entry_size == v2_entry_size) {
2731 sidedata_comp_len = getbe32(data + pos + 72);
2733 sidedata_comp_len = getbe32(data + pos + 72);
2732 }
2734 }
2733 incr = self->entry_size + comp_len + sidedata_comp_len;
2735 incr = self->entry_size + comp_len + sidedata_comp_len;
2734 if (offsets)
2736 if (offsets)
2735 offsets[len] = data + pos;
2737 offsets[len] = data + pos;
2736 len++;
2738 len++;
2737 pos += incr;
2739 pos += incr;
2738 }
2740 }
2739
2741
2740 if (pos != end) {
2742 if (pos != end) {
2741 if (!PyErr_Occurred())
2743 if (!PyErr_Occurred())
2742 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2744 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2743 return -1;
2745 return -1;
2744 }
2746 }
2745
2747
2746 return len;
2748 return len;
2747 }
2749 }
2748
2750
2749 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2751 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
2750 {
2752 {
2751 PyObject *data_obj, *inlined_obj, *revlogv2;
2753 PyObject *data_obj, *inlined_obj, *revlogv2;
2752 Py_ssize_t size;
2754 Py_ssize_t size;
2753
2755
2754 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2756 static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
2755
2757
2756 /* Initialize before argument-checking to avoid index_dealloc() crash.
2758 /* Initialize before argument-checking to avoid index_dealloc() crash.
2757 */
2759 */
2758 self->added = NULL;
2760 self->added = NULL;
2759 self->new_length = 0;
2761 self->new_length = 0;
2760 self->added_length = 0;
2762 self->added_length = 0;
2761 self->data = NULL;
2763 self->data = NULL;
2762 memset(&self->buf, 0, sizeof(self->buf));
2764 memset(&self->buf, 0, sizeof(self->buf));
2763 self->headrevs = NULL;
2765 self->headrevs = NULL;
2764 self->filteredrevs = Py_None;
2766 self->filteredrevs = Py_None;
2765 Py_INCREF(Py_None);
2767 Py_INCREF(Py_None);
2766 self->ntinitialized = 0;
2768 self->ntinitialized = 0;
2767 self->offsets = NULL;
2769 self->offsets = NULL;
2768 self->nodelen = 20;
2770 self->nodelen = 20;
2769 self->nullentry = NULL;
2771 self->nullentry = NULL;
2770
2772
2771 revlogv2 = NULL;
2773 revlogv2 = NULL;
2772 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2774 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
2773 &data_obj, &inlined_obj, &revlogv2))
2775 &data_obj, &inlined_obj, &revlogv2))
2774 return -1;
2776 return -1;
2775 if (!PyObject_CheckBuffer(data_obj)) {
2777 if (!PyObject_CheckBuffer(data_obj)) {
2776 PyErr_SetString(PyExc_TypeError,
2778 PyErr_SetString(PyExc_TypeError,
2777 "data does not support buffer interface");
2779 "data does not support buffer interface");
2778 return -1;
2780 return -1;
2779 }
2781 }
2780 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2782 if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
2781 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2783 PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
2782 return -1;
2784 return -1;
2783 }
2785 }
2784
2786
2785 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2787 if (revlogv2 && PyObject_IsTrue(revlogv2)) {
2786 self->format_version = format_v2;
2788 self->format_version = format_v2;
2787 self->entry_size = v2_entry_size;
2789 self->entry_size = v2_entry_size;
2788 } else {
2790 } else {
2789 self->format_version = format_v1;
2791 self->format_version = format_v1;
2790 self->entry_size = v1_entry_size;
2792 self->entry_size = v1_entry_size;
2791 }
2793 }
2792
2794
2793 self->nullentry = Py_BuildValue(
2795 self->nullentry = Py_BuildValue(
2794 PY23("iiiiiiis#iiBB", "iiiiiiiy#iiBB"), 0, 0, 0, -1, -1, -1, -1,
2796 PY23("iiiiiiis#iiBB", "iiiiiiiy#iiBB"), 0, 0, 0, -1, -1, -1, -1,
2795 nullid, self->nodelen, 0, 0, comp_mode_inline, comp_mode_inline);
2797 nullid, self->nodelen, 0, 0, comp_mode_inline, comp_mode_inline);
2796
2798
2797 if (!self->nullentry)
2799 if (!self->nullentry)
2798 return -1;
2800 return -1;
2799 PyObject_GC_UnTrack(self->nullentry);
2801 PyObject_GC_UnTrack(self->nullentry);
2800
2802
2801 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2803 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2802 return -1;
2804 return -1;
2803 size = self->buf.len;
2805 size = self->buf.len;
2804
2806
2805 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2807 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2806 self->data = data_obj;
2808 self->data = data_obj;
2807
2809
2808 self->ntlookups = self->ntmisses = 0;
2810 self->ntlookups = self->ntmisses = 0;
2809 self->ntrev = -1;
2811 self->ntrev = -1;
2810 Py_INCREF(self->data);
2812 Py_INCREF(self->data);
2811
2813
2812 if (self->inlined) {
2814 if (self->inlined) {
2813 Py_ssize_t len = inline_scan(self, NULL);
2815 Py_ssize_t len = inline_scan(self, NULL);
2814 if (len == -1)
2816 if (len == -1)
2815 goto bail;
2817 goto bail;
2816 self->length = len;
2818 self->length = len;
2817 } else {
2819 } else {
2818 if (size % self->entry_size) {
2820 if (size % self->entry_size) {
2819 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2821 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2820 goto bail;
2822 goto bail;
2821 }
2823 }
2822 self->length = size / self->entry_size;
2824 self->length = size / self->entry_size;
2823 }
2825 }
2824
2826
2825 return 0;
2827 return 0;
2826 bail:
2828 bail:
2827 return -1;
2829 return -1;
2828 }
2830 }
2829
2831
2830 static PyObject *index_nodemap(indexObject *self)
2832 static PyObject *index_nodemap(indexObject *self)
2831 {
2833 {
2832 Py_INCREF(self);
2834 Py_INCREF(self);
2833 return (PyObject *)self;
2835 return (PyObject *)self;
2834 }
2836 }
2835
2837
2836 static void _index_clearcaches(indexObject *self)
2838 static void _index_clearcaches(indexObject *self)
2837 {
2839 {
2838 if (self->offsets) {
2840 if (self->offsets) {
2839 PyMem_Free((void *)self->offsets);
2841 PyMem_Free((void *)self->offsets);
2840 self->offsets = NULL;
2842 self->offsets = NULL;
2841 }
2843 }
2842 if (self->ntinitialized) {
2844 if (self->ntinitialized) {
2843 nt_dealloc(&self->nt);
2845 nt_dealloc(&self->nt);
2844 }
2846 }
2845 self->ntinitialized = 0;
2847 self->ntinitialized = 0;
2846 Py_CLEAR(self->headrevs);
2848 Py_CLEAR(self->headrevs);
2847 }
2849 }
2848
2850
2849 static PyObject *index_clearcaches(indexObject *self)
2851 static PyObject *index_clearcaches(indexObject *self)
2850 {
2852 {
2851 _index_clearcaches(self);
2853 _index_clearcaches(self);
2852 self->ntrev = -1;
2854 self->ntrev = -1;
2853 self->ntlookups = self->ntmisses = 0;
2855 self->ntlookups = self->ntmisses = 0;
2854 Py_RETURN_NONE;
2856 Py_RETURN_NONE;
2855 }
2857 }
2856
2858
2857 static void index_dealloc(indexObject *self)
2859 static void index_dealloc(indexObject *self)
2858 {
2860 {
2859 _index_clearcaches(self);
2861 _index_clearcaches(self);
2860 Py_XDECREF(self->filteredrevs);
2862 Py_XDECREF(self->filteredrevs);
2861 if (self->buf.buf) {
2863 if (self->buf.buf) {
2862 PyBuffer_Release(&self->buf);
2864 PyBuffer_Release(&self->buf);
2863 memset(&self->buf, 0, sizeof(self->buf));
2865 memset(&self->buf, 0, sizeof(self->buf));
2864 }
2866 }
2865 Py_XDECREF(self->data);
2867 Py_XDECREF(self->data);
2866 PyMem_Free(self->added);
2868 PyMem_Free(self->added);
2867 Py_XDECREF(self->nullentry);
2869 Py_XDECREF(self->nullentry);
2868 PyObject_Del(self);
2870 PyObject_Del(self);
2869 }
2871 }
2870
2872
2871 static PySequenceMethods index_sequence_methods = {
2873 static PySequenceMethods index_sequence_methods = {
2872 (lenfunc)index_length, /* sq_length */
2874 (lenfunc)index_length, /* sq_length */
2873 0, /* sq_concat */
2875 0, /* sq_concat */
2874 0, /* sq_repeat */
2876 0, /* sq_repeat */
2875 (ssizeargfunc)index_get, /* sq_item */
2877 (ssizeargfunc)index_get, /* sq_item */
2876 0, /* sq_slice */
2878 0, /* sq_slice */
2877 0, /* sq_ass_item */
2879 0, /* sq_ass_item */
2878 0, /* sq_ass_slice */
2880 0, /* sq_ass_slice */
2879 (objobjproc)index_contains, /* sq_contains */
2881 (objobjproc)index_contains, /* sq_contains */
2880 };
2882 };
2881
2883
2882 static PyMappingMethods index_mapping_methods = {
2884 static PyMappingMethods index_mapping_methods = {
2883 (lenfunc)index_length, /* mp_length */
2885 (lenfunc)index_length, /* mp_length */
2884 (binaryfunc)index_getitem, /* mp_subscript */
2886 (binaryfunc)index_getitem, /* mp_subscript */
2885 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2887 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2886 };
2888 };
2887
2889
2888 static PyMethodDef index_methods[] = {
2890 static PyMethodDef index_methods[] = {
2889 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2891 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2890 "return the gca set of the given revs"},
2892 "return the gca set of the given revs"},
2891 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2893 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2892 METH_VARARGS,
2894 METH_VARARGS,
2893 "return the heads of the common ancestors of the given revs"},
2895 "return the heads of the common ancestors of the given revs"},
2894 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2896 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2895 "clear the index caches"},
2897 "clear the index caches"},
2896 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2898 {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"},
2897 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2899 {"get_rev", (PyCFunction)index_m_get, METH_VARARGS,
2898 "return `rev` associated with a node or None"},
2900 "return `rev` associated with a node or None"},
2899 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2901 {"has_node", (PyCFunction)index_m_has_node, METH_O,
2900 "return True if the node exist in the index"},
2902 "return True if the node exist in the index"},
2901 {"rev", (PyCFunction)index_m_rev, METH_O,
2903 {"rev", (PyCFunction)index_m_rev, METH_O,
2902 "return `rev` associated with a node or raise RevlogError"},
2904 "return `rev` associated with a node or raise RevlogError"},
2903 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2905 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets, METH_VARARGS,
2904 "compute phases"},
2906 "compute phases"},
2905 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2907 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2906 "reachableroots"},
2908 "reachableroots"},
2907 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2909 {"replace_sidedata_info", (PyCFunction)index_replace_sidedata_info,
2908 METH_VARARGS, "replace an existing index entry with a new value"},
2910 METH_VARARGS, "replace an existing index entry with a new value"},
2909 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2911 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2910 "get head revisions"}, /* Can do filtering since 3.2 */
2912 "get head revisions"}, /* Can do filtering since 3.2 */
2911 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2913 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2912 "get filtered head revisions"}, /* Can always do filtering */
2914 "get filtered head revisions"}, /* Can always do filtering */
2913 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2915 {"issnapshot", (PyCFunction)index_issnapshot, METH_O,
2914 "True if the object is a snapshot"},
2916 "True if the object is a snapshot"},
2915 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2917 {"findsnapshots", (PyCFunction)index_findsnapshots, METH_VARARGS,
2916 "Gather snapshot data in a cache dict"},
2918 "Gather snapshot data in a cache dict"},
2917 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2919 {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
2918 "determine revisions with deltas to reconstruct fulltext"},
2920 "determine revisions with deltas to reconstruct fulltext"},
2919 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2921 {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
2920 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2922 METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
2921 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2923 {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
2922 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2924 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2923 "match a potentially ambiguous node ID"},
2925 "match a potentially ambiguous node ID"},
2924 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2926 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2925 "find length of shortest hex nodeid of a binary ID"},
2927 "find length of shortest hex nodeid of a binary ID"},
2926 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2928 {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
2927 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
2929 {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
2928 "return an entry in binary form"},
2930 "return an entry in binary form"},
2929 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
2931 {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
2930 "pack the revlog header information into binary"},
2932 "pack the revlog header information into binary"},
2931 {NULL} /* Sentinel */
2933 {NULL} /* Sentinel */
2932 };
2934 };
2933
2935
2934 static PyGetSetDef index_getset[] = {
2936 static PyGetSetDef index_getset[] = {
2935 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2937 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2936 {NULL} /* Sentinel */
2938 {NULL} /* Sentinel */
2937 };
2939 };
2938
2940
2939 static PyMemberDef index_members[] = {
2941 static PyMemberDef index_members[] = {
2940 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
2942 {"entry_size", T_LONG, offsetof(indexObject, entry_size), 0,
2941 "size of an index entry"},
2943 "size of an index entry"},
2942 {NULL} /* Sentinel */
2944 {NULL} /* Sentinel */
2943 };
2945 };
2944
2946
2945 PyTypeObject HgRevlogIndex_Type = {
2947 PyTypeObject HgRevlogIndex_Type = {
2946 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2948 PyVarObject_HEAD_INIT(NULL, 0) /* header */
2947 "parsers.index", /* tp_name */
2949 "parsers.index", /* tp_name */
2948 sizeof(indexObject), /* tp_basicsize */
2950 sizeof(indexObject), /* tp_basicsize */
2949 0, /* tp_itemsize */
2951 0, /* tp_itemsize */
2950 (destructor)index_dealloc, /* tp_dealloc */
2952 (destructor)index_dealloc, /* tp_dealloc */
2951 0, /* tp_print */
2953 0, /* tp_print */
2952 0, /* tp_getattr */
2954 0, /* tp_getattr */
2953 0, /* tp_setattr */
2955 0, /* tp_setattr */
2954 0, /* tp_compare */
2956 0, /* tp_compare */
2955 0, /* tp_repr */
2957 0, /* tp_repr */
2956 0, /* tp_as_number */
2958 0, /* tp_as_number */
2957 &index_sequence_methods, /* tp_as_sequence */
2959 &index_sequence_methods, /* tp_as_sequence */
2958 &index_mapping_methods, /* tp_as_mapping */
2960 &index_mapping_methods, /* tp_as_mapping */
2959 0, /* tp_hash */
2961 0, /* tp_hash */
2960 0, /* tp_call */
2962 0, /* tp_call */
2961 0, /* tp_str */
2963 0, /* tp_str */
2962 0, /* tp_getattro */
2964 0, /* tp_getattro */
2963 0, /* tp_setattro */
2965 0, /* tp_setattro */
2964 0, /* tp_as_buffer */
2966 0, /* tp_as_buffer */
2965 Py_TPFLAGS_DEFAULT, /* tp_flags */
2967 Py_TPFLAGS_DEFAULT, /* tp_flags */
2966 "revlog index", /* tp_doc */
2968 "revlog index", /* tp_doc */
2967 0, /* tp_traverse */
2969 0, /* tp_traverse */
2968 0, /* tp_clear */
2970 0, /* tp_clear */
2969 0, /* tp_richcompare */
2971 0, /* tp_richcompare */
2970 0, /* tp_weaklistoffset */
2972 0, /* tp_weaklistoffset */
2971 0, /* tp_iter */
2973 0, /* tp_iter */
2972 0, /* tp_iternext */
2974 0, /* tp_iternext */
2973 index_methods, /* tp_methods */
2975 index_methods, /* tp_methods */
2974 index_members, /* tp_members */
2976 index_members, /* tp_members */
2975 index_getset, /* tp_getset */
2977 index_getset, /* tp_getset */
2976 0, /* tp_base */
2978 0, /* tp_base */
2977 0, /* tp_dict */
2979 0, /* tp_dict */
2978 0, /* tp_descr_get */
2980 0, /* tp_descr_get */
2979 0, /* tp_descr_set */
2981 0, /* tp_descr_set */
2980 0, /* tp_dictoffset */
2982 0, /* tp_dictoffset */
2981 (initproc)index_init, /* tp_init */
2983 (initproc)index_init, /* tp_init */
2982 0, /* tp_alloc */
2984 0, /* tp_alloc */
2983 };
2985 };
2984
2986
2985 /*
2987 /*
2986 * returns a tuple of the form (index, cache) with elements as
2988 * returns a tuple of the form (index, cache) with elements as
2987 * follows:
2989 * follows:
2988 *
2990 *
2989 * index: an index object that lazily parses Revlog (v1 or v2) records
2991 * index: an index object that lazily parses Revlog (v1 or v2) records
2990 * cache: if data is inlined, a tuple (0, index_file_content), else None
2992 * cache: if data is inlined, a tuple (0, index_file_content), else None
2991 * index_file_content could be a string, or a buffer
2993 * index_file_content could be a string, or a buffer
2992 *
2994 *
2993 * added complications are for backwards compatibility
2995 * added complications are for backwards compatibility
2994 */
2996 */
2995 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2997 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
2996 {
2998 {
2997 PyObject *cache = NULL;
2999 PyObject *cache = NULL;
2998 indexObject *idx;
3000 indexObject *idx;
2999 int ret;
3001 int ret;
3000
3002
3001 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3003 idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
3002 if (idx == NULL)
3004 if (idx == NULL)
3003 goto bail;
3005 goto bail;
3004
3006
3005 ret = index_init(idx, args, kwargs);
3007 ret = index_init(idx, args, kwargs);
3006 if (ret == -1)
3008 if (ret == -1)
3007 goto bail;
3009 goto bail;
3008
3010
3009 if (idx->inlined) {
3011 if (idx->inlined) {
3010 cache = Py_BuildValue("iO", 0, idx->data);
3012 cache = Py_BuildValue("iO", 0, idx->data);
3011 if (cache == NULL)
3013 if (cache == NULL)
3012 goto bail;
3014 goto bail;
3013 } else {
3015 } else {
3014 cache = Py_None;
3016 cache = Py_None;
3015 Py_INCREF(cache);
3017 Py_INCREF(cache);
3016 }
3018 }
3017
3019
3018 return Py_BuildValue("NN", idx, cache);
3020 return Py_BuildValue("NN", idx, cache);
3019
3021
3020 bail:
3022 bail:
3021 Py_XDECREF(idx);
3023 Py_XDECREF(idx);
3022 Py_XDECREF(cache);
3024 Py_XDECREF(cache);
3023 return NULL;
3025 return NULL;
3024 }
3026 }
3025
3027
3026 static Revlog_CAPI CAPI = {
3028 static Revlog_CAPI CAPI = {
3027 /* increment the abi_version field upon each change in the Revlog_CAPI
3029 /* increment the abi_version field upon each change in the Revlog_CAPI
3028 struct or in the ABI of the listed functions */
3030 struct or in the ABI of the listed functions */
3029 2,
3031 2,
3030 index_length,
3032 index_length,
3031 index_node,
3033 index_node,
3032 HgRevlogIndex_GetParents,
3034 HgRevlogIndex_GetParents,
3033 };
3035 };
3034
3036
3035 void revlog_module_init(PyObject *mod)
3037 void revlog_module_init(PyObject *mod)
3036 {
3038 {
3037 PyObject *caps = NULL;
3039 PyObject *caps = NULL;
3038 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3040 HgRevlogIndex_Type.tp_new = PyType_GenericNew;
3039 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3041 if (PyType_Ready(&HgRevlogIndex_Type) < 0)
3040 return;
3042 return;
3041 Py_INCREF(&HgRevlogIndex_Type);
3043 Py_INCREF(&HgRevlogIndex_Type);
3042 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3044 PyModule_AddObject(mod, "index", (PyObject *)&HgRevlogIndex_Type);
3043
3045
3044 nodetreeType.tp_new = PyType_GenericNew;
3046 nodetreeType.tp_new = PyType_GenericNew;
3045 if (PyType_Ready(&nodetreeType) < 0)
3047 if (PyType_Ready(&nodetreeType) < 0)
3046 return;
3048 return;
3047 Py_INCREF(&nodetreeType);
3049 Py_INCREF(&nodetreeType);
3048 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3050 PyModule_AddObject(mod, "nodetree", (PyObject *)&nodetreeType);
3049
3051
3050 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3052 caps = PyCapsule_New(&CAPI, "mercurial.cext.parsers.revlog_CAPI", NULL);
3051 if (caps != NULL)
3053 if (caps != NULL)
3052 PyModule_AddObject(mod, "revlog_CAPI", caps);
3054 PyModule_AddObject(mod, "revlog_CAPI", caps);
3053 }
3055 }
@@ -1,430 +1,436 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import (
13 from ..node import (
14 nullrev,
14 nullrev,
15 sha1nodeconstants,
15 sha1nodeconstants,
16 )
16 )
17 from .. import (
17 from .. import (
18 error,
18 error,
19 pycompat,
19 pycompat,
20 util,
20 util,
21 )
21 )
22
22
23 from ..revlogutils import nodemap as nodemaputil
23 from ..revlogutils import nodemap as nodemaputil
24 from ..revlogutils import constants as revlog_constants
24 from ..revlogutils import constants as revlog_constants
25
25
26 stringio = pycompat.bytesio
26 stringio = pycompat.bytesio
27
27
28
28
29 _pack = struct.pack
29 _pack = struct.pack
30 _unpack = struct.unpack
30 _unpack = struct.unpack
31 _compress = zlib.compress
31 _compress = zlib.compress
32 _decompress = zlib.decompress
32 _decompress = zlib.decompress
33
33
34 # Some code below makes tuples directly because it's more convenient. However,
34 # Some code below makes tuples directly because it's more convenient. However,
35 # code outside this module should always use dirstatetuple.
35 # code outside this module should always use dirstatetuple.
36 def dirstatetuple(*x):
36 def dirstatetuple(*x):
37 # x is a tuple
37 # x is a tuple
38 return x
38 return x
39
39
40
40
41 def gettype(q):
41 def gettype(q):
42 return int(q & 0xFFFF)
42 return int(q & 0xFFFF)
43
43
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return int(int(offset) << 16 | type)
46 return int(int(offset) << 16 | type)
47
47
48
48
49 class BaseIndexObject(object):
49 class BaseIndexObject(object):
50 # Format of an index entry according to Python's `struct` language
50 # Format of an index entry according to Python's `struct` language
51 index_format = revlog_constants.INDEX_ENTRY_V1
51 index_format = revlog_constants.INDEX_ENTRY_V1
52 # Size of a C unsigned long long int, platform independent
52 # Size of a C unsigned long long int, platform independent
53 big_int_size = struct.calcsize(b'>Q')
53 big_int_size = struct.calcsize(b'>Q')
54 # Size of a C long int, platform independent
54 # Size of a C long int, platform independent
55 int_size = struct.calcsize(b'>i')
55 int_size = struct.calcsize(b'>i')
56 # An empty index entry, used as a default value to be overridden, or nullrev
56 # An empty index entry, used as a default value to be overridden, or nullrev
57 null_item = (
57 null_item = (
58 0,
58 0,
59 0,
59 0,
60 0,
60 0,
61 -1,
61 -1,
62 -1,
62 -1,
63 -1,
63 -1,
64 -1,
64 -1,
65 sha1nodeconstants.nullid,
65 sha1nodeconstants.nullid,
66 0,
66 0,
67 0,
67 0,
68 revlog_constants.COMP_MODE_INLINE,
68 revlog_constants.COMP_MODE_INLINE,
69 revlog_constants.COMP_MODE_INLINE,
69 revlog_constants.COMP_MODE_INLINE,
70 )
70 )
71
71
72 @util.propertycache
72 @util.propertycache
73 def entry_size(self):
73 def entry_size(self):
74 return self.index_format.size
74 return self.index_format.size
75
75
76 @property
76 @property
77 def nodemap(self):
77 def nodemap(self):
78 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
78 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
79 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
79 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
80 return self._nodemap
80 return self._nodemap
81
81
82 @util.propertycache
82 @util.propertycache
83 def _nodemap(self):
83 def _nodemap(self):
84 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
84 nodemap = nodemaputil.NodeMap({sha1nodeconstants.nullid: nullrev})
85 for r in range(0, len(self)):
85 for r in range(0, len(self)):
86 n = self[r][7]
86 n = self[r][7]
87 nodemap[n] = r
87 nodemap[n] = r
88 return nodemap
88 return nodemap
89
89
90 def has_node(self, node):
90 def has_node(self, node):
91 """return True if the node exist in the index"""
91 """return True if the node exist in the index"""
92 return node in self._nodemap
92 return node in self._nodemap
93
93
94 def rev(self, node):
94 def rev(self, node):
95 """return a revision for a node
95 """return a revision for a node
96
96
97 If the node is unknown, raise a RevlogError"""
97 If the node is unknown, raise a RevlogError"""
98 return self._nodemap[node]
98 return self._nodemap[node]
99
99
100 def get_rev(self, node):
100 def get_rev(self, node):
101 """return a revision for a node
101 """return a revision for a node
102
102
103 If the node is unknown, return None"""
103 If the node is unknown, return None"""
104 return self._nodemap.get(node)
104 return self._nodemap.get(node)
105
105
106 def _stripnodes(self, start):
106 def _stripnodes(self, start):
107 if '_nodemap' in vars(self):
107 if '_nodemap' in vars(self):
108 for r in range(start, len(self)):
108 for r in range(start, len(self)):
109 n = self[r][7]
109 n = self[r][7]
110 del self._nodemap[n]
110 del self._nodemap[n]
111
111
112 def clearcaches(self):
112 def clearcaches(self):
113 self.__dict__.pop('_nodemap', None)
113 self.__dict__.pop('_nodemap', None)
114
114
115 def __len__(self):
115 def __len__(self):
116 return self._lgt + len(self._extra)
116 return self._lgt + len(self._extra)
117
117
118 def append(self, tup):
118 def append(self, tup):
119 if '_nodemap' in vars(self):
119 if '_nodemap' in vars(self):
120 self._nodemap[tup[7]] = len(self)
120 self._nodemap[tup[7]] = len(self)
121 data = self._pack_entry(tup)
121 data = self._pack_entry(tup)
122 self._extra.append(data)
122 self._extra.append(data)
123
123
124 def _pack_entry(self, entry):
124 def _pack_entry(self, entry):
125 assert entry[8] == 0
125 assert entry[8] == 0
126 assert entry[9] == 0
126 assert entry[9] == 0
127 return self.index_format.pack(*entry[:8])
127 return self.index_format.pack(*entry[:8])
128
128
129 def _check_index(self, i):
129 def _check_index(self, i):
130 if not isinstance(i, int):
130 if not isinstance(i, int):
131 raise TypeError(b"expecting int indexes")
131 raise TypeError(b"expecting int indexes")
132 if i < 0 or i >= len(self):
132 if i < 0 or i >= len(self):
133 raise IndexError
133 raise IndexError
134
134
135 def __getitem__(self, i):
135 def __getitem__(self, i):
136 if i == -1:
136 if i == -1:
137 return self.null_item
137 return self.null_item
138 self._check_index(i)
138 self._check_index(i)
139 if i >= self._lgt:
139 if i >= self._lgt:
140 data = self._extra[i - self._lgt]
140 data = self._extra[i - self._lgt]
141 else:
141 else:
142 index = self._calculate_index(i)
142 index = self._calculate_index(i)
143 data = self._data[index : index + self.entry_size]
143 data = self._data[index : index + self.entry_size]
144 r = self._unpack_entry(data)
144 r = self._unpack_entry(data)
145 if self._lgt and i == 0:
145 if self._lgt and i == 0:
146 r = (offset_type(0, gettype(r[0])),) + r[1:]
146 r = (offset_type(0, gettype(r[0])),) + r[1:]
147 return r
147 return r
148
148
149 def _unpack_entry(self, data):
149 def _unpack_entry(self, data):
150 r = self.index_format.unpack(data)
150 r = self.index_format.unpack(data)
151 r = r + (
151 r = r + (
152 0,
152 0,
153 0,
153 0,
154 revlog_constants.COMP_MODE_INLINE,
154 revlog_constants.COMP_MODE_INLINE,
155 revlog_constants.COMP_MODE_INLINE,
155 revlog_constants.COMP_MODE_INLINE,
156 )
156 )
157 return r
157 return r
158
158
159 def pack_header(self, header):
159 def pack_header(self, header):
160 """pack header information as binary"""
160 """pack header information as binary"""
161 v_fmt = revlog_constants.INDEX_HEADER
161 v_fmt = revlog_constants.INDEX_HEADER
162 return v_fmt.pack(header)
162 return v_fmt.pack(header)
163
163
164 def entry_binary(self, rev):
164 def entry_binary(self, rev):
165 """return the raw binary string representing a revision"""
165 """return the raw binary string representing a revision"""
166 entry = self[rev]
166 entry = self[rev]
167 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
167 p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
168 if rev == 0:
168 if rev == 0:
169 p = p[revlog_constants.INDEX_HEADER.size :]
169 p = p[revlog_constants.INDEX_HEADER.size :]
170 return p
170 return p
171
171
172
172
173 class IndexObject(BaseIndexObject):
173 class IndexObject(BaseIndexObject):
174 def __init__(self, data):
174 def __init__(self, data):
175 assert len(data) % self.entry_size == 0, (
175 assert len(data) % self.entry_size == 0, (
176 len(data),
176 len(data),
177 self.entry_size,
177 self.entry_size,
178 len(data) % self.entry_size,
178 len(data) % self.entry_size,
179 )
179 )
180 self._data = data
180 self._data = data
181 self._lgt = len(data) // self.entry_size
181 self._lgt = len(data) // self.entry_size
182 self._extra = []
182 self._extra = []
183
183
184 def _calculate_index(self, i):
184 def _calculate_index(self, i):
185 return i * self.entry_size
185 return i * self.entry_size
186
186
187 def __delitem__(self, i):
187 def __delitem__(self, i):
188 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
188 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
189 raise ValueError(b"deleting slices only supports a:-1 with step 1")
189 raise ValueError(b"deleting slices only supports a:-1 with step 1")
190 i = i.start
190 i = i.start
191 self._check_index(i)
191 self._check_index(i)
192 self._stripnodes(i)
192 self._stripnodes(i)
193 if i < self._lgt:
193 if i < self._lgt:
194 self._data = self._data[: i * self.entry_size]
194 self._data = self._data[: i * self.entry_size]
195 self._lgt = i
195 self._lgt = i
196 self._extra = []
196 self._extra = []
197 else:
197 else:
198 self._extra = self._extra[: i - self._lgt]
198 self._extra = self._extra[: i - self._lgt]
199
199
200
200
201 class PersistentNodeMapIndexObject(IndexObject):
201 class PersistentNodeMapIndexObject(IndexObject):
202 """a Debug oriented class to test persistent nodemap
202 """a Debug oriented class to test persistent nodemap
203
203
204 We need a simple python object to test API and higher level behavior. See
204 We need a simple python object to test API and higher level behavior. See
205 the Rust implementation for more serious usage. This should be used only
205 the Rust implementation for more serious usage. This should be used only
206 through the dedicated `devel.persistent-nodemap` config.
206 through the dedicated `devel.persistent-nodemap` config.
207 """
207 """
208
208
209 def nodemap_data_all(self):
209 def nodemap_data_all(self):
210 """Return bytes containing a full serialization of a nodemap
210 """Return bytes containing a full serialization of a nodemap
211
211
212 The nodemap should be valid for the full set of revisions in the
212 The nodemap should be valid for the full set of revisions in the
213 index."""
213 index."""
214 return nodemaputil.persistent_data(self)
214 return nodemaputil.persistent_data(self)
215
215
216 def nodemap_data_incremental(self):
216 def nodemap_data_incremental(self):
217 """Return bytes containing a incremental update to persistent nodemap
217 """Return bytes containing a incremental update to persistent nodemap
218
218
219 This containst the data for an append-only update of the data provided
219 This containst the data for an append-only update of the data provided
220 in the last call to `update_nodemap_data`.
220 in the last call to `update_nodemap_data`.
221 """
221 """
222 if self._nm_root is None:
222 if self._nm_root is None:
223 return None
223 return None
224 docket = self._nm_docket
224 docket = self._nm_docket
225 changed, data = nodemaputil.update_persistent_data(
225 changed, data = nodemaputil.update_persistent_data(
226 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
226 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
227 )
227 )
228
228
229 self._nm_root = self._nm_max_idx = self._nm_docket = None
229 self._nm_root = self._nm_max_idx = self._nm_docket = None
230 return docket, changed, data
230 return docket, changed, data
231
231
232 def update_nodemap_data(self, docket, nm_data):
232 def update_nodemap_data(self, docket, nm_data):
233 """provide full block of persisted binary data for a nodemap
233 """provide full block of persisted binary data for a nodemap
234
234
235 The data are expected to come from disk. See `nodemap_data_all` for a
235 The data are expected to come from disk. See `nodemap_data_all` for a
236 produceur of such data."""
236 produceur of such data."""
237 if nm_data is not None:
237 if nm_data is not None:
238 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
238 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
239 if self._nm_root:
239 if self._nm_root:
240 self._nm_docket = docket
240 self._nm_docket = docket
241 else:
241 else:
242 self._nm_root = self._nm_max_idx = self._nm_docket = None
242 self._nm_root = self._nm_max_idx = self._nm_docket = None
243
243
244
244
245 class InlinedIndexObject(BaseIndexObject):
245 class InlinedIndexObject(BaseIndexObject):
246 def __init__(self, data, inline=0):
246 def __init__(self, data, inline=0):
247 self._data = data
247 self._data = data
248 self._lgt = self._inline_scan(None)
248 self._lgt = self._inline_scan(None)
249 self._inline_scan(self._lgt)
249 self._inline_scan(self._lgt)
250 self._extra = []
250 self._extra = []
251
251
252 def _inline_scan(self, lgt):
252 def _inline_scan(self, lgt):
253 off = 0
253 off = 0
254 if lgt is not None:
254 if lgt is not None:
255 self._offsets = [0] * lgt
255 self._offsets = [0] * lgt
256 count = 0
256 count = 0
257 while off <= len(self._data) - self.entry_size:
257 while off <= len(self._data) - self.entry_size:
258 start = off + self.big_int_size
258 start = off + self.big_int_size
259 (s,) = struct.unpack(
259 (s,) = struct.unpack(
260 b'>i',
260 b'>i',
261 self._data[start : start + self.int_size],
261 self._data[start : start + self.int_size],
262 )
262 )
263 if lgt is not None:
263 if lgt is not None:
264 self._offsets[count] = off
264 self._offsets[count] = off
265 count += 1
265 count += 1
266 off += self.entry_size + s
266 off += self.entry_size + s
267 if off != len(self._data):
267 if off != len(self._data):
268 raise ValueError(b"corrupted data")
268 raise ValueError(b"corrupted data")
269 return count
269 return count
270
270
271 def __delitem__(self, i):
271 def __delitem__(self, i):
272 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
272 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
273 raise ValueError(b"deleting slices only supports a:-1 with step 1")
273 raise ValueError(b"deleting slices only supports a:-1 with step 1")
274 i = i.start
274 i = i.start
275 self._check_index(i)
275 self._check_index(i)
276 self._stripnodes(i)
276 self._stripnodes(i)
277 if i < self._lgt:
277 if i < self._lgt:
278 self._offsets = self._offsets[:i]
278 self._offsets = self._offsets[:i]
279 self._lgt = i
279 self._lgt = i
280 self._extra = []
280 self._extra = []
281 else:
281 else:
282 self._extra = self._extra[: i - self._lgt]
282 self._extra = self._extra[: i - self._lgt]
283
283
284 def _calculate_index(self, i):
284 def _calculate_index(self, i):
285 return self._offsets[i]
285 return self._offsets[i]
286
286
287
287
288 def parse_index2(data, inline, revlogv2=False):
288 def parse_index2(data, inline, revlogv2=False):
289 if not inline:
289 if not inline:
290 cls = IndexObject2 if revlogv2 else IndexObject
290 cls = IndexObject2 if revlogv2 else IndexObject
291 return cls(data), None
291 return cls(data), None
292 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
292 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
293 return cls(data, inline), (0, data)
293 return cls(data, inline), (0, data)
294
294
295
295
296 class Index2Mixin(object):
296 class Index2Mixin(object):
297 index_format = revlog_constants.INDEX_ENTRY_V2
297 index_format = revlog_constants.INDEX_ENTRY_V2
298
298
299 def replace_sidedata_info(
299 def replace_sidedata_info(
300 self, rev, sidedata_offset, sidedata_length, offset_flags
300 self,
301 rev,
302 sidedata_offset,
303 sidedata_length,
304 offset_flags,
305 compression_mode,
301 ):
306 ):
302 """
307 """
303 Replace an existing index entry's sidedata offset and length with new
308 Replace an existing index entry's sidedata offset and length with new
304 ones.
309 ones.
305 This cannot be used outside of the context of sidedata rewriting,
310 This cannot be used outside of the context of sidedata rewriting,
306 inside the transaction that creates the revision `rev`.
311 inside the transaction that creates the revision `rev`.
307 """
312 """
308 if rev < 0:
313 if rev < 0:
309 raise KeyError
314 raise KeyError
310 self._check_index(rev)
315 self._check_index(rev)
311 if rev < self._lgt:
316 if rev < self._lgt:
312 msg = b"cannot rewrite entries outside of this transaction"
317 msg = b"cannot rewrite entries outside of this transaction"
313 raise KeyError(msg)
318 raise KeyError(msg)
314 else:
319 else:
315 entry = list(self[rev])
320 entry = list(self[rev])
316 entry[0] = offset_flags
321 entry[0] = offset_flags
317 entry[8] = sidedata_offset
322 entry[8] = sidedata_offset
318 entry[9] = sidedata_length
323 entry[9] = sidedata_length
324 entry[11] = compression_mode
319 entry = tuple(entry)
325 entry = tuple(entry)
320 new = self._pack_entry(entry)
326 new = self._pack_entry(entry)
321 self._extra[rev - self._lgt] = new
327 self._extra[rev - self._lgt] = new
322
328
323 def _unpack_entry(self, data):
329 def _unpack_entry(self, data):
324 data = self.index_format.unpack(data)
330 data = self.index_format.unpack(data)
325 entry = data[:10]
331 entry = data[:10]
326 data_comp = data[10] & 3
332 data_comp = data[10] & 3
327 sidedata_comp = (data[10] & (3 << 2)) >> 2
333 sidedata_comp = (data[10] & (3 << 2)) >> 2
328 return entry + (data_comp, sidedata_comp)
334 return entry + (data_comp, sidedata_comp)
329
335
330 def _pack_entry(self, entry):
336 def _pack_entry(self, entry):
331 data = entry[:10]
337 data = entry[:10]
332 data_comp = entry[10] & 3
338 data_comp = entry[10] & 3
333 sidedata_comp = (entry[11] & 3) << 2
339 sidedata_comp = (entry[11] & 3) << 2
334 data += (data_comp | sidedata_comp,)
340 data += (data_comp | sidedata_comp,)
335
341
336 return self.index_format.pack(*data)
342 return self.index_format.pack(*data)
337
343
338 def entry_binary(self, rev):
344 def entry_binary(self, rev):
339 """return the raw binary string representing a revision"""
345 """return the raw binary string representing a revision"""
340 entry = self[rev]
346 entry = self[rev]
341 return self._pack_entry(entry)
347 return self._pack_entry(entry)
342
348
343 def pack_header(self, header):
349 def pack_header(self, header):
344 """pack header information as binary"""
350 """pack header information as binary"""
345 msg = 'version header should go in the docket, not the index: %d'
351 msg = 'version header should go in the docket, not the index: %d'
346 msg %= header
352 msg %= header
347 raise error.ProgrammingError(msg)
353 raise error.ProgrammingError(msg)
348
354
349
355
350 class IndexObject2(Index2Mixin, IndexObject):
356 class IndexObject2(Index2Mixin, IndexObject):
351 pass
357 pass
352
358
353
359
354 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
360 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
355 def _inline_scan(self, lgt):
361 def _inline_scan(self, lgt):
356 sidedata_length_pos = 72
362 sidedata_length_pos = 72
357 off = 0
363 off = 0
358 if lgt is not None:
364 if lgt is not None:
359 self._offsets = [0] * lgt
365 self._offsets = [0] * lgt
360 count = 0
366 count = 0
361 while off <= len(self._data) - self.entry_size:
367 while off <= len(self._data) - self.entry_size:
362 start = off + self.big_int_size
368 start = off + self.big_int_size
363 (data_size,) = struct.unpack(
369 (data_size,) = struct.unpack(
364 b'>i',
370 b'>i',
365 self._data[start : start + self.int_size],
371 self._data[start : start + self.int_size],
366 )
372 )
367 start = off + sidedata_length_pos
373 start = off + sidedata_length_pos
368 (side_data_size,) = struct.unpack(
374 (side_data_size,) = struct.unpack(
369 b'>i', self._data[start : start + self.int_size]
375 b'>i', self._data[start : start + self.int_size]
370 )
376 )
371 if lgt is not None:
377 if lgt is not None:
372 self._offsets[count] = off
378 self._offsets[count] = off
373 count += 1
379 count += 1
374 off += self.entry_size + data_size + side_data_size
380 off += self.entry_size + data_size + side_data_size
375 if off != len(self._data):
381 if off != len(self._data):
376 raise ValueError(b"corrupted data")
382 raise ValueError(b"corrupted data")
377 return count
383 return count
378
384
379
385
380 def parse_index_devel_nodemap(data, inline):
386 def parse_index_devel_nodemap(data, inline):
381 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
387 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
382 return PersistentNodeMapIndexObject(data), None
388 return PersistentNodeMapIndexObject(data), None
383
389
384
390
385 def parse_dirstate(dmap, copymap, st):
391 def parse_dirstate(dmap, copymap, st):
386 parents = [st[:20], st[20:40]]
392 parents = [st[:20], st[20:40]]
387 # dereference fields so they will be local in loop
393 # dereference fields so they will be local in loop
388 format = b">cllll"
394 format = b">cllll"
389 e_size = struct.calcsize(format)
395 e_size = struct.calcsize(format)
390 pos1 = 40
396 pos1 = 40
391 l = len(st)
397 l = len(st)
392
398
393 # the inner loop
399 # the inner loop
394 while pos1 < l:
400 while pos1 < l:
395 pos2 = pos1 + e_size
401 pos2 = pos1 + e_size
396 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
402 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
397 pos1 = pos2 + e[4]
403 pos1 = pos2 + e[4]
398 f = st[pos2:pos1]
404 f = st[pos2:pos1]
399 if b'\0' in f:
405 if b'\0' in f:
400 f, c = f.split(b'\0')
406 f, c = f.split(b'\0')
401 copymap[f] = c
407 copymap[f] = c
402 dmap[f] = e[:4]
408 dmap[f] = e[:4]
403 return parents
409 return parents
404
410
405
411
406 def pack_dirstate(dmap, copymap, pl, now):
412 def pack_dirstate(dmap, copymap, pl, now):
407 now = int(now)
413 now = int(now)
408 cs = stringio()
414 cs = stringio()
409 write = cs.write
415 write = cs.write
410 write(b"".join(pl))
416 write(b"".join(pl))
411 for f, e in pycompat.iteritems(dmap):
417 for f, e in pycompat.iteritems(dmap):
412 if e[0] == b'n' and e[3] == now:
418 if e[0] == b'n' and e[3] == now:
413 # The file was last modified "simultaneously" with the current
419 # The file was last modified "simultaneously" with the current
414 # write to dirstate (i.e. within the same second for file-
420 # write to dirstate (i.e. within the same second for file-
415 # systems with a granularity of 1 sec). This commonly happens
421 # systems with a granularity of 1 sec). This commonly happens
416 # for at least a couple of files on 'update'.
422 # for at least a couple of files on 'update'.
417 # The user could change the file without changing its size
423 # The user could change the file without changing its size
418 # within the same second. Invalidate the file's mtime in
424 # within the same second. Invalidate the file's mtime in
419 # dirstate, forcing future 'status' calls to compare the
425 # dirstate, forcing future 'status' calls to compare the
420 # contents of the file if the size is the same. This prevents
426 # contents of the file if the size is the same. This prevents
421 # mistakenly treating such files as clean.
427 # mistakenly treating such files as clean.
422 e = dirstatetuple(e[0], e[1], e[2], -1)
428 e = dirstatetuple(e[0], e[1], e[2], -1)
423 dmap[f] = e
429 dmap[f] = e
424
430
425 if f in copymap:
431 if f in copymap:
426 f = b"%s\0%s" % (f, copymap[f])
432 f = b"%s\0%s" % (f, copymap[f])
427 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
433 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
428 write(e)
434 write(e)
429 write(f)
435 write(f)
430 return cs.getvalue()
436 return cs.getvalue()
@@ -1,3418 +1,3439 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 FEATURES_BY_VERSION,
41 FEATURES_BY_VERSION,
42 FLAG_GENERALDELTA,
42 FLAG_GENERALDELTA,
43 FLAG_INLINE_DATA,
43 FLAG_INLINE_DATA,
44 INDEX_HEADER,
44 INDEX_HEADER,
45 REVLOGV0,
45 REVLOGV0,
46 REVLOGV1,
46 REVLOGV1,
47 REVLOGV1_FLAGS,
47 REVLOGV1_FLAGS,
48 REVLOGV2,
48 REVLOGV2,
49 REVLOGV2_FLAGS,
49 REVLOGV2_FLAGS,
50 REVLOG_DEFAULT_FLAGS,
50 REVLOG_DEFAULT_FLAGS,
51 REVLOG_DEFAULT_FORMAT,
51 REVLOG_DEFAULT_FORMAT,
52 REVLOG_DEFAULT_VERSION,
52 REVLOG_DEFAULT_VERSION,
53 SUPPORTED_FLAGS,
53 SUPPORTED_FLAGS,
54 )
54 )
55 from .revlogutils.flagutil import (
55 from .revlogutils.flagutil import (
56 REVIDX_DEFAULT_FLAGS,
56 REVIDX_DEFAULT_FLAGS,
57 REVIDX_ELLIPSIS,
57 REVIDX_ELLIPSIS,
58 REVIDX_EXTSTORED,
58 REVIDX_EXTSTORED,
59 REVIDX_FLAGS_ORDER,
59 REVIDX_FLAGS_ORDER,
60 REVIDX_HASCOPIESINFO,
60 REVIDX_HASCOPIESINFO,
61 REVIDX_ISCENSORED,
61 REVIDX_ISCENSORED,
62 REVIDX_RAWTEXT_CHANGING_FLAGS,
62 REVIDX_RAWTEXT_CHANGING_FLAGS,
63 )
63 )
64 from .thirdparty import attr
64 from .thirdparty import attr
65 from . import (
65 from . import (
66 ancestor,
66 ancestor,
67 dagop,
67 dagop,
68 error,
68 error,
69 mdiff,
69 mdiff,
70 policy,
70 policy,
71 pycompat,
71 pycompat,
72 templatefilters,
72 templatefilters,
73 util,
73 util,
74 )
74 )
75 from .interfaces import (
75 from .interfaces import (
76 repository,
76 repository,
77 util as interfaceutil,
77 util as interfaceutil,
78 )
78 )
79 from .revlogutils import (
79 from .revlogutils import (
80 deltas as deltautil,
80 deltas as deltautil,
81 docket as docketutil,
81 docket as docketutil,
82 flagutil,
82 flagutil,
83 nodemap as nodemaputil,
83 nodemap as nodemaputil,
84 revlogv0,
84 revlogv0,
85 sidedata as sidedatautil,
85 sidedata as sidedatautil,
86 )
86 )
87 from .utils import (
87 from .utils import (
88 storageutil,
88 storageutil,
89 stringutil,
89 stringutil,
90 )
90 )
91
91
92 # blanked usage of all the name to prevent pyflakes constraints
92 # blanked usage of all the name to prevent pyflakes constraints
93 # We need these name available in the module for extensions.
93 # We need these name available in the module for extensions.
94
94
95 REVLOGV0
95 REVLOGV0
96 REVLOGV1
96 REVLOGV1
97 REVLOGV2
97 REVLOGV2
98 FLAG_INLINE_DATA
98 FLAG_INLINE_DATA
99 FLAG_GENERALDELTA
99 FLAG_GENERALDELTA
100 REVLOG_DEFAULT_FLAGS
100 REVLOG_DEFAULT_FLAGS
101 REVLOG_DEFAULT_FORMAT
101 REVLOG_DEFAULT_FORMAT
102 REVLOG_DEFAULT_VERSION
102 REVLOG_DEFAULT_VERSION
103 REVLOGV1_FLAGS
103 REVLOGV1_FLAGS
104 REVLOGV2_FLAGS
104 REVLOGV2_FLAGS
105 REVIDX_ISCENSORED
105 REVIDX_ISCENSORED
106 REVIDX_ELLIPSIS
106 REVIDX_ELLIPSIS
107 REVIDX_HASCOPIESINFO
107 REVIDX_HASCOPIESINFO
108 REVIDX_EXTSTORED
108 REVIDX_EXTSTORED
109 REVIDX_DEFAULT_FLAGS
109 REVIDX_DEFAULT_FLAGS
110 REVIDX_FLAGS_ORDER
110 REVIDX_FLAGS_ORDER
111 REVIDX_RAWTEXT_CHANGING_FLAGS
111 REVIDX_RAWTEXT_CHANGING_FLAGS
112
112
113 parsers = policy.importmod('parsers')
113 parsers = policy.importmod('parsers')
114 rustancestor = policy.importrust('ancestor')
114 rustancestor = policy.importrust('ancestor')
115 rustdagop = policy.importrust('dagop')
115 rustdagop = policy.importrust('dagop')
116 rustrevlog = policy.importrust('revlog')
116 rustrevlog = policy.importrust('revlog')
117
117
118 # Aliased for performance.
118 # Aliased for performance.
119 _zlibdecompress = zlib.decompress
119 _zlibdecompress = zlib.decompress
120
120
121 # max size of revlog with inline data
121 # max size of revlog with inline data
122 _maxinline = 131072
122 _maxinline = 131072
123 _chunksize = 1048576
123 _chunksize = 1048576
124
124
125 # Flag processors for REVIDX_ELLIPSIS.
125 # Flag processors for REVIDX_ELLIPSIS.
126 def ellipsisreadprocessor(rl, text):
126 def ellipsisreadprocessor(rl, text):
127 return text, False
127 return text, False
128
128
129
129
130 def ellipsiswriteprocessor(rl, text):
130 def ellipsiswriteprocessor(rl, text):
131 return text, False
131 return text, False
132
132
133
133
134 def ellipsisrawprocessor(rl, text):
134 def ellipsisrawprocessor(rl, text):
135 return False
135 return False
136
136
137
137
138 ellipsisprocessor = (
138 ellipsisprocessor = (
139 ellipsisreadprocessor,
139 ellipsisreadprocessor,
140 ellipsiswriteprocessor,
140 ellipsiswriteprocessor,
141 ellipsisrawprocessor,
141 ellipsisrawprocessor,
142 )
142 )
143
143
144
144
145 def offset_type(offset, type):
145 def offset_type(offset, type):
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 raise ValueError(b'unknown revlog index flags')
147 raise ValueError(b'unknown revlog index flags')
148 return int(int(offset) << 16 | type)
148 return int(int(offset) << 16 | type)
149
149
150
150
151 def _verify_revision(rl, skipflags, state, node):
151 def _verify_revision(rl, skipflags, state, node):
152 """Verify the integrity of the given revlog ``node`` while providing a hook
152 """Verify the integrity of the given revlog ``node`` while providing a hook
153 point for extensions to influence the operation."""
153 point for extensions to influence the operation."""
154 if skipflags:
154 if skipflags:
155 state[b'skipread'].add(node)
155 state[b'skipread'].add(node)
156 else:
156 else:
157 # Side-effect: read content and verify hash.
157 # Side-effect: read content and verify hash.
158 rl.revision(node)
158 rl.revision(node)
159
159
160
160
161 # True if a fast implementation for persistent-nodemap is available
161 # True if a fast implementation for persistent-nodemap is available
162 #
162 #
163 # We also consider we have a "fast" implementation in "pure" python because
163 # We also consider we have a "fast" implementation in "pure" python because
164 # people using pure don't really have performance consideration (and a
164 # people using pure don't really have performance consideration (and a
165 # wheelbarrow of other slowness source)
165 # wheelbarrow of other slowness source)
166 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
167 parsers, 'BaseIndexObject'
167 parsers, 'BaseIndexObject'
168 )
168 )
169
169
170
170
171 @attr.s(slots=True, frozen=True)
171 @attr.s(slots=True, frozen=True)
172 class _revisioninfo(object):
172 class _revisioninfo(object):
173 """Information about a revision that allows building its fulltext
173 """Information about a revision that allows building its fulltext
174 node: expected hash of the revision
174 node: expected hash of the revision
175 p1, p2: parent revs of the revision
175 p1, p2: parent revs of the revision
176 btext: built text cache consisting of a one-element list
176 btext: built text cache consisting of a one-element list
177 cachedelta: (baserev, uncompressed_delta) or None
177 cachedelta: (baserev, uncompressed_delta) or None
178 flags: flags associated to the revision storage
178 flags: flags associated to the revision storage
179
179
180 One of btext[0] or cachedelta must be set.
180 One of btext[0] or cachedelta must be set.
181 """
181 """
182
182
183 node = attr.ib()
183 node = attr.ib()
184 p1 = attr.ib()
184 p1 = attr.ib()
185 p2 = attr.ib()
185 p2 = attr.ib()
186 btext = attr.ib()
186 btext = attr.ib()
187 textlen = attr.ib()
187 textlen = attr.ib()
188 cachedelta = attr.ib()
188 cachedelta = attr.ib()
189 flags = attr.ib()
189 flags = attr.ib()
190
190
191
191
192 @interfaceutil.implementer(repository.irevisiondelta)
192 @interfaceutil.implementer(repository.irevisiondelta)
193 @attr.s(slots=True)
193 @attr.s(slots=True)
194 class revlogrevisiondelta(object):
194 class revlogrevisiondelta(object):
195 node = attr.ib()
195 node = attr.ib()
196 p1node = attr.ib()
196 p1node = attr.ib()
197 p2node = attr.ib()
197 p2node = attr.ib()
198 basenode = attr.ib()
198 basenode = attr.ib()
199 flags = attr.ib()
199 flags = attr.ib()
200 baserevisionsize = attr.ib()
200 baserevisionsize = attr.ib()
201 revision = attr.ib()
201 revision = attr.ib()
202 delta = attr.ib()
202 delta = attr.ib()
203 sidedata = attr.ib()
203 sidedata = attr.ib()
204 protocol_flags = attr.ib()
204 protocol_flags = attr.ib()
205 linknode = attr.ib(default=None)
205 linknode = attr.ib(default=None)
206
206
207
207
208 @interfaceutil.implementer(repository.iverifyproblem)
208 @interfaceutil.implementer(repository.iverifyproblem)
209 @attr.s(frozen=True)
209 @attr.s(frozen=True)
210 class revlogproblem(object):
210 class revlogproblem(object):
211 warning = attr.ib(default=None)
211 warning = attr.ib(default=None)
212 error = attr.ib(default=None)
212 error = attr.ib(default=None)
213 node = attr.ib(default=None)
213 node = attr.ib(default=None)
214
214
215
215
216 def parse_index_v1(data, inline):
216 def parse_index_v1(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline)
218 index, cache = parsers.parse_index2(data, inline)
219 return index, cache
219 return index, cache
220
220
221
221
222 def parse_index_v2(data, inline):
222 def parse_index_v2(data, inline):
223 # call the C implementation to parse the index data
223 # call the C implementation to parse the index data
224 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
224 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
225 return index, cache
225 return index, cache
226
226
227
227
228 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
228 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
229
229
230 def parse_index_v1_nodemap(data, inline):
230 def parse_index_v1_nodemap(data, inline):
231 index, cache = parsers.parse_index_devel_nodemap(data, inline)
231 index, cache = parsers.parse_index_devel_nodemap(data, inline)
232 return index, cache
232 return index, cache
233
233
234
234
235 else:
235 else:
236 parse_index_v1_nodemap = None
236 parse_index_v1_nodemap = None
237
237
238
238
239 def parse_index_v1_mixed(data, inline):
239 def parse_index_v1_mixed(data, inline):
240 index, cache = parse_index_v1(data, inline)
240 index, cache = parse_index_v1(data, inline)
241 return rustrevlog.MixedIndex(index), cache
241 return rustrevlog.MixedIndex(index), cache
242
242
243
243
244 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
244 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
245 # signed integer)
245 # signed integer)
246 _maxentrysize = 0x7FFFFFFF
246 _maxentrysize = 0x7FFFFFFF
247
247
248
248
249 class revlog(object):
249 class revlog(object):
250 """
250 """
251 the underlying revision storage object
251 the underlying revision storage object
252
252
253 A revlog consists of two parts, an index and the revision data.
253 A revlog consists of two parts, an index and the revision data.
254
254
255 The index is a file with a fixed record size containing
255 The index is a file with a fixed record size containing
256 information on each revision, including its nodeid (hash), the
256 information on each revision, including its nodeid (hash), the
257 nodeids of its parents, the position and offset of its data within
257 nodeids of its parents, the position and offset of its data within
258 the data file, and the revision it's based on. Finally, each entry
258 the data file, and the revision it's based on. Finally, each entry
259 contains a linkrev entry that can serve as a pointer to external
259 contains a linkrev entry that can serve as a pointer to external
260 data.
260 data.
261
261
262 The revision data itself is a linear collection of data chunks.
262 The revision data itself is a linear collection of data chunks.
263 Each chunk represents a revision and is usually represented as a
263 Each chunk represents a revision and is usually represented as a
264 delta against the previous chunk. To bound lookup time, runs of
264 delta against the previous chunk. To bound lookup time, runs of
265 deltas are limited to about 2 times the length of the original
265 deltas are limited to about 2 times the length of the original
266 version data. This makes retrieval of a version proportional to
266 version data. This makes retrieval of a version proportional to
267 its size, or O(1) relative to the number of revisions.
267 its size, or O(1) relative to the number of revisions.
268
268
269 Both pieces of the revlog are written to in an append-only
269 Both pieces of the revlog are written to in an append-only
270 fashion, which means we never need to rewrite a file to insert or
270 fashion, which means we never need to rewrite a file to insert or
271 remove data, and can use some simple techniques to avoid the need
271 remove data, and can use some simple techniques to avoid the need
272 for locking while reading.
272 for locking while reading.
273
273
274 If checkambig, indexfile is opened with checkambig=True at
274 If checkambig, indexfile is opened with checkambig=True at
275 writing, to avoid file stat ambiguity.
275 writing, to avoid file stat ambiguity.
276
276
277 If mmaplargeindex is True, and an mmapindexthreshold is set, the
277 If mmaplargeindex is True, and an mmapindexthreshold is set, the
278 index will be mmapped rather than read if it is larger than the
278 index will be mmapped rather than read if it is larger than the
279 configured threshold.
279 configured threshold.
280
280
281 If censorable is True, the revlog can have censored revisions.
281 If censorable is True, the revlog can have censored revisions.
282
282
283 If `upperboundcomp` is not None, this is the expected maximal gain from
283 If `upperboundcomp` is not None, this is the expected maximal gain from
284 compression for the data content.
284 compression for the data content.
285
285
286 `concurrencychecker` is an optional function that receives 3 arguments: a
286 `concurrencychecker` is an optional function that receives 3 arguments: a
287 file handle, a filename, and an expected position. It should check whether
287 file handle, a filename, and an expected position. It should check whether
288 the current position in the file handle is valid, and log/warn/fail (by
288 the current position in the file handle is valid, and log/warn/fail (by
289 raising).
289 raising).
290
290
291
291
292 Internal details
292 Internal details
293 ----------------
293 ----------------
294
294
295 A large part of the revlog logic deals with revisions' "index entries", tuple
295 A large part of the revlog logic deals with revisions' "index entries", tuple
296 objects that contains the same "items" whatever the revlog version.
296 objects that contains the same "items" whatever the revlog version.
297 Different versions will have different ways of storing these items (sometimes
297 Different versions will have different ways of storing these items (sometimes
298 not having them at all), but the tuple will always be the same. New fields
298 not having them at all), but the tuple will always be the same. New fields
299 are usually added at the end to avoid breaking existing code that relies
299 are usually added at the end to avoid breaking existing code that relies
300 on the existing order. The field are defined as follows:
300 on the existing order. The field are defined as follows:
301
301
302 [0] offset:
302 [0] offset:
303 The byte index of the start of revision data chunk.
303 The byte index of the start of revision data chunk.
304 That value is shifted up by 16 bits. use "offset = field >> 16" to
304 That value is shifted up by 16 bits. use "offset = field >> 16" to
305 retrieve it.
305 retrieve it.
306
306
307 flags:
307 flags:
308 A flag field that carries special information or changes the behavior
308 A flag field that carries special information or changes the behavior
309 of the revision. (see `REVIDX_*` constants for details)
309 of the revision. (see `REVIDX_*` constants for details)
310 The flag field only occupies the first 16 bits of this field,
310 The flag field only occupies the first 16 bits of this field,
311 use "flags = field & 0xFFFF" to retrieve the value.
311 use "flags = field & 0xFFFF" to retrieve the value.
312
312
313 [1] compressed length:
313 [1] compressed length:
314 The size, in bytes, of the chunk on disk
314 The size, in bytes, of the chunk on disk
315
315
316 [2] uncompressed length:
316 [2] uncompressed length:
317 The size, in bytes, of the full revision once reconstructed.
317 The size, in bytes, of the full revision once reconstructed.
318
318
319 [3] base rev:
319 [3] base rev:
320 Either the base of the revision delta chain (without general
320 Either the base of the revision delta chain (without general
321 delta), or the base of the delta (stored in the data chunk)
321 delta), or the base of the delta (stored in the data chunk)
322 with general delta.
322 with general delta.
323
323
324 [4] link rev:
324 [4] link rev:
325 Changelog revision number of the changeset introducing this
325 Changelog revision number of the changeset introducing this
326 revision.
326 revision.
327
327
328 [5] parent 1 rev:
328 [5] parent 1 rev:
329 Revision number of the first parent
329 Revision number of the first parent
330
330
331 [6] parent 2 rev:
331 [6] parent 2 rev:
332 Revision number of the second parent
332 Revision number of the second parent
333
333
334 [7] node id:
334 [7] node id:
335 The node id of the current revision
335 The node id of the current revision
336
336
337 [8] sidedata offset:
337 [8] sidedata offset:
338 The byte index of the start of the revision's side-data chunk.
338 The byte index of the start of the revision's side-data chunk.
339
339
340 [9] sidedata chunk length:
340 [9] sidedata chunk length:
341 The size, in bytes, of the revision's side-data chunk.
341 The size, in bytes, of the revision's side-data chunk.
342
342
343 [10] data compression mode:
343 [10] data compression mode:
344 two bits that detail the way the data chunk is compressed on disk.
344 two bits that detail the way the data chunk is compressed on disk.
345 (see "COMP_MODE_*" constants for details). For revlog version 0 and
345 (see "COMP_MODE_*" constants for details). For revlog version 0 and
346 1 this will always be COMP_MODE_INLINE.
346 1 this will always be COMP_MODE_INLINE.
347
347
348 [11] side-data compression mode:
348 [11] side-data compression mode:
349 two bits that detail the way the sidedata chunk is compressed on disk.
349 two bits that detail the way the sidedata chunk is compressed on disk.
350 (see "COMP_MODE_*" constants for details)
350 (see "COMP_MODE_*" constants for details)
351 """
351 """
352
352
353 _flagserrorclass = error.RevlogError
353 _flagserrorclass = error.RevlogError
354
354
355 def __init__(
355 def __init__(
356 self,
356 self,
357 opener,
357 opener,
358 target,
358 target,
359 radix,
359 radix,
360 postfix=None, # only exist for `tmpcensored` now
360 postfix=None, # only exist for `tmpcensored` now
361 checkambig=False,
361 checkambig=False,
362 mmaplargeindex=False,
362 mmaplargeindex=False,
363 censorable=False,
363 censorable=False,
364 upperboundcomp=None,
364 upperboundcomp=None,
365 persistentnodemap=False,
365 persistentnodemap=False,
366 concurrencychecker=None,
366 concurrencychecker=None,
367 trypending=False,
367 trypending=False,
368 ):
368 ):
369 """
369 """
370 create a revlog object
370 create a revlog object
371
371
372 opener is a function that abstracts the file opening operation
372 opener is a function that abstracts the file opening operation
373 and can be used to implement COW semantics or the like.
373 and can be used to implement COW semantics or the like.
374
374
375 `target`: a (KIND, ID) tuple that identify the content stored in
375 `target`: a (KIND, ID) tuple that identify the content stored in
376 this revlog. It help the rest of the code to understand what the revlog
376 this revlog. It help the rest of the code to understand what the revlog
377 is about without having to resort to heuristic and index filename
377 is about without having to resort to heuristic and index filename
378 analysis. Note: that this must be reliably be set by normal code, but
378 analysis. Note: that this must be reliably be set by normal code, but
379 that test, debug, or performance measurement code might not set this to
379 that test, debug, or performance measurement code might not set this to
380 accurate value.
380 accurate value.
381 """
381 """
382 self.upperboundcomp = upperboundcomp
382 self.upperboundcomp = upperboundcomp
383
383
384 self.radix = radix
384 self.radix = radix
385
385
386 self._docket_file = None
386 self._docket_file = None
387 self._indexfile = None
387 self._indexfile = None
388 self._datafile = None
388 self._datafile = None
389 self._nodemap_file = None
389 self._nodemap_file = None
390 self.postfix = postfix
390 self.postfix = postfix
391 self._trypending = trypending
391 self._trypending = trypending
392 self.opener = opener
392 self.opener = opener
393 if persistentnodemap:
393 if persistentnodemap:
394 self._nodemap_file = nodemaputil.get_nodemap_file(self)
394 self._nodemap_file = nodemaputil.get_nodemap_file(self)
395
395
396 assert target[0] in ALL_KINDS
396 assert target[0] in ALL_KINDS
397 assert len(target) == 2
397 assert len(target) == 2
398 self.target = target
398 self.target = target
399 # When True, indexfile is opened with checkambig=True at writing, to
399 # When True, indexfile is opened with checkambig=True at writing, to
400 # avoid file stat ambiguity.
400 # avoid file stat ambiguity.
401 self._checkambig = checkambig
401 self._checkambig = checkambig
402 self._mmaplargeindex = mmaplargeindex
402 self._mmaplargeindex = mmaplargeindex
403 self._censorable = censorable
403 self._censorable = censorable
404 # 3-tuple of (node, rev, text) for a raw revision.
404 # 3-tuple of (node, rev, text) for a raw revision.
405 self._revisioncache = None
405 self._revisioncache = None
406 # Maps rev to chain base rev.
406 # Maps rev to chain base rev.
407 self._chainbasecache = util.lrucachedict(100)
407 self._chainbasecache = util.lrucachedict(100)
408 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
408 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
409 self._chunkcache = (0, b'')
409 self._chunkcache = (0, b'')
410 # How much data to read and cache into the raw revlog data cache.
410 # How much data to read and cache into the raw revlog data cache.
411 self._chunkcachesize = 65536
411 self._chunkcachesize = 65536
412 self._maxchainlen = None
412 self._maxchainlen = None
413 self._deltabothparents = True
413 self._deltabothparents = True
414 self.index = None
414 self.index = None
415 self._docket = None
415 self._docket = None
416 self._nodemap_docket = None
416 self._nodemap_docket = None
417 # Mapping of partial identifiers to full nodes.
417 # Mapping of partial identifiers to full nodes.
418 self._pcache = {}
418 self._pcache = {}
419 # Mapping of revision integer to full node.
419 # Mapping of revision integer to full node.
420 self._compengine = b'zlib'
420 self._compengine = b'zlib'
421 self._compengineopts = {}
421 self._compengineopts = {}
422 self._maxdeltachainspan = -1
422 self._maxdeltachainspan = -1
423 self._withsparseread = False
423 self._withsparseread = False
424 self._sparserevlog = False
424 self._sparserevlog = False
425 self.hassidedata = False
425 self.hassidedata = False
426 self._srdensitythreshold = 0.50
426 self._srdensitythreshold = 0.50
427 self._srmingapsize = 262144
427 self._srmingapsize = 262144
428
428
429 # Make copy of flag processors so each revlog instance can support
429 # Make copy of flag processors so each revlog instance can support
430 # custom flags.
430 # custom flags.
431 self._flagprocessors = dict(flagutil.flagprocessors)
431 self._flagprocessors = dict(flagutil.flagprocessors)
432
432
433 # 2-tuple of file handles being used for active writing.
433 # 2-tuple of file handles being used for active writing.
434 self._writinghandles = None
434 self._writinghandles = None
435 # prevent nesting of addgroup
435 # prevent nesting of addgroup
436 self._adding_group = None
436 self._adding_group = None
437
437
438 self._loadindex()
438 self._loadindex()
439
439
440 self._concurrencychecker = concurrencychecker
440 self._concurrencychecker = concurrencychecker
441
441
442 def _init_opts(self):
442 def _init_opts(self):
443 """process options (from above/config) to setup associated default revlog mode
443 """process options (from above/config) to setup associated default revlog mode
444
444
445 These values might be affected when actually reading on disk information.
445 These values might be affected when actually reading on disk information.
446
446
447 The relevant values are returned for use in _loadindex().
447 The relevant values are returned for use in _loadindex().
448
448
449 * newversionflags:
449 * newversionflags:
450 version header to use if we need to create a new revlog
450 version header to use if we need to create a new revlog
451
451
452 * mmapindexthreshold:
452 * mmapindexthreshold:
453 minimal index size for start to use mmap
453 minimal index size for start to use mmap
454
454
455 * force_nodemap:
455 * force_nodemap:
456 force the usage of a "development" version of the nodemap code
456 force the usage of a "development" version of the nodemap code
457 """
457 """
458 mmapindexthreshold = None
458 mmapindexthreshold = None
459 opts = self.opener.options
459 opts = self.opener.options
460
460
461 if b'revlogv2' in opts:
461 if b'revlogv2' in opts:
462 new_header = REVLOGV2 | FLAG_INLINE_DATA
462 new_header = REVLOGV2 | FLAG_INLINE_DATA
463 elif b'revlogv1' in opts:
463 elif b'revlogv1' in opts:
464 new_header = REVLOGV1 | FLAG_INLINE_DATA
464 new_header = REVLOGV1 | FLAG_INLINE_DATA
465 if b'generaldelta' in opts:
465 if b'generaldelta' in opts:
466 new_header |= FLAG_GENERALDELTA
466 new_header |= FLAG_GENERALDELTA
467 elif b'revlogv0' in self.opener.options:
467 elif b'revlogv0' in self.opener.options:
468 new_header = REVLOGV0
468 new_header = REVLOGV0
469 else:
469 else:
470 new_header = REVLOG_DEFAULT_VERSION
470 new_header = REVLOG_DEFAULT_VERSION
471
471
472 if b'chunkcachesize' in opts:
472 if b'chunkcachesize' in opts:
473 self._chunkcachesize = opts[b'chunkcachesize']
473 self._chunkcachesize = opts[b'chunkcachesize']
474 if b'maxchainlen' in opts:
474 if b'maxchainlen' in opts:
475 self._maxchainlen = opts[b'maxchainlen']
475 self._maxchainlen = opts[b'maxchainlen']
476 if b'deltabothparents' in opts:
476 if b'deltabothparents' in opts:
477 self._deltabothparents = opts[b'deltabothparents']
477 self._deltabothparents = opts[b'deltabothparents']
478 self._lazydelta = bool(opts.get(b'lazydelta', True))
478 self._lazydelta = bool(opts.get(b'lazydelta', True))
479 self._lazydeltabase = False
479 self._lazydeltabase = False
480 if self._lazydelta:
480 if self._lazydelta:
481 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
481 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
482 if b'compengine' in opts:
482 if b'compengine' in opts:
483 self._compengine = opts[b'compengine']
483 self._compengine = opts[b'compengine']
484 if b'zlib.level' in opts:
484 if b'zlib.level' in opts:
485 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
485 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
486 if b'zstd.level' in opts:
486 if b'zstd.level' in opts:
487 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
487 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
488 if b'maxdeltachainspan' in opts:
488 if b'maxdeltachainspan' in opts:
489 self._maxdeltachainspan = opts[b'maxdeltachainspan']
489 self._maxdeltachainspan = opts[b'maxdeltachainspan']
490 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
490 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
491 mmapindexthreshold = opts[b'mmapindexthreshold']
491 mmapindexthreshold = opts[b'mmapindexthreshold']
492 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
492 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
493 withsparseread = bool(opts.get(b'with-sparse-read', False))
493 withsparseread = bool(opts.get(b'with-sparse-read', False))
494 # sparse-revlog forces sparse-read
494 # sparse-revlog forces sparse-read
495 self._withsparseread = self._sparserevlog or withsparseread
495 self._withsparseread = self._sparserevlog or withsparseread
496 if b'sparse-read-density-threshold' in opts:
496 if b'sparse-read-density-threshold' in opts:
497 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
497 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
498 if b'sparse-read-min-gap-size' in opts:
498 if b'sparse-read-min-gap-size' in opts:
499 self._srmingapsize = opts[b'sparse-read-min-gap-size']
499 self._srmingapsize = opts[b'sparse-read-min-gap-size']
500 if opts.get(b'enableellipsis'):
500 if opts.get(b'enableellipsis'):
501 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
501 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
502
502
503 # revlog v0 doesn't have flag processors
503 # revlog v0 doesn't have flag processors
504 for flag, processor in pycompat.iteritems(
504 for flag, processor in pycompat.iteritems(
505 opts.get(b'flagprocessors', {})
505 opts.get(b'flagprocessors', {})
506 ):
506 ):
507 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
507 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
508
508
509 if self._chunkcachesize <= 0:
509 if self._chunkcachesize <= 0:
510 raise error.RevlogError(
510 raise error.RevlogError(
511 _(b'revlog chunk cache size %r is not greater than 0')
511 _(b'revlog chunk cache size %r is not greater than 0')
512 % self._chunkcachesize
512 % self._chunkcachesize
513 )
513 )
514 elif self._chunkcachesize & (self._chunkcachesize - 1):
514 elif self._chunkcachesize & (self._chunkcachesize - 1):
515 raise error.RevlogError(
515 raise error.RevlogError(
516 _(b'revlog chunk cache size %r is not a power of 2')
516 _(b'revlog chunk cache size %r is not a power of 2')
517 % self._chunkcachesize
517 % self._chunkcachesize
518 )
518 )
519 force_nodemap = opts.get(b'devel-force-nodemap', False)
519 force_nodemap = opts.get(b'devel-force-nodemap', False)
520 return new_header, mmapindexthreshold, force_nodemap
520 return new_header, mmapindexthreshold, force_nodemap
521
521
522 def _get_data(self, filepath, mmap_threshold, size=None):
522 def _get_data(self, filepath, mmap_threshold, size=None):
523 """return a file content with or without mmap
523 """return a file content with or without mmap
524
524
525 If the file is missing return the empty string"""
525 If the file is missing return the empty string"""
526 try:
526 try:
527 with self.opener(filepath) as fp:
527 with self.opener(filepath) as fp:
528 if mmap_threshold is not None:
528 if mmap_threshold is not None:
529 file_size = self.opener.fstat(fp).st_size
529 file_size = self.opener.fstat(fp).st_size
530 if file_size >= mmap_threshold:
530 if file_size >= mmap_threshold:
531 if size is not None:
531 if size is not None:
532 # avoid potentiel mmap crash
532 # avoid potentiel mmap crash
533 size = min(file_size, size)
533 size = min(file_size, size)
534 # TODO: should .close() to release resources without
534 # TODO: should .close() to release resources without
535 # relying on Python GC
535 # relying on Python GC
536 if size is None:
536 if size is None:
537 return util.buffer(util.mmapread(fp))
537 return util.buffer(util.mmapread(fp))
538 else:
538 else:
539 return util.buffer(util.mmapread(fp, size))
539 return util.buffer(util.mmapread(fp, size))
540 if size is None:
540 if size is None:
541 return fp.read()
541 return fp.read()
542 else:
542 else:
543 return fp.read(size)
543 return fp.read(size)
544 except IOError as inst:
544 except IOError as inst:
545 if inst.errno != errno.ENOENT:
545 if inst.errno != errno.ENOENT:
546 raise
546 raise
547 return b''
547 return b''
548
548
549 def _loadindex(self):
549 def _loadindex(self):
550
550
551 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
551 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
552
552
553 if self.postfix is not None:
553 if self.postfix is not None:
554 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
554 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
555 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
555 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
556 entry_point = b'%s.i.a' % self.radix
556 entry_point = b'%s.i.a' % self.radix
557 else:
557 else:
558 entry_point = b'%s.i' % self.radix
558 entry_point = b'%s.i' % self.radix
559
559
560 entry_data = b''
560 entry_data = b''
561 self._initempty = True
561 self._initempty = True
562 entry_data = self._get_data(entry_point, mmapindexthreshold)
562 entry_data = self._get_data(entry_point, mmapindexthreshold)
563 if len(entry_data) > 0:
563 if len(entry_data) > 0:
564 header = INDEX_HEADER.unpack(entry_data[:4])[0]
564 header = INDEX_HEADER.unpack(entry_data[:4])[0]
565 self._initempty = False
565 self._initempty = False
566 else:
566 else:
567 header = new_header
567 header = new_header
568
568
569 self._format_flags = header & ~0xFFFF
569 self._format_flags = header & ~0xFFFF
570 self._format_version = header & 0xFFFF
570 self._format_version = header & 0xFFFF
571
571
572 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
572 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
573 if supported_flags is None:
573 if supported_flags is None:
574 msg = _(b'unknown version (%d) in revlog %s')
574 msg = _(b'unknown version (%d) in revlog %s')
575 msg %= (self._format_version, self.display_id)
575 msg %= (self._format_version, self.display_id)
576 raise error.RevlogError(msg)
576 raise error.RevlogError(msg)
577 elif self._format_flags & ~supported_flags:
577 elif self._format_flags & ~supported_flags:
578 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
578 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
579 display_flag = self._format_flags >> 16
579 display_flag = self._format_flags >> 16
580 msg %= (display_flag, self._format_version, self.display_id)
580 msg %= (display_flag, self._format_version, self.display_id)
581 raise error.RevlogError(msg)
581 raise error.RevlogError(msg)
582
582
583 features = FEATURES_BY_VERSION[self._format_version]
583 features = FEATURES_BY_VERSION[self._format_version]
584 self._inline = features[b'inline'](self._format_flags)
584 self._inline = features[b'inline'](self._format_flags)
585 self._generaldelta = features[b'generaldelta'](self._format_flags)
585 self._generaldelta = features[b'generaldelta'](self._format_flags)
586 self.hassidedata = features[b'sidedata']
586 self.hassidedata = features[b'sidedata']
587
587
588 if not features[b'docket']:
588 if not features[b'docket']:
589 self._indexfile = entry_point
589 self._indexfile = entry_point
590 index_data = entry_data
590 index_data = entry_data
591 else:
591 else:
592 self._docket_file = entry_point
592 self._docket_file = entry_point
593 if self._initempty:
593 if self._initempty:
594 self._docket = docketutil.default_docket(self, header)
594 self._docket = docketutil.default_docket(self, header)
595 else:
595 else:
596 self._docket = docketutil.parse_docket(
596 self._docket = docketutil.parse_docket(
597 self, entry_data, use_pending=self._trypending
597 self, entry_data, use_pending=self._trypending
598 )
598 )
599 self._indexfile = self._docket.index_filepath()
599 self._indexfile = self._docket.index_filepath()
600 index_data = b''
600 index_data = b''
601 index_size = self._docket.index_end
601 index_size = self._docket.index_end
602 if index_size > 0:
602 if index_size > 0:
603 index_data = self._get_data(
603 index_data = self._get_data(
604 self._indexfile, mmapindexthreshold, size=index_size
604 self._indexfile, mmapindexthreshold, size=index_size
605 )
605 )
606 if len(index_data) < index_size:
606 if len(index_data) < index_size:
607 msg = _(b'too few index data for %s: got %d, expected %d')
607 msg = _(b'too few index data for %s: got %d, expected %d')
608 msg %= (self.display_id, len(index_data), index_size)
608 msg %= (self.display_id, len(index_data), index_size)
609 raise error.RevlogError(msg)
609 raise error.RevlogError(msg)
610
610
611 self._inline = False
611 self._inline = False
612 # generaldelta implied by version 2 revlogs.
612 # generaldelta implied by version 2 revlogs.
613 self._generaldelta = True
613 self._generaldelta = True
614 # the logic for persistent nodemap will be dealt with within the
614 # the logic for persistent nodemap will be dealt with within the
615 # main docket, so disable it for now.
615 # main docket, so disable it for now.
616 self._nodemap_file = None
616 self._nodemap_file = None
617
617
618 if self.postfix is None:
618 if self.postfix is None:
619 self._datafile = b'%s.d' % self.radix
619 self._datafile = b'%s.d' % self.radix
620 else:
620 else:
621 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
621 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
622
622
623 self.nodeconstants = sha1nodeconstants
623 self.nodeconstants = sha1nodeconstants
624 self.nullid = self.nodeconstants.nullid
624 self.nullid = self.nodeconstants.nullid
625
625
626 # sparse-revlog can't be on without general-delta (issue6056)
626 # sparse-revlog can't be on without general-delta (issue6056)
627 if not self._generaldelta:
627 if not self._generaldelta:
628 self._sparserevlog = False
628 self._sparserevlog = False
629
629
630 self._storedeltachains = True
630 self._storedeltachains = True
631
631
632 devel_nodemap = (
632 devel_nodemap = (
633 self._nodemap_file
633 self._nodemap_file
634 and force_nodemap
634 and force_nodemap
635 and parse_index_v1_nodemap is not None
635 and parse_index_v1_nodemap is not None
636 )
636 )
637
637
638 use_rust_index = False
638 use_rust_index = False
639 if rustrevlog is not None:
639 if rustrevlog is not None:
640 if self._nodemap_file is not None:
640 if self._nodemap_file is not None:
641 use_rust_index = True
641 use_rust_index = True
642 else:
642 else:
643 use_rust_index = self.opener.options.get(b'rust.index')
643 use_rust_index = self.opener.options.get(b'rust.index')
644
644
645 self._parse_index = parse_index_v1
645 self._parse_index = parse_index_v1
646 if self._format_version == REVLOGV0:
646 if self._format_version == REVLOGV0:
647 self._parse_index = revlogv0.parse_index_v0
647 self._parse_index = revlogv0.parse_index_v0
648 elif self._format_version == REVLOGV2:
648 elif self._format_version == REVLOGV2:
649 self._parse_index = parse_index_v2
649 self._parse_index = parse_index_v2
650 elif devel_nodemap:
650 elif devel_nodemap:
651 self._parse_index = parse_index_v1_nodemap
651 self._parse_index = parse_index_v1_nodemap
652 elif use_rust_index:
652 elif use_rust_index:
653 self._parse_index = parse_index_v1_mixed
653 self._parse_index = parse_index_v1_mixed
654 try:
654 try:
655 d = self._parse_index(index_data, self._inline)
655 d = self._parse_index(index_data, self._inline)
656 index, _chunkcache = d
656 index, _chunkcache = d
657 use_nodemap = (
657 use_nodemap = (
658 not self._inline
658 not self._inline
659 and self._nodemap_file is not None
659 and self._nodemap_file is not None
660 and util.safehasattr(index, 'update_nodemap_data')
660 and util.safehasattr(index, 'update_nodemap_data')
661 )
661 )
662 if use_nodemap:
662 if use_nodemap:
663 nodemap_data = nodemaputil.persisted_data(self)
663 nodemap_data = nodemaputil.persisted_data(self)
664 if nodemap_data is not None:
664 if nodemap_data is not None:
665 docket = nodemap_data[0]
665 docket = nodemap_data[0]
666 if (
666 if (
667 len(d[0]) > docket.tip_rev
667 len(d[0]) > docket.tip_rev
668 and d[0][docket.tip_rev][7] == docket.tip_node
668 and d[0][docket.tip_rev][7] == docket.tip_node
669 ):
669 ):
670 # no changelog tampering
670 # no changelog tampering
671 self._nodemap_docket = docket
671 self._nodemap_docket = docket
672 index.update_nodemap_data(*nodemap_data)
672 index.update_nodemap_data(*nodemap_data)
673 except (ValueError, IndexError):
673 except (ValueError, IndexError):
674 raise error.RevlogError(
674 raise error.RevlogError(
675 _(b"index %s is corrupted") % self.display_id
675 _(b"index %s is corrupted") % self.display_id
676 )
676 )
677 self.index, self._chunkcache = d
677 self.index, self._chunkcache = d
678 if not self._chunkcache:
678 if not self._chunkcache:
679 self._chunkclear()
679 self._chunkclear()
680 # revnum -> (chain-length, sum-delta-length)
680 # revnum -> (chain-length, sum-delta-length)
681 self._chaininfocache = util.lrucachedict(500)
681 self._chaininfocache = util.lrucachedict(500)
682 # revlog header -> revlog compressor
682 # revlog header -> revlog compressor
683 self._decompressors = {}
683 self._decompressors = {}
684
684
685 @util.propertycache
685 @util.propertycache
686 def revlog_kind(self):
686 def revlog_kind(self):
687 return self.target[0]
687 return self.target[0]
688
688
689 @util.propertycache
689 @util.propertycache
690 def display_id(self):
690 def display_id(self):
691 """The public facing "ID" of the revlog that we use in message"""
691 """The public facing "ID" of the revlog that we use in message"""
692 # Maybe we should build a user facing representation of
692 # Maybe we should build a user facing representation of
693 # revlog.target instead of using `self.radix`
693 # revlog.target instead of using `self.radix`
694 return self.radix
694 return self.radix
695
695
696 def _get_decompressor(self, t):
696 def _get_decompressor(self, t):
697 try:
697 try:
698 compressor = self._decompressors[t]
698 compressor = self._decompressors[t]
699 except KeyError:
699 except KeyError:
700 try:
700 try:
701 engine = util.compengines.forrevlogheader(t)
701 engine = util.compengines.forrevlogheader(t)
702 compressor = engine.revlogcompressor(self._compengineopts)
702 compressor = engine.revlogcompressor(self._compengineopts)
703 self._decompressors[t] = compressor
703 self._decompressors[t] = compressor
704 except KeyError:
704 except KeyError:
705 raise error.RevlogError(
705 raise error.RevlogError(
706 _(b'unknown compression type %s') % binascii.hexlify(t)
706 _(b'unknown compression type %s') % binascii.hexlify(t)
707 )
707 )
708 return compressor
708 return compressor
709
709
710 @util.propertycache
710 @util.propertycache
711 def _compressor(self):
711 def _compressor(self):
712 engine = util.compengines[self._compengine]
712 engine = util.compengines[self._compengine]
713 return engine.revlogcompressor(self._compengineopts)
713 return engine.revlogcompressor(self._compengineopts)
714
714
715 @util.propertycache
715 @util.propertycache
716 def _decompressor(self):
716 def _decompressor(self):
717 """the default decompressor"""
717 """the default decompressor"""
718 if self._docket is None:
718 if self._docket is None:
719 return None
719 return None
720 t = self._docket.default_compression_header
720 t = self._docket.default_compression_header
721 c = self._get_decompressor(t)
721 c = self._get_decompressor(t)
722 return c.decompress
722 return c.decompress
723
723
724 def _indexfp(self):
724 def _indexfp(self):
725 """file object for the revlog's index file"""
725 """file object for the revlog's index file"""
726 return self.opener(self._indexfile, mode=b"r")
726 return self.opener(self._indexfile, mode=b"r")
727
727
728 def __index_write_fp(self):
728 def __index_write_fp(self):
729 # You should not use this directly and use `_writing` instead
729 # You should not use this directly and use `_writing` instead
730 try:
730 try:
731 f = self.opener(
731 f = self.opener(
732 self._indexfile, mode=b"r+", checkambig=self._checkambig
732 self._indexfile, mode=b"r+", checkambig=self._checkambig
733 )
733 )
734 if self._docket is None:
734 if self._docket is None:
735 f.seek(0, os.SEEK_END)
735 f.seek(0, os.SEEK_END)
736 else:
736 else:
737 f.seek(self._docket.index_end, os.SEEK_SET)
737 f.seek(self._docket.index_end, os.SEEK_SET)
738 return f
738 return f
739 except IOError as inst:
739 except IOError as inst:
740 if inst.errno != errno.ENOENT:
740 if inst.errno != errno.ENOENT:
741 raise
741 raise
742 return self.opener(
742 return self.opener(
743 self._indexfile, mode=b"w+", checkambig=self._checkambig
743 self._indexfile, mode=b"w+", checkambig=self._checkambig
744 )
744 )
745
745
746 def __index_new_fp(self):
746 def __index_new_fp(self):
747 # You should not use this unless you are upgrading from inline revlog
747 # You should not use this unless you are upgrading from inline revlog
748 return self.opener(
748 return self.opener(
749 self._indexfile,
749 self._indexfile,
750 mode=b"w",
750 mode=b"w",
751 checkambig=self._checkambig,
751 checkambig=self._checkambig,
752 atomictemp=True,
752 atomictemp=True,
753 )
753 )
754
754
755 def _datafp(self, mode=b'r'):
755 def _datafp(self, mode=b'r'):
756 """file object for the revlog's data file"""
756 """file object for the revlog's data file"""
757 return self.opener(self._datafile, mode=mode)
757 return self.opener(self._datafile, mode=mode)
758
758
759 @contextlib.contextmanager
759 @contextlib.contextmanager
760 def _datareadfp(self, existingfp=None):
760 def _datareadfp(self, existingfp=None):
761 """file object suitable to read data"""
761 """file object suitable to read data"""
762 # Use explicit file handle, if given.
762 # Use explicit file handle, if given.
763 if existingfp is not None:
763 if existingfp is not None:
764 yield existingfp
764 yield existingfp
765
765
766 # Use a file handle being actively used for writes, if available.
766 # Use a file handle being actively used for writes, if available.
767 # There is some danger to doing this because reads will seek the
767 # There is some danger to doing this because reads will seek the
768 # file. However, _writeentry() performs a SEEK_END before all writes,
768 # file. However, _writeentry() performs a SEEK_END before all writes,
769 # so we should be safe.
769 # so we should be safe.
770 elif self._writinghandles:
770 elif self._writinghandles:
771 if self._inline:
771 if self._inline:
772 yield self._writinghandles[0]
772 yield self._writinghandles[0]
773 else:
773 else:
774 yield self._writinghandles[1]
774 yield self._writinghandles[1]
775
775
776 # Otherwise open a new file handle.
776 # Otherwise open a new file handle.
777 else:
777 else:
778 if self._inline:
778 if self._inline:
779 func = self._indexfp
779 func = self._indexfp
780 else:
780 else:
781 func = self._datafp
781 func = self._datafp
782 with func() as fp:
782 with func() as fp:
783 yield fp
783 yield fp
784
784
785 def tiprev(self):
785 def tiprev(self):
786 return len(self.index) - 1
786 return len(self.index) - 1
787
787
788 def tip(self):
788 def tip(self):
789 return self.node(self.tiprev())
789 return self.node(self.tiprev())
790
790
791 def __contains__(self, rev):
791 def __contains__(self, rev):
792 return 0 <= rev < len(self)
792 return 0 <= rev < len(self)
793
793
794 def __len__(self):
794 def __len__(self):
795 return len(self.index)
795 return len(self.index)
796
796
797 def __iter__(self):
797 def __iter__(self):
798 return iter(pycompat.xrange(len(self)))
798 return iter(pycompat.xrange(len(self)))
799
799
800 def revs(self, start=0, stop=None):
800 def revs(self, start=0, stop=None):
801 """iterate over all rev in this revlog (from start to stop)"""
801 """iterate over all rev in this revlog (from start to stop)"""
802 return storageutil.iterrevs(len(self), start=start, stop=stop)
802 return storageutil.iterrevs(len(self), start=start, stop=stop)
803
803
804 @property
804 @property
805 def nodemap(self):
805 def nodemap(self):
806 msg = (
806 msg = (
807 b"revlog.nodemap is deprecated, "
807 b"revlog.nodemap is deprecated, "
808 b"use revlog.index.[has_node|rev|get_rev]"
808 b"use revlog.index.[has_node|rev|get_rev]"
809 )
809 )
810 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
810 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
811 return self.index.nodemap
811 return self.index.nodemap
812
812
813 @property
813 @property
814 def _nodecache(self):
814 def _nodecache(self):
815 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
815 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
816 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
816 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
817 return self.index.nodemap
817 return self.index.nodemap
818
818
819 def hasnode(self, node):
819 def hasnode(self, node):
820 try:
820 try:
821 self.rev(node)
821 self.rev(node)
822 return True
822 return True
823 except KeyError:
823 except KeyError:
824 return False
824 return False
825
825
826 def candelta(self, baserev, rev):
826 def candelta(self, baserev, rev):
827 """whether two revisions (baserev, rev) can be delta-ed or not"""
827 """whether two revisions (baserev, rev) can be delta-ed or not"""
828 # Disable delta if either rev requires a content-changing flag
828 # Disable delta if either rev requires a content-changing flag
829 # processor (ex. LFS). This is because such flag processor can alter
829 # processor (ex. LFS). This is because such flag processor can alter
830 # the rawtext content that the delta will be based on, and two clients
830 # the rawtext content that the delta will be based on, and two clients
831 # could have a same revlog node with different flags (i.e. different
831 # could have a same revlog node with different flags (i.e. different
832 # rawtext contents) and the delta could be incompatible.
832 # rawtext contents) and the delta could be incompatible.
833 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
833 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
834 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
834 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
835 ):
835 ):
836 return False
836 return False
837 return True
837 return True
838
838
839 def update_caches(self, transaction):
839 def update_caches(self, transaction):
840 if self._nodemap_file is not None:
840 if self._nodemap_file is not None:
841 if transaction is None:
841 if transaction is None:
842 nodemaputil.update_persistent_nodemap(self)
842 nodemaputil.update_persistent_nodemap(self)
843 else:
843 else:
844 nodemaputil.setup_persistent_nodemap(transaction, self)
844 nodemaputil.setup_persistent_nodemap(transaction, self)
845
845
846 def clearcaches(self):
846 def clearcaches(self):
847 self._revisioncache = None
847 self._revisioncache = None
848 self._chainbasecache.clear()
848 self._chainbasecache.clear()
849 self._chunkcache = (0, b'')
849 self._chunkcache = (0, b'')
850 self._pcache = {}
850 self._pcache = {}
851 self._nodemap_docket = None
851 self._nodemap_docket = None
852 self.index.clearcaches()
852 self.index.clearcaches()
853 # The python code is the one responsible for validating the docket, we
853 # The python code is the one responsible for validating the docket, we
854 # end up having to refresh it here.
854 # end up having to refresh it here.
855 use_nodemap = (
855 use_nodemap = (
856 not self._inline
856 not self._inline
857 and self._nodemap_file is not None
857 and self._nodemap_file is not None
858 and util.safehasattr(self.index, 'update_nodemap_data')
858 and util.safehasattr(self.index, 'update_nodemap_data')
859 )
859 )
860 if use_nodemap:
860 if use_nodemap:
861 nodemap_data = nodemaputil.persisted_data(self)
861 nodemap_data = nodemaputil.persisted_data(self)
862 if nodemap_data is not None:
862 if nodemap_data is not None:
863 self._nodemap_docket = nodemap_data[0]
863 self._nodemap_docket = nodemap_data[0]
864 self.index.update_nodemap_data(*nodemap_data)
864 self.index.update_nodemap_data(*nodemap_data)
865
865
866 def rev(self, node):
866 def rev(self, node):
867 try:
867 try:
868 return self.index.rev(node)
868 return self.index.rev(node)
869 except TypeError:
869 except TypeError:
870 raise
870 raise
871 except error.RevlogError:
871 except error.RevlogError:
872 # parsers.c radix tree lookup failed
872 # parsers.c radix tree lookup failed
873 if (
873 if (
874 node == self.nodeconstants.wdirid
874 node == self.nodeconstants.wdirid
875 or node in self.nodeconstants.wdirfilenodeids
875 or node in self.nodeconstants.wdirfilenodeids
876 ):
876 ):
877 raise error.WdirUnsupported
877 raise error.WdirUnsupported
878 raise error.LookupError(node, self.display_id, _(b'no node'))
878 raise error.LookupError(node, self.display_id, _(b'no node'))
879
879
880 # Accessors for index entries.
880 # Accessors for index entries.
881
881
882 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
882 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
883 # are flags.
883 # are flags.
884 def start(self, rev):
884 def start(self, rev):
885 return int(self.index[rev][0] >> 16)
885 return int(self.index[rev][0] >> 16)
886
886
887 def flags(self, rev):
887 def flags(self, rev):
888 return self.index[rev][0] & 0xFFFF
888 return self.index[rev][0] & 0xFFFF
889
889
890 def length(self, rev):
890 def length(self, rev):
891 return self.index[rev][1]
891 return self.index[rev][1]
892
892
893 def sidedata_length(self, rev):
893 def sidedata_length(self, rev):
894 if not self.hassidedata:
894 if not self.hassidedata:
895 return 0
895 return 0
896 return self.index[rev][9]
896 return self.index[rev][9]
897
897
898 def rawsize(self, rev):
898 def rawsize(self, rev):
899 """return the length of the uncompressed text for a given revision"""
899 """return the length of the uncompressed text for a given revision"""
900 l = self.index[rev][2]
900 l = self.index[rev][2]
901 if l >= 0:
901 if l >= 0:
902 return l
902 return l
903
903
904 t = self.rawdata(rev)
904 t = self.rawdata(rev)
905 return len(t)
905 return len(t)
906
906
907 def size(self, rev):
907 def size(self, rev):
908 """length of non-raw text (processed by a "read" flag processor)"""
908 """length of non-raw text (processed by a "read" flag processor)"""
909 # fast path: if no "read" flag processor could change the content,
909 # fast path: if no "read" flag processor could change the content,
910 # size is rawsize. note: ELLIPSIS is known to not change the content.
910 # size is rawsize. note: ELLIPSIS is known to not change the content.
911 flags = self.flags(rev)
911 flags = self.flags(rev)
912 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
912 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
913 return self.rawsize(rev)
913 return self.rawsize(rev)
914
914
915 return len(self.revision(rev, raw=False))
915 return len(self.revision(rev, raw=False))
916
916
917 def chainbase(self, rev):
917 def chainbase(self, rev):
918 base = self._chainbasecache.get(rev)
918 base = self._chainbasecache.get(rev)
919 if base is not None:
919 if base is not None:
920 return base
920 return base
921
921
922 index = self.index
922 index = self.index
923 iterrev = rev
923 iterrev = rev
924 base = index[iterrev][3]
924 base = index[iterrev][3]
925 while base != iterrev:
925 while base != iterrev:
926 iterrev = base
926 iterrev = base
927 base = index[iterrev][3]
927 base = index[iterrev][3]
928
928
929 self._chainbasecache[rev] = base
929 self._chainbasecache[rev] = base
930 return base
930 return base
931
931
932 def linkrev(self, rev):
932 def linkrev(self, rev):
933 return self.index[rev][4]
933 return self.index[rev][4]
934
934
935 def parentrevs(self, rev):
935 def parentrevs(self, rev):
936 try:
936 try:
937 entry = self.index[rev]
937 entry = self.index[rev]
938 except IndexError:
938 except IndexError:
939 if rev == wdirrev:
939 if rev == wdirrev:
940 raise error.WdirUnsupported
940 raise error.WdirUnsupported
941 raise
941 raise
942 if entry[5] == nullrev:
942 if entry[5] == nullrev:
943 return entry[6], entry[5]
943 return entry[6], entry[5]
944 else:
944 else:
945 return entry[5], entry[6]
945 return entry[5], entry[6]
946
946
947 # fast parentrevs(rev) where rev isn't filtered
947 # fast parentrevs(rev) where rev isn't filtered
948 _uncheckedparentrevs = parentrevs
948 _uncheckedparentrevs = parentrevs
949
949
950 def node(self, rev):
950 def node(self, rev):
951 try:
951 try:
952 return self.index[rev][7]
952 return self.index[rev][7]
953 except IndexError:
953 except IndexError:
954 if rev == wdirrev:
954 if rev == wdirrev:
955 raise error.WdirUnsupported
955 raise error.WdirUnsupported
956 raise
956 raise
957
957
958 # Derived from index values.
958 # Derived from index values.
959
959
960 def end(self, rev):
960 def end(self, rev):
961 return self.start(rev) + self.length(rev)
961 return self.start(rev) + self.length(rev)
962
962
963 def parents(self, node):
963 def parents(self, node):
964 i = self.index
964 i = self.index
965 d = i[self.rev(node)]
965 d = i[self.rev(node)]
966 # inline node() to avoid function call overhead
966 # inline node() to avoid function call overhead
967 if d[5] == self.nullid:
967 if d[5] == self.nullid:
968 return i[d[6]][7], i[d[5]][7]
968 return i[d[6]][7], i[d[5]][7]
969 else:
969 else:
970 return i[d[5]][7], i[d[6]][7]
970 return i[d[5]][7], i[d[6]][7]
971
971
972 def chainlen(self, rev):
972 def chainlen(self, rev):
973 return self._chaininfo(rev)[0]
973 return self._chaininfo(rev)[0]
974
974
975 def _chaininfo(self, rev):
975 def _chaininfo(self, rev):
976 chaininfocache = self._chaininfocache
976 chaininfocache = self._chaininfocache
977 if rev in chaininfocache:
977 if rev in chaininfocache:
978 return chaininfocache[rev]
978 return chaininfocache[rev]
979 index = self.index
979 index = self.index
980 generaldelta = self._generaldelta
980 generaldelta = self._generaldelta
981 iterrev = rev
981 iterrev = rev
982 e = index[iterrev]
982 e = index[iterrev]
983 clen = 0
983 clen = 0
984 compresseddeltalen = 0
984 compresseddeltalen = 0
985 while iterrev != e[3]:
985 while iterrev != e[3]:
986 clen += 1
986 clen += 1
987 compresseddeltalen += e[1]
987 compresseddeltalen += e[1]
988 if generaldelta:
988 if generaldelta:
989 iterrev = e[3]
989 iterrev = e[3]
990 else:
990 else:
991 iterrev -= 1
991 iterrev -= 1
992 if iterrev in chaininfocache:
992 if iterrev in chaininfocache:
993 t = chaininfocache[iterrev]
993 t = chaininfocache[iterrev]
994 clen += t[0]
994 clen += t[0]
995 compresseddeltalen += t[1]
995 compresseddeltalen += t[1]
996 break
996 break
997 e = index[iterrev]
997 e = index[iterrev]
998 else:
998 else:
999 # Add text length of base since decompressing that also takes
999 # Add text length of base since decompressing that also takes
1000 # work. For cache hits the length is already included.
1000 # work. For cache hits the length is already included.
1001 compresseddeltalen += e[1]
1001 compresseddeltalen += e[1]
1002 r = (clen, compresseddeltalen)
1002 r = (clen, compresseddeltalen)
1003 chaininfocache[rev] = r
1003 chaininfocache[rev] = r
1004 return r
1004 return r
1005
1005
1006 def _deltachain(self, rev, stoprev=None):
1006 def _deltachain(self, rev, stoprev=None):
1007 """Obtain the delta chain for a revision.
1007 """Obtain the delta chain for a revision.
1008
1008
1009 ``stoprev`` specifies a revision to stop at. If not specified, we
1009 ``stoprev`` specifies a revision to stop at. If not specified, we
1010 stop at the base of the chain.
1010 stop at the base of the chain.
1011
1011
1012 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1012 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1013 revs in ascending order and ``stopped`` is a bool indicating whether
1013 revs in ascending order and ``stopped`` is a bool indicating whether
1014 ``stoprev`` was hit.
1014 ``stoprev`` was hit.
1015 """
1015 """
1016 # Try C implementation.
1016 # Try C implementation.
1017 try:
1017 try:
1018 return self.index.deltachain(rev, stoprev, self._generaldelta)
1018 return self.index.deltachain(rev, stoprev, self._generaldelta)
1019 except AttributeError:
1019 except AttributeError:
1020 pass
1020 pass
1021
1021
1022 chain = []
1022 chain = []
1023
1023
1024 # Alias to prevent attribute lookup in tight loop.
1024 # Alias to prevent attribute lookup in tight loop.
1025 index = self.index
1025 index = self.index
1026 generaldelta = self._generaldelta
1026 generaldelta = self._generaldelta
1027
1027
1028 iterrev = rev
1028 iterrev = rev
1029 e = index[iterrev]
1029 e = index[iterrev]
1030 while iterrev != e[3] and iterrev != stoprev:
1030 while iterrev != e[3] and iterrev != stoprev:
1031 chain.append(iterrev)
1031 chain.append(iterrev)
1032 if generaldelta:
1032 if generaldelta:
1033 iterrev = e[3]
1033 iterrev = e[3]
1034 else:
1034 else:
1035 iterrev -= 1
1035 iterrev -= 1
1036 e = index[iterrev]
1036 e = index[iterrev]
1037
1037
1038 if iterrev == stoprev:
1038 if iterrev == stoprev:
1039 stopped = True
1039 stopped = True
1040 else:
1040 else:
1041 chain.append(iterrev)
1041 chain.append(iterrev)
1042 stopped = False
1042 stopped = False
1043
1043
1044 chain.reverse()
1044 chain.reverse()
1045 return chain, stopped
1045 return chain, stopped
1046
1046
1047 def ancestors(self, revs, stoprev=0, inclusive=False):
1047 def ancestors(self, revs, stoprev=0, inclusive=False):
1048 """Generate the ancestors of 'revs' in reverse revision order.
1048 """Generate the ancestors of 'revs' in reverse revision order.
1049 Does not generate revs lower than stoprev.
1049 Does not generate revs lower than stoprev.
1050
1050
1051 See the documentation for ancestor.lazyancestors for more details."""
1051 See the documentation for ancestor.lazyancestors for more details."""
1052
1052
1053 # first, make sure start revisions aren't filtered
1053 # first, make sure start revisions aren't filtered
1054 revs = list(revs)
1054 revs = list(revs)
1055 checkrev = self.node
1055 checkrev = self.node
1056 for r in revs:
1056 for r in revs:
1057 checkrev(r)
1057 checkrev(r)
1058 # and we're sure ancestors aren't filtered as well
1058 # and we're sure ancestors aren't filtered as well
1059
1059
1060 if rustancestor is not None:
1060 if rustancestor is not None:
1061 lazyancestors = rustancestor.LazyAncestors
1061 lazyancestors = rustancestor.LazyAncestors
1062 arg = self.index
1062 arg = self.index
1063 else:
1063 else:
1064 lazyancestors = ancestor.lazyancestors
1064 lazyancestors = ancestor.lazyancestors
1065 arg = self._uncheckedparentrevs
1065 arg = self._uncheckedparentrevs
1066 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1066 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1067
1067
1068 def descendants(self, revs):
1068 def descendants(self, revs):
1069 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1069 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1070
1070
1071 def findcommonmissing(self, common=None, heads=None):
1071 def findcommonmissing(self, common=None, heads=None):
1072 """Return a tuple of the ancestors of common and the ancestors of heads
1072 """Return a tuple of the ancestors of common and the ancestors of heads
1073 that are not ancestors of common. In revset terminology, we return the
1073 that are not ancestors of common. In revset terminology, we return the
1074 tuple:
1074 tuple:
1075
1075
1076 ::common, (::heads) - (::common)
1076 ::common, (::heads) - (::common)
1077
1077
1078 The list is sorted by revision number, meaning it is
1078 The list is sorted by revision number, meaning it is
1079 topologically sorted.
1079 topologically sorted.
1080
1080
1081 'heads' and 'common' are both lists of node IDs. If heads is
1081 'heads' and 'common' are both lists of node IDs. If heads is
1082 not supplied, uses all of the revlog's heads. If common is not
1082 not supplied, uses all of the revlog's heads. If common is not
1083 supplied, uses nullid."""
1083 supplied, uses nullid."""
1084 if common is None:
1084 if common is None:
1085 common = [self.nullid]
1085 common = [self.nullid]
1086 if heads is None:
1086 if heads is None:
1087 heads = self.heads()
1087 heads = self.heads()
1088
1088
1089 common = [self.rev(n) for n in common]
1089 common = [self.rev(n) for n in common]
1090 heads = [self.rev(n) for n in heads]
1090 heads = [self.rev(n) for n in heads]
1091
1091
1092 # we want the ancestors, but inclusive
1092 # we want the ancestors, but inclusive
1093 class lazyset(object):
1093 class lazyset(object):
1094 def __init__(self, lazyvalues):
1094 def __init__(self, lazyvalues):
1095 self.addedvalues = set()
1095 self.addedvalues = set()
1096 self.lazyvalues = lazyvalues
1096 self.lazyvalues = lazyvalues
1097
1097
1098 def __contains__(self, value):
1098 def __contains__(self, value):
1099 return value in self.addedvalues or value in self.lazyvalues
1099 return value in self.addedvalues or value in self.lazyvalues
1100
1100
1101 def __iter__(self):
1101 def __iter__(self):
1102 added = self.addedvalues
1102 added = self.addedvalues
1103 for r in added:
1103 for r in added:
1104 yield r
1104 yield r
1105 for r in self.lazyvalues:
1105 for r in self.lazyvalues:
1106 if not r in added:
1106 if not r in added:
1107 yield r
1107 yield r
1108
1108
1109 def add(self, value):
1109 def add(self, value):
1110 self.addedvalues.add(value)
1110 self.addedvalues.add(value)
1111
1111
1112 def update(self, values):
1112 def update(self, values):
1113 self.addedvalues.update(values)
1113 self.addedvalues.update(values)
1114
1114
1115 has = lazyset(self.ancestors(common))
1115 has = lazyset(self.ancestors(common))
1116 has.add(nullrev)
1116 has.add(nullrev)
1117 has.update(common)
1117 has.update(common)
1118
1118
1119 # take all ancestors from heads that aren't in has
1119 # take all ancestors from heads that aren't in has
1120 missing = set()
1120 missing = set()
1121 visit = collections.deque(r for r in heads if r not in has)
1121 visit = collections.deque(r for r in heads if r not in has)
1122 while visit:
1122 while visit:
1123 r = visit.popleft()
1123 r = visit.popleft()
1124 if r in missing:
1124 if r in missing:
1125 continue
1125 continue
1126 else:
1126 else:
1127 missing.add(r)
1127 missing.add(r)
1128 for p in self.parentrevs(r):
1128 for p in self.parentrevs(r):
1129 if p not in has:
1129 if p not in has:
1130 visit.append(p)
1130 visit.append(p)
1131 missing = list(missing)
1131 missing = list(missing)
1132 missing.sort()
1132 missing.sort()
1133 return has, [self.node(miss) for miss in missing]
1133 return has, [self.node(miss) for miss in missing]
1134
1134
1135 def incrementalmissingrevs(self, common=None):
1135 def incrementalmissingrevs(self, common=None):
1136 """Return an object that can be used to incrementally compute the
1136 """Return an object that can be used to incrementally compute the
1137 revision numbers of the ancestors of arbitrary sets that are not
1137 revision numbers of the ancestors of arbitrary sets that are not
1138 ancestors of common. This is an ancestor.incrementalmissingancestors
1138 ancestors of common. This is an ancestor.incrementalmissingancestors
1139 object.
1139 object.
1140
1140
1141 'common' is a list of revision numbers. If common is not supplied, uses
1141 'common' is a list of revision numbers. If common is not supplied, uses
1142 nullrev.
1142 nullrev.
1143 """
1143 """
1144 if common is None:
1144 if common is None:
1145 common = [nullrev]
1145 common = [nullrev]
1146
1146
1147 if rustancestor is not None:
1147 if rustancestor is not None:
1148 return rustancestor.MissingAncestors(self.index, common)
1148 return rustancestor.MissingAncestors(self.index, common)
1149 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1149 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1150
1150
1151 def findmissingrevs(self, common=None, heads=None):
1151 def findmissingrevs(self, common=None, heads=None):
1152 """Return the revision numbers of the ancestors of heads that
1152 """Return the revision numbers of the ancestors of heads that
1153 are not ancestors of common.
1153 are not ancestors of common.
1154
1154
1155 More specifically, return a list of revision numbers corresponding to
1155 More specifically, return a list of revision numbers corresponding to
1156 nodes N such that every N satisfies the following constraints:
1156 nodes N such that every N satisfies the following constraints:
1157
1157
1158 1. N is an ancestor of some node in 'heads'
1158 1. N is an ancestor of some node in 'heads'
1159 2. N is not an ancestor of any node in 'common'
1159 2. N is not an ancestor of any node in 'common'
1160
1160
1161 The list is sorted by revision number, meaning it is
1161 The list is sorted by revision number, meaning it is
1162 topologically sorted.
1162 topologically sorted.
1163
1163
1164 'heads' and 'common' are both lists of revision numbers. If heads is
1164 'heads' and 'common' are both lists of revision numbers. If heads is
1165 not supplied, uses all of the revlog's heads. If common is not
1165 not supplied, uses all of the revlog's heads. If common is not
1166 supplied, uses nullid."""
1166 supplied, uses nullid."""
1167 if common is None:
1167 if common is None:
1168 common = [nullrev]
1168 common = [nullrev]
1169 if heads is None:
1169 if heads is None:
1170 heads = self.headrevs()
1170 heads = self.headrevs()
1171
1171
1172 inc = self.incrementalmissingrevs(common=common)
1172 inc = self.incrementalmissingrevs(common=common)
1173 return inc.missingancestors(heads)
1173 return inc.missingancestors(heads)
1174
1174
1175 def findmissing(self, common=None, heads=None):
1175 def findmissing(self, common=None, heads=None):
1176 """Return the ancestors of heads that are not ancestors of common.
1176 """Return the ancestors of heads that are not ancestors of common.
1177
1177
1178 More specifically, return a list of nodes N such that every N
1178 More specifically, return a list of nodes N such that every N
1179 satisfies the following constraints:
1179 satisfies the following constraints:
1180
1180
1181 1. N is an ancestor of some node in 'heads'
1181 1. N is an ancestor of some node in 'heads'
1182 2. N is not an ancestor of any node in 'common'
1182 2. N is not an ancestor of any node in 'common'
1183
1183
1184 The list is sorted by revision number, meaning it is
1184 The list is sorted by revision number, meaning it is
1185 topologically sorted.
1185 topologically sorted.
1186
1186
1187 'heads' and 'common' are both lists of node IDs. If heads is
1187 'heads' and 'common' are both lists of node IDs. If heads is
1188 not supplied, uses all of the revlog's heads. If common is not
1188 not supplied, uses all of the revlog's heads. If common is not
1189 supplied, uses nullid."""
1189 supplied, uses nullid."""
1190 if common is None:
1190 if common is None:
1191 common = [self.nullid]
1191 common = [self.nullid]
1192 if heads is None:
1192 if heads is None:
1193 heads = self.heads()
1193 heads = self.heads()
1194
1194
1195 common = [self.rev(n) for n in common]
1195 common = [self.rev(n) for n in common]
1196 heads = [self.rev(n) for n in heads]
1196 heads = [self.rev(n) for n in heads]
1197
1197
1198 inc = self.incrementalmissingrevs(common=common)
1198 inc = self.incrementalmissingrevs(common=common)
1199 return [self.node(r) for r in inc.missingancestors(heads)]
1199 return [self.node(r) for r in inc.missingancestors(heads)]
1200
1200
1201 def nodesbetween(self, roots=None, heads=None):
1201 def nodesbetween(self, roots=None, heads=None):
1202 """Return a topological path from 'roots' to 'heads'.
1202 """Return a topological path from 'roots' to 'heads'.
1203
1203
1204 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1204 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1205 topologically sorted list of all nodes N that satisfy both of
1205 topologically sorted list of all nodes N that satisfy both of
1206 these constraints:
1206 these constraints:
1207
1207
1208 1. N is a descendant of some node in 'roots'
1208 1. N is a descendant of some node in 'roots'
1209 2. N is an ancestor of some node in 'heads'
1209 2. N is an ancestor of some node in 'heads'
1210
1210
1211 Every node is considered to be both a descendant and an ancestor
1211 Every node is considered to be both a descendant and an ancestor
1212 of itself, so every reachable node in 'roots' and 'heads' will be
1212 of itself, so every reachable node in 'roots' and 'heads' will be
1213 included in 'nodes'.
1213 included in 'nodes'.
1214
1214
1215 'outroots' is the list of reachable nodes in 'roots', i.e., the
1215 'outroots' is the list of reachable nodes in 'roots', i.e., the
1216 subset of 'roots' that is returned in 'nodes'. Likewise,
1216 subset of 'roots' that is returned in 'nodes'. Likewise,
1217 'outheads' is the subset of 'heads' that is also in 'nodes'.
1217 'outheads' is the subset of 'heads' that is also in 'nodes'.
1218
1218
1219 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1219 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1220 unspecified, uses nullid as the only root. If 'heads' is
1220 unspecified, uses nullid as the only root. If 'heads' is
1221 unspecified, uses list of all of the revlog's heads."""
1221 unspecified, uses list of all of the revlog's heads."""
1222 nonodes = ([], [], [])
1222 nonodes = ([], [], [])
1223 if roots is not None:
1223 if roots is not None:
1224 roots = list(roots)
1224 roots = list(roots)
1225 if not roots:
1225 if not roots:
1226 return nonodes
1226 return nonodes
1227 lowestrev = min([self.rev(n) for n in roots])
1227 lowestrev = min([self.rev(n) for n in roots])
1228 else:
1228 else:
1229 roots = [self.nullid] # Everybody's a descendant of nullid
1229 roots = [self.nullid] # Everybody's a descendant of nullid
1230 lowestrev = nullrev
1230 lowestrev = nullrev
1231 if (lowestrev == nullrev) and (heads is None):
1231 if (lowestrev == nullrev) and (heads is None):
1232 # We want _all_ the nodes!
1232 # We want _all_ the nodes!
1233 return (
1233 return (
1234 [self.node(r) for r in self],
1234 [self.node(r) for r in self],
1235 [self.nullid],
1235 [self.nullid],
1236 list(self.heads()),
1236 list(self.heads()),
1237 )
1237 )
1238 if heads is None:
1238 if heads is None:
1239 # All nodes are ancestors, so the latest ancestor is the last
1239 # All nodes are ancestors, so the latest ancestor is the last
1240 # node.
1240 # node.
1241 highestrev = len(self) - 1
1241 highestrev = len(self) - 1
1242 # Set ancestors to None to signal that every node is an ancestor.
1242 # Set ancestors to None to signal that every node is an ancestor.
1243 ancestors = None
1243 ancestors = None
1244 # Set heads to an empty dictionary for later discovery of heads
1244 # Set heads to an empty dictionary for later discovery of heads
1245 heads = {}
1245 heads = {}
1246 else:
1246 else:
1247 heads = list(heads)
1247 heads = list(heads)
1248 if not heads:
1248 if not heads:
1249 return nonodes
1249 return nonodes
1250 ancestors = set()
1250 ancestors = set()
1251 # Turn heads into a dictionary so we can remove 'fake' heads.
1251 # Turn heads into a dictionary so we can remove 'fake' heads.
1252 # Also, later we will be using it to filter out the heads we can't
1252 # Also, later we will be using it to filter out the heads we can't
1253 # find from roots.
1253 # find from roots.
1254 heads = dict.fromkeys(heads, False)
1254 heads = dict.fromkeys(heads, False)
1255 # Start at the top and keep marking parents until we're done.
1255 # Start at the top and keep marking parents until we're done.
1256 nodestotag = set(heads)
1256 nodestotag = set(heads)
1257 # Remember where the top was so we can use it as a limit later.
1257 # Remember where the top was so we can use it as a limit later.
1258 highestrev = max([self.rev(n) for n in nodestotag])
1258 highestrev = max([self.rev(n) for n in nodestotag])
1259 while nodestotag:
1259 while nodestotag:
1260 # grab a node to tag
1260 # grab a node to tag
1261 n = nodestotag.pop()
1261 n = nodestotag.pop()
1262 # Never tag nullid
1262 # Never tag nullid
1263 if n == self.nullid:
1263 if n == self.nullid:
1264 continue
1264 continue
1265 # A node's revision number represents its place in a
1265 # A node's revision number represents its place in a
1266 # topologically sorted list of nodes.
1266 # topologically sorted list of nodes.
1267 r = self.rev(n)
1267 r = self.rev(n)
1268 if r >= lowestrev:
1268 if r >= lowestrev:
1269 if n not in ancestors:
1269 if n not in ancestors:
1270 # If we are possibly a descendant of one of the roots
1270 # If we are possibly a descendant of one of the roots
1271 # and we haven't already been marked as an ancestor
1271 # and we haven't already been marked as an ancestor
1272 ancestors.add(n) # Mark as ancestor
1272 ancestors.add(n) # Mark as ancestor
1273 # Add non-nullid parents to list of nodes to tag.
1273 # Add non-nullid parents to list of nodes to tag.
1274 nodestotag.update(
1274 nodestotag.update(
1275 [p for p in self.parents(n) if p != self.nullid]
1275 [p for p in self.parents(n) if p != self.nullid]
1276 )
1276 )
1277 elif n in heads: # We've seen it before, is it a fake head?
1277 elif n in heads: # We've seen it before, is it a fake head?
1278 # So it is, real heads should not be the ancestors of
1278 # So it is, real heads should not be the ancestors of
1279 # any other heads.
1279 # any other heads.
1280 heads.pop(n)
1280 heads.pop(n)
1281 if not ancestors:
1281 if not ancestors:
1282 return nonodes
1282 return nonodes
1283 # Now that we have our set of ancestors, we want to remove any
1283 # Now that we have our set of ancestors, we want to remove any
1284 # roots that are not ancestors.
1284 # roots that are not ancestors.
1285
1285
1286 # If one of the roots was nullid, everything is included anyway.
1286 # If one of the roots was nullid, everything is included anyway.
1287 if lowestrev > nullrev:
1287 if lowestrev > nullrev:
1288 # But, since we weren't, let's recompute the lowest rev to not
1288 # But, since we weren't, let's recompute the lowest rev to not
1289 # include roots that aren't ancestors.
1289 # include roots that aren't ancestors.
1290
1290
1291 # Filter out roots that aren't ancestors of heads
1291 # Filter out roots that aren't ancestors of heads
1292 roots = [root for root in roots if root in ancestors]
1292 roots = [root for root in roots if root in ancestors]
1293 # Recompute the lowest revision
1293 # Recompute the lowest revision
1294 if roots:
1294 if roots:
1295 lowestrev = min([self.rev(root) for root in roots])
1295 lowestrev = min([self.rev(root) for root in roots])
1296 else:
1296 else:
1297 # No more roots? Return empty list
1297 # No more roots? Return empty list
1298 return nonodes
1298 return nonodes
1299 else:
1299 else:
1300 # We are descending from nullid, and don't need to care about
1300 # We are descending from nullid, and don't need to care about
1301 # any other roots.
1301 # any other roots.
1302 lowestrev = nullrev
1302 lowestrev = nullrev
1303 roots = [self.nullid]
1303 roots = [self.nullid]
1304 # Transform our roots list into a set.
1304 # Transform our roots list into a set.
1305 descendants = set(roots)
1305 descendants = set(roots)
1306 # Also, keep the original roots so we can filter out roots that aren't
1306 # Also, keep the original roots so we can filter out roots that aren't
1307 # 'real' roots (i.e. are descended from other roots).
1307 # 'real' roots (i.e. are descended from other roots).
1308 roots = descendants.copy()
1308 roots = descendants.copy()
1309 # Our topologically sorted list of output nodes.
1309 # Our topologically sorted list of output nodes.
1310 orderedout = []
1310 orderedout = []
1311 # Don't start at nullid since we don't want nullid in our output list,
1311 # Don't start at nullid since we don't want nullid in our output list,
1312 # and if nullid shows up in descendants, empty parents will look like
1312 # and if nullid shows up in descendants, empty parents will look like
1313 # they're descendants.
1313 # they're descendants.
1314 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1314 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1315 n = self.node(r)
1315 n = self.node(r)
1316 isdescendant = False
1316 isdescendant = False
1317 if lowestrev == nullrev: # Everybody is a descendant of nullid
1317 if lowestrev == nullrev: # Everybody is a descendant of nullid
1318 isdescendant = True
1318 isdescendant = True
1319 elif n in descendants:
1319 elif n in descendants:
1320 # n is already a descendant
1320 # n is already a descendant
1321 isdescendant = True
1321 isdescendant = True
1322 # This check only needs to be done here because all the roots
1322 # This check only needs to be done here because all the roots
1323 # will start being marked is descendants before the loop.
1323 # will start being marked is descendants before the loop.
1324 if n in roots:
1324 if n in roots:
1325 # If n was a root, check if it's a 'real' root.
1325 # If n was a root, check if it's a 'real' root.
1326 p = tuple(self.parents(n))
1326 p = tuple(self.parents(n))
1327 # If any of its parents are descendants, it's not a root.
1327 # If any of its parents are descendants, it's not a root.
1328 if (p[0] in descendants) or (p[1] in descendants):
1328 if (p[0] in descendants) or (p[1] in descendants):
1329 roots.remove(n)
1329 roots.remove(n)
1330 else:
1330 else:
1331 p = tuple(self.parents(n))
1331 p = tuple(self.parents(n))
1332 # A node is a descendant if either of its parents are
1332 # A node is a descendant if either of its parents are
1333 # descendants. (We seeded the dependents list with the roots
1333 # descendants. (We seeded the dependents list with the roots
1334 # up there, remember?)
1334 # up there, remember?)
1335 if (p[0] in descendants) or (p[1] in descendants):
1335 if (p[0] in descendants) or (p[1] in descendants):
1336 descendants.add(n)
1336 descendants.add(n)
1337 isdescendant = True
1337 isdescendant = True
1338 if isdescendant and ((ancestors is None) or (n in ancestors)):
1338 if isdescendant and ((ancestors is None) or (n in ancestors)):
1339 # Only include nodes that are both descendants and ancestors.
1339 # Only include nodes that are both descendants and ancestors.
1340 orderedout.append(n)
1340 orderedout.append(n)
1341 if (ancestors is not None) and (n in heads):
1341 if (ancestors is not None) and (n in heads):
1342 # We're trying to figure out which heads are reachable
1342 # We're trying to figure out which heads are reachable
1343 # from roots.
1343 # from roots.
1344 # Mark this head as having been reached
1344 # Mark this head as having been reached
1345 heads[n] = True
1345 heads[n] = True
1346 elif ancestors is None:
1346 elif ancestors is None:
1347 # Otherwise, we're trying to discover the heads.
1347 # Otherwise, we're trying to discover the heads.
1348 # Assume this is a head because if it isn't, the next step
1348 # Assume this is a head because if it isn't, the next step
1349 # will eventually remove it.
1349 # will eventually remove it.
1350 heads[n] = True
1350 heads[n] = True
1351 # But, obviously its parents aren't.
1351 # But, obviously its parents aren't.
1352 for p in self.parents(n):
1352 for p in self.parents(n):
1353 heads.pop(p, None)
1353 heads.pop(p, None)
1354 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1354 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1355 roots = list(roots)
1355 roots = list(roots)
1356 assert orderedout
1356 assert orderedout
1357 assert roots
1357 assert roots
1358 assert heads
1358 assert heads
1359 return (orderedout, roots, heads)
1359 return (orderedout, roots, heads)
1360
1360
1361 def headrevs(self, revs=None):
1361 def headrevs(self, revs=None):
1362 if revs is None:
1362 if revs is None:
1363 try:
1363 try:
1364 return self.index.headrevs()
1364 return self.index.headrevs()
1365 except AttributeError:
1365 except AttributeError:
1366 return self._headrevs()
1366 return self._headrevs()
1367 if rustdagop is not None:
1367 if rustdagop is not None:
1368 return rustdagop.headrevs(self.index, revs)
1368 return rustdagop.headrevs(self.index, revs)
1369 return dagop.headrevs(revs, self._uncheckedparentrevs)
1369 return dagop.headrevs(revs, self._uncheckedparentrevs)
1370
1370
1371 def computephases(self, roots):
1371 def computephases(self, roots):
1372 return self.index.computephasesmapsets(roots)
1372 return self.index.computephasesmapsets(roots)
1373
1373
1374 def _headrevs(self):
1374 def _headrevs(self):
1375 count = len(self)
1375 count = len(self)
1376 if not count:
1376 if not count:
1377 return [nullrev]
1377 return [nullrev]
1378 # we won't iter over filtered rev so nobody is a head at start
1378 # we won't iter over filtered rev so nobody is a head at start
1379 ishead = [0] * (count + 1)
1379 ishead = [0] * (count + 1)
1380 index = self.index
1380 index = self.index
1381 for r in self:
1381 for r in self:
1382 ishead[r] = 1 # I may be an head
1382 ishead[r] = 1 # I may be an head
1383 e = index[r]
1383 e = index[r]
1384 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1384 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1385 return [r for r, val in enumerate(ishead) if val]
1385 return [r for r, val in enumerate(ishead) if val]
1386
1386
1387 def heads(self, start=None, stop=None):
1387 def heads(self, start=None, stop=None):
1388 """return the list of all nodes that have no children
1388 """return the list of all nodes that have no children
1389
1389
1390 if start is specified, only heads that are descendants of
1390 if start is specified, only heads that are descendants of
1391 start will be returned
1391 start will be returned
1392 if stop is specified, it will consider all the revs from stop
1392 if stop is specified, it will consider all the revs from stop
1393 as if they had no children
1393 as if they had no children
1394 """
1394 """
1395 if start is None and stop is None:
1395 if start is None and stop is None:
1396 if not len(self):
1396 if not len(self):
1397 return [self.nullid]
1397 return [self.nullid]
1398 return [self.node(r) for r in self.headrevs()]
1398 return [self.node(r) for r in self.headrevs()]
1399
1399
1400 if start is None:
1400 if start is None:
1401 start = nullrev
1401 start = nullrev
1402 else:
1402 else:
1403 start = self.rev(start)
1403 start = self.rev(start)
1404
1404
1405 stoprevs = {self.rev(n) for n in stop or []}
1405 stoprevs = {self.rev(n) for n in stop or []}
1406
1406
1407 revs = dagop.headrevssubset(
1407 revs = dagop.headrevssubset(
1408 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1408 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1409 )
1409 )
1410
1410
1411 return [self.node(rev) for rev in revs]
1411 return [self.node(rev) for rev in revs]
1412
1412
1413 def children(self, node):
1413 def children(self, node):
1414 """find the children of a given node"""
1414 """find the children of a given node"""
1415 c = []
1415 c = []
1416 p = self.rev(node)
1416 p = self.rev(node)
1417 for r in self.revs(start=p + 1):
1417 for r in self.revs(start=p + 1):
1418 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1418 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1419 if prevs:
1419 if prevs:
1420 for pr in prevs:
1420 for pr in prevs:
1421 if pr == p:
1421 if pr == p:
1422 c.append(self.node(r))
1422 c.append(self.node(r))
1423 elif p == nullrev:
1423 elif p == nullrev:
1424 c.append(self.node(r))
1424 c.append(self.node(r))
1425 return c
1425 return c
1426
1426
1427 def commonancestorsheads(self, a, b):
1427 def commonancestorsheads(self, a, b):
1428 """calculate all the heads of the common ancestors of nodes a and b"""
1428 """calculate all the heads of the common ancestors of nodes a and b"""
1429 a, b = self.rev(a), self.rev(b)
1429 a, b = self.rev(a), self.rev(b)
1430 ancs = self._commonancestorsheads(a, b)
1430 ancs = self._commonancestorsheads(a, b)
1431 return pycompat.maplist(self.node, ancs)
1431 return pycompat.maplist(self.node, ancs)
1432
1432
1433 def _commonancestorsheads(self, *revs):
1433 def _commonancestorsheads(self, *revs):
1434 """calculate all the heads of the common ancestors of revs"""
1434 """calculate all the heads of the common ancestors of revs"""
1435 try:
1435 try:
1436 ancs = self.index.commonancestorsheads(*revs)
1436 ancs = self.index.commonancestorsheads(*revs)
1437 except (AttributeError, OverflowError): # C implementation failed
1437 except (AttributeError, OverflowError): # C implementation failed
1438 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1438 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1439 return ancs
1439 return ancs
1440
1440
1441 def isancestor(self, a, b):
1441 def isancestor(self, a, b):
1442 """return True if node a is an ancestor of node b
1442 """return True if node a is an ancestor of node b
1443
1443
1444 A revision is considered an ancestor of itself."""
1444 A revision is considered an ancestor of itself."""
1445 a, b = self.rev(a), self.rev(b)
1445 a, b = self.rev(a), self.rev(b)
1446 return self.isancestorrev(a, b)
1446 return self.isancestorrev(a, b)
1447
1447
1448 def isancestorrev(self, a, b):
1448 def isancestorrev(self, a, b):
1449 """return True if revision a is an ancestor of revision b
1449 """return True if revision a is an ancestor of revision b
1450
1450
1451 A revision is considered an ancestor of itself.
1451 A revision is considered an ancestor of itself.
1452
1452
1453 The implementation of this is trivial but the use of
1453 The implementation of this is trivial but the use of
1454 reachableroots is not."""
1454 reachableroots is not."""
1455 if a == nullrev:
1455 if a == nullrev:
1456 return True
1456 return True
1457 elif a == b:
1457 elif a == b:
1458 return True
1458 return True
1459 elif a > b:
1459 elif a > b:
1460 return False
1460 return False
1461 return bool(self.reachableroots(a, [b], [a], includepath=False))
1461 return bool(self.reachableroots(a, [b], [a], includepath=False))
1462
1462
1463 def reachableroots(self, minroot, heads, roots, includepath=False):
1463 def reachableroots(self, minroot, heads, roots, includepath=False):
1464 """return (heads(::(<roots> and <roots>::<heads>)))
1464 """return (heads(::(<roots> and <roots>::<heads>)))
1465
1465
1466 If includepath is True, return (<roots>::<heads>)."""
1466 If includepath is True, return (<roots>::<heads>)."""
1467 try:
1467 try:
1468 return self.index.reachableroots2(
1468 return self.index.reachableroots2(
1469 minroot, heads, roots, includepath
1469 minroot, heads, roots, includepath
1470 )
1470 )
1471 except AttributeError:
1471 except AttributeError:
1472 return dagop._reachablerootspure(
1472 return dagop._reachablerootspure(
1473 self.parentrevs, minroot, roots, heads, includepath
1473 self.parentrevs, minroot, roots, heads, includepath
1474 )
1474 )
1475
1475
1476 def ancestor(self, a, b):
1476 def ancestor(self, a, b):
1477 """calculate the "best" common ancestor of nodes a and b"""
1477 """calculate the "best" common ancestor of nodes a and b"""
1478
1478
1479 a, b = self.rev(a), self.rev(b)
1479 a, b = self.rev(a), self.rev(b)
1480 try:
1480 try:
1481 ancs = self.index.ancestors(a, b)
1481 ancs = self.index.ancestors(a, b)
1482 except (AttributeError, OverflowError):
1482 except (AttributeError, OverflowError):
1483 ancs = ancestor.ancestors(self.parentrevs, a, b)
1483 ancs = ancestor.ancestors(self.parentrevs, a, b)
1484 if ancs:
1484 if ancs:
1485 # choose a consistent winner when there's a tie
1485 # choose a consistent winner when there's a tie
1486 return min(map(self.node, ancs))
1486 return min(map(self.node, ancs))
1487 return self.nullid
1487 return self.nullid
1488
1488
1489 def _match(self, id):
1489 def _match(self, id):
1490 if isinstance(id, int):
1490 if isinstance(id, int):
1491 # rev
1491 # rev
1492 return self.node(id)
1492 return self.node(id)
1493 if len(id) == self.nodeconstants.nodelen:
1493 if len(id) == self.nodeconstants.nodelen:
1494 # possibly a binary node
1494 # possibly a binary node
1495 # odds of a binary node being all hex in ASCII are 1 in 10**25
1495 # odds of a binary node being all hex in ASCII are 1 in 10**25
1496 try:
1496 try:
1497 node = id
1497 node = id
1498 self.rev(node) # quick search the index
1498 self.rev(node) # quick search the index
1499 return node
1499 return node
1500 except error.LookupError:
1500 except error.LookupError:
1501 pass # may be partial hex id
1501 pass # may be partial hex id
1502 try:
1502 try:
1503 # str(rev)
1503 # str(rev)
1504 rev = int(id)
1504 rev = int(id)
1505 if b"%d" % rev != id:
1505 if b"%d" % rev != id:
1506 raise ValueError
1506 raise ValueError
1507 if rev < 0:
1507 if rev < 0:
1508 rev = len(self) + rev
1508 rev = len(self) + rev
1509 if rev < 0 or rev >= len(self):
1509 if rev < 0 or rev >= len(self):
1510 raise ValueError
1510 raise ValueError
1511 return self.node(rev)
1511 return self.node(rev)
1512 except (ValueError, OverflowError):
1512 except (ValueError, OverflowError):
1513 pass
1513 pass
1514 if len(id) == 2 * self.nodeconstants.nodelen:
1514 if len(id) == 2 * self.nodeconstants.nodelen:
1515 try:
1515 try:
1516 # a full hex nodeid?
1516 # a full hex nodeid?
1517 node = bin(id)
1517 node = bin(id)
1518 self.rev(node)
1518 self.rev(node)
1519 return node
1519 return node
1520 except (TypeError, error.LookupError):
1520 except (TypeError, error.LookupError):
1521 pass
1521 pass
1522
1522
1523 def _partialmatch(self, id):
1523 def _partialmatch(self, id):
1524 # we don't care wdirfilenodeids as they should be always full hash
1524 # we don't care wdirfilenodeids as they should be always full hash
1525 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1525 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1526 try:
1526 try:
1527 partial = self.index.partialmatch(id)
1527 partial = self.index.partialmatch(id)
1528 if partial and self.hasnode(partial):
1528 if partial and self.hasnode(partial):
1529 if maybewdir:
1529 if maybewdir:
1530 # single 'ff...' match in radix tree, ambiguous with wdir
1530 # single 'ff...' match in radix tree, ambiguous with wdir
1531 raise error.RevlogError
1531 raise error.RevlogError
1532 return partial
1532 return partial
1533 if maybewdir:
1533 if maybewdir:
1534 # no 'ff...' match in radix tree, wdir identified
1534 # no 'ff...' match in radix tree, wdir identified
1535 raise error.WdirUnsupported
1535 raise error.WdirUnsupported
1536 return None
1536 return None
1537 except error.RevlogError:
1537 except error.RevlogError:
1538 # parsers.c radix tree lookup gave multiple matches
1538 # parsers.c radix tree lookup gave multiple matches
1539 # fast path: for unfiltered changelog, radix tree is accurate
1539 # fast path: for unfiltered changelog, radix tree is accurate
1540 if not getattr(self, 'filteredrevs', None):
1540 if not getattr(self, 'filteredrevs', None):
1541 raise error.AmbiguousPrefixLookupError(
1541 raise error.AmbiguousPrefixLookupError(
1542 id, self.display_id, _(b'ambiguous identifier')
1542 id, self.display_id, _(b'ambiguous identifier')
1543 )
1543 )
1544 # fall through to slow path that filters hidden revisions
1544 # fall through to slow path that filters hidden revisions
1545 except (AttributeError, ValueError):
1545 except (AttributeError, ValueError):
1546 # we are pure python, or key was too short to search radix tree
1546 # we are pure python, or key was too short to search radix tree
1547 pass
1547 pass
1548
1548
1549 if id in self._pcache:
1549 if id in self._pcache:
1550 return self._pcache[id]
1550 return self._pcache[id]
1551
1551
1552 if len(id) <= 40:
1552 if len(id) <= 40:
1553 try:
1553 try:
1554 # hex(node)[:...]
1554 # hex(node)[:...]
1555 l = len(id) // 2 # grab an even number of digits
1555 l = len(id) // 2 # grab an even number of digits
1556 prefix = bin(id[: l * 2])
1556 prefix = bin(id[: l * 2])
1557 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1557 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1558 nl = [
1558 nl = [
1559 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1559 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1560 ]
1560 ]
1561 if self.nodeconstants.nullhex.startswith(id):
1561 if self.nodeconstants.nullhex.startswith(id):
1562 nl.append(self.nullid)
1562 nl.append(self.nullid)
1563 if len(nl) > 0:
1563 if len(nl) > 0:
1564 if len(nl) == 1 and not maybewdir:
1564 if len(nl) == 1 and not maybewdir:
1565 self._pcache[id] = nl[0]
1565 self._pcache[id] = nl[0]
1566 return nl[0]
1566 return nl[0]
1567 raise error.AmbiguousPrefixLookupError(
1567 raise error.AmbiguousPrefixLookupError(
1568 id, self.display_id, _(b'ambiguous identifier')
1568 id, self.display_id, _(b'ambiguous identifier')
1569 )
1569 )
1570 if maybewdir:
1570 if maybewdir:
1571 raise error.WdirUnsupported
1571 raise error.WdirUnsupported
1572 return None
1572 return None
1573 except TypeError:
1573 except TypeError:
1574 pass
1574 pass
1575
1575
1576 def lookup(self, id):
1576 def lookup(self, id):
1577 """locate a node based on:
1577 """locate a node based on:
1578 - revision number or str(revision number)
1578 - revision number or str(revision number)
1579 - nodeid or subset of hex nodeid
1579 - nodeid or subset of hex nodeid
1580 """
1580 """
1581 n = self._match(id)
1581 n = self._match(id)
1582 if n is not None:
1582 if n is not None:
1583 return n
1583 return n
1584 n = self._partialmatch(id)
1584 n = self._partialmatch(id)
1585 if n:
1585 if n:
1586 return n
1586 return n
1587
1587
1588 raise error.LookupError(id, self.display_id, _(b'no match found'))
1588 raise error.LookupError(id, self.display_id, _(b'no match found'))
1589
1589
1590 def shortest(self, node, minlength=1):
1590 def shortest(self, node, minlength=1):
1591 """Find the shortest unambiguous prefix that matches node."""
1591 """Find the shortest unambiguous prefix that matches node."""
1592
1592
1593 def isvalid(prefix):
1593 def isvalid(prefix):
1594 try:
1594 try:
1595 matchednode = self._partialmatch(prefix)
1595 matchednode = self._partialmatch(prefix)
1596 except error.AmbiguousPrefixLookupError:
1596 except error.AmbiguousPrefixLookupError:
1597 return False
1597 return False
1598 except error.WdirUnsupported:
1598 except error.WdirUnsupported:
1599 # single 'ff...' match
1599 # single 'ff...' match
1600 return True
1600 return True
1601 if matchednode is None:
1601 if matchednode is None:
1602 raise error.LookupError(node, self.display_id, _(b'no node'))
1602 raise error.LookupError(node, self.display_id, _(b'no node'))
1603 return True
1603 return True
1604
1604
1605 def maybewdir(prefix):
1605 def maybewdir(prefix):
1606 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1606 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1607
1607
1608 hexnode = hex(node)
1608 hexnode = hex(node)
1609
1609
1610 def disambiguate(hexnode, minlength):
1610 def disambiguate(hexnode, minlength):
1611 """Disambiguate against wdirid."""
1611 """Disambiguate against wdirid."""
1612 for length in range(minlength, len(hexnode) + 1):
1612 for length in range(minlength, len(hexnode) + 1):
1613 prefix = hexnode[:length]
1613 prefix = hexnode[:length]
1614 if not maybewdir(prefix):
1614 if not maybewdir(prefix):
1615 return prefix
1615 return prefix
1616
1616
1617 if not getattr(self, 'filteredrevs', None):
1617 if not getattr(self, 'filteredrevs', None):
1618 try:
1618 try:
1619 length = max(self.index.shortest(node), minlength)
1619 length = max(self.index.shortest(node), minlength)
1620 return disambiguate(hexnode, length)
1620 return disambiguate(hexnode, length)
1621 except error.RevlogError:
1621 except error.RevlogError:
1622 if node != self.nodeconstants.wdirid:
1622 if node != self.nodeconstants.wdirid:
1623 raise error.LookupError(
1623 raise error.LookupError(
1624 node, self.display_id, _(b'no node')
1624 node, self.display_id, _(b'no node')
1625 )
1625 )
1626 except AttributeError:
1626 except AttributeError:
1627 # Fall through to pure code
1627 # Fall through to pure code
1628 pass
1628 pass
1629
1629
1630 if node == self.nodeconstants.wdirid:
1630 if node == self.nodeconstants.wdirid:
1631 for length in range(minlength, len(hexnode) + 1):
1631 for length in range(minlength, len(hexnode) + 1):
1632 prefix = hexnode[:length]
1632 prefix = hexnode[:length]
1633 if isvalid(prefix):
1633 if isvalid(prefix):
1634 return prefix
1634 return prefix
1635
1635
1636 for length in range(minlength, len(hexnode) + 1):
1636 for length in range(minlength, len(hexnode) + 1):
1637 prefix = hexnode[:length]
1637 prefix = hexnode[:length]
1638 if isvalid(prefix):
1638 if isvalid(prefix):
1639 return disambiguate(hexnode, length)
1639 return disambiguate(hexnode, length)
1640
1640
1641 def cmp(self, node, text):
1641 def cmp(self, node, text):
1642 """compare text with a given file revision
1642 """compare text with a given file revision
1643
1643
1644 returns True if text is different than what is stored.
1644 returns True if text is different than what is stored.
1645 """
1645 """
1646 p1, p2 = self.parents(node)
1646 p1, p2 = self.parents(node)
1647 return storageutil.hashrevisionsha1(text, p1, p2) != node
1647 return storageutil.hashrevisionsha1(text, p1, p2) != node
1648
1648
1649 def _cachesegment(self, offset, data):
1649 def _cachesegment(self, offset, data):
1650 """Add a segment to the revlog cache.
1650 """Add a segment to the revlog cache.
1651
1651
1652 Accepts an absolute offset and the data that is at that location.
1652 Accepts an absolute offset and the data that is at that location.
1653 """
1653 """
1654 o, d = self._chunkcache
1654 o, d = self._chunkcache
1655 # try to add to existing cache
1655 # try to add to existing cache
1656 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1656 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1657 self._chunkcache = o, d + data
1657 self._chunkcache = o, d + data
1658 else:
1658 else:
1659 self._chunkcache = offset, data
1659 self._chunkcache = offset, data
1660
1660
1661 def _readsegment(self, offset, length, df=None):
1661 def _readsegment(self, offset, length, df=None):
1662 """Load a segment of raw data from the revlog.
1662 """Load a segment of raw data from the revlog.
1663
1663
1664 Accepts an absolute offset, length to read, and an optional existing
1664 Accepts an absolute offset, length to read, and an optional existing
1665 file handle to read from.
1665 file handle to read from.
1666
1666
1667 If an existing file handle is passed, it will be seeked and the
1667 If an existing file handle is passed, it will be seeked and the
1668 original seek position will NOT be restored.
1668 original seek position will NOT be restored.
1669
1669
1670 Returns a str or buffer of raw byte data.
1670 Returns a str or buffer of raw byte data.
1671
1671
1672 Raises if the requested number of bytes could not be read.
1672 Raises if the requested number of bytes could not be read.
1673 """
1673 """
1674 # Cache data both forward and backward around the requested
1674 # Cache data both forward and backward around the requested
1675 # data, in a fixed size window. This helps speed up operations
1675 # data, in a fixed size window. This helps speed up operations
1676 # involving reading the revlog backwards.
1676 # involving reading the revlog backwards.
1677 cachesize = self._chunkcachesize
1677 cachesize = self._chunkcachesize
1678 realoffset = offset & ~(cachesize - 1)
1678 realoffset = offset & ~(cachesize - 1)
1679 reallength = (
1679 reallength = (
1680 (offset + length + cachesize) & ~(cachesize - 1)
1680 (offset + length + cachesize) & ~(cachesize - 1)
1681 ) - realoffset
1681 ) - realoffset
1682 with self._datareadfp(df) as df:
1682 with self._datareadfp(df) as df:
1683 df.seek(realoffset)
1683 df.seek(realoffset)
1684 d = df.read(reallength)
1684 d = df.read(reallength)
1685
1685
1686 self._cachesegment(realoffset, d)
1686 self._cachesegment(realoffset, d)
1687 if offset != realoffset or reallength != length:
1687 if offset != realoffset or reallength != length:
1688 startoffset = offset - realoffset
1688 startoffset = offset - realoffset
1689 if len(d) - startoffset < length:
1689 if len(d) - startoffset < length:
1690 raise error.RevlogError(
1690 raise error.RevlogError(
1691 _(
1691 _(
1692 b'partial read of revlog %s; expected %d bytes from '
1692 b'partial read of revlog %s; expected %d bytes from '
1693 b'offset %d, got %d'
1693 b'offset %d, got %d'
1694 )
1694 )
1695 % (
1695 % (
1696 self._indexfile if self._inline else self._datafile,
1696 self._indexfile if self._inline else self._datafile,
1697 length,
1697 length,
1698 offset,
1698 offset,
1699 len(d) - startoffset,
1699 len(d) - startoffset,
1700 )
1700 )
1701 )
1701 )
1702
1702
1703 return util.buffer(d, startoffset, length)
1703 return util.buffer(d, startoffset, length)
1704
1704
1705 if len(d) < length:
1705 if len(d) < length:
1706 raise error.RevlogError(
1706 raise error.RevlogError(
1707 _(
1707 _(
1708 b'partial read of revlog %s; expected %d bytes from offset '
1708 b'partial read of revlog %s; expected %d bytes from offset '
1709 b'%d, got %d'
1709 b'%d, got %d'
1710 )
1710 )
1711 % (
1711 % (
1712 self._indexfile if self._inline else self._datafile,
1712 self._indexfile if self._inline else self._datafile,
1713 length,
1713 length,
1714 offset,
1714 offset,
1715 len(d),
1715 len(d),
1716 )
1716 )
1717 )
1717 )
1718
1718
1719 return d
1719 return d
1720
1720
1721 def _getsegment(self, offset, length, df=None):
1721 def _getsegment(self, offset, length, df=None):
1722 """Obtain a segment of raw data from the revlog.
1722 """Obtain a segment of raw data from the revlog.
1723
1723
1724 Accepts an absolute offset, length of bytes to obtain, and an
1724 Accepts an absolute offset, length of bytes to obtain, and an
1725 optional file handle to the already-opened revlog. If the file
1725 optional file handle to the already-opened revlog. If the file
1726 handle is used, it's original seek position will not be preserved.
1726 handle is used, it's original seek position will not be preserved.
1727
1727
1728 Requests for data may be returned from a cache.
1728 Requests for data may be returned from a cache.
1729
1729
1730 Returns a str or a buffer instance of raw byte data.
1730 Returns a str or a buffer instance of raw byte data.
1731 """
1731 """
1732 o, d = self._chunkcache
1732 o, d = self._chunkcache
1733 l = len(d)
1733 l = len(d)
1734
1734
1735 # is it in the cache?
1735 # is it in the cache?
1736 cachestart = offset - o
1736 cachestart = offset - o
1737 cacheend = cachestart + length
1737 cacheend = cachestart + length
1738 if cachestart >= 0 and cacheend <= l:
1738 if cachestart >= 0 and cacheend <= l:
1739 if cachestart == 0 and cacheend == l:
1739 if cachestart == 0 and cacheend == l:
1740 return d # avoid a copy
1740 return d # avoid a copy
1741 return util.buffer(d, cachestart, cacheend - cachestart)
1741 return util.buffer(d, cachestart, cacheend - cachestart)
1742
1742
1743 return self._readsegment(offset, length, df=df)
1743 return self._readsegment(offset, length, df=df)
1744
1744
1745 def _getsegmentforrevs(self, startrev, endrev, df=None):
1745 def _getsegmentforrevs(self, startrev, endrev, df=None):
1746 """Obtain a segment of raw data corresponding to a range of revisions.
1746 """Obtain a segment of raw data corresponding to a range of revisions.
1747
1747
1748 Accepts the start and end revisions and an optional already-open
1748 Accepts the start and end revisions and an optional already-open
1749 file handle to be used for reading. If the file handle is read, its
1749 file handle to be used for reading. If the file handle is read, its
1750 seek position will not be preserved.
1750 seek position will not be preserved.
1751
1751
1752 Requests for data may be satisfied by a cache.
1752 Requests for data may be satisfied by a cache.
1753
1753
1754 Returns a 2-tuple of (offset, data) for the requested range of
1754 Returns a 2-tuple of (offset, data) for the requested range of
1755 revisions. Offset is the integer offset from the beginning of the
1755 revisions. Offset is the integer offset from the beginning of the
1756 revlog and data is a str or buffer of the raw byte data.
1756 revlog and data is a str or buffer of the raw byte data.
1757
1757
1758 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1758 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1759 to determine where each revision's data begins and ends.
1759 to determine where each revision's data begins and ends.
1760 """
1760 """
1761 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1761 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1762 # (functions are expensive).
1762 # (functions are expensive).
1763 index = self.index
1763 index = self.index
1764 istart = index[startrev]
1764 istart = index[startrev]
1765 start = int(istart[0] >> 16)
1765 start = int(istart[0] >> 16)
1766 if startrev == endrev:
1766 if startrev == endrev:
1767 end = start + istart[1]
1767 end = start + istart[1]
1768 else:
1768 else:
1769 iend = index[endrev]
1769 iend = index[endrev]
1770 end = int(iend[0] >> 16) + iend[1]
1770 end = int(iend[0] >> 16) + iend[1]
1771
1771
1772 if self._inline:
1772 if self._inline:
1773 start += (startrev + 1) * self.index.entry_size
1773 start += (startrev + 1) * self.index.entry_size
1774 end += (endrev + 1) * self.index.entry_size
1774 end += (endrev + 1) * self.index.entry_size
1775 length = end - start
1775 length = end - start
1776
1776
1777 return start, self._getsegment(start, length, df=df)
1777 return start, self._getsegment(start, length, df=df)
1778
1778
1779 def _chunk(self, rev, df=None):
1779 def _chunk(self, rev, df=None):
1780 """Obtain a single decompressed chunk for a revision.
1780 """Obtain a single decompressed chunk for a revision.
1781
1781
1782 Accepts an integer revision and an optional already-open file handle
1782 Accepts an integer revision and an optional already-open file handle
1783 to be used for reading. If used, the seek position of the file will not
1783 to be used for reading. If used, the seek position of the file will not
1784 be preserved.
1784 be preserved.
1785
1785
1786 Returns a str holding uncompressed data for the requested revision.
1786 Returns a str holding uncompressed data for the requested revision.
1787 """
1787 """
1788 compression_mode = self.index[rev][10]
1788 compression_mode = self.index[rev][10]
1789 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1789 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1790 if compression_mode == COMP_MODE_PLAIN:
1790 if compression_mode == COMP_MODE_PLAIN:
1791 return data
1791 return data
1792 elif compression_mode == COMP_MODE_DEFAULT:
1792 elif compression_mode == COMP_MODE_DEFAULT:
1793 return self._decompressor(data)
1793 return self._decompressor(data)
1794 elif compression_mode == COMP_MODE_INLINE:
1794 elif compression_mode == COMP_MODE_INLINE:
1795 return self.decompress(data)
1795 return self.decompress(data)
1796 else:
1796 else:
1797 msg = 'unknown compression mode %d'
1797 msg = 'unknown compression mode %d'
1798 msg %= compression_mode
1798 msg %= compression_mode
1799 raise error.RevlogError(msg)
1799 raise error.RevlogError(msg)
1800
1800
1801 def _chunks(self, revs, df=None, targetsize=None):
1801 def _chunks(self, revs, df=None, targetsize=None):
1802 """Obtain decompressed chunks for the specified revisions.
1802 """Obtain decompressed chunks for the specified revisions.
1803
1803
1804 Accepts an iterable of numeric revisions that are assumed to be in
1804 Accepts an iterable of numeric revisions that are assumed to be in
1805 ascending order. Also accepts an optional already-open file handle
1805 ascending order. Also accepts an optional already-open file handle
1806 to be used for reading. If used, the seek position of the file will
1806 to be used for reading. If used, the seek position of the file will
1807 not be preserved.
1807 not be preserved.
1808
1808
1809 This function is similar to calling ``self._chunk()`` multiple times,
1809 This function is similar to calling ``self._chunk()`` multiple times,
1810 but is faster.
1810 but is faster.
1811
1811
1812 Returns a list with decompressed data for each requested revision.
1812 Returns a list with decompressed data for each requested revision.
1813 """
1813 """
1814 if not revs:
1814 if not revs:
1815 return []
1815 return []
1816 start = self.start
1816 start = self.start
1817 length = self.length
1817 length = self.length
1818 inline = self._inline
1818 inline = self._inline
1819 iosize = self.index.entry_size
1819 iosize = self.index.entry_size
1820 buffer = util.buffer
1820 buffer = util.buffer
1821
1821
1822 l = []
1822 l = []
1823 ladd = l.append
1823 ladd = l.append
1824
1824
1825 if not self._withsparseread:
1825 if not self._withsparseread:
1826 slicedchunks = (revs,)
1826 slicedchunks = (revs,)
1827 else:
1827 else:
1828 slicedchunks = deltautil.slicechunk(
1828 slicedchunks = deltautil.slicechunk(
1829 self, revs, targetsize=targetsize
1829 self, revs, targetsize=targetsize
1830 )
1830 )
1831
1831
1832 for revschunk in slicedchunks:
1832 for revschunk in slicedchunks:
1833 firstrev = revschunk[0]
1833 firstrev = revschunk[0]
1834 # Skip trailing revisions with empty diff
1834 # Skip trailing revisions with empty diff
1835 for lastrev in revschunk[::-1]:
1835 for lastrev in revschunk[::-1]:
1836 if length(lastrev) != 0:
1836 if length(lastrev) != 0:
1837 break
1837 break
1838
1838
1839 try:
1839 try:
1840 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1840 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1841 except OverflowError:
1841 except OverflowError:
1842 # issue4215 - we can't cache a run of chunks greater than
1842 # issue4215 - we can't cache a run of chunks greater than
1843 # 2G on Windows
1843 # 2G on Windows
1844 return [self._chunk(rev, df=df) for rev in revschunk]
1844 return [self._chunk(rev, df=df) for rev in revschunk]
1845
1845
1846 decomp = self.decompress
1846 decomp = self.decompress
1847 # self._decompressor might be None, but will not be used in that case
1847 # self._decompressor might be None, but will not be used in that case
1848 def_decomp = self._decompressor
1848 def_decomp = self._decompressor
1849 for rev in revschunk:
1849 for rev in revschunk:
1850 chunkstart = start(rev)
1850 chunkstart = start(rev)
1851 if inline:
1851 if inline:
1852 chunkstart += (rev + 1) * iosize
1852 chunkstart += (rev + 1) * iosize
1853 chunklength = length(rev)
1853 chunklength = length(rev)
1854 comp_mode = self.index[rev][10]
1854 comp_mode = self.index[rev][10]
1855 c = buffer(data, chunkstart - offset, chunklength)
1855 c = buffer(data, chunkstart - offset, chunklength)
1856 if comp_mode == COMP_MODE_PLAIN:
1856 if comp_mode == COMP_MODE_PLAIN:
1857 ladd(c)
1857 ladd(c)
1858 elif comp_mode == COMP_MODE_INLINE:
1858 elif comp_mode == COMP_MODE_INLINE:
1859 ladd(decomp(c))
1859 ladd(decomp(c))
1860 elif comp_mode == COMP_MODE_DEFAULT:
1860 elif comp_mode == COMP_MODE_DEFAULT:
1861 ladd(def_decomp(c))
1861 ladd(def_decomp(c))
1862 else:
1862 else:
1863 msg = 'unknown compression mode %d'
1863 msg = 'unknown compression mode %d'
1864 msg %= comp_mode
1864 msg %= comp_mode
1865 raise error.RevlogError(msg)
1865 raise error.RevlogError(msg)
1866
1866
1867 return l
1867 return l
1868
1868
1869 def _chunkclear(self):
1869 def _chunkclear(self):
1870 """Clear the raw chunk cache."""
1870 """Clear the raw chunk cache."""
1871 self._chunkcache = (0, b'')
1871 self._chunkcache = (0, b'')
1872
1872
1873 def deltaparent(self, rev):
1873 def deltaparent(self, rev):
1874 """return deltaparent of the given revision"""
1874 """return deltaparent of the given revision"""
1875 base = self.index[rev][3]
1875 base = self.index[rev][3]
1876 if base == rev:
1876 if base == rev:
1877 return nullrev
1877 return nullrev
1878 elif self._generaldelta:
1878 elif self._generaldelta:
1879 return base
1879 return base
1880 else:
1880 else:
1881 return rev - 1
1881 return rev - 1
1882
1882
1883 def issnapshot(self, rev):
1883 def issnapshot(self, rev):
1884 """tells whether rev is a snapshot"""
1884 """tells whether rev is a snapshot"""
1885 if not self._sparserevlog:
1885 if not self._sparserevlog:
1886 return self.deltaparent(rev) == nullrev
1886 return self.deltaparent(rev) == nullrev
1887 elif util.safehasattr(self.index, b'issnapshot'):
1887 elif util.safehasattr(self.index, b'issnapshot'):
1888 # directly assign the method to cache the testing and access
1888 # directly assign the method to cache the testing and access
1889 self.issnapshot = self.index.issnapshot
1889 self.issnapshot = self.index.issnapshot
1890 return self.issnapshot(rev)
1890 return self.issnapshot(rev)
1891 if rev == nullrev:
1891 if rev == nullrev:
1892 return True
1892 return True
1893 entry = self.index[rev]
1893 entry = self.index[rev]
1894 base = entry[3]
1894 base = entry[3]
1895 if base == rev:
1895 if base == rev:
1896 return True
1896 return True
1897 if base == nullrev:
1897 if base == nullrev:
1898 return True
1898 return True
1899 p1 = entry[5]
1899 p1 = entry[5]
1900 p2 = entry[6]
1900 p2 = entry[6]
1901 if base == p1 or base == p2:
1901 if base == p1 or base == p2:
1902 return False
1902 return False
1903 return self.issnapshot(base)
1903 return self.issnapshot(base)
1904
1904
1905 def snapshotdepth(self, rev):
1905 def snapshotdepth(self, rev):
1906 """number of snapshot in the chain before this one"""
1906 """number of snapshot in the chain before this one"""
1907 if not self.issnapshot(rev):
1907 if not self.issnapshot(rev):
1908 raise error.ProgrammingError(b'revision %d not a snapshot')
1908 raise error.ProgrammingError(b'revision %d not a snapshot')
1909 return len(self._deltachain(rev)[0]) - 1
1909 return len(self._deltachain(rev)[0]) - 1
1910
1910
1911 def revdiff(self, rev1, rev2):
1911 def revdiff(self, rev1, rev2):
1912 """return or calculate a delta between two revisions
1912 """return or calculate a delta between two revisions
1913
1913
1914 The delta calculated is in binary form and is intended to be written to
1914 The delta calculated is in binary form and is intended to be written to
1915 revlog data directly. So this function needs raw revision data.
1915 revlog data directly. So this function needs raw revision data.
1916 """
1916 """
1917 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1917 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1918 return bytes(self._chunk(rev2))
1918 return bytes(self._chunk(rev2))
1919
1919
1920 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1920 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1921
1921
1922 def _processflags(self, text, flags, operation, raw=False):
1922 def _processflags(self, text, flags, operation, raw=False):
1923 """deprecated entry point to access flag processors"""
1923 """deprecated entry point to access flag processors"""
1924 msg = b'_processflag(...) use the specialized variant'
1924 msg = b'_processflag(...) use the specialized variant'
1925 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1925 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1926 if raw:
1926 if raw:
1927 return text, flagutil.processflagsraw(self, text, flags)
1927 return text, flagutil.processflagsraw(self, text, flags)
1928 elif operation == b'read':
1928 elif operation == b'read':
1929 return flagutil.processflagsread(self, text, flags)
1929 return flagutil.processflagsread(self, text, flags)
1930 else: # write operation
1930 else: # write operation
1931 return flagutil.processflagswrite(self, text, flags)
1931 return flagutil.processflagswrite(self, text, flags)
1932
1932
1933 def revision(self, nodeorrev, _df=None, raw=False):
1933 def revision(self, nodeorrev, _df=None, raw=False):
1934 """return an uncompressed revision of a given node or revision
1934 """return an uncompressed revision of a given node or revision
1935 number.
1935 number.
1936
1936
1937 _df - an existing file handle to read from. (internal-only)
1937 _df - an existing file handle to read from. (internal-only)
1938 raw - an optional argument specifying if the revision data is to be
1938 raw - an optional argument specifying if the revision data is to be
1939 treated as raw data when applying flag transforms. 'raw' should be set
1939 treated as raw data when applying flag transforms. 'raw' should be set
1940 to True when generating changegroups or in debug commands.
1940 to True when generating changegroups or in debug commands.
1941 """
1941 """
1942 if raw:
1942 if raw:
1943 msg = (
1943 msg = (
1944 b'revlog.revision(..., raw=True) is deprecated, '
1944 b'revlog.revision(..., raw=True) is deprecated, '
1945 b'use revlog.rawdata(...)'
1945 b'use revlog.rawdata(...)'
1946 )
1946 )
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1948 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1948 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1949
1949
1950 def sidedata(self, nodeorrev, _df=None):
1950 def sidedata(self, nodeorrev, _df=None):
1951 """a map of extra data related to the changeset but not part of the hash
1951 """a map of extra data related to the changeset but not part of the hash
1952
1952
1953 This function currently return a dictionary. However, more advanced
1953 This function currently return a dictionary. However, more advanced
1954 mapping object will likely be used in the future for a more
1954 mapping object will likely be used in the future for a more
1955 efficient/lazy code.
1955 efficient/lazy code.
1956 """
1956 """
1957 return self._revisiondata(nodeorrev, _df)[1]
1957 return self._revisiondata(nodeorrev, _df)[1]
1958
1958
1959 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1959 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1960 # deal with <nodeorrev> argument type
1960 # deal with <nodeorrev> argument type
1961 if isinstance(nodeorrev, int):
1961 if isinstance(nodeorrev, int):
1962 rev = nodeorrev
1962 rev = nodeorrev
1963 node = self.node(rev)
1963 node = self.node(rev)
1964 else:
1964 else:
1965 node = nodeorrev
1965 node = nodeorrev
1966 rev = None
1966 rev = None
1967
1967
1968 # fast path the special `nullid` rev
1968 # fast path the special `nullid` rev
1969 if node == self.nullid:
1969 if node == self.nullid:
1970 return b"", {}
1970 return b"", {}
1971
1971
1972 # ``rawtext`` is the text as stored inside the revlog. Might be the
1972 # ``rawtext`` is the text as stored inside the revlog. Might be the
1973 # revision or might need to be processed to retrieve the revision.
1973 # revision or might need to be processed to retrieve the revision.
1974 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1974 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1975
1975
1976 if self.hassidedata:
1976 if self.hassidedata:
1977 if rev is None:
1977 if rev is None:
1978 rev = self.rev(node)
1978 rev = self.rev(node)
1979 sidedata = self._sidedata(rev)
1979 sidedata = self._sidedata(rev)
1980 else:
1980 else:
1981 sidedata = {}
1981 sidedata = {}
1982
1982
1983 if raw and validated:
1983 if raw and validated:
1984 # if we don't want to process the raw text and that raw
1984 # if we don't want to process the raw text and that raw
1985 # text is cached, we can exit early.
1985 # text is cached, we can exit early.
1986 return rawtext, sidedata
1986 return rawtext, sidedata
1987 if rev is None:
1987 if rev is None:
1988 rev = self.rev(node)
1988 rev = self.rev(node)
1989 # the revlog's flag for this revision
1989 # the revlog's flag for this revision
1990 # (usually alter its state or content)
1990 # (usually alter its state or content)
1991 flags = self.flags(rev)
1991 flags = self.flags(rev)
1992
1992
1993 if validated and flags == REVIDX_DEFAULT_FLAGS:
1993 if validated and flags == REVIDX_DEFAULT_FLAGS:
1994 # no extra flags set, no flag processor runs, text = rawtext
1994 # no extra flags set, no flag processor runs, text = rawtext
1995 return rawtext, sidedata
1995 return rawtext, sidedata
1996
1996
1997 if raw:
1997 if raw:
1998 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1998 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1999 text = rawtext
1999 text = rawtext
2000 else:
2000 else:
2001 r = flagutil.processflagsread(self, rawtext, flags)
2001 r = flagutil.processflagsread(self, rawtext, flags)
2002 text, validatehash = r
2002 text, validatehash = r
2003 if validatehash:
2003 if validatehash:
2004 self.checkhash(text, node, rev=rev)
2004 self.checkhash(text, node, rev=rev)
2005 if not validated:
2005 if not validated:
2006 self._revisioncache = (node, rev, rawtext)
2006 self._revisioncache = (node, rev, rawtext)
2007
2007
2008 return text, sidedata
2008 return text, sidedata
2009
2009
2010 def _rawtext(self, node, rev, _df=None):
2010 def _rawtext(self, node, rev, _df=None):
2011 """return the possibly unvalidated rawtext for a revision
2011 """return the possibly unvalidated rawtext for a revision
2012
2012
2013 returns (rev, rawtext, validated)
2013 returns (rev, rawtext, validated)
2014 """
2014 """
2015
2015
2016 # revision in the cache (could be useful to apply delta)
2016 # revision in the cache (could be useful to apply delta)
2017 cachedrev = None
2017 cachedrev = None
2018 # An intermediate text to apply deltas to
2018 # An intermediate text to apply deltas to
2019 basetext = None
2019 basetext = None
2020
2020
2021 # Check if we have the entry in cache
2021 # Check if we have the entry in cache
2022 # The cache entry looks like (node, rev, rawtext)
2022 # The cache entry looks like (node, rev, rawtext)
2023 if self._revisioncache:
2023 if self._revisioncache:
2024 if self._revisioncache[0] == node:
2024 if self._revisioncache[0] == node:
2025 return (rev, self._revisioncache[2], True)
2025 return (rev, self._revisioncache[2], True)
2026 cachedrev = self._revisioncache[1]
2026 cachedrev = self._revisioncache[1]
2027
2027
2028 if rev is None:
2028 if rev is None:
2029 rev = self.rev(node)
2029 rev = self.rev(node)
2030
2030
2031 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2031 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2032 if stopped:
2032 if stopped:
2033 basetext = self._revisioncache[2]
2033 basetext = self._revisioncache[2]
2034
2034
2035 # drop cache to save memory, the caller is expected to
2035 # drop cache to save memory, the caller is expected to
2036 # update self._revisioncache after validating the text
2036 # update self._revisioncache after validating the text
2037 self._revisioncache = None
2037 self._revisioncache = None
2038
2038
2039 targetsize = None
2039 targetsize = None
2040 rawsize = self.index[rev][2]
2040 rawsize = self.index[rev][2]
2041 if 0 <= rawsize:
2041 if 0 <= rawsize:
2042 targetsize = 4 * rawsize
2042 targetsize = 4 * rawsize
2043
2043
2044 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2044 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2045 if basetext is None:
2045 if basetext is None:
2046 basetext = bytes(bins[0])
2046 basetext = bytes(bins[0])
2047 bins = bins[1:]
2047 bins = bins[1:]
2048
2048
2049 rawtext = mdiff.patches(basetext, bins)
2049 rawtext = mdiff.patches(basetext, bins)
2050 del basetext # let us have a chance to free memory early
2050 del basetext # let us have a chance to free memory early
2051 return (rev, rawtext, False)
2051 return (rev, rawtext, False)
2052
2052
2053 def _sidedata(self, rev):
2053 def _sidedata(self, rev):
2054 """Return the sidedata for a given revision number."""
2054 """Return the sidedata for a given revision number."""
2055 index_entry = self.index[rev]
2055 index_entry = self.index[rev]
2056 sidedata_offset = index_entry[8]
2056 sidedata_offset = index_entry[8]
2057 sidedata_size = index_entry[9]
2057 sidedata_size = index_entry[9]
2058
2058
2059 if self._inline:
2059 if self._inline:
2060 sidedata_offset += self.index.entry_size * (1 + rev)
2060 sidedata_offset += self.index.entry_size * (1 + rev)
2061 if sidedata_size == 0:
2061 if sidedata_size == 0:
2062 return {}
2062 return {}
2063
2063
2064 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2064 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2065 comp = self.index[rev][11]
2065 comp = self.index[rev][11]
2066 if comp == COMP_MODE_PLAIN:
2066 if comp == COMP_MODE_PLAIN:
2067 segment = comp_segment
2067 segment = comp_segment
2068 elif comp == COMP_MODE_DEFAULT:
2068 elif comp == COMP_MODE_DEFAULT:
2069 segment = self._decompressor(comp_segment)
2069 segment = self._decompressor(comp_segment)
2070 elif comp == COMP_MODE_INLINE:
2070 elif comp == COMP_MODE_INLINE:
2071 segment = self.decompress(comp_segment)
2071 segment = self.decompress(comp_segment)
2072 else:
2072 else:
2073 msg = 'unknown compression mode %d'
2073 msg = 'unknown compression mode %d'
2074 msg %= comp
2074 msg %= comp
2075 raise error.RevlogError(msg)
2075 raise error.RevlogError(msg)
2076
2076
2077 sidedata = sidedatautil.deserialize_sidedata(segment)
2077 sidedata = sidedatautil.deserialize_sidedata(segment)
2078 return sidedata
2078 return sidedata
2079
2079
2080 def rawdata(self, nodeorrev, _df=None):
2080 def rawdata(self, nodeorrev, _df=None):
2081 """return an uncompressed raw data of a given node or revision number.
2081 """return an uncompressed raw data of a given node or revision number.
2082
2082
2083 _df - an existing file handle to read from. (internal-only)
2083 _df - an existing file handle to read from. (internal-only)
2084 """
2084 """
2085 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2085 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2086
2086
2087 def hash(self, text, p1, p2):
2087 def hash(self, text, p1, p2):
2088 """Compute a node hash.
2088 """Compute a node hash.
2089
2089
2090 Available as a function so that subclasses can replace the hash
2090 Available as a function so that subclasses can replace the hash
2091 as needed.
2091 as needed.
2092 """
2092 """
2093 return storageutil.hashrevisionsha1(text, p1, p2)
2093 return storageutil.hashrevisionsha1(text, p1, p2)
2094
2094
2095 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2095 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2096 """Check node hash integrity.
2096 """Check node hash integrity.
2097
2097
2098 Available as a function so that subclasses can extend hash mismatch
2098 Available as a function so that subclasses can extend hash mismatch
2099 behaviors as needed.
2099 behaviors as needed.
2100 """
2100 """
2101 try:
2101 try:
2102 if p1 is None and p2 is None:
2102 if p1 is None and p2 is None:
2103 p1, p2 = self.parents(node)
2103 p1, p2 = self.parents(node)
2104 if node != self.hash(text, p1, p2):
2104 if node != self.hash(text, p1, p2):
2105 # Clear the revision cache on hash failure. The revision cache
2105 # Clear the revision cache on hash failure. The revision cache
2106 # only stores the raw revision and clearing the cache does have
2106 # only stores the raw revision and clearing the cache does have
2107 # the side-effect that we won't have a cache hit when the raw
2107 # the side-effect that we won't have a cache hit when the raw
2108 # revision data is accessed. But this case should be rare and
2108 # revision data is accessed. But this case should be rare and
2109 # it is extra work to teach the cache about the hash
2109 # it is extra work to teach the cache about the hash
2110 # verification state.
2110 # verification state.
2111 if self._revisioncache and self._revisioncache[0] == node:
2111 if self._revisioncache and self._revisioncache[0] == node:
2112 self._revisioncache = None
2112 self._revisioncache = None
2113
2113
2114 revornode = rev
2114 revornode = rev
2115 if revornode is None:
2115 if revornode is None:
2116 revornode = templatefilters.short(hex(node))
2116 revornode = templatefilters.short(hex(node))
2117 raise error.RevlogError(
2117 raise error.RevlogError(
2118 _(b"integrity check failed on %s:%s")
2118 _(b"integrity check failed on %s:%s")
2119 % (self.display_id, pycompat.bytestr(revornode))
2119 % (self.display_id, pycompat.bytestr(revornode))
2120 )
2120 )
2121 except error.RevlogError:
2121 except error.RevlogError:
2122 if self._censorable and storageutil.iscensoredtext(text):
2122 if self._censorable and storageutil.iscensoredtext(text):
2123 raise error.CensoredNodeError(self.display_id, node, text)
2123 raise error.CensoredNodeError(self.display_id, node, text)
2124 raise
2124 raise
2125
2125
2126 def _enforceinlinesize(self, tr):
2126 def _enforceinlinesize(self, tr):
2127 """Check if the revlog is too big for inline and convert if so.
2127 """Check if the revlog is too big for inline and convert if so.
2128
2128
2129 This should be called after revisions are added to the revlog. If the
2129 This should be called after revisions are added to the revlog. If the
2130 revlog has grown too large to be an inline revlog, it will convert it
2130 revlog has grown too large to be an inline revlog, it will convert it
2131 to use multiple index and data files.
2131 to use multiple index and data files.
2132 """
2132 """
2133 tiprev = len(self) - 1
2133 tiprev = len(self) - 1
2134 total_size = self.start(tiprev) + self.length(tiprev)
2134 total_size = self.start(tiprev) + self.length(tiprev)
2135 if not self._inline or total_size < _maxinline:
2135 if not self._inline or total_size < _maxinline:
2136 return
2136 return
2137
2137
2138 troffset = tr.findoffset(self._indexfile)
2138 troffset = tr.findoffset(self._indexfile)
2139 if troffset is None:
2139 if troffset is None:
2140 raise error.RevlogError(
2140 raise error.RevlogError(
2141 _(b"%s not found in the transaction") % self._indexfile
2141 _(b"%s not found in the transaction") % self._indexfile
2142 )
2142 )
2143 trindex = 0
2143 trindex = 0
2144 tr.add(self._datafile, 0)
2144 tr.add(self._datafile, 0)
2145
2145
2146 existing_handles = False
2146 existing_handles = False
2147 if self._writinghandles is not None:
2147 if self._writinghandles is not None:
2148 existing_handles = True
2148 existing_handles = True
2149 fp = self._writinghandles[0]
2149 fp = self._writinghandles[0]
2150 fp.flush()
2150 fp.flush()
2151 fp.close()
2151 fp.close()
2152 # We can't use the cached file handle after close(). So prevent
2152 # We can't use the cached file handle after close(). So prevent
2153 # its usage.
2153 # its usage.
2154 self._writinghandles = None
2154 self._writinghandles = None
2155
2155
2156 new_dfh = self._datafp(b'w+')
2156 new_dfh = self._datafp(b'w+')
2157 new_dfh.truncate(0) # drop any potentially existing data
2157 new_dfh.truncate(0) # drop any potentially existing data
2158 try:
2158 try:
2159 with self._indexfp() as read_ifh:
2159 with self._indexfp() as read_ifh:
2160 for r in self:
2160 for r in self:
2161 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2161 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2162 if troffset <= self.start(r):
2162 if troffset <= self.start(r):
2163 trindex = r
2163 trindex = r
2164 new_dfh.flush()
2164 new_dfh.flush()
2165
2165
2166 with self.__index_new_fp() as fp:
2166 with self.__index_new_fp() as fp:
2167 self._format_flags &= ~FLAG_INLINE_DATA
2167 self._format_flags &= ~FLAG_INLINE_DATA
2168 self._inline = False
2168 self._inline = False
2169 for i in self:
2169 for i in self:
2170 e = self.index.entry_binary(i)
2170 e = self.index.entry_binary(i)
2171 if i == 0 and self._docket is None:
2171 if i == 0 and self._docket is None:
2172 header = self._format_flags | self._format_version
2172 header = self._format_flags | self._format_version
2173 header = self.index.pack_header(header)
2173 header = self.index.pack_header(header)
2174 e = header + e
2174 e = header + e
2175 fp.write(e)
2175 fp.write(e)
2176 if self._docket is not None:
2176 if self._docket is not None:
2177 self._docket.index_end = fp.tell()
2177 self._docket.index_end = fp.tell()
2178 # the temp file replace the real index when we exit the context
2178 # the temp file replace the real index when we exit the context
2179 # manager
2179 # manager
2180
2180
2181 tr.replace(self._indexfile, trindex * self.index.entry_size)
2181 tr.replace(self._indexfile, trindex * self.index.entry_size)
2182 nodemaputil.setup_persistent_nodemap(tr, self)
2182 nodemaputil.setup_persistent_nodemap(tr, self)
2183 self._chunkclear()
2183 self._chunkclear()
2184
2184
2185 if existing_handles:
2185 if existing_handles:
2186 # switched from inline to conventional reopen the index
2186 # switched from inline to conventional reopen the index
2187 ifh = self.__index_write_fp()
2187 ifh = self.__index_write_fp()
2188 self._writinghandles = (ifh, new_dfh)
2188 self._writinghandles = (ifh, new_dfh)
2189 new_dfh = None
2189 new_dfh = None
2190 finally:
2190 finally:
2191 if new_dfh is not None:
2191 if new_dfh is not None:
2192 new_dfh.close()
2192 new_dfh.close()
2193
2193
2194 def _nodeduplicatecallback(self, transaction, node):
2194 def _nodeduplicatecallback(self, transaction, node):
2195 """called when trying to add a node already stored."""
2195 """called when trying to add a node already stored."""
2196
2196
2197 @contextlib.contextmanager
2197 @contextlib.contextmanager
2198 def _writing(self, transaction):
2198 def _writing(self, transaction):
2199 if self._trypending:
2199 if self._trypending:
2200 msg = b'try to write in a `trypending` revlog: %s'
2200 msg = b'try to write in a `trypending` revlog: %s'
2201 msg %= self.display_id
2201 msg %= self.display_id
2202 raise error.ProgrammingError(msg)
2202 raise error.ProgrammingError(msg)
2203 if self._writinghandles is not None:
2203 if self._writinghandles is not None:
2204 yield
2204 yield
2205 else:
2205 else:
2206 r = len(self)
2206 r = len(self)
2207 dsize = 0
2207 dsize = 0
2208 if r:
2208 if r:
2209 dsize = self.end(r - 1)
2209 dsize = self.end(r - 1)
2210 dfh = None
2210 dfh = None
2211 if not self._inline:
2211 if not self._inline:
2212 try:
2212 try:
2213 dfh = self._datafp(b"r+")
2213 dfh = self._datafp(b"r+")
2214 if self._docket is None:
2214 if self._docket is None:
2215 dfh.seek(0, os.SEEK_END)
2215 dfh.seek(0, os.SEEK_END)
2216 else:
2216 else:
2217 dfh.seek(self._docket.data_end, os.SEEK_SET)
2217 dfh.seek(self._docket.data_end, os.SEEK_SET)
2218 except IOError as inst:
2218 except IOError as inst:
2219 if inst.errno != errno.ENOENT:
2219 if inst.errno != errno.ENOENT:
2220 raise
2220 raise
2221 dfh = self._datafp(b"w+")
2221 dfh = self._datafp(b"w+")
2222 transaction.add(self._datafile, dsize)
2222 transaction.add(self._datafile, dsize)
2223 try:
2223 try:
2224 isize = r * self.index.entry_size
2224 isize = r * self.index.entry_size
2225 ifh = self.__index_write_fp()
2225 ifh = self.__index_write_fp()
2226 if self._inline:
2226 if self._inline:
2227 transaction.add(self._indexfile, dsize + isize)
2227 transaction.add(self._indexfile, dsize + isize)
2228 else:
2228 else:
2229 transaction.add(self._indexfile, isize)
2229 transaction.add(self._indexfile, isize)
2230 try:
2230 try:
2231 self._writinghandles = (ifh, dfh)
2231 self._writinghandles = (ifh, dfh)
2232 try:
2232 try:
2233 yield
2233 yield
2234 if self._docket is not None:
2234 if self._docket is not None:
2235 self._write_docket(transaction)
2235 self._write_docket(transaction)
2236 finally:
2236 finally:
2237 self._writinghandles = None
2237 self._writinghandles = None
2238 finally:
2238 finally:
2239 ifh.close()
2239 ifh.close()
2240 finally:
2240 finally:
2241 if dfh is not None:
2241 if dfh is not None:
2242 dfh.close()
2242 dfh.close()
2243
2243
2244 def _write_docket(self, transaction):
2244 def _write_docket(self, transaction):
2245 """write the current docket on disk
2245 """write the current docket on disk
2246
2246
2247 Exist as a method to help changelog to implement transaction logic
2247 Exist as a method to help changelog to implement transaction logic
2248
2248
2249 We could also imagine using the same transaction logic for all revlog
2249 We could also imagine using the same transaction logic for all revlog
2250 since docket are cheap."""
2250 since docket are cheap."""
2251 self._docket.write(transaction)
2251 self._docket.write(transaction)
2252
2252
2253 def addrevision(
2253 def addrevision(
2254 self,
2254 self,
2255 text,
2255 text,
2256 transaction,
2256 transaction,
2257 link,
2257 link,
2258 p1,
2258 p1,
2259 p2,
2259 p2,
2260 cachedelta=None,
2260 cachedelta=None,
2261 node=None,
2261 node=None,
2262 flags=REVIDX_DEFAULT_FLAGS,
2262 flags=REVIDX_DEFAULT_FLAGS,
2263 deltacomputer=None,
2263 deltacomputer=None,
2264 sidedata=None,
2264 sidedata=None,
2265 ):
2265 ):
2266 """add a revision to the log
2266 """add a revision to the log
2267
2267
2268 text - the revision data to add
2268 text - the revision data to add
2269 transaction - the transaction object used for rollback
2269 transaction - the transaction object used for rollback
2270 link - the linkrev data to add
2270 link - the linkrev data to add
2271 p1, p2 - the parent nodeids of the revision
2271 p1, p2 - the parent nodeids of the revision
2272 cachedelta - an optional precomputed delta
2272 cachedelta - an optional precomputed delta
2273 node - nodeid of revision; typically node is not specified, and it is
2273 node - nodeid of revision; typically node is not specified, and it is
2274 computed by default as hash(text, p1, p2), however subclasses might
2274 computed by default as hash(text, p1, p2), however subclasses might
2275 use different hashing method (and override checkhash() in such case)
2275 use different hashing method (and override checkhash() in such case)
2276 flags - the known flags to set on the revision
2276 flags - the known flags to set on the revision
2277 deltacomputer - an optional deltacomputer instance shared between
2277 deltacomputer - an optional deltacomputer instance shared between
2278 multiple calls
2278 multiple calls
2279 """
2279 """
2280 if link == nullrev:
2280 if link == nullrev:
2281 raise error.RevlogError(
2281 raise error.RevlogError(
2282 _(b"attempted to add linkrev -1 to %s") % self.display_id
2282 _(b"attempted to add linkrev -1 to %s") % self.display_id
2283 )
2283 )
2284
2284
2285 if sidedata is None:
2285 if sidedata is None:
2286 sidedata = {}
2286 sidedata = {}
2287 elif sidedata and not self.hassidedata:
2287 elif sidedata and not self.hassidedata:
2288 raise error.ProgrammingError(
2288 raise error.ProgrammingError(
2289 _(b"trying to add sidedata to a revlog who don't support them")
2289 _(b"trying to add sidedata to a revlog who don't support them")
2290 )
2290 )
2291
2291
2292 if flags:
2292 if flags:
2293 node = node or self.hash(text, p1, p2)
2293 node = node or self.hash(text, p1, p2)
2294
2294
2295 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2295 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2296
2296
2297 # If the flag processor modifies the revision data, ignore any provided
2297 # If the flag processor modifies the revision data, ignore any provided
2298 # cachedelta.
2298 # cachedelta.
2299 if rawtext != text:
2299 if rawtext != text:
2300 cachedelta = None
2300 cachedelta = None
2301
2301
2302 if len(rawtext) > _maxentrysize:
2302 if len(rawtext) > _maxentrysize:
2303 raise error.RevlogError(
2303 raise error.RevlogError(
2304 _(
2304 _(
2305 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2305 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2306 )
2306 )
2307 % (self.display_id, len(rawtext))
2307 % (self.display_id, len(rawtext))
2308 )
2308 )
2309
2309
2310 node = node or self.hash(rawtext, p1, p2)
2310 node = node or self.hash(rawtext, p1, p2)
2311 rev = self.index.get_rev(node)
2311 rev = self.index.get_rev(node)
2312 if rev is not None:
2312 if rev is not None:
2313 return rev
2313 return rev
2314
2314
2315 if validatehash:
2315 if validatehash:
2316 self.checkhash(rawtext, node, p1=p1, p2=p2)
2316 self.checkhash(rawtext, node, p1=p1, p2=p2)
2317
2317
2318 return self.addrawrevision(
2318 return self.addrawrevision(
2319 rawtext,
2319 rawtext,
2320 transaction,
2320 transaction,
2321 link,
2321 link,
2322 p1,
2322 p1,
2323 p2,
2323 p2,
2324 node,
2324 node,
2325 flags,
2325 flags,
2326 cachedelta=cachedelta,
2326 cachedelta=cachedelta,
2327 deltacomputer=deltacomputer,
2327 deltacomputer=deltacomputer,
2328 sidedata=sidedata,
2328 sidedata=sidedata,
2329 )
2329 )
2330
2330
2331 def addrawrevision(
2331 def addrawrevision(
2332 self,
2332 self,
2333 rawtext,
2333 rawtext,
2334 transaction,
2334 transaction,
2335 link,
2335 link,
2336 p1,
2336 p1,
2337 p2,
2337 p2,
2338 node,
2338 node,
2339 flags,
2339 flags,
2340 cachedelta=None,
2340 cachedelta=None,
2341 deltacomputer=None,
2341 deltacomputer=None,
2342 sidedata=None,
2342 sidedata=None,
2343 ):
2343 ):
2344 """add a raw revision with known flags, node and parents
2344 """add a raw revision with known flags, node and parents
2345 useful when reusing a revision not stored in this revlog (ex: received
2345 useful when reusing a revision not stored in this revlog (ex: received
2346 over wire, or read from an external bundle).
2346 over wire, or read from an external bundle).
2347 """
2347 """
2348 with self._writing(transaction):
2348 with self._writing(transaction):
2349 return self._addrevision(
2349 return self._addrevision(
2350 node,
2350 node,
2351 rawtext,
2351 rawtext,
2352 transaction,
2352 transaction,
2353 link,
2353 link,
2354 p1,
2354 p1,
2355 p2,
2355 p2,
2356 flags,
2356 flags,
2357 cachedelta,
2357 cachedelta,
2358 deltacomputer=deltacomputer,
2358 deltacomputer=deltacomputer,
2359 sidedata=sidedata,
2359 sidedata=sidedata,
2360 )
2360 )
2361
2361
2362 def compress(self, data):
2362 def compress(self, data):
2363 """Generate a possibly-compressed representation of data."""
2363 """Generate a possibly-compressed representation of data."""
2364 if not data:
2364 if not data:
2365 return b'', data
2365 return b'', data
2366
2366
2367 compressed = self._compressor.compress(data)
2367 compressed = self._compressor.compress(data)
2368
2368
2369 if compressed:
2369 if compressed:
2370 # The revlog compressor added the header in the returned data.
2370 # The revlog compressor added the header in the returned data.
2371 return b'', compressed
2371 return b'', compressed
2372
2372
2373 if data[0:1] == b'\0':
2373 if data[0:1] == b'\0':
2374 return b'', data
2374 return b'', data
2375 return b'u', data
2375 return b'u', data
2376
2376
2377 def decompress(self, data):
2377 def decompress(self, data):
2378 """Decompress a revlog chunk.
2378 """Decompress a revlog chunk.
2379
2379
2380 The chunk is expected to begin with a header identifying the
2380 The chunk is expected to begin with a header identifying the
2381 format type so it can be routed to an appropriate decompressor.
2381 format type so it can be routed to an appropriate decompressor.
2382 """
2382 """
2383 if not data:
2383 if not data:
2384 return data
2384 return data
2385
2385
2386 # Revlogs are read much more frequently than they are written and many
2386 # Revlogs are read much more frequently than they are written and many
2387 # chunks only take microseconds to decompress, so performance is
2387 # chunks only take microseconds to decompress, so performance is
2388 # important here.
2388 # important here.
2389 #
2389 #
2390 # We can make a few assumptions about revlogs:
2390 # We can make a few assumptions about revlogs:
2391 #
2391 #
2392 # 1) the majority of chunks will be compressed (as opposed to inline
2392 # 1) the majority of chunks will be compressed (as opposed to inline
2393 # raw data).
2393 # raw data).
2394 # 2) decompressing *any* data will likely by at least 10x slower than
2394 # 2) decompressing *any* data will likely by at least 10x slower than
2395 # returning raw inline data.
2395 # returning raw inline data.
2396 # 3) we want to prioritize common and officially supported compression
2396 # 3) we want to prioritize common and officially supported compression
2397 # engines
2397 # engines
2398 #
2398 #
2399 # It follows that we want to optimize for "decompress compressed data
2399 # It follows that we want to optimize for "decompress compressed data
2400 # when encoded with common and officially supported compression engines"
2400 # when encoded with common and officially supported compression engines"
2401 # case over "raw data" and "data encoded by less common or non-official
2401 # case over "raw data" and "data encoded by less common or non-official
2402 # compression engines." That is why we have the inline lookup first
2402 # compression engines." That is why we have the inline lookup first
2403 # followed by the compengines lookup.
2403 # followed by the compengines lookup.
2404 #
2404 #
2405 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2405 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2406 # compressed chunks. And this matters for changelog and manifest reads.
2406 # compressed chunks. And this matters for changelog and manifest reads.
2407 t = data[0:1]
2407 t = data[0:1]
2408
2408
2409 if t == b'x':
2409 if t == b'x':
2410 try:
2410 try:
2411 return _zlibdecompress(data)
2411 return _zlibdecompress(data)
2412 except zlib.error as e:
2412 except zlib.error as e:
2413 raise error.RevlogError(
2413 raise error.RevlogError(
2414 _(b'revlog decompress error: %s')
2414 _(b'revlog decompress error: %s')
2415 % stringutil.forcebytestr(e)
2415 % stringutil.forcebytestr(e)
2416 )
2416 )
2417 # '\0' is more common than 'u' so it goes first.
2417 # '\0' is more common than 'u' so it goes first.
2418 elif t == b'\0':
2418 elif t == b'\0':
2419 return data
2419 return data
2420 elif t == b'u':
2420 elif t == b'u':
2421 return util.buffer(data, 1)
2421 return util.buffer(data, 1)
2422
2422
2423 compressor = self._get_decompressor(t)
2423 compressor = self._get_decompressor(t)
2424
2424
2425 return compressor.decompress(data)
2425 return compressor.decompress(data)
2426
2426
2427 def _addrevision(
2427 def _addrevision(
2428 self,
2428 self,
2429 node,
2429 node,
2430 rawtext,
2430 rawtext,
2431 transaction,
2431 transaction,
2432 link,
2432 link,
2433 p1,
2433 p1,
2434 p2,
2434 p2,
2435 flags,
2435 flags,
2436 cachedelta,
2436 cachedelta,
2437 alwayscache=False,
2437 alwayscache=False,
2438 deltacomputer=None,
2438 deltacomputer=None,
2439 sidedata=None,
2439 sidedata=None,
2440 ):
2440 ):
2441 """internal function to add revisions to the log
2441 """internal function to add revisions to the log
2442
2442
2443 see addrevision for argument descriptions.
2443 see addrevision for argument descriptions.
2444
2444
2445 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2445 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2446
2446
2447 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2447 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2448 be used.
2448 be used.
2449
2449
2450 invariants:
2450 invariants:
2451 - rawtext is optional (can be None); if not set, cachedelta must be set.
2451 - rawtext is optional (can be None); if not set, cachedelta must be set.
2452 if both are set, they must correspond to each other.
2452 if both are set, they must correspond to each other.
2453 """
2453 """
2454 if node == self.nullid:
2454 if node == self.nullid:
2455 raise error.RevlogError(
2455 raise error.RevlogError(
2456 _(b"%s: attempt to add null revision") % self.display_id
2456 _(b"%s: attempt to add null revision") % self.display_id
2457 )
2457 )
2458 if (
2458 if (
2459 node == self.nodeconstants.wdirid
2459 node == self.nodeconstants.wdirid
2460 or node in self.nodeconstants.wdirfilenodeids
2460 or node in self.nodeconstants.wdirfilenodeids
2461 ):
2461 ):
2462 raise error.RevlogError(
2462 raise error.RevlogError(
2463 _(b"%s: attempt to add wdir revision") % self.display_id
2463 _(b"%s: attempt to add wdir revision") % self.display_id
2464 )
2464 )
2465 if self._writinghandles is None:
2465 if self._writinghandles is None:
2466 msg = b'adding revision outside `revlog._writing` context'
2466 msg = b'adding revision outside `revlog._writing` context'
2467 raise error.ProgrammingError(msg)
2467 raise error.ProgrammingError(msg)
2468
2468
2469 if self._inline:
2469 if self._inline:
2470 fh = self._writinghandles[0]
2470 fh = self._writinghandles[0]
2471 else:
2471 else:
2472 fh = self._writinghandles[1]
2472 fh = self._writinghandles[1]
2473
2473
2474 btext = [rawtext]
2474 btext = [rawtext]
2475
2475
2476 curr = len(self)
2476 curr = len(self)
2477 prev = curr - 1
2477 prev = curr - 1
2478
2478
2479 offset = self._get_data_offset(prev)
2479 offset = self._get_data_offset(prev)
2480
2480
2481 if self._concurrencychecker:
2481 if self._concurrencychecker:
2482 ifh, dfh = self._writinghandles
2482 ifh, dfh = self._writinghandles
2483 if self._inline:
2483 if self._inline:
2484 # offset is "as if" it were in the .d file, so we need to add on
2484 # offset is "as if" it were in the .d file, so we need to add on
2485 # the size of the entry metadata.
2485 # the size of the entry metadata.
2486 self._concurrencychecker(
2486 self._concurrencychecker(
2487 ifh, self._indexfile, offset + curr * self.index.entry_size
2487 ifh, self._indexfile, offset + curr * self.index.entry_size
2488 )
2488 )
2489 else:
2489 else:
2490 # Entries in the .i are a consistent size.
2490 # Entries in the .i are a consistent size.
2491 self._concurrencychecker(
2491 self._concurrencychecker(
2492 ifh, self._indexfile, curr * self.index.entry_size
2492 ifh, self._indexfile, curr * self.index.entry_size
2493 )
2493 )
2494 self._concurrencychecker(dfh, self._datafile, offset)
2494 self._concurrencychecker(dfh, self._datafile, offset)
2495
2495
2496 p1r, p2r = self.rev(p1), self.rev(p2)
2496 p1r, p2r = self.rev(p1), self.rev(p2)
2497
2497
2498 # full versions are inserted when the needed deltas
2498 # full versions are inserted when the needed deltas
2499 # become comparable to the uncompressed text
2499 # become comparable to the uncompressed text
2500 if rawtext is None:
2500 if rawtext is None:
2501 # need rawtext size, before changed by flag processors, which is
2501 # need rawtext size, before changed by flag processors, which is
2502 # the non-raw size. use revlog explicitly to avoid filelog's extra
2502 # the non-raw size. use revlog explicitly to avoid filelog's extra
2503 # logic that might remove metadata size.
2503 # logic that might remove metadata size.
2504 textlen = mdiff.patchedsize(
2504 textlen = mdiff.patchedsize(
2505 revlog.size(self, cachedelta[0]), cachedelta[1]
2505 revlog.size(self, cachedelta[0]), cachedelta[1]
2506 )
2506 )
2507 else:
2507 else:
2508 textlen = len(rawtext)
2508 textlen = len(rawtext)
2509
2509
2510 if deltacomputer is None:
2510 if deltacomputer is None:
2511 deltacomputer = deltautil.deltacomputer(self)
2511 deltacomputer = deltautil.deltacomputer(self)
2512
2512
2513 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2513 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2514
2514
2515 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2515 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2516
2516
2517 compression_mode = COMP_MODE_INLINE
2517 compression_mode = COMP_MODE_INLINE
2518 if self._docket is not None:
2518 if self._docket is not None:
2519 h, d = deltainfo.data
2519 h, d = deltainfo.data
2520 if not h and not d:
2520 if not h and not d:
2521 # not data to store at all... declare them uncompressed
2521 # not data to store at all... declare them uncompressed
2522 compression_mode = COMP_MODE_PLAIN
2522 compression_mode = COMP_MODE_PLAIN
2523 elif not h:
2523 elif not h:
2524 t = d[0:1]
2524 t = d[0:1]
2525 if t == b'\0':
2525 if t == b'\0':
2526 compression_mode = COMP_MODE_PLAIN
2526 compression_mode = COMP_MODE_PLAIN
2527 elif t == self._docket.default_compression_header:
2527 elif t == self._docket.default_compression_header:
2528 compression_mode = COMP_MODE_DEFAULT
2528 compression_mode = COMP_MODE_DEFAULT
2529 elif h == b'u':
2529 elif h == b'u':
2530 # we have a more efficient way to declare uncompressed
2530 # we have a more efficient way to declare uncompressed
2531 h = b''
2531 h = b''
2532 compression_mode = COMP_MODE_PLAIN
2532 compression_mode = COMP_MODE_PLAIN
2533 deltainfo = deltautil.drop_u_compression(deltainfo)
2533 deltainfo = deltautil.drop_u_compression(deltainfo)
2534
2534
2535 sidedata_compression_mode = COMP_MODE_INLINE
2535 sidedata_compression_mode = COMP_MODE_INLINE
2536 if sidedata and self.hassidedata:
2536 if sidedata and self.hassidedata:
2537 sidedata_compression_mode = COMP_MODE_PLAIN
2537 sidedata_compression_mode = COMP_MODE_PLAIN
2538 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2538 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2539 sidedata_offset = offset + deltainfo.deltalen
2539 sidedata_offset = offset + deltainfo.deltalen
2540 h, comp_sidedata = self.compress(serialized_sidedata)
2540 h, comp_sidedata = self.compress(serialized_sidedata)
2541 if (
2541 if (
2542 h != b'u'
2542 h != b'u'
2543 and comp_sidedata[0:1] != b'\0'
2543 and comp_sidedata[0:1] != b'\0'
2544 and len(comp_sidedata) < len(serialized_sidedata)
2544 and len(comp_sidedata) < len(serialized_sidedata)
2545 ):
2545 ):
2546 assert not h
2546 assert not h
2547 if (
2547 if (
2548 comp_sidedata[0:1]
2548 comp_sidedata[0:1]
2549 == self._docket.default_compression_header
2549 == self._docket.default_compression_header
2550 ):
2550 ):
2551 sidedata_compression_mode = COMP_MODE_DEFAULT
2551 sidedata_compression_mode = COMP_MODE_DEFAULT
2552 serialized_sidedata = comp_sidedata
2552 serialized_sidedata = comp_sidedata
2553 else:
2553 else:
2554 sidedata_compression_mode = COMP_MODE_INLINE
2554 sidedata_compression_mode = COMP_MODE_INLINE
2555 serialized_sidedata = comp_sidedata
2555 serialized_sidedata = comp_sidedata
2556 else:
2556 else:
2557 serialized_sidedata = b""
2557 serialized_sidedata = b""
2558 # Don't store the offset if the sidedata is empty, that way
2558 # Don't store the offset if the sidedata is empty, that way
2559 # we can easily detect empty sidedata and they will be no different
2559 # we can easily detect empty sidedata and they will be no different
2560 # than ones we manually add.
2560 # than ones we manually add.
2561 sidedata_offset = 0
2561 sidedata_offset = 0
2562
2562
2563 e = (
2563 e = (
2564 offset_type(offset, flags),
2564 offset_type(offset, flags),
2565 deltainfo.deltalen,
2565 deltainfo.deltalen,
2566 textlen,
2566 textlen,
2567 deltainfo.base,
2567 deltainfo.base,
2568 link,
2568 link,
2569 p1r,
2569 p1r,
2570 p2r,
2570 p2r,
2571 node,
2571 node,
2572 sidedata_offset,
2572 sidedata_offset,
2573 len(serialized_sidedata),
2573 len(serialized_sidedata),
2574 compression_mode,
2574 compression_mode,
2575 sidedata_compression_mode,
2575 sidedata_compression_mode,
2576 )
2576 )
2577
2577
2578 self.index.append(e)
2578 self.index.append(e)
2579 entry = self.index.entry_binary(curr)
2579 entry = self.index.entry_binary(curr)
2580 if curr == 0 and self._docket is None:
2580 if curr == 0 and self._docket is None:
2581 header = self._format_flags | self._format_version
2581 header = self._format_flags | self._format_version
2582 header = self.index.pack_header(header)
2582 header = self.index.pack_header(header)
2583 entry = header + entry
2583 entry = header + entry
2584 self._writeentry(
2584 self._writeentry(
2585 transaction,
2585 transaction,
2586 entry,
2586 entry,
2587 deltainfo.data,
2587 deltainfo.data,
2588 link,
2588 link,
2589 offset,
2589 offset,
2590 serialized_sidedata,
2590 serialized_sidedata,
2591 )
2591 )
2592
2592
2593 rawtext = btext[0]
2593 rawtext = btext[0]
2594
2594
2595 if alwayscache and rawtext is None:
2595 if alwayscache and rawtext is None:
2596 rawtext = deltacomputer.buildtext(revinfo, fh)
2596 rawtext = deltacomputer.buildtext(revinfo, fh)
2597
2597
2598 if type(rawtext) == bytes: # only accept immutable objects
2598 if type(rawtext) == bytes: # only accept immutable objects
2599 self._revisioncache = (node, curr, rawtext)
2599 self._revisioncache = (node, curr, rawtext)
2600 self._chainbasecache[curr] = deltainfo.chainbase
2600 self._chainbasecache[curr] = deltainfo.chainbase
2601 return curr
2601 return curr
2602
2602
2603 def _get_data_offset(self, prev):
2603 def _get_data_offset(self, prev):
2604 """Returns the current offset in the (in-transaction) data file.
2604 """Returns the current offset in the (in-transaction) data file.
2605 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2605 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2606 file to store that information: since sidedata can be rewritten to the
2606 file to store that information: since sidedata can be rewritten to the
2607 end of the data file within a transaction, you can have cases where, for
2607 end of the data file within a transaction, you can have cases where, for
2608 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2608 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2609 to `n - 1`'s sidedata being written after `n`'s data.
2609 to `n - 1`'s sidedata being written after `n`'s data.
2610
2610
2611 TODO cache this in a docket file before getting out of experimental."""
2611 TODO cache this in a docket file before getting out of experimental."""
2612 if self._docket is None:
2612 if self._docket is None:
2613 return self.end(prev)
2613 return self.end(prev)
2614 else:
2614 else:
2615 return self._docket.data_end
2615 return self._docket.data_end
2616
2616
2617 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2617 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2618 # Files opened in a+ mode have inconsistent behavior on various
2618 # Files opened in a+ mode have inconsistent behavior on various
2619 # platforms. Windows requires that a file positioning call be made
2619 # platforms. Windows requires that a file positioning call be made
2620 # when the file handle transitions between reads and writes. See
2620 # when the file handle transitions between reads and writes. See
2621 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2621 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2622 # platforms, Python or the platform itself can be buggy. Some versions
2622 # platforms, Python or the platform itself can be buggy. Some versions
2623 # of Solaris have been observed to not append at the end of the file
2623 # of Solaris have been observed to not append at the end of the file
2624 # if the file was seeked to before the end. See issue4943 for more.
2624 # if the file was seeked to before the end. See issue4943 for more.
2625 #
2625 #
2626 # We work around this issue by inserting a seek() before writing.
2626 # We work around this issue by inserting a seek() before writing.
2627 # Note: This is likely not necessary on Python 3. However, because
2627 # Note: This is likely not necessary on Python 3. However, because
2628 # the file handle is reused for reads and may be seeked there, we need
2628 # the file handle is reused for reads and may be seeked there, we need
2629 # to be careful before changing this.
2629 # to be careful before changing this.
2630 if self._writinghandles is None:
2630 if self._writinghandles is None:
2631 msg = b'adding revision outside `revlog._writing` context'
2631 msg = b'adding revision outside `revlog._writing` context'
2632 raise error.ProgrammingError(msg)
2632 raise error.ProgrammingError(msg)
2633 ifh, dfh = self._writinghandles
2633 ifh, dfh = self._writinghandles
2634 if self._docket is None:
2634 if self._docket is None:
2635 ifh.seek(0, os.SEEK_END)
2635 ifh.seek(0, os.SEEK_END)
2636 else:
2636 else:
2637 ifh.seek(self._docket.index_end, os.SEEK_SET)
2637 ifh.seek(self._docket.index_end, os.SEEK_SET)
2638 if dfh:
2638 if dfh:
2639 if self._docket is None:
2639 if self._docket is None:
2640 dfh.seek(0, os.SEEK_END)
2640 dfh.seek(0, os.SEEK_END)
2641 else:
2641 else:
2642 dfh.seek(self._docket.data_end, os.SEEK_SET)
2642 dfh.seek(self._docket.data_end, os.SEEK_SET)
2643
2643
2644 curr = len(self) - 1
2644 curr = len(self) - 1
2645 if not self._inline:
2645 if not self._inline:
2646 transaction.add(self._datafile, offset)
2646 transaction.add(self._datafile, offset)
2647 transaction.add(self._indexfile, curr * len(entry))
2647 transaction.add(self._indexfile, curr * len(entry))
2648 if data[0]:
2648 if data[0]:
2649 dfh.write(data[0])
2649 dfh.write(data[0])
2650 dfh.write(data[1])
2650 dfh.write(data[1])
2651 if sidedata:
2651 if sidedata:
2652 dfh.write(sidedata)
2652 dfh.write(sidedata)
2653 ifh.write(entry)
2653 ifh.write(entry)
2654 else:
2654 else:
2655 offset += curr * self.index.entry_size
2655 offset += curr * self.index.entry_size
2656 transaction.add(self._indexfile, offset)
2656 transaction.add(self._indexfile, offset)
2657 ifh.write(entry)
2657 ifh.write(entry)
2658 ifh.write(data[0])
2658 ifh.write(data[0])
2659 ifh.write(data[1])
2659 ifh.write(data[1])
2660 if sidedata:
2660 if sidedata:
2661 ifh.write(sidedata)
2661 ifh.write(sidedata)
2662 self._enforceinlinesize(transaction)
2662 self._enforceinlinesize(transaction)
2663 if self._docket is not None:
2663 if self._docket is not None:
2664 self._docket.index_end = self._writinghandles[0].tell()
2664 self._docket.index_end = self._writinghandles[0].tell()
2665 self._docket.data_end = self._writinghandles[1].tell()
2665 self._docket.data_end = self._writinghandles[1].tell()
2666
2666
2667 nodemaputil.setup_persistent_nodemap(transaction, self)
2667 nodemaputil.setup_persistent_nodemap(transaction, self)
2668
2668
2669 def addgroup(
2669 def addgroup(
2670 self,
2670 self,
2671 deltas,
2671 deltas,
2672 linkmapper,
2672 linkmapper,
2673 transaction,
2673 transaction,
2674 alwayscache=False,
2674 alwayscache=False,
2675 addrevisioncb=None,
2675 addrevisioncb=None,
2676 duplicaterevisioncb=None,
2676 duplicaterevisioncb=None,
2677 ):
2677 ):
2678 """
2678 """
2679 add a delta group
2679 add a delta group
2680
2680
2681 given a set of deltas, add them to the revision log. the
2681 given a set of deltas, add them to the revision log. the
2682 first delta is against its parent, which should be in our
2682 first delta is against its parent, which should be in our
2683 log, the rest are against the previous delta.
2683 log, the rest are against the previous delta.
2684
2684
2685 If ``addrevisioncb`` is defined, it will be called with arguments of
2685 If ``addrevisioncb`` is defined, it will be called with arguments of
2686 this revlog and the node that was added.
2686 this revlog and the node that was added.
2687 """
2687 """
2688
2688
2689 if self._adding_group:
2689 if self._adding_group:
2690 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2690 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2691
2691
2692 self._adding_group = True
2692 self._adding_group = True
2693 empty = True
2693 empty = True
2694 try:
2694 try:
2695 with self._writing(transaction):
2695 with self._writing(transaction):
2696 deltacomputer = deltautil.deltacomputer(self)
2696 deltacomputer = deltautil.deltacomputer(self)
2697 # loop through our set of deltas
2697 # loop through our set of deltas
2698 for data in deltas:
2698 for data in deltas:
2699 (
2699 (
2700 node,
2700 node,
2701 p1,
2701 p1,
2702 p2,
2702 p2,
2703 linknode,
2703 linknode,
2704 deltabase,
2704 deltabase,
2705 delta,
2705 delta,
2706 flags,
2706 flags,
2707 sidedata,
2707 sidedata,
2708 ) = data
2708 ) = data
2709 link = linkmapper(linknode)
2709 link = linkmapper(linknode)
2710 flags = flags or REVIDX_DEFAULT_FLAGS
2710 flags = flags or REVIDX_DEFAULT_FLAGS
2711
2711
2712 rev = self.index.get_rev(node)
2712 rev = self.index.get_rev(node)
2713 if rev is not None:
2713 if rev is not None:
2714 # this can happen if two branches make the same change
2714 # this can happen if two branches make the same change
2715 self._nodeduplicatecallback(transaction, rev)
2715 self._nodeduplicatecallback(transaction, rev)
2716 if duplicaterevisioncb:
2716 if duplicaterevisioncb:
2717 duplicaterevisioncb(self, rev)
2717 duplicaterevisioncb(self, rev)
2718 empty = False
2718 empty = False
2719 continue
2719 continue
2720
2720
2721 for p in (p1, p2):
2721 for p in (p1, p2):
2722 if not self.index.has_node(p):
2722 if not self.index.has_node(p):
2723 raise error.LookupError(
2723 raise error.LookupError(
2724 p, self.radix, _(b'unknown parent')
2724 p, self.radix, _(b'unknown parent')
2725 )
2725 )
2726
2726
2727 if not self.index.has_node(deltabase):
2727 if not self.index.has_node(deltabase):
2728 raise error.LookupError(
2728 raise error.LookupError(
2729 deltabase, self.display_id, _(b'unknown delta base')
2729 deltabase, self.display_id, _(b'unknown delta base')
2730 )
2730 )
2731
2731
2732 baserev = self.rev(deltabase)
2732 baserev = self.rev(deltabase)
2733
2733
2734 if baserev != nullrev and self.iscensored(baserev):
2734 if baserev != nullrev and self.iscensored(baserev):
2735 # if base is censored, delta must be full replacement in a
2735 # if base is censored, delta must be full replacement in a
2736 # single patch operation
2736 # single patch operation
2737 hlen = struct.calcsize(b">lll")
2737 hlen = struct.calcsize(b">lll")
2738 oldlen = self.rawsize(baserev)
2738 oldlen = self.rawsize(baserev)
2739 newlen = len(delta) - hlen
2739 newlen = len(delta) - hlen
2740 if delta[:hlen] != mdiff.replacediffheader(
2740 if delta[:hlen] != mdiff.replacediffheader(
2741 oldlen, newlen
2741 oldlen, newlen
2742 ):
2742 ):
2743 raise error.CensoredBaseError(
2743 raise error.CensoredBaseError(
2744 self.display_id, self.node(baserev)
2744 self.display_id, self.node(baserev)
2745 )
2745 )
2746
2746
2747 if not flags and self._peek_iscensored(baserev, delta):
2747 if not flags and self._peek_iscensored(baserev, delta):
2748 flags |= REVIDX_ISCENSORED
2748 flags |= REVIDX_ISCENSORED
2749
2749
2750 # We assume consumers of addrevisioncb will want to retrieve
2750 # We assume consumers of addrevisioncb will want to retrieve
2751 # the added revision, which will require a call to
2751 # the added revision, which will require a call to
2752 # revision(). revision() will fast path if there is a cache
2752 # revision(). revision() will fast path if there is a cache
2753 # hit. So, we tell _addrevision() to always cache in this case.
2753 # hit. So, we tell _addrevision() to always cache in this case.
2754 # We're only using addgroup() in the context of changegroup
2754 # We're only using addgroup() in the context of changegroup
2755 # generation so the revision data can always be handled as raw
2755 # generation so the revision data can always be handled as raw
2756 # by the flagprocessor.
2756 # by the flagprocessor.
2757 rev = self._addrevision(
2757 rev = self._addrevision(
2758 node,
2758 node,
2759 None,
2759 None,
2760 transaction,
2760 transaction,
2761 link,
2761 link,
2762 p1,
2762 p1,
2763 p2,
2763 p2,
2764 flags,
2764 flags,
2765 (baserev, delta),
2765 (baserev, delta),
2766 alwayscache=alwayscache,
2766 alwayscache=alwayscache,
2767 deltacomputer=deltacomputer,
2767 deltacomputer=deltacomputer,
2768 sidedata=sidedata,
2768 sidedata=sidedata,
2769 )
2769 )
2770
2770
2771 if addrevisioncb:
2771 if addrevisioncb:
2772 addrevisioncb(self, rev)
2772 addrevisioncb(self, rev)
2773 empty = False
2773 empty = False
2774 finally:
2774 finally:
2775 self._adding_group = False
2775 self._adding_group = False
2776 return not empty
2776 return not empty
2777
2777
2778 def iscensored(self, rev):
2778 def iscensored(self, rev):
2779 """Check if a file revision is censored."""
2779 """Check if a file revision is censored."""
2780 if not self._censorable:
2780 if not self._censorable:
2781 return False
2781 return False
2782
2782
2783 return self.flags(rev) & REVIDX_ISCENSORED
2783 return self.flags(rev) & REVIDX_ISCENSORED
2784
2784
2785 def _peek_iscensored(self, baserev, delta):
2785 def _peek_iscensored(self, baserev, delta):
2786 """Quickly check if a delta produces a censored revision."""
2786 """Quickly check if a delta produces a censored revision."""
2787 if not self._censorable:
2787 if not self._censorable:
2788 return False
2788 return False
2789
2789
2790 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2790 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2791
2791
2792 def getstrippoint(self, minlink):
2792 def getstrippoint(self, minlink):
2793 """find the minimum rev that must be stripped to strip the linkrev
2793 """find the minimum rev that must be stripped to strip the linkrev
2794
2794
2795 Returns a tuple containing the minimum rev and a set of all revs that
2795 Returns a tuple containing the minimum rev and a set of all revs that
2796 have linkrevs that will be broken by this strip.
2796 have linkrevs that will be broken by this strip.
2797 """
2797 """
2798 return storageutil.resolvestripinfo(
2798 return storageutil.resolvestripinfo(
2799 minlink,
2799 minlink,
2800 len(self) - 1,
2800 len(self) - 1,
2801 self.headrevs(),
2801 self.headrevs(),
2802 self.linkrev,
2802 self.linkrev,
2803 self.parentrevs,
2803 self.parentrevs,
2804 )
2804 )
2805
2805
2806 def strip(self, minlink, transaction):
2806 def strip(self, minlink, transaction):
2807 """truncate the revlog on the first revision with a linkrev >= minlink
2807 """truncate the revlog on the first revision with a linkrev >= minlink
2808
2808
2809 This function is called when we're stripping revision minlink and
2809 This function is called when we're stripping revision minlink and
2810 its descendants from the repository.
2810 its descendants from the repository.
2811
2811
2812 We have to remove all revisions with linkrev >= minlink, because
2812 We have to remove all revisions with linkrev >= minlink, because
2813 the equivalent changelog revisions will be renumbered after the
2813 the equivalent changelog revisions will be renumbered after the
2814 strip.
2814 strip.
2815
2815
2816 So we truncate the revlog on the first of these revisions, and
2816 So we truncate the revlog on the first of these revisions, and
2817 trust that the caller has saved the revisions that shouldn't be
2817 trust that the caller has saved the revisions that shouldn't be
2818 removed and that it'll re-add them after this truncation.
2818 removed and that it'll re-add them after this truncation.
2819 """
2819 """
2820 if len(self) == 0:
2820 if len(self) == 0:
2821 return
2821 return
2822
2822
2823 rev, _ = self.getstrippoint(minlink)
2823 rev, _ = self.getstrippoint(minlink)
2824 if rev == len(self):
2824 if rev == len(self):
2825 return
2825 return
2826
2826
2827 # first truncate the files on disk
2827 # first truncate the files on disk
2828 data_end = self.start(rev)
2828 data_end = self.start(rev)
2829 if not self._inline:
2829 if not self._inline:
2830 transaction.add(self._datafile, data_end)
2830 transaction.add(self._datafile, data_end)
2831 end = rev * self.index.entry_size
2831 end = rev * self.index.entry_size
2832 else:
2832 else:
2833 end = data_end + (rev * self.index.entry_size)
2833 end = data_end + (rev * self.index.entry_size)
2834
2834
2835 transaction.add(self._indexfile, end)
2835 transaction.add(self._indexfile, end)
2836 if self._docket is not None:
2836 if self._docket is not None:
2837 # XXX we could, leverage the docket while stripping. However it is
2837 # XXX we could, leverage the docket while stripping. However it is
2838 # not powerfull enough at the time of this comment
2838 # not powerfull enough at the time of this comment
2839 self._docket.index_end = end
2839 self._docket.index_end = end
2840 self._docket.data_end = data_end
2840 self._docket.data_end = data_end
2841 self._docket.write(transaction, stripping=True)
2841 self._docket.write(transaction, stripping=True)
2842
2842
2843 # then reset internal state in memory to forget those revisions
2843 # then reset internal state in memory to forget those revisions
2844 self._revisioncache = None
2844 self._revisioncache = None
2845 self._chaininfocache = util.lrucachedict(500)
2845 self._chaininfocache = util.lrucachedict(500)
2846 self._chunkclear()
2846 self._chunkclear()
2847
2847
2848 del self.index[rev:-1]
2848 del self.index[rev:-1]
2849
2849
2850 def checksize(self):
2850 def checksize(self):
2851 """Check size of index and data files
2851 """Check size of index and data files
2852
2852
2853 return a (dd, di) tuple.
2853 return a (dd, di) tuple.
2854 - dd: extra bytes for the "data" file
2854 - dd: extra bytes for the "data" file
2855 - di: extra bytes for the "index" file
2855 - di: extra bytes for the "index" file
2856
2856
2857 A healthy revlog will return (0, 0).
2857 A healthy revlog will return (0, 0).
2858 """
2858 """
2859 expected = 0
2859 expected = 0
2860 if len(self):
2860 if len(self):
2861 expected = max(0, self.end(len(self) - 1))
2861 expected = max(0, self.end(len(self) - 1))
2862
2862
2863 try:
2863 try:
2864 with self._datafp() as f:
2864 with self._datafp() as f:
2865 f.seek(0, io.SEEK_END)
2865 f.seek(0, io.SEEK_END)
2866 actual = f.tell()
2866 actual = f.tell()
2867 dd = actual - expected
2867 dd = actual - expected
2868 except IOError as inst:
2868 except IOError as inst:
2869 if inst.errno != errno.ENOENT:
2869 if inst.errno != errno.ENOENT:
2870 raise
2870 raise
2871 dd = 0
2871 dd = 0
2872
2872
2873 try:
2873 try:
2874 f = self.opener(self._indexfile)
2874 f = self.opener(self._indexfile)
2875 f.seek(0, io.SEEK_END)
2875 f.seek(0, io.SEEK_END)
2876 actual = f.tell()
2876 actual = f.tell()
2877 f.close()
2877 f.close()
2878 s = self.index.entry_size
2878 s = self.index.entry_size
2879 i = max(0, actual // s)
2879 i = max(0, actual // s)
2880 di = actual - (i * s)
2880 di = actual - (i * s)
2881 if self._inline:
2881 if self._inline:
2882 databytes = 0
2882 databytes = 0
2883 for r in self:
2883 for r in self:
2884 databytes += max(0, self.length(r))
2884 databytes += max(0, self.length(r))
2885 dd = 0
2885 dd = 0
2886 di = actual - len(self) * s - databytes
2886 di = actual - len(self) * s - databytes
2887 except IOError as inst:
2887 except IOError as inst:
2888 if inst.errno != errno.ENOENT:
2888 if inst.errno != errno.ENOENT:
2889 raise
2889 raise
2890 di = 0
2890 di = 0
2891
2891
2892 return (dd, di)
2892 return (dd, di)
2893
2893
2894 def files(self):
2894 def files(self):
2895 res = [self._indexfile]
2895 res = [self._indexfile]
2896 if not self._inline:
2896 if not self._inline:
2897 res.append(self._datafile)
2897 res.append(self._datafile)
2898 return res
2898 return res
2899
2899
2900 def emitrevisions(
2900 def emitrevisions(
2901 self,
2901 self,
2902 nodes,
2902 nodes,
2903 nodesorder=None,
2903 nodesorder=None,
2904 revisiondata=False,
2904 revisiondata=False,
2905 assumehaveparentrevisions=False,
2905 assumehaveparentrevisions=False,
2906 deltamode=repository.CG_DELTAMODE_STD,
2906 deltamode=repository.CG_DELTAMODE_STD,
2907 sidedata_helpers=None,
2907 sidedata_helpers=None,
2908 ):
2908 ):
2909 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2909 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2910 raise error.ProgrammingError(
2910 raise error.ProgrammingError(
2911 b'unhandled value for nodesorder: %s' % nodesorder
2911 b'unhandled value for nodesorder: %s' % nodesorder
2912 )
2912 )
2913
2913
2914 if nodesorder is None and not self._generaldelta:
2914 if nodesorder is None and not self._generaldelta:
2915 nodesorder = b'storage'
2915 nodesorder = b'storage'
2916
2916
2917 if (
2917 if (
2918 not self._storedeltachains
2918 not self._storedeltachains
2919 and deltamode != repository.CG_DELTAMODE_PREV
2919 and deltamode != repository.CG_DELTAMODE_PREV
2920 ):
2920 ):
2921 deltamode = repository.CG_DELTAMODE_FULL
2921 deltamode = repository.CG_DELTAMODE_FULL
2922
2922
2923 return storageutil.emitrevisions(
2923 return storageutil.emitrevisions(
2924 self,
2924 self,
2925 nodes,
2925 nodes,
2926 nodesorder,
2926 nodesorder,
2927 revlogrevisiondelta,
2927 revlogrevisiondelta,
2928 deltaparentfn=self.deltaparent,
2928 deltaparentfn=self.deltaparent,
2929 candeltafn=self.candelta,
2929 candeltafn=self.candelta,
2930 rawsizefn=self.rawsize,
2930 rawsizefn=self.rawsize,
2931 revdifffn=self.revdiff,
2931 revdifffn=self.revdiff,
2932 flagsfn=self.flags,
2932 flagsfn=self.flags,
2933 deltamode=deltamode,
2933 deltamode=deltamode,
2934 revisiondata=revisiondata,
2934 revisiondata=revisiondata,
2935 assumehaveparentrevisions=assumehaveparentrevisions,
2935 assumehaveparentrevisions=assumehaveparentrevisions,
2936 sidedata_helpers=sidedata_helpers,
2936 sidedata_helpers=sidedata_helpers,
2937 )
2937 )
2938
2938
2939 DELTAREUSEALWAYS = b'always'
2939 DELTAREUSEALWAYS = b'always'
2940 DELTAREUSESAMEREVS = b'samerevs'
2940 DELTAREUSESAMEREVS = b'samerevs'
2941 DELTAREUSENEVER = b'never'
2941 DELTAREUSENEVER = b'never'
2942
2942
2943 DELTAREUSEFULLADD = b'fulladd'
2943 DELTAREUSEFULLADD = b'fulladd'
2944
2944
2945 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2945 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2946
2946
2947 def clone(
2947 def clone(
2948 self,
2948 self,
2949 tr,
2949 tr,
2950 destrevlog,
2950 destrevlog,
2951 addrevisioncb=None,
2951 addrevisioncb=None,
2952 deltareuse=DELTAREUSESAMEREVS,
2952 deltareuse=DELTAREUSESAMEREVS,
2953 forcedeltabothparents=None,
2953 forcedeltabothparents=None,
2954 sidedata_helpers=None,
2954 sidedata_helpers=None,
2955 ):
2955 ):
2956 """Copy this revlog to another, possibly with format changes.
2956 """Copy this revlog to another, possibly with format changes.
2957
2957
2958 The destination revlog will contain the same revisions and nodes.
2958 The destination revlog will contain the same revisions and nodes.
2959 However, it may not be bit-for-bit identical due to e.g. delta encoding
2959 However, it may not be bit-for-bit identical due to e.g. delta encoding
2960 differences.
2960 differences.
2961
2961
2962 The ``deltareuse`` argument control how deltas from the existing revlog
2962 The ``deltareuse`` argument control how deltas from the existing revlog
2963 are preserved in the destination revlog. The argument can have the
2963 are preserved in the destination revlog. The argument can have the
2964 following values:
2964 following values:
2965
2965
2966 DELTAREUSEALWAYS
2966 DELTAREUSEALWAYS
2967 Deltas will always be reused (if possible), even if the destination
2967 Deltas will always be reused (if possible), even if the destination
2968 revlog would not select the same revisions for the delta. This is the
2968 revlog would not select the same revisions for the delta. This is the
2969 fastest mode of operation.
2969 fastest mode of operation.
2970 DELTAREUSESAMEREVS
2970 DELTAREUSESAMEREVS
2971 Deltas will be reused if the destination revlog would pick the same
2971 Deltas will be reused if the destination revlog would pick the same
2972 revisions for the delta. This mode strikes a balance between speed
2972 revisions for the delta. This mode strikes a balance between speed
2973 and optimization.
2973 and optimization.
2974 DELTAREUSENEVER
2974 DELTAREUSENEVER
2975 Deltas will never be reused. This is the slowest mode of execution.
2975 Deltas will never be reused. This is the slowest mode of execution.
2976 This mode can be used to recompute deltas (e.g. if the diff/delta
2976 This mode can be used to recompute deltas (e.g. if the diff/delta
2977 algorithm changes).
2977 algorithm changes).
2978 DELTAREUSEFULLADD
2978 DELTAREUSEFULLADD
2979 Revision will be re-added as if their were new content. This is
2979 Revision will be re-added as if their were new content. This is
2980 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2980 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2981 eg: large file detection and handling.
2981 eg: large file detection and handling.
2982
2982
2983 Delta computation can be slow, so the choice of delta reuse policy can
2983 Delta computation can be slow, so the choice of delta reuse policy can
2984 significantly affect run time.
2984 significantly affect run time.
2985
2985
2986 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2986 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2987 two extremes. Deltas will be reused if they are appropriate. But if the
2987 two extremes. Deltas will be reused if they are appropriate. But if the
2988 delta could choose a better revision, it will do so. This means if you
2988 delta could choose a better revision, it will do so. This means if you
2989 are converting a non-generaldelta revlog to a generaldelta revlog,
2989 are converting a non-generaldelta revlog to a generaldelta revlog,
2990 deltas will be recomputed if the delta's parent isn't a parent of the
2990 deltas will be recomputed if the delta's parent isn't a parent of the
2991 revision.
2991 revision.
2992
2992
2993 In addition to the delta policy, the ``forcedeltabothparents``
2993 In addition to the delta policy, the ``forcedeltabothparents``
2994 argument controls whether to force compute deltas against both parents
2994 argument controls whether to force compute deltas against both parents
2995 for merges. By default, the current default is used.
2995 for merges. By default, the current default is used.
2996
2996
2997 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2997 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2998 `sidedata_helpers`.
2998 `sidedata_helpers`.
2999 """
2999 """
3000 if deltareuse not in self.DELTAREUSEALL:
3000 if deltareuse not in self.DELTAREUSEALL:
3001 raise ValueError(
3001 raise ValueError(
3002 _(b'value for deltareuse invalid: %s') % deltareuse
3002 _(b'value for deltareuse invalid: %s') % deltareuse
3003 )
3003 )
3004
3004
3005 if len(destrevlog):
3005 if len(destrevlog):
3006 raise ValueError(_(b'destination revlog is not empty'))
3006 raise ValueError(_(b'destination revlog is not empty'))
3007
3007
3008 if getattr(self, 'filteredrevs', None):
3008 if getattr(self, 'filteredrevs', None):
3009 raise ValueError(_(b'source revlog has filtered revisions'))
3009 raise ValueError(_(b'source revlog has filtered revisions'))
3010 if getattr(destrevlog, 'filteredrevs', None):
3010 if getattr(destrevlog, 'filteredrevs', None):
3011 raise ValueError(_(b'destination revlog has filtered revisions'))
3011 raise ValueError(_(b'destination revlog has filtered revisions'))
3012
3012
3013 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3013 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3014 # if possible.
3014 # if possible.
3015 oldlazydelta = destrevlog._lazydelta
3015 oldlazydelta = destrevlog._lazydelta
3016 oldlazydeltabase = destrevlog._lazydeltabase
3016 oldlazydeltabase = destrevlog._lazydeltabase
3017 oldamd = destrevlog._deltabothparents
3017 oldamd = destrevlog._deltabothparents
3018
3018
3019 try:
3019 try:
3020 if deltareuse == self.DELTAREUSEALWAYS:
3020 if deltareuse == self.DELTAREUSEALWAYS:
3021 destrevlog._lazydeltabase = True
3021 destrevlog._lazydeltabase = True
3022 destrevlog._lazydelta = True
3022 destrevlog._lazydelta = True
3023 elif deltareuse == self.DELTAREUSESAMEREVS:
3023 elif deltareuse == self.DELTAREUSESAMEREVS:
3024 destrevlog._lazydeltabase = False
3024 destrevlog._lazydeltabase = False
3025 destrevlog._lazydelta = True
3025 destrevlog._lazydelta = True
3026 elif deltareuse == self.DELTAREUSENEVER:
3026 elif deltareuse == self.DELTAREUSENEVER:
3027 destrevlog._lazydeltabase = False
3027 destrevlog._lazydeltabase = False
3028 destrevlog._lazydelta = False
3028 destrevlog._lazydelta = False
3029
3029
3030 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3030 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3031
3031
3032 self._clone(
3032 self._clone(
3033 tr,
3033 tr,
3034 destrevlog,
3034 destrevlog,
3035 addrevisioncb,
3035 addrevisioncb,
3036 deltareuse,
3036 deltareuse,
3037 forcedeltabothparents,
3037 forcedeltabothparents,
3038 sidedata_helpers,
3038 sidedata_helpers,
3039 )
3039 )
3040
3040
3041 finally:
3041 finally:
3042 destrevlog._lazydelta = oldlazydelta
3042 destrevlog._lazydelta = oldlazydelta
3043 destrevlog._lazydeltabase = oldlazydeltabase
3043 destrevlog._lazydeltabase = oldlazydeltabase
3044 destrevlog._deltabothparents = oldamd
3044 destrevlog._deltabothparents = oldamd
3045
3045
3046 def _clone(
3046 def _clone(
3047 self,
3047 self,
3048 tr,
3048 tr,
3049 destrevlog,
3049 destrevlog,
3050 addrevisioncb,
3050 addrevisioncb,
3051 deltareuse,
3051 deltareuse,
3052 forcedeltabothparents,
3052 forcedeltabothparents,
3053 sidedata_helpers,
3053 sidedata_helpers,
3054 ):
3054 ):
3055 """perform the core duty of `revlog.clone` after parameter processing"""
3055 """perform the core duty of `revlog.clone` after parameter processing"""
3056 deltacomputer = deltautil.deltacomputer(destrevlog)
3056 deltacomputer = deltautil.deltacomputer(destrevlog)
3057 index = self.index
3057 index = self.index
3058 for rev in self:
3058 for rev in self:
3059 entry = index[rev]
3059 entry = index[rev]
3060
3060
3061 # Some classes override linkrev to take filtered revs into
3061 # Some classes override linkrev to take filtered revs into
3062 # account. Use raw entry from index.
3062 # account. Use raw entry from index.
3063 flags = entry[0] & 0xFFFF
3063 flags = entry[0] & 0xFFFF
3064 linkrev = entry[4]
3064 linkrev = entry[4]
3065 p1 = index[entry[5]][7]
3065 p1 = index[entry[5]][7]
3066 p2 = index[entry[6]][7]
3066 p2 = index[entry[6]][7]
3067 node = entry[7]
3067 node = entry[7]
3068
3068
3069 # (Possibly) reuse the delta from the revlog if allowed and
3069 # (Possibly) reuse the delta from the revlog if allowed and
3070 # the revlog chunk is a delta.
3070 # the revlog chunk is a delta.
3071 cachedelta = None
3071 cachedelta = None
3072 rawtext = None
3072 rawtext = None
3073 if deltareuse == self.DELTAREUSEFULLADD:
3073 if deltareuse == self.DELTAREUSEFULLADD:
3074 text, sidedata = self._revisiondata(rev)
3074 text, sidedata = self._revisiondata(rev)
3075
3075
3076 if sidedata_helpers is not None:
3076 if sidedata_helpers is not None:
3077 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3077 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3078 self, sidedata_helpers, sidedata, rev
3078 self, sidedata_helpers, sidedata, rev
3079 )
3079 )
3080 flags = flags | new_flags[0] & ~new_flags[1]
3080 flags = flags | new_flags[0] & ~new_flags[1]
3081
3081
3082 destrevlog.addrevision(
3082 destrevlog.addrevision(
3083 text,
3083 text,
3084 tr,
3084 tr,
3085 linkrev,
3085 linkrev,
3086 p1,
3086 p1,
3087 p2,
3087 p2,
3088 cachedelta=cachedelta,
3088 cachedelta=cachedelta,
3089 node=node,
3089 node=node,
3090 flags=flags,
3090 flags=flags,
3091 deltacomputer=deltacomputer,
3091 deltacomputer=deltacomputer,
3092 sidedata=sidedata,
3092 sidedata=sidedata,
3093 )
3093 )
3094 else:
3094 else:
3095 if destrevlog._lazydelta:
3095 if destrevlog._lazydelta:
3096 dp = self.deltaparent(rev)
3096 dp = self.deltaparent(rev)
3097 if dp != nullrev:
3097 if dp != nullrev:
3098 cachedelta = (dp, bytes(self._chunk(rev)))
3098 cachedelta = (dp, bytes(self._chunk(rev)))
3099
3099
3100 sidedata = None
3100 sidedata = None
3101 if not cachedelta:
3101 if not cachedelta:
3102 rawtext, sidedata = self._revisiondata(rev)
3102 rawtext, sidedata = self._revisiondata(rev)
3103 if sidedata is None:
3103 if sidedata is None:
3104 sidedata = self.sidedata(rev)
3104 sidedata = self.sidedata(rev)
3105
3105
3106 if sidedata_helpers is not None:
3106 if sidedata_helpers is not None:
3107 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3107 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3108 self, sidedata_helpers, sidedata, rev
3108 self, sidedata_helpers, sidedata, rev
3109 )
3109 )
3110 flags = flags | new_flags[0] & ~new_flags[1]
3110 flags = flags | new_flags[0] & ~new_flags[1]
3111
3111
3112 with destrevlog._writing(tr):
3112 with destrevlog._writing(tr):
3113 destrevlog._addrevision(
3113 destrevlog._addrevision(
3114 node,
3114 node,
3115 rawtext,
3115 rawtext,
3116 tr,
3116 tr,
3117 linkrev,
3117 linkrev,
3118 p1,
3118 p1,
3119 p2,
3119 p2,
3120 flags,
3120 flags,
3121 cachedelta,
3121 cachedelta,
3122 deltacomputer=deltacomputer,
3122 deltacomputer=deltacomputer,
3123 sidedata=sidedata,
3123 sidedata=sidedata,
3124 )
3124 )
3125
3125
3126 if addrevisioncb:
3126 if addrevisioncb:
3127 addrevisioncb(self, rev, node)
3127 addrevisioncb(self, rev, node)
3128
3128
3129 def censorrevision(self, tr, censornode, tombstone=b''):
3129 def censorrevision(self, tr, censornode, tombstone=b''):
3130 if self._format_version == REVLOGV0:
3130 if self._format_version == REVLOGV0:
3131 raise error.RevlogError(
3131 raise error.RevlogError(
3132 _(b'cannot censor with version %d revlogs')
3132 _(b'cannot censor with version %d revlogs')
3133 % self._format_version
3133 % self._format_version
3134 )
3134 )
3135
3135
3136 censorrev = self.rev(censornode)
3136 censorrev = self.rev(censornode)
3137 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3137 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3138
3138
3139 if len(tombstone) > self.rawsize(censorrev):
3139 if len(tombstone) > self.rawsize(censorrev):
3140 raise error.Abort(
3140 raise error.Abort(
3141 _(b'censor tombstone must be no longer than censored data')
3141 _(b'censor tombstone must be no longer than censored data')
3142 )
3142 )
3143
3143
3144 # Rewriting the revlog in place is hard. Our strategy for censoring is
3144 # Rewriting the revlog in place is hard. Our strategy for censoring is
3145 # to create a new revlog, copy all revisions to it, then replace the
3145 # to create a new revlog, copy all revisions to it, then replace the
3146 # revlogs on transaction close.
3146 # revlogs on transaction close.
3147 #
3147 #
3148 # This is a bit dangerous. We could easily have a mismatch of state.
3148 # This is a bit dangerous. We could easily have a mismatch of state.
3149 newrl = revlog(
3149 newrl = revlog(
3150 self.opener,
3150 self.opener,
3151 target=self.target,
3151 target=self.target,
3152 radix=self.radix,
3152 radix=self.radix,
3153 postfix=b'tmpcensored',
3153 postfix=b'tmpcensored',
3154 censorable=True,
3154 censorable=True,
3155 )
3155 )
3156 newrl._format_version = self._format_version
3156 newrl._format_version = self._format_version
3157 newrl._format_flags = self._format_flags
3157 newrl._format_flags = self._format_flags
3158 newrl._generaldelta = self._generaldelta
3158 newrl._generaldelta = self._generaldelta
3159 newrl._parse_index = self._parse_index
3159 newrl._parse_index = self._parse_index
3160
3160
3161 for rev in self.revs():
3161 for rev in self.revs():
3162 node = self.node(rev)
3162 node = self.node(rev)
3163 p1, p2 = self.parents(node)
3163 p1, p2 = self.parents(node)
3164
3164
3165 if rev == censorrev:
3165 if rev == censorrev:
3166 newrl.addrawrevision(
3166 newrl.addrawrevision(
3167 tombstone,
3167 tombstone,
3168 tr,
3168 tr,
3169 self.linkrev(censorrev),
3169 self.linkrev(censorrev),
3170 p1,
3170 p1,
3171 p2,
3171 p2,
3172 censornode,
3172 censornode,
3173 REVIDX_ISCENSORED,
3173 REVIDX_ISCENSORED,
3174 )
3174 )
3175
3175
3176 if newrl.deltaparent(rev) != nullrev:
3176 if newrl.deltaparent(rev) != nullrev:
3177 raise error.Abort(
3177 raise error.Abort(
3178 _(
3178 _(
3179 b'censored revision stored as delta; '
3179 b'censored revision stored as delta; '
3180 b'cannot censor'
3180 b'cannot censor'
3181 ),
3181 ),
3182 hint=_(
3182 hint=_(
3183 b'censoring of revlogs is not '
3183 b'censoring of revlogs is not '
3184 b'fully implemented; please report '
3184 b'fully implemented; please report '
3185 b'this bug'
3185 b'this bug'
3186 ),
3186 ),
3187 )
3187 )
3188 continue
3188 continue
3189
3189
3190 if self.iscensored(rev):
3190 if self.iscensored(rev):
3191 if self.deltaparent(rev) != nullrev:
3191 if self.deltaparent(rev) != nullrev:
3192 raise error.Abort(
3192 raise error.Abort(
3193 _(
3193 _(
3194 b'cannot censor due to censored '
3194 b'cannot censor due to censored '
3195 b'revision having delta stored'
3195 b'revision having delta stored'
3196 )
3196 )
3197 )
3197 )
3198 rawtext = self._chunk(rev)
3198 rawtext = self._chunk(rev)
3199 else:
3199 else:
3200 rawtext = self.rawdata(rev)
3200 rawtext = self.rawdata(rev)
3201
3201
3202 newrl.addrawrevision(
3202 newrl.addrawrevision(
3203 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3203 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3204 )
3204 )
3205
3205
3206 tr.addbackup(self._indexfile, location=b'store')
3206 tr.addbackup(self._indexfile, location=b'store')
3207 if not self._inline:
3207 if not self._inline:
3208 tr.addbackup(self._datafile, location=b'store')
3208 tr.addbackup(self._datafile, location=b'store')
3209
3209
3210 self.opener.rename(newrl._indexfile, self._indexfile)
3210 self.opener.rename(newrl._indexfile, self._indexfile)
3211 if not self._inline:
3211 if not self._inline:
3212 self.opener.rename(newrl._datafile, self._datafile)
3212 self.opener.rename(newrl._datafile, self._datafile)
3213
3213
3214 self.clearcaches()
3214 self.clearcaches()
3215 self._loadindex()
3215 self._loadindex()
3216
3216
3217 def verifyintegrity(self, state):
3217 def verifyintegrity(self, state):
3218 """Verifies the integrity of the revlog.
3218 """Verifies the integrity of the revlog.
3219
3219
3220 Yields ``revlogproblem`` instances describing problems that are
3220 Yields ``revlogproblem`` instances describing problems that are
3221 found.
3221 found.
3222 """
3222 """
3223 dd, di = self.checksize()
3223 dd, di = self.checksize()
3224 if dd:
3224 if dd:
3225 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3225 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3226 if di:
3226 if di:
3227 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3227 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3228
3228
3229 version = self._format_version
3229 version = self._format_version
3230
3230
3231 # The verifier tells us what version revlog we should be.
3231 # The verifier tells us what version revlog we should be.
3232 if version != state[b'expectedversion']:
3232 if version != state[b'expectedversion']:
3233 yield revlogproblem(
3233 yield revlogproblem(
3234 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3234 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3235 % (self.display_id, version, state[b'expectedversion'])
3235 % (self.display_id, version, state[b'expectedversion'])
3236 )
3236 )
3237
3237
3238 state[b'skipread'] = set()
3238 state[b'skipread'] = set()
3239 state[b'safe_renamed'] = set()
3239 state[b'safe_renamed'] = set()
3240
3240
3241 for rev in self:
3241 for rev in self:
3242 node = self.node(rev)
3242 node = self.node(rev)
3243
3243
3244 # Verify contents. 4 cases to care about:
3244 # Verify contents. 4 cases to care about:
3245 #
3245 #
3246 # common: the most common case
3246 # common: the most common case
3247 # rename: with a rename
3247 # rename: with a rename
3248 # meta: file content starts with b'\1\n', the metadata
3248 # meta: file content starts with b'\1\n', the metadata
3249 # header defined in filelog.py, but without a rename
3249 # header defined in filelog.py, but without a rename
3250 # ext: content stored externally
3250 # ext: content stored externally
3251 #
3251 #
3252 # More formally, their differences are shown below:
3252 # More formally, their differences are shown below:
3253 #
3253 #
3254 # | common | rename | meta | ext
3254 # | common | rename | meta | ext
3255 # -------------------------------------------------------
3255 # -------------------------------------------------------
3256 # flags() | 0 | 0 | 0 | not 0
3256 # flags() | 0 | 0 | 0 | not 0
3257 # renamed() | False | True | False | ?
3257 # renamed() | False | True | False | ?
3258 # rawtext[0:2]=='\1\n'| False | True | True | ?
3258 # rawtext[0:2]=='\1\n'| False | True | True | ?
3259 #
3259 #
3260 # "rawtext" means the raw text stored in revlog data, which
3260 # "rawtext" means the raw text stored in revlog data, which
3261 # could be retrieved by "rawdata(rev)". "text"
3261 # could be retrieved by "rawdata(rev)". "text"
3262 # mentioned below is "revision(rev)".
3262 # mentioned below is "revision(rev)".
3263 #
3263 #
3264 # There are 3 different lengths stored physically:
3264 # There are 3 different lengths stored physically:
3265 # 1. L1: rawsize, stored in revlog index
3265 # 1. L1: rawsize, stored in revlog index
3266 # 2. L2: len(rawtext), stored in revlog data
3266 # 2. L2: len(rawtext), stored in revlog data
3267 # 3. L3: len(text), stored in revlog data if flags==0, or
3267 # 3. L3: len(text), stored in revlog data if flags==0, or
3268 # possibly somewhere else if flags!=0
3268 # possibly somewhere else if flags!=0
3269 #
3269 #
3270 # L1 should be equal to L2. L3 could be different from them.
3270 # L1 should be equal to L2. L3 could be different from them.
3271 # "text" may or may not affect commit hash depending on flag
3271 # "text" may or may not affect commit hash depending on flag
3272 # processors (see flagutil.addflagprocessor).
3272 # processors (see flagutil.addflagprocessor).
3273 #
3273 #
3274 # | common | rename | meta | ext
3274 # | common | rename | meta | ext
3275 # -------------------------------------------------
3275 # -------------------------------------------------
3276 # rawsize() | L1 | L1 | L1 | L1
3276 # rawsize() | L1 | L1 | L1 | L1
3277 # size() | L1 | L2-LM | L1(*) | L1 (?)
3277 # size() | L1 | L2-LM | L1(*) | L1 (?)
3278 # len(rawtext) | L2 | L2 | L2 | L2
3278 # len(rawtext) | L2 | L2 | L2 | L2
3279 # len(text) | L2 | L2 | L2 | L3
3279 # len(text) | L2 | L2 | L2 | L3
3280 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3280 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3281 #
3281 #
3282 # LM: length of metadata, depending on rawtext
3282 # LM: length of metadata, depending on rawtext
3283 # (*): not ideal, see comment in filelog.size
3283 # (*): not ideal, see comment in filelog.size
3284 # (?): could be "- len(meta)" if the resolved content has
3284 # (?): could be "- len(meta)" if the resolved content has
3285 # rename metadata
3285 # rename metadata
3286 #
3286 #
3287 # Checks needed to be done:
3287 # Checks needed to be done:
3288 # 1. length check: L1 == L2, in all cases.
3288 # 1. length check: L1 == L2, in all cases.
3289 # 2. hash check: depending on flag processor, we may need to
3289 # 2. hash check: depending on flag processor, we may need to
3290 # use either "text" (external), or "rawtext" (in revlog).
3290 # use either "text" (external), or "rawtext" (in revlog).
3291
3291
3292 try:
3292 try:
3293 skipflags = state.get(b'skipflags', 0)
3293 skipflags = state.get(b'skipflags', 0)
3294 if skipflags:
3294 if skipflags:
3295 skipflags &= self.flags(rev)
3295 skipflags &= self.flags(rev)
3296
3296
3297 _verify_revision(self, skipflags, state, node)
3297 _verify_revision(self, skipflags, state, node)
3298
3298
3299 l1 = self.rawsize(rev)
3299 l1 = self.rawsize(rev)
3300 l2 = len(self.rawdata(node))
3300 l2 = len(self.rawdata(node))
3301
3301
3302 if l1 != l2:
3302 if l1 != l2:
3303 yield revlogproblem(
3303 yield revlogproblem(
3304 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3304 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3305 node=node,
3305 node=node,
3306 )
3306 )
3307
3307
3308 except error.CensoredNodeError:
3308 except error.CensoredNodeError:
3309 if state[b'erroroncensored']:
3309 if state[b'erroroncensored']:
3310 yield revlogproblem(
3310 yield revlogproblem(
3311 error=_(b'censored file data'), node=node
3311 error=_(b'censored file data'), node=node
3312 )
3312 )
3313 state[b'skipread'].add(node)
3313 state[b'skipread'].add(node)
3314 except Exception as e:
3314 except Exception as e:
3315 yield revlogproblem(
3315 yield revlogproblem(
3316 error=_(b'unpacking %s: %s')
3316 error=_(b'unpacking %s: %s')
3317 % (short(node), stringutil.forcebytestr(e)),
3317 % (short(node), stringutil.forcebytestr(e)),
3318 node=node,
3318 node=node,
3319 )
3319 )
3320 state[b'skipread'].add(node)
3320 state[b'skipread'].add(node)
3321
3321
3322 def storageinfo(
3322 def storageinfo(
3323 self,
3323 self,
3324 exclusivefiles=False,
3324 exclusivefiles=False,
3325 sharedfiles=False,
3325 sharedfiles=False,
3326 revisionscount=False,
3326 revisionscount=False,
3327 trackedsize=False,
3327 trackedsize=False,
3328 storedsize=False,
3328 storedsize=False,
3329 ):
3329 ):
3330 d = {}
3330 d = {}
3331
3331
3332 if exclusivefiles:
3332 if exclusivefiles:
3333 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3333 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3334 if not self._inline:
3334 if not self._inline:
3335 d[b'exclusivefiles'].append((self.opener, self._datafile))
3335 d[b'exclusivefiles'].append((self.opener, self._datafile))
3336
3336
3337 if sharedfiles:
3337 if sharedfiles:
3338 d[b'sharedfiles'] = []
3338 d[b'sharedfiles'] = []
3339
3339
3340 if revisionscount:
3340 if revisionscount:
3341 d[b'revisionscount'] = len(self)
3341 d[b'revisionscount'] = len(self)
3342
3342
3343 if trackedsize:
3343 if trackedsize:
3344 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3344 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3345
3345
3346 if storedsize:
3346 if storedsize:
3347 d[b'storedsize'] = sum(
3347 d[b'storedsize'] = sum(
3348 self.opener.stat(path).st_size for path in self.files()
3348 self.opener.stat(path).st_size for path in self.files()
3349 )
3349 )
3350
3350
3351 return d
3351 return d
3352
3352
3353 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3353 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3354 if not self.hassidedata:
3354 if not self.hassidedata:
3355 return
3355 return
3356 # revlog formats with sidedata support does not support inline
3356 # revlog formats with sidedata support does not support inline
3357 assert not self._inline
3357 assert not self._inline
3358 if not helpers[1] and not helpers[2]:
3358 if not helpers[1] and not helpers[2]:
3359 # Nothing to generate or remove
3359 # Nothing to generate or remove
3360 return
3360 return
3361
3361
3362 new_entries = []
3362 new_entries = []
3363 # append the new sidedata
3363 # append the new sidedata
3364 with self._writing(transaction):
3364 with self._writing(transaction):
3365 ifh, dfh = self._writinghandles
3365 ifh, dfh = self._writinghandles
3366 if self._docket is not None:
3366 if self._docket is not None:
3367 dfh.seek(self._docket.data_end, os.SEEK_SET)
3367 dfh.seek(self._docket.data_end, os.SEEK_SET)
3368 else:
3368 else:
3369 dfh.seek(0, os.SEEK_END)
3369 dfh.seek(0, os.SEEK_END)
3370
3370
3371 current_offset = dfh.tell()
3371 current_offset = dfh.tell()
3372 for rev in range(startrev, endrev + 1):
3372 for rev in range(startrev, endrev + 1):
3373 entry = self.index[rev]
3373 entry = self.index[rev]
3374 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3374 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3375 store=self,
3375 store=self,
3376 sidedata_helpers=helpers,
3376 sidedata_helpers=helpers,
3377 sidedata={},
3377 sidedata={},
3378 rev=rev,
3378 rev=rev,
3379 )
3379 )
3380
3380
3381 serialized_sidedata = sidedatautil.serialize_sidedata(
3381 serialized_sidedata = sidedatautil.serialize_sidedata(
3382 new_sidedata
3382 new_sidedata
3383 )
3383 )
3384
3385 sidedata_compression_mode = COMP_MODE_INLINE
3386 if serialized_sidedata and self.hassidedata:
3387 sidedata_compression_mode = COMP_MODE_PLAIN
3388 h, comp_sidedata = self.compress(serialized_sidedata)
3389 if (
3390 h != b'u'
3391 and comp_sidedata[0] != b'\0'
3392 and len(comp_sidedata) < len(serialized_sidedata)
3393 ):
3394 assert not h
3395 if (
3396 comp_sidedata[0]
3397 == self._docket.default_compression_header
3398 ):
3399 sidedata_compression_mode = COMP_MODE_DEFAULT
3400 serialized_sidedata = comp_sidedata
3401 else:
3402 sidedata_compression_mode = COMP_MODE_INLINE
3403 serialized_sidedata = comp_sidedata
3384 if entry[8] != 0 or entry[9] != 0:
3404 if entry[8] != 0 or entry[9] != 0:
3385 # rewriting entries that already have sidedata is not
3405 # rewriting entries that already have sidedata is not
3386 # supported yet, because it introduces garbage data in the
3406 # supported yet, because it introduces garbage data in the
3387 # revlog.
3407 # revlog.
3388 msg = b"rewriting existing sidedata is not supported yet"
3408 msg = b"rewriting existing sidedata is not supported yet"
3389 raise error.Abort(msg)
3409 raise error.Abort(msg)
3390
3410
3391 # Apply (potential) flags to add and to remove after running
3411 # Apply (potential) flags to add and to remove after running
3392 # the sidedata helpers
3412 # the sidedata helpers
3393 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3413 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3394 entry_update = (
3414 entry_update = (
3395 current_offset,
3415 current_offset,
3396 len(serialized_sidedata),
3416 len(serialized_sidedata),
3397 new_offset_flags,
3417 new_offset_flags,
3418 sidedata_compression_mode,
3398 )
3419 )
3399
3420
3400 # the sidedata computation might have move the file cursors around
3421 # the sidedata computation might have move the file cursors around
3401 dfh.seek(current_offset, os.SEEK_SET)
3422 dfh.seek(current_offset, os.SEEK_SET)
3402 dfh.write(serialized_sidedata)
3423 dfh.write(serialized_sidedata)
3403 new_entries.append(entry_update)
3424 new_entries.append(entry_update)
3404 current_offset += len(serialized_sidedata)
3425 current_offset += len(serialized_sidedata)
3405 if self._docket is not None:
3426 if self._docket is not None:
3406 self._docket.data_end = dfh.tell()
3427 self._docket.data_end = dfh.tell()
3407
3428
3408 # rewrite the new index entries
3429 # rewrite the new index entries
3409 ifh.seek(startrev * self.index.entry_size)
3430 ifh.seek(startrev * self.index.entry_size)
3410 for i, e in enumerate(new_entries):
3431 for i, e in enumerate(new_entries):
3411 rev = startrev + i
3432 rev = startrev + i
3412 self.index.replace_sidedata_info(rev, *e)
3433 self.index.replace_sidedata_info(rev, *e)
3413 packed = self.index.entry_binary(rev)
3434 packed = self.index.entry_binary(rev)
3414 if rev == 0 and self._docket is None:
3435 if rev == 0 and self._docket is None:
3415 header = self._format_flags | self._format_version
3436 header = self._format_flags | self._format_version
3416 header = self.index.pack_header(header)
3437 header = self.index.pack_header(header)
3417 packed = header + packed
3438 packed = header + packed
3418 ifh.write(packed)
3439 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now